vendor: update google/cadvisor and opencontainers/runc
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
14
vendor/github.com/opencontainers/runc/libcontainer/BUILD
generated
vendored
14
vendor/github.com/opencontainers/runc/libcontainer/BUILD
generated
vendored
@@ -16,6 +16,7 @@ go_library(
|
||||
"message_linux.go",
|
||||
"network_linux.go",
|
||||
"notify_linux.go",
|
||||
"notify_linux_v2.go",
|
||||
"process.go",
|
||||
"process_linux.go",
|
||||
"restored_process.go",
|
||||
@@ -36,10 +37,12 @@ go_library(
|
||||
"//vendor/github.com/opencontainers/runtime-spec/specs-go:go_default_library",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/checkpoint-restore/go-criu/rpc:go_default_library",
|
||||
"//vendor/github.com/checkpoint-restore/go-criu/v4:go_default_library",
|
||||
"//vendor/github.com/checkpoint-restore/go-criu/v4/rpc:go_default_library",
|
||||
"//vendor/github.com/containerd/console:go_default_library",
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/golang/protobuf/proto:go_default_library",
|
||||
"//vendor/github.com/moby/sys/mountinfo:go_default_library",
|
||||
"//vendor/github.com/mrunalp/fileutils:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/apparmor:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
@@ -50,11 +53,11 @@ go_library(
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/keys:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/logs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/mount:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/seccomp:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/types:go_default_library",
|
||||
"//vendor/github.com/opencontainers/selinux/go-selinux:go_default_library",
|
||||
"//vendor/github.com/opencontainers/selinux/go-selinux/label:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
@@ -64,10 +67,12 @@ go_library(
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/checkpoint-restore/go-criu/rpc:go_default_library",
|
||||
"//vendor/github.com/checkpoint-restore/go-criu/v4:go_default_library",
|
||||
"//vendor/github.com/checkpoint-restore/go-criu/v4/rpc:go_default_library",
|
||||
"//vendor/github.com/containerd/console:go_default_library",
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/golang/protobuf/proto:go_default_library",
|
||||
"//vendor/github.com/moby/sys/mountinfo:go_default_library",
|
||||
"//vendor/github.com/mrunalp/fileutils:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/apparmor:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
@@ -78,11 +83,11 @@ go_library(
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/keys:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/logs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/mount:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/seccomp:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/types:go_default_library",
|
||||
"//vendor/github.com/opencontainers/selinux/go-selinux:go_default_library",
|
||||
"//vendor/github.com/opencontainers/selinux/go-selinux/label:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
@@ -112,7 +117,6 @@ filegroup(
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/keys:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/logs:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/mount:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/seccomp:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/stacktrace:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:all-srcs",
|
||||
|
5
vendor/github.com/opencontainers/runc/libcontainer/README.md
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/README.md
generated
vendored
@@ -155,8 +155,7 @@ config := &configs.Config{
|
||||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: nil,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
@@ -166,7 +165,7 @@ config := &configs.Config{
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
Hostname: "testing",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/BUILD
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/cgroups/BUILD
generated
vendored
@@ -7,6 +7,7 @@ go_library(
|
||||
"cgroups_unsupported.go",
|
||||
"stats.go",
|
||||
"utils.go",
|
||||
"v1_utils.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/cgroups",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/cgroups",
|
||||
@@ -37,6 +38,7 @@ filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [
|
||||
":package-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:all-srcs",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:all-srcs",
|
||||
|
55
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
55
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
@@ -3,8 +3,6 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
@@ -27,48 +25,27 @@ type Manager interface {
|
||||
// Destroys the cgroup set
|
||||
Destroy() error
|
||||
|
||||
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
|
||||
// Paths map[string]string
|
||||
// Cgroups *configs.Cgroup
|
||||
// Paths maps cgroup subsystem to path at which it is mounted.
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems
|
||||
|
||||
// Returns cgroup paths to save in a state file and to be able to
|
||||
// restore the object later.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetUnifiedPath returns the unified path when running in unified mode.
|
||||
// The value corresponds to the all values of GetPaths() map.
|
||||
//
|
||||
// GetUnifiedPath returns error when running in hybrid mode as well as
|
||||
// in legacy mode.
|
||||
GetUnifiedPath() (string, error)
|
||||
// Path returns a cgroup path to the specified controller/subsystem.
|
||||
// For cgroupv2, the argument is unused and can be empty.
|
||||
Path(string) string
|
||||
|
||||
// Sets the cgroup as configured.
|
||||
Set(container *configs.Config) error
|
||||
|
||||
// Gets the cgroup as configured.
|
||||
// GetPaths returns cgroup path(s) to save in a state file in order to restore later.
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the path
|
||||
// to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetCgroups returns the cgroup data as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
Subsystem string
|
||||
}
|
||||
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||
GetFreezerState() (configs.FreezerState, error)
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||
}
|
||||
|
||||
func NewNotFoundError(sub string) error {
|
||||
return &NotFoundError{
|
||||
Subsystem: sub,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
// Whether the cgroup path exists or not
|
||||
Exists() bool
|
||||
}
|
||||
|
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/BUILD
generated
vendored
Normal file
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/BUILD
generated
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["devices_emulator.go"],
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/cgroups/devices",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
373
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
Normal file
373
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
Normal file
@@ -0,0 +1,373 @@
|
||||
// +build linux
|
||||
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
/*
|
||||
* Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
|
||||
* Copyright (C) 2020 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package devices
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// deviceMeta is a DeviceRule without the Allow or Permissions fields, and no
|
||||
// wildcard-type support. It's effectively the "match" portion of a metadata
|
||||
// rule, for the purposes of our emulation.
|
||||
type deviceMeta struct {
|
||||
node configs.DeviceType
|
||||
major int64
|
||||
minor int64
|
||||
}
|
||||
|
||||
// deviceRule is effectively the tuple (deviceMeta, DevicePermissions).
|
||||
type deviceRule struct {
|
||||
meta deviceMeta
|
||||
perms configs.DevicePermissions
|
||||
}
|
||||
|
||||
// deviceRules is a mapping of device metadata rules to the associated
|
||||
// permissions in the ruleset.
|
||||
type deviceRules map[deviceMeta]configs.DevicePermissions
|
||||
|
||||
func (r deviceRules) orderedEntries() []deviceRule {
|
||||
var rules []deviceRule
|
||||
for meta, perms := range r {
|
||||
rules = append(rules, deviceRule{meta: meta, perms: perms})
|
||||
}
|
||||
sort.Slice(rules, func(i, j int) bool {
|
||||
// Sort by (major, minor, type).
|
||||
a, b := rules[i].meta, rules[j].meta
|
||||
return a.major < b.major ||
|
||||
(a.major == b.major && a.minor < b.minor) ||
|
||||
(a.major == b.major && a.minor == b.minor && a.node < b.node)
|
||||
})
|
||||
return rules
|
||||
}
|
||||
|
||||
type Emulator struct {
|
||||
defaultAllow bool
|
||||
rules deviceRules
|
||||
}
|
||||
|
||||
func (e *Emulator) IsBlacklist() bool {
|
||||
return e.defaultAllow
|
||||
}
|
||||
|
||||
func (e *Emulator) IsAllowAll() bool {
|
||||
return e.IsBlacklist() && len(e.rules) == 0
|
||||
}
|
||||
|
||||
var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`)
|
||||
|
||||
func parseLine(line string) (*deviceRule, error) {
|
||||
matches := devicesListRegexp.FindStringSubmatch(line)
|
||||
if matches == nil {
|
||||
return nil, errors.Errorf("line doesn't match devices.list format")
|
||||
}
|
||||
var (
|
||||
rule deviceRule
|
||||
node = matches[1]
|
||||
major = matches[2]
|
||||
minor = matches[3]
|
||||
perms = matches[4]
|
||||
)
|
||||
|
||||
// Parse the node type.
|
||||
switch node {
|
||||
case "a":
|
||||
// Super-special case -- "a" always means every device with every
|
||||
// access mode. In fact, for devices.list this actually indicates that
|
||||
// the cgroup is in black-list mode.
|
||||
// TODO: Double-check that the entire file is "a *:* rwm".
|
||||
return nil, nil
|
||||
case "b":
|
||||
rule.meta.node = configs.BlockDevice
|
||||
case "c":
|
||||
rule.meta.node = configs.CharDevice
|
||||
default:
|
||||
// Should never happen!
|
||||
return nil, errors.Errorf("unknown device type %q", node)
|
||||
}
|
||||
|
||||
// Parse the major number.
|
||||
if major == "*" {
|
||||
rule.meta.major = configs.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(major, 10, 32)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse major number")
|
||||
}
|
||||
rule.meta.major = int64(val)
|
||||
}
|
||||
|
||||
// Parse the minor number.
|
||||
if minor == "*" {
|
||||
rule.meta.minor = configs.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(minor, 10, 32)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse minor number")
|
||||
}
|
||||
rule.meta.minor = int64(val)
|
||||
}
|
||||
|
||||
// Parse the access permissions.
|
||||
rule.perms = configs.DevicePermissions(perms)
|
||||
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
|
||||
// Should never happen!
|
||||
return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||
}
|
||||
return &rule, nil
|
||||
}
|
||||
|
||||
func (e *Emulator) addRule(rule deviceRule) error {
|
||||
if e.rules == nil {
|
||||
e.rules = make(map[deviceMeta]configs.DevicePermissions)
|
||||
}
|
||||
|
||||
// Merge with any pre-existing permissions.
|
||||
oldPerms := e.rules[rule.meta]
|
||||
newPerms := rule.perms.Union(oldPerms)
|
||||
e.rules[rule.meta] = newPerms
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) rmRule(rule deviceRule) error {
|
||||
// Give an error if any of the permissions requested to be removed are
|
||||
// present in a partially-matching wildcard rule, because such rules will
|
||||
// be ignored by cgroupv1.
|
||||
//
|
||||
// This is a diversion from cgroupv1, but is necessary to avoid leading
|
||||
// users into a false sense of security. cgroupv1 will silently(!) ignore
|
||||
// requests to remove partial exceptions, but we really shouldn't do that.
|
||||
//
|
||||
// It may seem like we could just "split" wildcard rules which hit this
|
||||
// issue, but unfortunately there are 2^32 possible major and minor
|
||||
// numbers, which would exhaust kernel memory quickly if we did this. Not
|
||||
// to mention it'd be really slow (the kernel side is implemented as a
|
||||
// linked-list of exceptions).
|
||||
for _, partialMeta := range []deviceMeta{
|
||||
{node: rule.meta.node, major: configs.Wildcard, minor: rule.meta.minor},
|
||||
{node: rule.meta.node, major: rule.meta.major, minor: configs.Wildcard},
|
||||
{node: rule.meta.node, major: configs.Wildcard, minor: configs.Wildcard},
|
||||
} {
|
||||
// This wildcard rule is equivalent to the requested rule, so skip it.
|
||||
if rule.meta == partialMeta {
|
||||
continue
|
||||
}
|
||||
// Only give an error if the set of permissions overlap.
|
||||
partialPerms := e.rules[partialMeta]
|
||||
if !partialPerms.Intersection(rule.perms).IsEmpty() {
|
||||
return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||
}
|
||||
}
|
||||
|
||||
// Subtract all of the permissions listed from the full match rule. If the
|
||||
// rule didn't exist, all of this is a no-op.
|
||||
newPerms := e.rules[rule.meta].Difference(rule.perms)
|
||||
if newPerms.IsEmpty() {
|
||||
delete(e.rules, rule.meta)
|
||||
} else {
|
||||
e.rules[rule.meta] = newPerms
|
||||
}
|
||||
// TODO: The actual cgroup code doesn't care if an exception didn't exist
|
||||
// during removal, so not erroring out here is /accurate/ but quite
|
||||
// worrying. Maybe we should do additional validation, but again we
|
||||
// have to worry about backwards-compatibility.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) allow(rule *deviceRule) error {
|
||||
// This cgroup is configured as a black-list. Reset the entire emulator,
|
||||
// and put is into black-list mode.
|
||||
if rule == nil || rule.meta.node == configs.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: true,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception")
|
||||
} else {
|
||||
err = errors.Wrap(e.addRule(*rule), "add 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) deny(rule *deviceRule) error {
|
||||
// This cgroup is configured as a white-list. Reset the entire emulator,
|
||||
// and put is into white-list mode.
|
||||
if rule == nil || rule.meta.node == configs.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: false,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = errors.Wrap(e.addRule(*rule), "add 'deny' exception")
|
||||
} else {
|
||||
err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) Apply(rule configs.DeviceRule) error {
|
||||
if !rule.Type.CanCgroup() {
|
||||
return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||
}
|
||||
|
||||
innerRule := &deviceRule{
|
||||
meta: deviceMeta{
|
||||
node: rule.Type,
|
||||
major: rule.Major,
|
||||
minor: rule.Minor,
|
||||
},
|
||||
perms: rule.Permissions,
|
||||
}
|
||||
if innerRule.meta.node == configs.WildcardDevice {
|
||||
innerRule = nil
|
||||
}
|
||||
|
||||
if rule.Allow {
|
||||
return e.allow(innerRule)
|
||||
} else {
|
||||
return e.deny(innerRule)
|
||||
}
|
||||
}
|
||||
|
||||
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
|
||||
// a new Emulator that represents the state of the devices cgroup. Note that
|
||||
// black-list devices cgroups cannot be fully reconstructed, due to limitations
|
||||
// in the devices cgroup API. Instead, such cgroups are always treated as
|
||||
// "allow all" cgroups.
|
||||
func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||
// Normally cgroups are in black-list mode by default, but the way we
|
||||
// figure out the current mode is whether or not devices.list has an
|
||||
// allow-all rule. So we default to a white-list, and the existence of an
|
||||
// "a *:* rwm" entry will tell us otherwise.
|
||||
e := &Emulator{
|
||||
defaultAllow: false,
|
||||
}
|
||||
|
||||
// Parse the "devices.list".
|
||||
s := bufio.NewScanner(list)
|
||||
for s.Scan() {
|
||||
line := s.Text()
|
||||
deviceRule, err := parseLine(line)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "parsing line %q", line)
|
||||
}
|
||||
// "devices.list" is an allow list. Note that this means that in
|
||||
// black-list mode, we have no idea what rules are in play. As a
|
||||
// result, we need to be very careful in Transition().
|
||||
if err := e.allow(deviceRule); err != nil {
|
||||
return nil, errors.Wrapf(err, "adding devices.list rule")
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, errors.Wrap(err, "reading devices.list lines")
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// Transition calculates what is the minimally-disruptive set of rules need to
|
||||
// be applied to a devices cgroup in order to transition to the given target.
|
||||
// This means that any already-existing rules will not be applied, and
|
||||
// disruptive rules (like denying all device access) will only be applied if
|
||||
// necessary.
|
||||
//
|
||||
// This function is the sole reason for all of Emulator -- to allow us
|
||||
// to figure out how to update a containers' cgroups without causing spurrious
|
||||
// device errors (if possible).
|
||||
func (source *Emulator) Transition(target *Emulator) ([]*configs.DeviceRule, error) {
|
||||
var transitionRules []*configs.DeviceRule
|
||||
oldRules := source.rules
|
||||
|
||||
// If the default policy doesn't match, we need to include a "disruptive"
|
||||
// rule (either allow-all or deny-all) in order to switch the cgroup to the
|
||||
// correct default policy.
|
||||
//
|
||||
// However, due to a limitation in "devices.list" we cannot be sure what
|
||||
// deny rules are in place in a black-list cgroup. Thus if the source is a
|
||||
// black-list we also have to include a disruptive rule.
|
||||
if source.IsBlacklist() || source.defaultAllow != target.defaultAllow {
|
||||
transitionRules = append(transitionRules, &configs.DeviceRule{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
Minor: -1,
|
||||
Permissions: configs.DevicePermissions("rwm"),
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
// The old rules are only relevant if we aren't starting out with a
|
||||
// disruptive rule.
|
||||
oldRules = nil
|
||||
}
|
||||
|
||||
// NOTE: We traverse through the rules in a sorted order so we always write
|
||||
// the same set of rules (this is to aid testing).
|
||||
|
||||
// First, we create inverse rules for any old rules not in the new set.
|
||||
// This includes partial-inverse rules for specific permissions. This is a
|
||||
// no-op if we added a disruptive rule, since oldRules will be empty.
|
||||
for _, rule := range oldRules.orderedEntries() {
|
||||
meta, oldPerms := rule.meta, rule.perms
|
||||
newPerms := target.rules[meta]
|
||||
droppedPerms := oldPerms.Difference(newPerms)
|
||||
if !droppedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &configs.DeviceRule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: droppedPerms,
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Add any additional rules which weren't in the old set. We happen to
|
||||
// filter out rules which are present in both sets, though this isn't
|
||||
// strictly necessary.
|
||||
for _, rule := range target.rules.orderedEntries() {
|
||||
meta, newPerms := rule.meta, rule.perms
|
||||
oldPerms := oldRules[meta]
|
||||
gainedPerms := newPerms.Difference(oldPerms)
|
||||
if !gainedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &configs.DeviceRule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: gainedPerms,
|
||||
Allow: !target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
return transitionRules, nil
|
||||
}
|
@@ -22,7 +22,7 @@ const (
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
|
||||
func DeviceFilter(devices []*configs.DeviceRule) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
@@ -49,7 +49,8 @@ func (p *program) init() {
|
||||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
|
||||
asm.And.Imm32(asm.R2, 0xFFFF))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
@@ -67,7 +68,7 @@ func (p *program) init() {
|
||||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev *configs.Device) error {
|
||||
func (p *program) appendDevice(dev *configs.DeviceRule) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/BUILD
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/BUILD
generated
vendored
@@ -3,14 +3,13 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"apply_raw.go",
|
||||
"blkio.go",
|
||||
"cpu.go",
|
||||
"cpuacct.go",
|
||||
"cpuset.go",
|
||||
"devices.go",
|
||||
"freezer.go",
|
||||
"fs_unsupported.go",
|
||||
"fs.go",
|
||||
"hugetlb.go",
|
||||
"kmem.go",
|
||||
"memory.go",
|
||||
@@ -19,13 +18,16 @@ go_library(
|
||||
"net_prio.go",
|
||||
"perf_event.go",
|
||||
"pids.go",
|
||||
"unsupported.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/cgroups/fs",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/moby/sys/mountinfo:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
@@ -34,7 +36,9 @@ go_library(
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/moby/sys/mountinfo:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
|
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
@@ -4,6 +4,7 @@ package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -66,9 +67,21 @@ func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
shares := cgroup.Resources.CpuShares
|
||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
// read it back
|
||||
sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// ... and check
|
||||
if shares > sharesRead {
|
||||
return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
|
||||
} else if shares < sharesRead {
|
||||
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
|
69
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
69
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
@@ -3,8 +3,10 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -12,15 +14,24 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
nanosecondsInSecond = 1000000000
|
||||
)
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
cgroupCpuacctUsageAll = "cpuacct.usage_all"
|
||||
|
||||
var clockTicks = uint64(system.GetClockTicks())
|
||||
nanosecondsInSecond = 1000000000
|
||||
|
||||
userModeColumn = 1
|
||||
kernelModeColumn = 2
|
||||
cuacctUsageAllColumnsNumber = 3
|
||||
|
||||
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||
// on Linux it's a constant which is safe to be hard coded,
|
||||
// so we can avoid using cgo here. For details, see:
|
||||
// https://github.com/containerd/cgroups/pull/12
|
||||
clockTicks uint64 = 100
|
||||
)
|
||||
|
||||
type CpuacctGroup struct {
|
||||
}
|
||||
@@ -62,8 +73,15 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||
return nil
|
||||
@@ -120,3 +138,44 @@ func getPercpuUsage(path string) ([]uint64, error) {
|
||||
}
|
||||
return percpuUsage, nil
|
||||
}
|
||||
|
||||
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||
usageKernelMode := []uint64{}
|
||||
usageUserMode := []uint64{}
|
||||
|
||||
file, err := os.Open(filepath.Join(path, cgroupCpuacctUsageAll))
|
||||
if os.IsNotExist(err) {
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
} else if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
scanner.Scan() //skipping header line
|
||||
|
||||
for scanner.Scan() {
|
||||
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
|
||||
if len(lineFields) != cuacctUsageAllColumnsNumber {
|
||||
continue
|
||||
}
|
||||
|
||||
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Unable to convert CPU usage in kernel mode to uint64: %s", err)
|
||||
}
|
||||
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||
|
||||
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Unable to convert CPU usage in user mode to uint64: %s", err)
|
||||
}
|
||||
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, nil, fmt.Errorf("Problem in reading %s line by line, %s", cgroupCpuacctUsageAll, err)
|
||||
}
|
||||
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
}
|
||||
|
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
@@ -4,15 +4,16 @@ package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
type CpusetGroup struct {
|
||||
@@ -52,17 +53,39 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the source mount point of directory passed in as argument.
|
||||
func getMount(dir string) (string, error) {
|
||||
mi, err := mountinfo.GetMounts(mountinfo.ParentsFilter(dir))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errors.Errorf("Can't find mount point of %s", dir)
|
||||
}
|
||||
|
||||
// find the longest mount point
|
||||
var idx, maxlen int
|
||||
for i := range mi {
|
||||
if len(mi[i].Mountpoint) > maxlen {
|
||||
maxlen = len(mi[i].Mountpoint)
|
||||
idx = i
|
||||
}
|
||||
}
|
||||
|
||||
return mi[idx].Mountpoint, nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||
root, err := getMount(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
|
||||
root = filepath.Dir(root)
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
@@ -108,7 +131,7 @@ func (s *CpusetGroup) ensureParent(current, root string) error {
|
||||
}
|
||||
// Avoid infinite recursion.
|
||||
if parent == current {
|
||||
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
|
||||
return errors.New("cpuset: cgroup parent path outside cgroup root")
|
||||
}
|
||||
if err := s.ensureParent(parent, root); err != nil {
|
||||
return err
|
||||
|
87
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
87
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
@@ -3,13 +3,19 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"reflect"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
testingSkipFinalCheck bool
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
@@ -26,49 +32,74 @@ func (s *DevicesGroup) Apply(d *cgroupData) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadEmulator(path string) (*devices.Emulator, error) {
|
||||
list, err := fscommon.ReadFile(path, "devices.list")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return devices.EmulatorFromList(bytes.NewBufferString(list))
|
||||
}
|
||||
|
||||
func buildEmulator(rules []*configs.DeviceRule) (*devices.Emulator, error) {
|
||||
// This defaults to a white-list -- which is what we want!
|
||||
emu := &devices.Emulator{}
|
||||
for _, rule := range rules {
|
||||
if err := emu.Apply(*rule); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return emu, nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if system.RunningInUserNS() {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
// Generate two emulators, one for the current state of the cgroup and one
|
||||
// for the requested state by the user.
|
||||
current, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
target, err := buildEmulator(cgroup.Resources.Devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
// Compute the minimal set of transition rules needed to achieve the
|
||||
// requested state.
|
||||
transitionRules, err := current.Transition(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, rule := range transitionRules {
|
||||
file := "devices.deny"
|
||||
if rule.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
||||
if err := fscommon.WriteFile(path, file, rule.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
// Final safety check -- ensure that the resulting state is what was
|
||||
// requested. This is only really correct for white-lists, but for
|
||||
// black-lists we can at least check that the cgroup is in the right mode.
|
||||
//
|
||||
// This safety-check is skipped for the unit tests because we cannot
|
||||
// currently mock devices.list correctly.
|
||||
if !s.testingSkipFinalCheck {
|
||||
currentAfter, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
|
||||
return errors.New("resulting devices cgroup doesn't precisely match target")
|
||||
} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
|
||||
return errors.New("resulting devices cgroup doesn't match target mode")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
34
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
@@ -3,13 +3,16 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type FreezerGroup struct {
|
||||
@@ -39,11 +42,11 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return err
|
||||
}
|
||||
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
state, err := s.GetState(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
|
||||
if state == cgroup.Resources.Freezer {
|
||||
break
|
||||
}
|
||||
|
||||
@@ -65,3 +68,30 @@ func (s *FreezerGroup) Remove(d *cgroupData) error {
|
||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||
for {
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as
|
||||
// being in an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch strings.TrimSpace(state) {
|
||||
case "THAWED":
|
||||
return configs.Thawed, nil
|
||||
case "FROZEN":
|
||||
return configs.Frozen, nil
|
||||
case "FREEZING":
|
||||
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||
// is still undergoing freezing. This should be a temporary delay.
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -3,10 +3,11 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
@@ -35,7 +36,7 @@ var (
|
||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
@@ -61,11 +62,19 @@ type subsystem interface {
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
type manager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Rootless bool // ignore permission-related errors
|
||||
Paths map[string]string
|
||||
cgroups *configs.Cgroup
|
||||
rootless bool // ignore permission-related errors
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgroups.Manager {
|
||||
return &manager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
rootless: rootless,
|
||||
}
|
||||
}
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
@@ -81,10 +90,43 @@ func getCgroupRoot() (string, error) {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
root, err := cgroups.FindCgroupMountpointDir()
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var root string
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
text := scanner.Text()
|
||||
fields := strings.Split(text, " ")
|
||||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
numPostFields := len(postSeparatorFields)
|
||||
|
||||
// This is an error as we can't detect if the mount is for "cgroup"
|
||||
if numPostFields == 0 {
|
||||
return "", fmt.Errorf("mountinfo: found no fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if postSeparatorFields[0] == "cgroup" {
|
||||
// Check that the mount is properly formatted.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
root = filepath.Dir(fields[4])
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if root == "" {
|
||||
return "", errors.New("no cgroup mount found in mountinfo")
|
||||
}
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
@@ -110,43 +152,39 @@ func isIgnorableError(rootless bool, err error) bool {
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// TODO: rm errors.Cause once we switch to %w everywhere
|
||||
err = errors.Cause(err)
|
||||
// Is it an ordinary EPERM?
|
||||
if os.IsPermission(errors.Cause(err)) {
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try to handle other errnos.
|
||||
var errno error
|
||||
switch err := errors.Cause(err).(type) {
|
||||
case *os.PathError:
|
||||
errno = err.Err
|
||||
case *os.LinkError:
|
||||
errno = err.Err
|
||||
case *os.SyscallError:
|
||||
errno = err.Err
|
||||
// Handle some specific syscall errors.
|
||||
var errno unix.Errno
|
||||
if errors.As(err, &errno) {
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *Manager) getSubsystems() subsystemSet {
|
||||
func (m *manager) getSubsystems() subsystemSet {
|
||||
return subsystemsLegacy
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
func (m *manager) Apply(pid int) (err error) {
|
||||
if m.cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
var c = m.Cgroups
|
||||
var c = m.cgroups
|
||||
|
||||
d, err := getCgroupData(m.Cgroups, pid)
|
||||
d, err := getCgroupData(m.cgroups, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Paths = make(map[string]string)
|
||||
m.paths = make(map[string]string)
|
||||
if c.Paths != nil {
|
||||
for name, path := range c.Paths {
|
||||
_, err := d.path(name)
|
||||
@@ -156,15 +194,12 @@ func (m *Manager) Apply(pid int) (err error) {
|
||||
}
|
||||
return err
|
||||
}
|
||||
m.Paths[name] = path
|
||||
m.paths[name] = path
|
||||
}
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
return cgroups.EnterPid(m.paths, pid)
|
||||
}
|
||||
|
||||
for _, sys := range m.getSubsystems() {
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
p, err := d.path(sys.Name())
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem is
|
||||
@@ -174,15 +209,15 @@ func (m *Manager) Apply(pid int) (err error) {
|
||||
}
|
||||
return err
|
||||
}
|
||||
m.Paths[sys.Name()] = p
|
||||
m.paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(d); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
|
||||
// been set, we don't bail on error in case of permission problems.
|
||||
// Cases where limits have been set (and we couldn't create our own
|
||||
// cgroup) are handled by Set.
|
||||
if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" {
|
||||
delete(m.Paths, sys.Name())
|
||||
// In the case of rootless (including euid=0 in userns), where an
|
||||
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||
// case of permission problems. Cases where limits have been set
|
||||
// (and we couldn't create our own cgroup) are handled by Set.
|
||||
if isIgnorableError(m.rootless, err) && m.cgroups.Path == "" {
|
||||
delete(m.paths, sys.Name())
|
||||
continue
|
||||
}
|
||||
return err
|
||||
@@ -192,35 +227,30 @@ func (m *Manager) Apply(pid int) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
if m.Cgroups == nil || m.Cgroups.Paths != nil {
|
||||
func (m *manager) Destroy() error {
|
||||
if m.cgroups == nil || m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
if err := cgroups.RemovePaths(m.paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
m.paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
func (m *manager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
for name, path := range m.paths {
|
||||
sys, err := m.getSubsystems().Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
@@ -232,22 +262,23 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
func (m *manager) Set(container *configs.Config) error {
|
||||
if container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups != nil && m.Cgroups.Paths != nil {
|
||||
if m.cgroups != nil && m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for _, sys := range m.getSubsystems() {
|
||||
path := paths[sys.Name()]
|
||||
path := m.paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if m.Rootless && sys.Name() == "devices" {
|
||||
if m.rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
@@ -262,45 +293,41 @@ func (m *Manager) Set(container *configs.Config) error {
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
if m.Cgroups == nil {
|
||||
func (m *manager) Freeze(state configs.FreezerState) (Err error) {
|
||||
path := m.Path("freezer")
|
||||
if m.cgroups == nil || path == "" {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
dir := paths["freezer"]
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
}
|
||||
}()
|
||||
|
||||
freezer, err := m.getSubsystems().Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(dir, m.Cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
if err := freezer.Set(path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetPids(paths["devices"])
|
||||
func (m *manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetAllPids(paths["devices"])
|
||||
func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
@@ -310,7 +337,7 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
}
|
||||
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
|
||||
return nil, errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
@@ -379,33 +406,27 @@ func removePath(p string, err error) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func CheckCpushares(path string, c uint64) error {
|
||||
var cpuShares uint64
|
||||
|
||||
if c == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
fd, err := os.Open(filepath.Join(path, "cpu.shares"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
_, err = fmt.Fscanf(fd, "%d", &cpuShares)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
|
||||
if c > cpuShares {
|
||||
return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
|
||||
} else if c < cpuShares {
|
||||
return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
|
||||
}
|
||||
|
||||
return nil
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
dir := m.Path("freezer")
|
||||
freezer, err := m.getSubsystems().Get("freezer")
|
||||
|
||||
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||
if err != nil || dir == "" {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
return freezer.(*FreezerGroup).GetState(dir)
|
||||
}
|
||||
|
||||
func (m *manager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/kmem.go
generated
vendored
@@ -6,10 +6,8 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // for Errno type only
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -49,12 +47,8 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
if errors.Is(err, unix.EBUSY) {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
|
103
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
103
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
@@ -5,7 +5,9 @@ package fs
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -16,8 +18,16 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
numaNodeSymbol = "N"
|
||||
numaStatColumnSeparator = " "
|
||||
numaStatKeyValueSeparator = "="
|
||||
numaStatMaxColumns = math.MaxUint8 + 1
|
||||
numaStatValueIndex = 1
|
||||
numaStatTypeIndex = 0
|
||||
numaStatColumnSliceLength = 2
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemoryPagesByNuma = "memory.numa_stat"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
@@ -64,9 +74,9 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
// If the memory update is set to -1 we should also
|
||||
// set swap to -1, it means unlimited memory.
|
||||
if cgroup.Resources.Memory == -1 {
|
||||
// If the memory update is set to -1 and the swap is not explicitly
|
||||
// set, we should also set swap to -1, it means unlimited memory.
|
||||
if cgroup.Resources.Memory == -1 && cgroup.Resources.MemorySwap == 0 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
cgroup.Resources.MemorySwap = -1
|
||||
@@ -209,6 +219,13 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if value == 1 {
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
}
|
||||
|
||||
pagesByNUMA, err := getPageUsageByNUMA(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -269,3 +286,79 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) {
|
||||
stats := cgroups.PageUsageByNUMA{}
|
||||
|
||||
file, err := os.Open(path.Join(cgroupPath, cgroupMemoryPagesByNuma))
|
||||
if os.IsNotExist(err) {
|
||||
return stats, nil
|
||||
} else if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
var statsType string
|
||||
statsByType := cgroups.PageStats{Nodes: map[uint8]uint64{}}
|
||||
columns := strings.SplitN(scanner.Text(), numaStatColumnSeparator, numaStatMaxColumns)
|
||||
|
||||
for _, column := range columns {
|
||||
pagesByNode := strings.SplitN(column, numaStatKeyValueSeparator, numaStatColumnSliceLength)
|
||||
|
||||
if strings.HasPrefix(pagesByNode[numaStatTypeIndex], numaNodeSymbol) {
|
||||
nodeID, err := strconv.ParseUint(pagesByNode[numaStatTypeIndex][1:], 10, 8)
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
}
|
||||
|
||||
statsByType.Nodes[uint8(nodeID)], err = strconv.ParseUint(pagesByNode[numaStatValueIndex], 0, 64)
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
}
|
||||
} else {
|
||||
statsByType.Total, err = strconv.ParseUint(pagesByNode[numaStatValueIndex], 0, 64)
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
}
|
||||
|
||||
statsType = pagesByNode[numaStatTypeIndex]
|
||||
}
|
||||
|
||||
err := addNUMAStatsByType(&stats, statsByType, statsType)
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
}
|
||||
}
|
||||
}
|
||||
err = scanner.Err()
|
||||
if err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, err
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func addNUMAStatsByType(stats *cgroups.PageUsageByNUMA, byTypeStats cgroups.PageStats, statsType string) error {
|
||||
switch statsType {
|
||||
case "total":
|
||||
stats.Total = byTypeStats
|
||||
case "file":
|
||||
stats.File = byTypeStats
|
||||
case "anon":
|
||||
stats.Anon = byTypeStats
|
||||
case "unevictable":
|
||||
stats.Unevictable = byTypeStats
|
||||
case "hierarchical_total":
|
||||
stats.Hierarchical.Total = byTypeStats
|
||||
case "hierarchical_file":
|
||||
stats.Hierarchical.File = byTypeStats
|
||||
case "hierarchical_anon":
|
||||
stats.Hierarchical.Anon = byTypeStats
|
||||
case "hierarchical_unevictable":
|
||||
stats.Hierarchical.Unevictable = byTypeStats
|
||||
default:
|
||||
return fmt.Errorf("unsupported NUMA page type found: %s", statsType)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/BUILD
generated
vendored
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/BUILD
generated
vendored
@@ -5,10 +5,12 @@ go_library(
|
||||
srcs = [
|
||||
"cpu.go",
|
||||
"cpuset.go",
|
||||
"create.go",
|
||||
"defaultpath.go",
|
||||
"devices.go",
|
||||
"freezer.go",
|
||||
"fs2.go",
|
||||
"hugetlb.go",
|
||||
"io.go",
|
||||
"memory.go",
|
||||
"pids.go",
|
||||
@@ -22,7 +24,6 @@ go_library(
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter:go_default_library",
|
||||
@@ -30,7 +31,6 @@ go_library(
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter:go_default_library",
|
||||
|
29
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
29
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
@@ -13,15 +13,36 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpuSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.CpuWeight != 0 || cgroup.Resources.CpuQuota != 0 || cgroup.Resources.CpuPeriod != 0
|
||||
}
|
||||
|
||||
func setCpu(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuWeight != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil {
|
||||
if !isCpuSet(cgroup) {
|
||||
return nil
|
||||
}
|
||||
r := cgroup.Resources
|
||||
|
||||
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
|
||||
if r.CpuWeight != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.CpuMax != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.max", cgroup.Resources.CpuMax); err != nil {
|
||||
if r.CpuQuota != 0 || r.CpuPeriod != 0 {
|
||||
str := "max"
|
||||
if r.CpuQuota > 0 {
|
||||
str = strconv.FormatInt(r.CpuQuota, 10)
|
||||
}
|
||||
period := r.CpuPeriod
|
||||
if period == 0 {
|
||||
// This default value is documented in
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
period = 100000
|
||||
}
|
||||
str += " " + strconv.FormatUint(period, 10)
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.max", str); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
@@ -7,7 +7,15 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpusetSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.CpusetCpus != "" || cgroup.Resources.CpusetMems != ""
|
||||
}
|
||||
|
||||
func setCpuset(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isCpusetSet(cgroup) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
|
151
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
151
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func supportedControllers(cgroup *configs.Cgroup) ([]byte, error) {
|
||||
const file = UnifiedMountpoint + "/cgroup.controllers"
|
||||
return ioutil.ReadFile(file)
|
||||
}
|
||||
|
||||
// needAnyControllers returns whether we enable some supported controllers or not,
|
||||
// based on (1) controllers available and (2) resources that are being set.
|
||||
// We don't check "pseudo" controllers such as
|
||||
// "freezer" and "devices".
|
||||
func needAnyControllers(cgroup *configs.Cgroup) (bool, error) {
|
||||
if cgroup == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// list of all available controllers
|
||||
content, err := supportedControllers(cgroup)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
avail := make(map[string]struct{})
|
||||
for _, ctr := range strings.Fields(string(content)) {
|
||||
avail[ctr] = struct{}{}
|
||||
}
|
||||
|
||||
// check whether the controller if available or not
|
||||
have := func(controller string) bool {
|
||||
_, ok := avail[controller]
|
||||
return ok
|
||||
}
|
||||
|
||||
if isPidsSet(cgroup) && have("pids") {
|
||||
return true, nil
|
||||
}
|
||||
if isMemorySet(cgroup) && have("memory") {
|
||||
return true, nil
|
||||
}
|
||||
if isIoSet(cgroup) && have("io") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpuSet(cgroup) && have("cpu") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpusetSet(cgroup) && have("cpuset") {
|
||||
return true, nil
|
||||
}
|
||||
if isHugeTlbSet(cgroup) && have("hugetlb") {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// containsDomainController returns whether the current config contains domain controller or not.
|
||||
// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids.
|
||||
func containsDomainController(cg *configs.Cgroup) bool {
|
||||
return isMemorySet(cg) || isIoSet(cg) || isCpuSet(cg) || isHugeTlbSet(cg)
|
||||
}
|
||||
|
||||
// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers.
|
||||
func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||
if !strings.HasPrefix(path, UnifiedMountpoint) {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
content, err := supportedControllers(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctrs := bytes.Fields(content)
|
||||
res := append([]byte("+"), bytes.Join(ctrs, []byte(" +"))...)
|
||||
|
||||
elements := strings.Split(path, "/")
|
||||
elements = elements[3:]
|
||||
current := "/sys/fs"
|
||||
for i, e := range elements {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
current := current
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
cgTypeFile := filepath.Join(current, "cgroup.type")
|
||||
cgType, _ := ioutil.ReadFile(cgTypeFile)
|
||||
switch strings.TrimSpace(string(cgType)) {
|
||||
// If the cgroup is in an invalid mode (usually this means there's an internal
|
||||
// process in the cgroup tree, because we created a cgroup under an
|
||||
// already-populated-by-other-processes cgroup), then we have to error out if
|
||||
// the user requested controllers which are not thread-aware. However, if all
|
||||
// the controllers requested are thread-aware we can simply put the cgroup into
|
||||
// threaded mode.
|
||||
case "domain invalid":
|
||||
if containsDomainController(c) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current)
|
||||
} else {
|
||||
// Not entirely correct (in theory we'd always want to be a domain --
|
||||
// since that means we're a properly delegated cgroup subtree) but in
|
||||
// this case there's not much we can do and it's better than giving an
|
||||
// error.
|
||||
_ = ioutil.WriteFile(cgTypeFile, []byte("threaded"), 0644)
|
||||
}
|
||||
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
|
||||
// (and you cannot usually take a cgroup out of threaded mode).
|
||||
case "domain threaded":
|
||||
fallthrough
|
||||
case "threaded":
|
||||
if containsDomainController(c) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, strings.TrimSpace(string(cgType)))
|
||||
}
|
||||
}
|
||||
}
|
||||
// enable all supported controllers
|
||||
if i < len(elements)-1 {
|
||||
file := filepath.Join(current, "cgroup.subtree_control")
|
||||
if err := ioutil.WriteFile(file, res, 0644); err != nil {
|
||||
// try write one by one
|
||||
allCtrs := bytes.Split(res, []byte(" "))
|
||||
for _, ctr := range allCtrs {
|
||||
_ = ioutil.WriteFile(file, ctr, 0644)
|
||||
}
|
||||
}
|
||||
// Some controllers might not be enabled when rootless or containerized,
|
||||
// but we don't catch the error here. (Caught in setXXX() functions.)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
@@ -44,14 +44,10 @@ func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName, ownCgroup)
|
||||
return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName, ownCgroup string) (string, error) {
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
@@ -62,6 +58,16 @@ func _defaultDirPath(root, cgPath, cgParent, cgName, ownCgroup string) (string,
|
||||
if filepath.IsAbs(innerPath) {
|
||||
return filepath.Join(root, innerPath), nil
|
||||
}
|
||||
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// The current user scope most probably has tasks in it already,
|
||||
// making it impossible to enable controllers for its sub-cgroup.
|
||||
// A parent cgroup (with no tasks in it) is what we need.
|
||||
ownCgroup = filepath.Dir(ownCgroup)
|
||||
|
||||
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||
}
|
||||
|
||||
@@ -80,9 +86,6 @@ func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
s = bufio.NewScanner(r)
|
||||
)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
@@ -95,5 +98,8 @@ func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", errors.New("cgroup path not found")
|
||||
}
|
||||
|
39
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
39
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
@@ -10,12 +10,10 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isRWM(cgroupPermissions string) bool {
|
||||
r := false
|
||||
w := false
|
||||
m := false
|
||||
for _, rn := range cgroupPermissions {
|
||||
switch rn {
|
||||
func isRWM(perms configs.DevicePermissions) bool {
|
||||
var r, w, m bool
|
||||
for _, perm := range perms {
|
||||
switch perm {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
@@ -39,22 +37,10 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool {
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
// XXX: This is currently a white-list (but all callers pass a blacklist of
|
||||
// devices). This is bad for a whole variety of reasons, but will need
|
||||
// to be fixed with co-ordinated effort with downstreams.
|
||||
devices := cgroup.Devices
|
||||
if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil {
|
||||
// never set by OCI specconv, but *allowAllDevices=false is still used by the integration test
|
||||
if *allowAllDevices == true {
|
||||
return errors.New("libcontainer AllowAllDevices is not supported, use Devices")
|
||||
}
|
||||
for _, ad := range cgroup.Resources.AllowedDevices {
|
||||
d := *ad
|
||||
d.Allow = true
|
||||
devices = append(devices, &d)
|
||||
}
|
||||
}
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
// never set by OCI specconv
|
||||
return errors.New("libcontainer DeniedDevices is not supported, use Devices")
|
||||
}
|
||||
insts, license, err := devicefilter.DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -64,6 +50,17 @@ func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
return errors.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
// XXX: This code is currently incorrect when it comes to updating an
|
||||
// existing cgroup with new rules (new rulesets are just appended to
|
||||
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
|
||||
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
|
||||
// programs.
|
||||
//
|
||||
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
|
||||
// race-free blacklist because it acts as a whitelist by default, and
|
||||
// having a deny-everything program cannot be overriden by other
|
||||
// programs. You could temporarily insert a deny-everything program
|
||||
// but that would result in spurrious failures during updates.
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(cgroup) {
|
||||
return err
|
||||
|
63
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
63
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
@@ -3,32 +3,49 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
stdErrors "errors"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var desired int
|
||||
if err := supportsFreezer(dirPath); err != nil {
|
||||
// We can ignore this request as long as the user didn't ask us to
|
||||
// freeze the container (since without the freezer cgroup, that's a
|
||||
// no-op).
|
||||
if state == configs.Undefined || state == configs.Thawed {
|
||||
err = nil
|
||||
}
|
||||
return errors.Wrap(err, "freezer not supported")
|
||||
}
|
||||
|
||||
var stateStr string
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
desired = 1
|
||||
stateStr = "1"
|
||||
case configs.Thawed:
|
||||
desired = 0
|
||||
stateStr = "0"
|
||||
default:
|
||||
return errors.Errorf("unknown freezer state %+v", state)
|
||||
return errors.Errorf("invalid freezer state %q requested", state)
|
||||
}
|
||||
supportedErr := supportsFreezer(dirPath)
|
||||
if supportedErr != nil && desired != 0 {
|
||||
// can ignore error if desired == 1
|
||||
return errors.Wrap(supportedErr, "freezer not supported")
|
||||
|
||||
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", stateStr); err != nil {
|
||||
return err
|
||||
}
|
||||
return freezeWithInt(dirPath, desired)
|
||||
// Confirm that the cgroup did actually change states.
|
||||
if actualState, err := getFreezer(dirPath); err != nil {
|
||||
return err
|
||||
} else if actualState != state {
|
||||
return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func supportsFreezer(dirPath string) error {
|
||||
@@ -36,18 +53,22 @@ func supportsFreezer(dirPath string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// freeze writes desired int to "cgroup.freeze".
|
||||
func freezeWithInt(dirPath string, desired int) error {
|
||||
desiredS := strconv.Itoa(desired)
|
||||
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", desiredS); err != nil {
|
||||
return err
|
||||
}
|
||||
got, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
func getFreezer(dirPath string) (configs.FreezerState, error) {
|
||||
state, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
if err != nil {
|
||||
return err
|
||||
// If the kernel is too old, then we just treat the freezer as being in
|
||||
// an "undefined" state.
|
||||
if os.IsNotExist(err) || stdErrors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
if gotS := strings.TrimSpace(string(got)); gotS != desiredS {
|
||||
return errors.Errorf("expected \"cgroup.freeze\" in %q to be %q, got %q", dirPath, desiredS, gotS)
|
||||
switch strings.TrimSpace(state) {
|
||||
case "0":
|
||||
return configs.Thawed, nil
|
||||
case "1":
|
||||
return configs.Frozen, nil
|
||||
default:
|
||||
return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
266
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
266
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
@@ -8,64 +8,12 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
config = &configs.Cgroup{}
|
||||
}
|
||||
if dirPath != "" {
|
||||
if filepath.Clean(dirPath) != dirPath || !filepath.IsAbs(dirPath) {
|
||||
return nil, errors.Errorf("invalid dir path %q", dirPath)
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
controllers, err := detectControllers(dirPath)
|
||||
if err != nil && !rootless {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m := &manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
controllers: controllers,
|
||||
rootless: rootless,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func detectControllers(dirPath string) (map[string]struct{}, error) {
|
||||
if err := os.MkdirAll(dirPath, 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersPath, err := securejoin.SecureJoin(dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersData, err := ioutil.ReadFile(controllersPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersFields := strings.Fields(string(controllersData))
|
||||
controllers := make(map[string]struct{}, len(controllersFields))
|
||||
for _, c := range controllersFields {
|
||||
controllers[c] = struct{}{}
|
||||
}
|
||||
return controllers, nil
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
@@ -76,8 +24,68 @@ type manager struct {
|
||||
rootless bool
|
||||
}
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
config = &configs.Cgroup{}
|
||||
}
|
||||
if dirPath == "" {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
m := &manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
rootless: rootless,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *manager) getControllers() error {
|
||||
if m.controllers != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
file := filepath.Join(m.dirPath, "cgroup.controllers")
|
||||
data, err := ioutil.ReadFile(file)
|
||||
if err != nil {
|
||||
if m.rootless && m.config.Path == "" {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
fields := strings.Fields(string(data))
|
||||
m.controllers = make(map[string]struct{}, len(fields))
|
||||
for _, c := range fields {
|
||||
m.controllers[c] = struct{}{}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) error {
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless {
|
||||
if err := CreateCgroupPath(m.dirPath, m.config); err != nil {
|
||||
// Related tests:
|
||||
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
|
||||
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
|
||||
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if m.rootless {
|
||||
if m.config.Path == "" {
|
||||
if blNeed, nErr := needAnyControllers(m.config); nErr == nil && !blNeed {
|
||||
return nil
|
||||
}
|
||||
return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups")
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@@ -93,41 +101,52 @@ func (m *manager) GetAllPids() ([]int, error) {
|
||||
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
var (
|
||||
st cgroups.Stats
|
||||
errs []error
|
||||
)
|
||||
|
||||
st := cgroups.NewStats()
|
||||
if err := m.getControllers(); err != nil {
|
||||
return st, err
|
||||
}
|
||||
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := statPids(m.dirPath, &st); err != nil {
|
||||
if err := statPids(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
} else {
|
||||
if err := statPidsWithoutController(m.dirPath, &st); err != nil {
|
||||
if err := statPidsWithoutController(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// memory (since kenrel 4.5)
|
||||
// memory (since kernel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := statMemory(m.dirPath, &st); err != nil {
|
||||
if err := statMemory(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := statIo(m.dirPath, &st); err != nil {
|
||||
if err := statIo(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := statCpu(m.dirPath, &st); err != nil {
|
||||
if err := statCpu(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if _, ok := m.controllers["hugetlb"]; ok {
|
||||
if err := statHugeTlb(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return &st, errors.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
return st, errors.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return &st, nil
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
@@ -138,77 +157,112 @@ func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func rmdir(path string) error {
|
||||
err := unix.Rmdir(path)
|
||||
if err == nil || err == unix.ENOENT {
|
||||
return nil
|
||||
}
|
||||
return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
||||
}
|
||||
|
||||
// removeCgroupPath aims to remove cgroup path recursively
|
||||
// Because there may be subcgroups in it.
|
||||
func removeCgroupPath(path string) error {
|
||||
// try the fast path first
|
||||
if err := rmdir(path); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
infos, err := ioutil.ReadDir(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
for _, info := range infos {
|
||||
if info.IsDir() {
|
||||
// We should remove subcgroups dir first
|
||||
if err = removeCgroupPath(filepath.Join(path, info.Name())); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
err = rmdir(path)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
return os.RemoveAll(m.dirPath)
|
||||
return removeCgroupPath(m.dirPath)
|
||||
}
|
||||
|
||||
// GetPaths is for compatibility purpose and should be removed in future
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
paths := map[string]string{
|
||||
// pseudo-controller for compatibility
|
||||
"devices": m.dirPath,
|
||||
"freezer": m.dirPath,
|
||||
}
|
||||
for c := range m.controllers {
|
||||
paths[c] = m.dirPath
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *manager) GetUnifiedPath() (string, error) {
|
||||
return m.dirPath, nil
|
||||
func (m *manager) Path(_ string) string {
|
||||
return m.dirPath
|
||||
}
|
||||
|
||||
func (m *manager) Set(container *configs.Config) error {
|
||||
if container == nil || container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
var errs []error
|
||||
if err := m.getControllers(); err != nil {
|
||||
return err
|
||||
}
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := setPids(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := setPids(m.dirPath, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := setMemory(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := setMemory(m.dirPath, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := setIo(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := setIo(m.dirPath, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := setCpu(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := setCpu(m.dirPath, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
if err := setDevices(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
//
|
||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if err := setDevices(m.dirPath, container.Cgroups); err != nil && !m.rootless {
|
||||
return err
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if _, ok := m.controllers["cpuset"]; ok {
|
||||
if err := setCpuset(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if err := setCpuset(m.dirPath, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := setHugeTlb(m.dirPath, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return errors.Errorf("error while setting cgroup v2: %+v", errs)
|
||||
return err
|
||||
}
|
||||
m.config = container.Cgroups
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.dirPath
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
||||
|
||||
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return getFreezer(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) Exists() bool {
|
||||
return cgroups.PathExists(m.dirPath)
|
||||
}
|
||||
|
66
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
66
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isHugeTlbSet(cgroup *configs.Cgroup) bool {
|
||||
return len(cgroup.Resources.HugetlbLimit) > 0
|
||||
}
|
||||
|
||||
func setHugeTlb(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isHugeTlbSet(cgroup) {
|
||||
return nil
|
||||
}
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
if err := fscommon.WriteFile(dirPath, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "max"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||
hugePageSizes, err := cgroups.GetHugePageSize()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to fetch hugetlb info")
|
||||
}
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
|
||||
for _, pagesize := range hugePageSizes {
|
||||
usage := strings.Join([]string{"hugetlb", pagesize, "current"}, ".")
|
||||
value, err := fscommon.GetCgroupParamUint(dirPath, usage)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse hugetlb.%s.current file", pagesize)
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
fileName := strings.Join([]string{"hugetlb", pagesize, "events"}, ".")
|
||||
filePath := filepath.Join(dirPath, fileName)
|
||||
contents, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse hugetlb.%s.events file", pagesize)
|
||||
}
|
||||
_, value, err = fscommon.GetCgroupParamKeyValue(string(contents))
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse hugetlb.%s.events file", pagesize)
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pagesize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
@@ -14,14 +14,26 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isIoSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.BlkioWeight != 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleReadBpsDevice) > 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleWriteBpsDevice) > 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleReadIOPSDevice) > 0 ||
|
||||
len(cgroup.Resources.BlkioThrottleWriteIOPSDevice) > 0
|
||||
}
|
||||
|
||||
func setIo(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if !isIoSet(cgroup) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
filename := "io.bfq.weight"
|
||||
if err := fscommon.WriteFile(dirPath, filename, strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
if err := fscommon.WriteFile(dirPath, filename,
|
||||
strconv.FormatUint(cgroups.ConvertBlkIOToCgroupV2Value(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
@@ -81,11 +93,11 @@ func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[0], 10, 0)
|
||||
major, err := strconv.ParseUint(d[0], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
major, err := strconv.ParseUint(d[1], 10, 0)
|
||||
minor, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
48
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
48
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
@@ -15,22 +15,58 @@ import (
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// numToStr converts an int64 value to a string for writing to a
|
||||
// cgroupv2 files with .min, .max, .low, or .high suffix.
|
||||
// The value of -1 is converted to "max" for cgroupv1 compatibility
|
||||
// (which used to write -1 to remove the limit).
|
||||
func numToStr(value int64) (ret string) {
|
||||
switch {
|
||||
case value == 0:
|
||||
ret = ""
|
||||
case value == -1:
|
||||
ret = "max"
|
||||
default:
|
||||
ret = strconv.FormatInt(value, 10)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func isMemorySet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.MemoryReservation != 0 ||
|
||||
cgroup.Resources.Memory != 0 || cgroup.Resources.MemorySwap != 0
|
||||
}
|
||||
|
||||
func setMemory(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.swap.max", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if !isMemorySet(cgroup) {
|
||||
return nil
|
||||
}
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(cgroup.Resources.MemorySwap, cgroup.Resources.Memory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
swapStr := numToStr(swap)
|
||||
if swapStr == "" && swap == 0 && cgroup.Resources.MemorySwap > 0 {
|
||||
// memory and memorySwap set to the same value -- disable swap
|
||||
swapStr = "0"
|
||||
}
|
||||
// never write empty string to `memory.swap.max`, it means set to 0.
|
||||
if swapStr != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.max", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
|
||||
if val := numToStr(cgroup.Resources.Memory); val != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.low", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
if val := numToStr(cgroup.Resources.MemoryReservation); val != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.low", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
@@ -4,9 +4,7 @@ package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
@@ -16,16 +14,16 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isPidsSet(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.PidsLimit != 0
|
||||
}
|
||||
|
||||
func setPids(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(dirPath, "pids.max", limit); err != nil {
|
||||
if !isPidsSet(cgroup) {
|
||||
return nil
|
||||
}
|
||||
if val := numToStr(cgroup.Resources.PidsLimit); val != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "pids.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -33,20 +31,11 @@ func setPids(dirPath string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func isNOTSUP(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.ENOTSUP
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := ioutil.ReadFile(filepath.Join(dirPath, "cgroup.procs"))
|
||||
if err != nil && isNOTSUP(err) {
|
||||
if errors.Is(err, unix.ENOTSUP) {
|
||||
contents, err = ioutil.ReadFile(filepath.Join(dirPath, "cgroup.threads"))
|
||||
}
|
||||
if err != nil {
|
||||
|
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/BUILD
generated
vendored
4
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/BUILD
generated
vendored
@@ -13,10 +13,14 @@ go_library(
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
|
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/fscommon.go
generated
vendored
@@ -4,9 +4,12 @@ package fscommon
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func WriteFile(dir, file, data string) error {
|
||||
@@ -17,7 +20,7 @@ func WriteFile(dir, file, data string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ioutil.WriteFile(path, []byte(data), 0700); err != nil {
|
||||
if err := retryingWriteFile(path, []byte(data), 0700); err != nil {
|
||||
return errors.Wrapf(err, "failed to write %q to %q", data, path)
|
||||
}
|
||||
return nil
|
||||
@@ -34,3 +37,14 @@ func ReadFile(dir, file string) (string, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
return string(data), err
|
||||
}
|
||||
|
||||
func retryingWriteFile(filename string, data []byte, perm os.FileMode) error {
|
||||
for {
|
||||
err := ioutil.WriteFile(filename, data, perm)
|
||||
if errors.Is(err, unix.EINTR) {
|
||||
logrus.Infof("interrupted while writing %s to %s", string(data), filename)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
@@ -20,6 +20,12 @@ type CpuUsage struct {
|
||||
// Total CPU time consumed per core.
|
||||
// Units: nanoseconds.
|
||||
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
|
||||
// CPU time consumed per core in kernel mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"`
|
||||
// CPU time consumed per core in user mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"`
|
||||
// Time spent by tasks of the cgroup in kernel mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
|
||||
@@ -51,12 +57,33 @@ type MemoryStats struct {
|
||||
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||
// usage of kernel TCP memory
|
||||
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
|
||||
// usage of memory pages by NUMA node
|
||||
// see chapter 5.6 of memory controller documentation
|
||||
PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"`
|
||||
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
|
||||
UseHierarchy bool `json:"use_hierarchy"`
|
||||
|
||||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMA struct {
|
||||
// Embedding is used as types can't be recursive.
|
||||
PageUsageByNUMAInner
|
||||
Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMAInner struct {
|
||||
Total PageStats `json:"total,omitempty"`
|
||||
File PageStats `json:"file,omitempty"`
|
||||
Anon PageStats `json:"anon,omitempty"`
|
||||
Unevictable PageStats `json:"unevictable,omitempty"`
|
||||
}
|
||||
|
||||
type PageStats struct {
|
||||
Total uint64 `json:"total,omitempty"`
|
||||
Nodes map[uint8]uint64 `json:"nodes,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
|
41
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/BUILD
generated
vendored
41
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/BUILD
generated
vendored
@@ -3,73 +3,66 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"apply_nosystemd.go",
|
||||
"apply_systemd.go",
|
||||
"unified_hierarchy.go",
|
||||
"common.go",
|
||||
"unsupported.go",
|
||||
"user.go",
|
||||
"v1.go",
|
||||
"v2.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/cgroups/systemd",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
deps = [
|
||||
"//vendor/github.com/coreos/go-systemd/v22/dbus:go_default_library",
|
||||
"//vendor/github.com/godbus/dbus/v5:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/coreos/go-systemd/dbus:go_default_library",
|
||||
"//vendor/github.com/godbus/dbus:go_default_library",
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:darwin": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:dragonfly": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:freebsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:ios": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/github.com/coreos/go-systemd/dbus:go_default_library",
|
||||
"//vendor/github.com/godbus/dbus:go_default_library",
|
||||
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:nacl": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:netbsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:openbsd": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:plan9": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:solaris": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:windows": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
|
424
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
424
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
@@ -0,0 +1,424 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var (
|
||||
connOnce sync.Once
|
||||
connDbus *systemdDbus.Conn
|
||||
connErr error
|
||||
|
||||
versionOnce sync.Once
|
||||
version int
|
||||
versionErr error
|
||||
)
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func IsRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return fi.IsDir()
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func groupPrefix(ruleType configs.DeviceType) (string, error) {
|
||||
switch ruleType {
|
||||
case configs.BlockDevice:
|
||||
return "block-", nil
|
||||
case configs.CharDevice:
|
||||
return "char-", nil
|
||||
default:
|
||||
return "", errors.Errorf("device type %v has no group prefix", ruleType)
|
||||
}
|
||||
}
|
||||
|
||||
// findDeviceGroup tries to find the device group name (as listed in
|
||||
// /proc/devices) with the type prefixed as requried for DeviceAllow, for a
|
||||
// given (type, major) combination. If more than one device group exists, an
|
||||
// arbitrary one is chosen.
|
||||
func findDeviceGroup(ruleType configs.DeviceType, ruleMajor int64) (string, error) {
|
||||
fh, err := os.Open("/proc/devices")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
prefix, err := groupPrefix(ruleType)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(fh)
|
||||
var currentType configs.DeviceType
|
||||
for scanner.Scan() {
|
||||
// We need to strip spaces because the first number is column-aligned.
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// Handle the "header" lines.
|
||||
switch line {
|
||||
case "Block devices:":
|
||||
currentType = configs.BlockDevice
|
||||
continue
|
||||
case "Character devices:":
|
||||
currentType = configs.CharDevice
|
||||
continue
|
||||
case "":
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip lines unrelated to our type.
|
||||
if currentType != ruleType {
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse out the (major, name).
|
||||
var (
|
||||
currMajor int64
|
||||
currName string
|
||||
)
|
||||
if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
|
||||
if err == nil {
|
||||
err = errors.Errorf("wrong number of fields")
|
||||
}
|
||||
return "", errors.Wrapf(err, "scan /proc/devices line %q", line)
|
||||
}
|
||||
|
||||
if currMajor == ruleMajor {
|
||||
return prefix + currName, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", errors.Wrap(err, "reading /proc/devices")
|
||||
}
|
||||
// Couldn't find the device group.
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// generateDeviceProperties takes the configured device rules and generates a
|
||||
// corresponding set of systemd properties to configure the devices correctly.
|
||||
func generateDeviceProperties(rules []*configs.DeviceRule) ([]systemdDbus.Property, error) {
|
||||
// DeviceAllow is the type "a(ss)" which means we need a temporary struct
|
||||
// to represent it in Go.
|
||||
type deviceAllowEntry struct {
|
||||
Path string
|
||||
Perms string
|
||||
}
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
// Always run in the strictest white-list mode.
|
||||
newProp("DevicePolicy", "strict"),
|
||||
// Empty the DeviceAllow array before filling it.
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}
|
||||
|
||||
// Figure out the set of rules.
|
||||
configEmu := &devices.Emulator{}
|
||||
for _, rule := range rules {
|
||||
if err := configEmu.Apply(*rule); err != nil {
|
||||
return nil, errors.Wrap(err, "apply rule for systemd")
|
||||
}
|
||||
}
|
||||
// systemd doesn't support blacklists. So we log a warning, and tell
|
||||
// systemd to act as a deny-all whitelist. This ruleset will be replaced
|
||||
// with our normal fallback code. This may result in spurrious errors, but
|
||||
// the only other option is to error out here.
|
||||
if configEmu.IsBlacklist() {
|
||||
// However, if we're dealing with an allow-all rule then we can do it.
|
||||
if configEmu.IsAllowAll() {
|
||||
return []systemdDbus.Property{
|
||||
// Run in white-list mode by setting to "auto" and removing all
|
||||
// DeviceAllow rules.
|
||||
newProp("DevicePolicy", "auto"),
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}, nil
|
||||
}
|
||||
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// Now generate the set of rules we actually need to apply. Unlike the
|
||||
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all
|
||||
// whitelist which is the default for devices.Emulator.
|
||||
baseEmu := &devices.Emulator{}
|
||||
finalRules, err := baseEmu.Transition(configEmu)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get simplified rules for systemd")
|
||||
}
|
||||
var deviceAllowList []deviceAllowEntry
|
||||
for _, rule := range finalRules {
|
||||
if !rule.Allow {
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
|
||||
}
|
||||
switch rule.Type {
|
||||
case configs.BlockDevice, configs.CharDevice:
|
||||
default:
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
|
||||
}
|
||||
|
||||
entry := deviceAllowEntry{
|
||||
Perms: string(rule.Permissions),
|
||||
}
|
||||
|
||||
// systemd has a fairly odd (though understandable) syntax here, and
|
||||
// because of the OCI configuration format we have to do quite a bit of
|
||||
// trickery to convert things:
|
||||
//
|
||||
// * Concrete rules with non-wildcard major/minor numbers have to use
|
||||
// /dev/{block,char} paths. This is slightly odd because it means
|
||||
// that we cannot add whitelist rules for devices that don't exist,
|
||||
// but there's not too much we can do about that.
|
||||
//
|
||||
// However, path globbing is not support for path-based rules so we
|
||||
// need to handle wildcards in some other manner.
|
||||
//
|
||||
// * Wildcard-minor rules have to specify a "device group name" (the
|
||||
// second column in /proc/devices).
|
||||
//
|
||||
// * Wildcard (major and minor) rules can just specify a glob with the
|
||||
// type ("char-*" or "block-*").
|
||||
//
|
||||
// The only type of rule we can't handle is wildcard-major rules, and
|
||||
// so we'll give a warning in that case (note that the fallback code
|
||||
// will insert any rules systemd couldn't handle). What amazing fun.
|
||||
|
||||
if rule.Major == configs.Wildcard {
|
||||
// "_ *:n _" rules aren't supported by systemd.
|
||||
if rule.Minor != configs.Wildcard {
|
||||
logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// "_ *:* _" rules just wildcard everything.
|
||||
prefix, err := groupPrefix(rule.Type)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entry.Path = prefix + "*"
|
||||
} else if rule.Minor == configs.Wildcard {
|
||||
// "_ n:* _" rules require a device group from /proc/devices.
|
||||
group, err := findDeviceGroup(rule.Type, rule.Major)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major)
|
||||
}
|
||||
if group == "" {
|
||||
// Couldn't find a group.
|
||||
logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule)
|
||||
continue
|
||||
}
|
||||
entry.Path = group
|
||||
} else {
|
||||
// "_ n:m _" rules are just a path in /dev/{block,char}/.
|
||||
switch rule.Type {
|
||||
case configs.BlockDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor)
|
||||
case configs.CharDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor)
|
||||
}
|
||||
}
|
||||
deviceAllowList = append(deviceAllowList, entry)
|
||||
}
|
||||
|
||||
properties = append(properties, newProp("DeviceAllow", deviceAllowList))
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// getDbusConnection lazy initializes systemd dbus connection
|
||||
// and returns it
|
||||
func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) {
|
||||
connOnce.Do(func() {
|
||||
if rootless {
|
||||
connDbus, connErr = NewUserSystemdDbus()
|
||||
} else {
|
||||
connDbus, connErr = systemdDbus.New()
|
||||
}
|
||||
})
|
||||
return connDbus, connErr
|
||||
}
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []systemdDbus.Property) error {
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := dbusConnection.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
dbusConnection.ResetFailedUnit(unitName)
|
||||
return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error {
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := dbusConnection.StopUnit(unitName, "replace", statusChan); err == nil {
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
||||
}
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StopUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func systemdVersion(conn *systemdDbus.Conn) (int, error) {
|
||||
versionOnce.Do(func() {
|
||||
version = -1
|
||||
verStr, err := conn.GetManagerProperty("Version")
|
||||
if err != nil {
|
||||
versionErr = err
|
||||
return
|
||||
}
|
||||
|
||||
version, versionErr = systemdVersionAtoi(verStr)
|
||||
return
|
||||
})
|
||||
|
||||
return version, versionErr
|
||||
}
|
||||
|
||||
func systemdVersionAtoi(verStr string) (int, error) {
|
||||
// verStr should be of the form:
|
||||
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32"
|
||||
// all the input strings include quotes, and the output int should be 245
|
||||
// thus, we unconditionally remove the `"v`
|
||||
// and then match on the first integer we can grab
|
||||
re := regexp.MustCompile(`"?v?([0-9]+)`)
|
||||
matches := re.FindStringSubmatch(verStr)
|
||||
if len(matches) < 2 {
|
||||
return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
|
||||
}
|
||||
ver, err := strconv.Atoi(matches[1])
|
||||
return ver, errors.Wrapf(err, "can't parse version %s", verStr)
|
||||
}
|
||||
|
||||
func addCpuQuota(conn *systemdDbus.Conn, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
if period != 0 {
|
||||
// systemd only supports CPUQuotaPeriodUSec since v242
|
||||
sdVer, err := systemdVersion(conn)
|
||||
if err != nil {
|
||||
logrus.Warnf("systemdVersion: %s", err)
|
||||
} else if sdVer >= 242 {
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPeriodUSec", period))
|
||||
}
|
||||
}
|
||||
if quota != 0 || period != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
if period == 0 {
|
||||
// assume the default kernel value of 100000 us (100 ms), same for v1 and v2.
|
||||
// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
|
||||
// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
period = 100000
|
||||
}
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
}
|
312
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
312
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unified_hierarchy.go
generated
vendored
@@ -1,312 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type UnifiedManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true))
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
}
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCgroupsV2(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
path, err := getSubsystemPath(m.Cgroups, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = map[string]string{
|
||||
"pids": path,
|
||||
"memory": path,
|
||||
"io": path,
|
||||
"cpu": path,
|
||||
"devices": path,
|
||||
"cpuset": path,
|
||||
"freezer": path,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
func (m *UnifiedManager) GetUnifiedPath() (string, error) {
|
||||
unifiedPath := ""
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for k, v := range m.Paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
return unifiedPath,
|
||||
errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v)
|
||||
}
|
||||
}
|
||||
if unifiedPath == "" {
|
||||
// FIXME: unified path could be detected even when no controller is available
|
||||
return unifiedPath, errors.New("cannot detect unified path")
|
||||
}
|
||||
return unifiedPath, nil
|
||||
}
|
||||
func createCgroupsv2Path(path string) (Err error) {
|
||||
content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !filepath.HasPrefix(path, "/sys/fs/cgroup") {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
res := ""
|
||||
for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") {
|
||||
if i == 0 {
|
||||
res = fmt.Sprintf("+%s", c)
|
||||
} else {
|
||||
res = res + fmt.Sprintf(" +%s", c)
|
||||
}
|
||||
}
|
||||
resByte := []byte(res)
|
||||
|
||||
current := "/sys/fs"
|
||||
elements := strings.Split(path, "/")
|
||||
for i, e := range elements[3:] {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
if i < len(elements[3:])-1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinCgroupsV2(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return createCgroupsv2Path(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.Cgroups, path, false)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Set(container *configs.Config) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(container)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
@@ -3,7 +3,7 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
@@ -14,54 +14,58 @@ type Manager struct {
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
func IsRunningSystemd() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", fmt.Errorf("Systemd not supported")
|
||||
func (m *Manager) Path(_ string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
return errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return nil, errors.New("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Exists() bool {
|
||||
return false
|
||||
}
|
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// NewUserSystemdDbus creates a connection for systemd user-instance.
|
||||
func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
addr, err := DetectUserDbusSessionBusAddress()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
uid, err := DetectUID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
|
||||
conn, err := dbus.Dial(addr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "error while dialing %q", addr)
|
||||
}
|
||||
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
|
||||
err = conn.Auth(methods)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid)
|
||||
}
|
||||
if err = conn.Hello(); err != nil {
|
||||
conn.Close()
|
||||
return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid)
|
||||
}
|
||||
return conn, nil
|
||||
})
|
||||
}
|
||||
|
||||
// DetectUID detects UID from the OwnerUID field of `busctl --user status`
|
||||
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
|
||||
//
|
||||
// Otherwise returns os.Getuid() .
|
||||
func DetectUID() (int, error) {
|
||||
if !system.RunningInUserNS() {
|
||||
return os.Getuid(), nil
|
||||
}
|
||||
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||
if err != nil {
|
||||
return -1, errors.Wrap(err, "could not execute `busctl --user --no-pager status`")
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "OwnerUID=") {
|
||||
uidStr := strings.TrimPrefix(s, "OwnerUID=")
|
||||
i, err := strconv.Atoi(uidStr)
|
||||
if err != nil {
|
||||
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return -1, errors.New("could not detect the OwnerUID")
|
||||
}
|
||||
|
||||
// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
|
||||
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
|
||||
// Otherwise parses the value from `systemctl --user show-environment` .
|
||||
func DetectUserDbusSessionBusAddress() (string, error) {
|
||||
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
|
||||
return env, nil
|
||||
}
|
||||
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
|
||||
busPath := filepath.Join(xdr, "bus")
|
||||
if _, err := os.Stat(busPath); err == nil {
|
||||
busAddress := "unix:path=" + busPath
|
||||
return busAddress, nil
|
||||
}
|
||||
}
|
||||
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
|
||||
if err != nil {
|
||||
return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b))
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
|
||||
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`")
|
||||
}
|
@@ -4,27 +4,30 @@ package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/godbus/dbus"
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type LegacyManager struct {
|
||||
type legacyManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &legacyManager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
}
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
@@ -65,88 +68,56 @@ var legacySubsystems = subsystemSet{
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
const (
|
||||
testScopeWait = 4
|
||||
testSliceWait = 4
|
||||
)
|
||||
func genV1ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
r := c.Resources
|
||||
|
||||
var (
|
||||
connLock sync.Mutex
|
||||
theConn *systemdDbus.Conn
|
||||
)
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func isRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
deviceProperties, err := generateDeviceProperties(r.Devices)
|
||||
if err != nil {
|
||||
return false
|
||||
return nil, err
|
||||
}
|
||||
return fi.IsDir()
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(r.Memory)))
|
||||
}
|
||||
|
||||
if r.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", r.CpuShares))
|
||||
}
|
||||
|
||||
addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(r.BlkioWeight)))
|
||||
}
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
if !isRunningSystemd() {
|
||||
return false
|
||||
}
|
||||
|
||||
connLock.Lock()
|
||||
defer connLock.Unlock()
|
||||
|
||||
if theConn == nil {
|
||||
var err error
|
||||
theConn, err = systemdDbus.New()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
if !isRunningSystemd() {
|
||||
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
|
||||
}
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &UnifiedManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &LegacyManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Apply(pid int) error {
|
||||
func (m *legacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
_, err := getSubsystemPath(m.cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
@@ -156,8 +127,8 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
m.paths = paths
|
||||
return cgroups.EnterPid(m.paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
@@ -196,63 +167,26 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
resourcesProperties, err := genV1ResourcesProperties(c, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties = append(properties, resourcesProperties...)
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
if err := enableKmem(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
if err := startUnit(dbusConnection, unitName, properties); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -262,7 +196,7 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
|
||||
subsystemPath, err := getSubsystemPath(m.cgroups, s.Name())
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
@@ -272,33 +206,33 @@ func (m *LegacyManager) Apply(pid int) error {
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
m.Paths = paths
|
||||
m.paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
func (m *legacyManager) Destroy() error {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(dbusConnection, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
m.paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPaths() map[string]string {
|
||||
func (m *legacyManager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
@@ -352,40 +286,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint(c.Path, subsystem)
|
||||
if err != nil {
|
||||
@@ -412,46 +312,46 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.Cgroups, "freezer")
|
||||
func (m *legacyManager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.cgroups, "freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer, err := legacySubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(path, m.Cgroups)
|
||||
err = freezer.Set(path, m.cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
func (m *legacyManager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
func (m *legacyManager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
func (m *legacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
for name, path := range m.paths {
|
||||
sys, err := legacySubsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
@@ -464,41 +364,65 @@ func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Set(container *configs.Config) error {
|
||||
func (m *legacyManager) Set(container *configs.Config) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties, err := genV1ResourcesProperties(container.Cgroups, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Figure out the current freezer state, so we can revert to it after we
|
||||
// temporarily freeze the container.
|
||||
targetFreezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetFreezerState == configs.Undefined {
|
||||
targetFreezerState = configs.Thawed
|
||||
}
|
||||
|
||||
// We have to freeze the container while systemd sets the cgroup settings.
|
||||
// The reason for this is that systemd's application of DeviceAllow rules
|
||||
// is done disruptively, resulting in spurrious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we freeze the container to avoid them hitting the cgroup
|
||||
// error. But if the freezer cgroup isn't supported, we just warn about it.
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
}
|
||||
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset freezer state before we apply the configuration, to avoid clashing
|
||||
// with the freezer setting in the configuration.
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
func setKernelMemory(c *configs.Cgroup) error {
|
||||
func enableKmem(c *configs.Cgroup) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
@@ -519,16 +443,28 @@ func setKernelMemory(c *configs.Cgroup) error {
|
||||
return fs.EnableKernelMemoryAccounting(path)
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return false
|
||||
func (m *legacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
func (m *legacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
path, err := getSubsystemPath(m.cgroups, "freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
freezer, err := legacySubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
return freezer.(*fs.FreezerGroup).GetState(path)
|
||||
}
|
||||
|
||||
func (m *legacyManager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
@@ -0,0 +1,357 @@
|
||||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type unifiedManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
path string
|
||||
rootless bool
|
||||
}
|
||||
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager {
|
||||
return &unifiedManager{
|
||||
cgroups: config,
|
||||
path: path,
|
||||
rootless: rootless,
|
||||
}
|
||||
}
|
||||
|
||||
func genV2ResourcesProperties(c *configs.Cgroup, conn *systemdDbus.Conn) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
r := c.Resources
|
||||
|
||||
// NOTE: This is of questionable correctness because we insert our own
|
||||
// devices eBPF program later. Two programs with identical rules
|
||||
// aren't the end of the world, but it is a bit concerning. However
|
||||
// it's unclear if systemd removes all eBPF programs attached when
|
||||
// doing SetUnitProperties...
|
||||
deviceProperties, err := generateDeviceProperties(r.Devices)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryMax", uint64(r.Memory)))
|
||||
}
|
||||
if r.MemoryReservation != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLow", uint64(r.MemoryReservation)))
|
||||
}
|
||||
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if swap != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemorySwapMax", uint64(swap)))
|
||||
}
|
||||
|
||||
if r.CpuWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUWeight", r.CpuWeight))
|
||||
}
|
||||
|
||||
addCpuQuota(conn, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
// ignore r.KernelMemory
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
return cgroups.WriteCgroupProc(m.path, pid)
|
||||
}
|
||||
|
||||
slice := "system.slice"
|
||||
if m.rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("IOAccounting", true))
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resourcesProperties, err := genV2ResourcesProperties(c, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties = append(properties, resourcesProperties...)
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(dbusConnection, unitName, properties); err != nil {
|
||||
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
|
||||
}
|
||||
|
||||
if err = m.initPath(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Destroy() error {
|
||||
if m.cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(dbusConnection, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// XXX this is probably not needed, systemd should handle it
|
||||
err = os.Remove(m.path)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Path(_ string) string {
|
||||
return m.path
|
||||
}
|
||||
|
||||
// getSliceFull value is used in initPath.
|
||||
// The value is incompatible with systemdDbus.PropSlice.
|
||||
func (m *unifiedManager) getSliceFull() (string, error) {
|
||||
c := m.cgroups
|
||||
slice := "system.slice"
|
||||
if m.rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
if m.rootless {
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
|
||||
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
managerCG, err := strconv.Unquote(managerCGQuoted)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
slice = filepath.Join(managerCG, slice)
|
||||
}
|
||||
|
||||
// an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice"
|
||||
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified.
|
||||
return slice, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) initPath() error {
|
||||
if m.path != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
sliceFull, err := m.getSliceFull()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c := m.cgroups
|
||||
path := filepath.Join(sliceFull, getUnitName(c))
|
||||
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// an example of the final path in rootless:
|
||||
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
|
||||
m.path = path
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.cgroups, m.path, m.rootless)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Freeze(state configs.FreezerState) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPids() ([]int, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetAllPids() ([]int, error) {
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Set(container *configs.Config) error {
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
properties, err := genV2ResourcesProperties(m.cgroups, dbusConnection)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Figure out the current freezer state, so we can revert to it after we
|
||||
// temporarily freeze the container.
|
||||
targetFreezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetFreezerState == configs.Undefined {
|
||||
targetFreezerState = configs.Thawed
|
||||
}
|
||||
|
||||
// We have to freeze the container while systemd sets the cgroup settings.
|
||||
// The reason for this is that systemd's application of DeviceAllow rules
|
||||
// is done disruptively, resulting in spurrious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we freeze the container to avoid them hitting the cgroup
|
||||
// error. But if the freezer cgroup isn't supported, we just warn about it.
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
}
|
||||
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return errors.Wrap(err, "error while setting unit properties")
|
||||
}
|
||||
|
||||
// Reset freezer state before we apply the configuration, to avoid clashing
|
||||
// with the freezer setting in the configuration.
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(container)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.path
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
return fsMgr.GetFreezerState()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Exists() bool {
|
||||
return cgroups.PathExists(m.path)
|
||||
}
|
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
357
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
@@ -4,6 +4,7 @@ package cgroups
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
@@ -12,7 +13,6 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
units "github.com/docker/go-units"
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
)
|
||||
@@ -40,8 +39,8 @@ var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st syscall.Statfs_t
|
||||
if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
|
||||
panic("cannot statfs cgroup root")
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
@@ -49,191 +48,19 @@ func IsCgroup2UnifiedMode() bool {
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return unifiedMountpoint, nil
|
||||
}
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
||||
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
||||
// We are not using mount.GetMounts() because it's super-inefficient,
|
||||
// parsing it directly sped up x10 times because of not using Sscanf.
|
||||
// It was one of two major performance drawbacks in container start.
|
||||
if !isSubsystemAvailable(subsystem) {
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if IsCgroup2UnifiedMode() {
|
||||
subsystem = ""
|
||||
}
|
||||
|
||||
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
|
||||
}
|
||||
|
||||
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
txt := scanner.Text()
|
||||
fields := strings.Fields(txt)
|
||||
if len(fields) < 9 {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(fields[4], cgroupPath) {
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
|
||||
return fields[4], fields[3], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func isSubsystemAvailable(subsystem string) bool {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
controllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, c := range controllers {
|
||||
if c == subsystem {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, avail := cgroups[subsystem]
|
||||
return avail
|
||||
}
|
||||
|
||||
func GetClosestMountpointAncestor(dir, mountinfo string) string {
|
||||
deepestMountPoint := ""
|
||||
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
|
||||
mountInfoParts := strings.Fields(mountInfoEntry)
|
||||
if len(mountInfoParts) < 5 {
|
||||
continue
|
||||
}
|
||||
mountPoint := mountInfoParts[4]
|
||||
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
|
||||
deepestMountPoint = mountPoint
|
||||
}
|
||||
}
|
||||
return deepestMountPoint
|
||||
}
|
||||
|
||||
func FindCgroupMountpointDir() (string, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
text := scanner.Text()
|
||||
fields := strings.Split(text, " ")
|
||||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
numPostFields := len(postSeparatorFields)
|
||||
|
||||
// This is an error as we can't detect if the mount is for "cgroup"
|
||||
if numPostFields == 0 {
|
||||
return "", fmt.Errorf("Found no fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
|
||||
// Check that the mount is properly formatted.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
return filepath.Dir(fields[4]), nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return "", NewNotFoundError("cgroup")
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
Mountpoint string
|
||||
Root string
|
||||
Subsystems []string
|
||||
}
|
||||
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", fmt.Errorf("no subsystem for mount")
|
||||
}
|
||||
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
scanner := bufio.NewScanner(mi)
|
||||
numFound := 0
|
||||
for scanner.Scan() && numFound < len(ss) {
|
||||
txt := scanner.Text()
|
||||
sepIdx := strings.Index(txt, " - ")
|
||||
if sepIdx == -1 {
|
||||
return nil, fmt.Errorf("invalid mountinfo format")
|
||||
}
|
||||
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Split(txt, " ")
|
||||
m := Mount{
|
||||
Mountpoint: fields[4],
|
||||
Root: fields[3],
|
||||
}
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
seen, known := ss[opt]
|
||||
if !known || (!all && seen) {
|
||||
continue
|
||||
}
|
||||
ss[opt] = true
|
||||
if strings.HasPrefix(opt, CgroupNamePrefix) {
|
||||
opt = opt[len(CgroupNamePrefix):]
|
||||
}
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
numFound++
|
||||
}
|
||||
if len(m.Subsystems) > 0 || all {
|
||||
res = append(res, m)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||
// all indicates whether to return just the first instance or all the mounts.
|
||||
// This function should not be used from cgroupv2 code, as in this case
|
||||
// all the controllers are available under the constant unifiedMountpoint.
|
||||
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// TODO: remove cgroupv2 case once all external users are converted
|
||||
availableControllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -246,22 +73,7 @@ func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
return []Mount{m}, nil
|
||||
}
|
||||
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMap := make(map[string]bool)
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = false
|
||||
}
|
||||
return getCgroupMountsHelper(allMap, f, all)
|
||||
return getCgroupMountsV1(all)
|
||||
}
|
||||
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
@@ -305,61 +117,8 @@ func GetAllSubsystems() ([]string, error) {
|
||||
return subsystems, nil
|
||||
}
|
||||
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func GetInitCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
|
||||
func readProcsFile(file string) ([]int, error) {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -379,11 +138,18 @@ func readProcsFile(dir string) ([]int, error) {
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
return out, s.Err()
|
||||
}
|
||||
|
||||
// ParseCgroupFile parses the given cgroup file, typically from
|
||||
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
|
||||
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
|
||||
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
|
||||
// "cpu": "/user.slice/user-1000.slice"
|
||||
// "pids": "/user.slice/user-1000.slice"
|
||||
// etc.
|
||||
//
|
||||
// Note that for cgroup v2 unified hierarchy, there are no per-controller
|
||||
// cgroup paths, so the resulting map will have a single element where the key
|
||||
// is empty string ("") and the value is the cgroup path the <pid> is in.
|
||||
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@@ -423,22 +189,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
return cgroups, nil
|
||||
}
|
||||
|
||||
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "/", nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
return "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func PathExists(path string) bool {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return false
|
||||
@@ -514,8 +264,8 @@ func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
}
|
||||
|
||||
// GetPids returns all pids, that were added to cgroup at path.
|
||||
func GetPids(path string) ([]int, error) {
|
||||
return readProcsFile(path)
|
||||
func GetPids(dir string) ([]int, error) {
|
||||
return readProcsFile(filepath.Join(dir, CgroupProcesses))
|
||||
}
|
||||
|
||||
// GetAllPids returns all pids, that were added to cgroup at path and to all its
|
||||
@@ -524,14 +274,13 @@ func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
// collect pids from all sub-cgroups
|
||||
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
|
||||
dir, file := filepath.Split(p)
|
||||
if file != CgroupProcesses {
|
||||
return nil
|
||||
}
|
||||
if iErr != nil {
|
||||
return iErr
|
||||
}
|
||||
cPids, err := readProcsFile(dir)
|
||||
if info.IsDir() || info.Name() != CgroupProcesses {
|
||||
return nil
|
||||
}
|
||||
cPids, err := readProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -568,7 +317,7 @@ func WriteCgroupProc(dir string, pid int) error {
|
||||
|
||||
// EINVAL might mean that the task being added to cgroup.procs is in state
|
||||
// TASK_NEW. We should attempt to do so again.
|
||||
if isEINVAL(err) {
|
||||
if errors.Is(err, unix.EINVAL) {
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
@@ -578,11 +327,53 @@ func WriteCgroupProc(dir string, pid int) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func isEINVAL(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.EINVAL
|
||||
default:
|
||||
return false
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for BlkIOWeight is y = (1 + (x - 10) * 9999 / 990)
|
||||
// convert linearly from [10-1000] to [1-10000]
|
||||
func ConvertBlkIOToCgroupV2Value(blkIoWeight uint16) uint64 {
|
||||
if blkIoWeight == 0 {
|
||||
return 0
|
||||
}
|
||||
return uint64(1 + (uint64(blkIoWeight)-10)*9999/990)
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
|
||||
// convert from [2-262144] to [1-10000]
|
||||
// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)"
|
||||
func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
||||
if cpuShares == 0 {
|
||||
return 0
|
||||
}
|
||||
return (1 + ((cpuShares-2)*9999)/262142)
|
||||
}
|
||||
|
||||
// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec
|
||||
// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap
|
||||
// is defined as memory+swap combined, while in cgroup v2 swap is a separate value.
|
||||
func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
||||
// for compatibility with cgroup1 controller, set swap to unlimited in
|
||||
// case the memory is set to unlimited, and swap is not explicitly set,
|
||||
// treating the request as "set both memory and swap to unlimited".
|
||||
if memory == -1 && memorySwap == 0 {
|
||||
return -1, nil
|
||||
}
|
||||
if memorySwap == -1 || memorySwap == 0 {
|
||||
// -1 is "max", 0 is "unset", so treat as is
|
||||
return memorySwap, nil
|
||||
}
|
||||
// sanity checks
|
||||
if memory == 0 || memory == -1 {
|
||||
return 0, errors.New("unable to set swap limit without memory limit")
|
||||
}
|
||||
if memory < 0 {
|
||||
return 0, fmt.Errorf("invalid memory value: %d", memory)
|
||||
}
|
||||
if memorySwap < memory {
|
||||
return 0, errors.New("memory+swap limit should be >= memory limit")
|
||||
}
|
||||
|
||||
return memorySwap - memory, nil
|
||||
}
|
||||
|
250
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
250
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
@@ -0,0 +1,250 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Code in this source file are specific to cgroup v1,
|
||||
// and must not be used from any cgroup v2 code.
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
)
|
||||
|
||||
var (
|
||||
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
||||
)
|
||||
|
||||
type NotFoundError struct {
|
||||
Subsystem string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||
}
|
||||
|
||||
func NewNotFoundError(sub string) error {
|
||||
return &NotFoundError{
|
||||
Subsystem: sub,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
||||
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", "", errUnified
|
||||
}
|
||||
|
||||
// We are not using mount.GetMounts() because it's super-inefficient,
|
||||
// parsing it directly sped up x10 times because of not using Sscanf.
|
||||
// It was one of two major performance drawbacks in container start.
|
||||
if !isSubsystemAvailable(subsystem) {
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
|
||||
}
|
||||
|
||||
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
txt := scanner.Text()
|
||||
fields := strings.Fields(txt)
|
||||
if len(fields) < 9 {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(fields[4], cgroupPath) {
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if opt == subsystem {
|
||||
return fields[4], fields[3], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func isSubsystemAvailable(subsystem string) bool {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
panic("don't call isSubsystemAvailable from cgroupv2 code")
|
||||
}
|
||||
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, avail := cgroups[subsystem]
|
||||
return avail
|
||||
}
|
||||
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", fmt.Errorf("no subsystem for mount")
|
||||
}
|
||||
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
scanner := bufio.NewScanner(mi)
|
||||
numFound := 0
|
||||
for scanner.Scan() && numFound < len(ss) {
|
||||
txt := scanner.Text()
|
||||
sepIdx := strings.Index(txt, " - ")
|
||||
if sepIdx == -1 {
|
||||
return nil, fmt.Errorf("invalid mountinfo format")
|
||||
}
|
||||
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Split(txt, " ")
|
||||
m := Mount{
|
||||
Mountpoint: fields[4],
|
||||
Root: fields[3],
|
||||
}
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
seen, known := ss[opt]
|
||||
if !known || (!all && seen) {
|
||||
continue
|
||||
}
|
||||
ss[opt] = true
|
||||
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
numFound++
|
||||
}
|
||||
if len(m.Subsystems) > 0 || all {
|
||||
res = append(res, m)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func getCgroupMountsV1(all bool) ([]Mount, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMap := make(map[string]bool)
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = false
|
||||
}
|
||||
return getCgroupMountsHelper(allMap, f, all)
|
||||
}
|
||||
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func GetInitCgroup(subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
if p, ok := cgroups[subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
return "", NewNotFoundError(subsystem)
|
||||
}
|
7
vendor/github.com/opencontainers/runc/libcontainer/configs/BUILD
generated
vendored
7
vendor/github.com/opencontainers/runc/libcontainer/configs/BUILD
generated
vendored
@@ -9,7 +9,6 @@ go_library(
|
||||
"config.go",
|
||||
"config_linux.go",
|
||||
"device.go",
|
||||
"device_defaults.go",
|
||||
"hugepage_limit.go",
|
||||
"intelrdt.go",
|
||||
"interface_priority_map.go",
|
||||
@@ -26,13 +25,15 @@ go_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//vendor/github.com/opencontainers/runtime-spec/specs-go:go_default_library",
|
||||
"//vendor/github.com/pkg/errors:go_default_library",
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
"//vendor/github.com/coreos/go-systemd/v22/dbus:go_default_library",
|
||||
],
|
||||
"@io_bazel_rules_go//go/platform:linux": [
|
||||
"//vendor/golang.org/x/sys/unix:go_default_library",
|
||||
"//vendor/github.com/coreos/go-systemd/v22/dbus:go_default_library",
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
|
23
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
23
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
@@ -1,5 +1,9 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
)
|
||||
|
||||
type FreezerState string
|
||||
|
||||
const (
|
||||
@@ -29,18 +33,16 @@ type Cgroup struct {
|
||||
|
||||
// Resources contains various cgroups settings to apply
|
||||
*Resources
|
||||
|
||||
// SystemdProps are any additional properties for systemd,
|
||||
// derived from org.systemd.property.xxx annotations.
|
||||
// Ignored unless systemd is used for managing cgroups.
|
||||
SystemdProps []systemdDbus.Property `json:"-"`
|
||||
}
|
||||
|
||||
type Resources struct {
|
||||
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
|
||||
// Deprecated
|
||||
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
|
||||
// Deprecated
|
||||
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
|
||||
// Deprecated
|
||||
DeniedDevices []*Device `json:"denied_devices,omitempty"`
|
||||
|
||||
Devices []*Device `json:"devices"`
|
||||
// Devices is the set of access rules for devices in the container.
|
||||
Devices []*DeviceRule `json:"devices"`
|
||||
|
||||
// Memory limit (in bytes)
|
||||
Memory int64 `json:"memory"`
|
||||
@@ -124,7 +126,4 @@ type Resources struct {
|
||||
|
||||
// CpuWeight sets a proportional bandwidth limit.
|
||||
CpuWeight uint64 `json:"cpu_weight"`
|
||||
|
||||
// CpuMax sets she maximum bandwidth limit (format: max period).
|
||||
CpuMax string `json:"cpu_max"`
|
||||
}
|
||||
|
96
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
96
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
@@ -8,7 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@@ -70,9 +70,10 @@ type Arg struct {
|
||||
|
||||
// Syscall is a rule to match a syscall in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
Args []*Arg `json:"args"`
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
ErrnoRet *uint `json:"errnoRet"`
|
||||
Args []*Arg `json:"args"`
|
||||
}
|
||||
|
||||
// TODO Windows. Many of these fields should be factored out into those parts
|
||||
@@ -175,7 +176,7 @@ type Config struct {
|
||||
|
||||
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||
// CommandHooks are serialized to JSON, but other hooks are not.
|
||||
Hooks *Hooks
|
||||
Hooks Hooks
|
||||
|
||||
// Version is the version of opencontainer specification that is supported.
|
||||
Version string `json:"version"`
|
||||
@@ -202,17 +203,50 @@ type Config struct {
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
}
|
||||
|
||||
type Hooks struct {
|
||||
type HookName string
|
||||
type HookList []Hook
|
||||
type Hooks map[HookName]HookList
|
||||
|
||||
const (
|
||||
// Prestart commands are executed after the container namespaces are created,
|
||||
// but before the user supplied command is executed from init.
|
||||
Prestart []Hook
|
||||
// Note: This hook is now deprecated
|
||||
// Prestart commands are called in the Runtime namespace.
|
||||
Prestart HookName = "prestart"
|
||||
|
||||
// CreateRuntime commands MUST be called as part of the create operation after
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateRuntime is called immediately after the deprecated Prestart hook.
|
||||
// CreateRuntime commands are called in the Runtime Namespace.
|
||||
CreateRuntime = "createRuntime"
|
||||
|
||||
// CreateContainer commands MUST be called as part of the create operation after
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateContainer commands are called in the Container namespace.
|
||||
CreateContainer = "createContainer"
|
||||
|
||||
// StartContainer commands MUST be called as part of the start operation and before
|
||||
// the container process is started.
|
||||
// StartContainer commands are called in the Container namespace.
|
||||
StartContainer = "startContainer"
|
||||
|
||||
// Poststart commands are executed after the container init process starts.
|
||||
Poststart []Hook
|
||||
// Poststart commands are called in the Runtime Namespace.
|
||||
Poststart = "poststart"
|
||||
|
||||
// Poststop commands are executed after the container init process exits.
|
||||
Poststop []Hook
|
||||
}
|
||||
// Poststop commands are called in the Runtime Namespace.
|
||||
Poststop = "poststop"
|
||||
)
|
||||
|
||||
// TODO move this to runtime-spec
|
||||
// See: https://github.com/opencontainers/runtime-spec/pull/1046
|
||||
const (
|
||||
Creating = "creating"
|
||||
Created = "created"
|
||||
Running = "running"
|
||||
Stopped = "stopped"
|
||||
)
|
||||
|
||||
type Capabilities struct {
|
||||
// Bounding is the set of capabilities checked by the kernel.
|
||||
@@ -227,32 +261,39 @@ type Capabilities struct {
|
||||
Ambient []string
|
||||
}
|
||||
|
||||
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
||||
var state struct {
|
||||
Prestart []CommandHook
|
||||
Poststart []CommandHook
|
||||
Poststop []CommandHook
|
||||
func (hooks HookList) RunHooks(state *specs.State) error {
|
||||
for i, h := range hooks {
|
||||
if err := h.Run(state); err != nil {
|
||||
return errors.Wrapf(err, "Running hook #%d:", i)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
||||
var state map[HookName][]CommandHook
|
||||
|
||||
if err := json.Unmarshal(b, &state); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
deserialize := func(shooks []CommandHook) (hooks []Hook) {
|
||||
for _, shook := range shooks {
|
||||
hooks = append(hooks, shook)
|
||||
*hooks = Hooks{}
|
||||
for n, commandHooks := range state {
|
||||
if len(commandHooks) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
return hooks
|
||||
(*hooks)[n] = HookList{}
|
||||
for _, h := range commandHooks {
|
||||
(*hooks)[n] = append((*hooks)[n], h)
|
||||
}
|
||||
}
|
||||
|
||||
hooks.Prestart = deserialize(state.Prestart)
|
||||
hooks.Poststart = deserialize(state.Poststart)
|
||||
hooks.Poststop = deserialize(state.Poststop)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks Hooks) MarshalJSON() ([]byte, error) {
|
||||
func (hooks *Hooks) MarshalJSON() ([]byte, error) {
|
||||
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
||||
for _, hook := range hooks {
|
||||
switch chook := hook.(type) {
|
||||
@@ -267,9 +308,12 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
|
||||
}
|
||||
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"prestart": serialize(hooks.Prestart),
|
||||
"poststart": serialize(hooks.Poststart),
|
||||
"poststop": serialize(hooks.Poststop),
|
||||
"prestart": serialize((*hooks)[Prestart]),
|
||||
"createRuntime": serialize((*hooks)[CreateRuntime]),
|
||||
"createContainer": serialize((*hooks)[CreateContainer]),
|
||||
"startContainer": serialize((*hooks)[StartContainer]),
|
||||
"poststart": serialize((*hooks)[Poststart]),
|
||||
"poststop": serialize((*hooks)[Poststop]),
|
||||
})
|
||||
}
|
||||
|
||||
|
175
vendor/github.com/opencontainers/runc/libcontainer/configs/device.go
generated
vendored
175
vendor/github.com/opencontainers/runc/libcontainer/configs/device.go
generated
vendored
@@ -1,8 +1,12 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -12,21 +16,11 @@ const (
|
||||
// TODO Windows: This can be factored out in the future
|
||||
|
||||
type Device struct {
|
||||
// Device type, block, char, etc.
|
||||
Type rune `json:"type"`
|
||||
DeviceRule
|
||||
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
|
||||
// Cgroup permissions format, rwm.
|
||||
Permissions string `json:"permissions"`
|
||||
|
||||
// FileMode permission bits for the device.
|
||||
FileMode os.FileMode `json:"file_mode"`
|
||||
|
||||
@@ -35,23 +29,154 @@ type Device struct {
|
||||
|
||||
// Gid of the device.
|
||||
Gid uint32 `json:"gid"`
|
||||
}
|
||||
|
||||
// Write the file to the allowed list
|
||||
// DevicePermissions is a cgroupv1-style string to represent device access. It
|
||||
// has to be a string for backward compatibility reasons, hence why it has
|
||||
// methods to do set operations.
|
||||
type DevicePermissions string
|
||||
|
||||
const (
|
||||
deviceRead uint = (1 << iota)
|
||||
deviceWrite
|
||||
deviceMknod
|
||||
)
|
||||
|
||||
func (p DevicePermissions) toSet() uint {
|
||||
var set uint
|
||||
for _, perm := range p {
|
||||
switch perm {
|
||||
case 'r':
|
||||
set |= deviceRead
|
||||
case 'w':
|
||||
set |= deviceWrite
|
||||
case 'm':
|
||||
set |= deviceMknod
|
||||
}
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
func fromSet(set uint) DevicePermissions {
|
||||
var perm string
|
||||
if set&deviceRead == deviceRead {
|
||||
perm += "r"
|
||||
}
|
||||
if set&deviceWrite == deviceWrite {
|
||||
perm += "w"
|
||||
}
|
||||
if set&deviceMknod == deviceMknod {
|
||||
perm += "m"
|
||||
}
|
||||
return DevicePermissions(perm)
|
||||
}
|
||||
|
||||
// Union returns the union of the two sets of DevicePermissions.
|
||||
func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs | rhs)
|
||||
}
|
||||
|
||||
// Difference returns the set difference of the two sets of DevicePermissions.
|
||||
// In set notation, A.Difference(B) gives you A\B.
|
||||
func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs &^ rhs)
|
||||
}
|
||||
|
||||
// Intersection computes the intersection of the two sets of DevicePermissions.
|
||||
func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs & rhs)
|
||||
}
|
||||
|
||||
// IsEmpty returns whether the set of permissions in a DevicePermissions is
|
||||
// empty.
|
||||
func (p DevicePermissions) IsEmpty() bool {
|
||||
return p == DevicePermissions("")
|
||||
}
|
||||
|
||||
// IsValid returns whether the set of permissions is a subset of valid
|
||||
// permissions (namely, {r,w,m}).
|
||||
func (p DevicePermissions) IsValid() bool {
|
||||
return p == fromSet(p.toSet())
|
||||
}
|
||||
|
||||
type DeviceType rune
|
||||
|
||||
const (
|
||||
WildcardDevice DeviceType = 'a'
|
||||
BlockDevice DeviceType = 'b'
|
||||
CharDevice DeviceType = 'c' // or 'u'
|
||||
FifoDevice DeviceType = 'p'
|
||||
)
|
||||
|
||||
func (t DeviceType) IsValid() bool {
|
||||
switch t {
|
||||
case WildcardDevice, BlockDevice, CharDevice, FifoDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t DeviceType) CanMknod() bool {
|
||||
switch t {
|
||||
case BlockDevice, CharDevice, FifoDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t DeviceType) CanCgroup() bool {
|
||||
switch t {
|
||||
case WildcardDevice, BlockDevice, CharDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type DeviceRule struct {
|
||||
// Type of device ('c' for char, 'b' for block). If set to 'a', this rule
|
||||
// acts as a wildcard and all fields other than Allow are ignored.
|
||||
Type DeviceType `json:"type"`
|
||||
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
|
||||
// Permissions is the set of permissions that this rule applies to (in the
|
||||
// cgroupv1 format -- any combination of "rwm").
|
||||
Permissions DevicePermissions `json:"permissions"`
|
||||
|
||||
// Allow specifies whether this rule is allowed.
|
||||
Allow bool `json:"allow"`
|
||||
}
|
||||
|
||||
func (d *Device) CgroupString() string {
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
|
||||
}
|
||||
|
||||
func (d *Device) Mkdev() int {
|
||||
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
|
||||
}
|
||||
|
||||
// deviceNumberString converts the device number to a string return result.
|
||||
func deviceNumberString(number int64) string {
|
||||
if number == Wildcard {
|
||||
return "*"
|
||||
func (d *DeviceRule) CgroupString() string {
|
||||
var (
|
||||
major = strconv.FormatInt(d.Major, 10)
|
||||
minor = strconv.FormatInt(d.Minor, 10)
|
||||
)
|
||||
if d.Major == Wildcard {
|
||||
major = "*"
|
||||
}
|
||||
return fmt.Sprint(number)
|
||||
if d.Minor == Wildcard {
|
||||
minor = "*"
|
||||
}
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
|
||||
}
|
||||
|
||||
func (d *DeviceRule) Mkdev() (uint64, error) {
|
||||
if d.Major == Wildcard || d.Minor == Wildcard {
|
||||
return 0, errors.New("cannot mkdev() device with wildcards")
|
||||
}
|
||||
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
|
||||
}
|
||||
|
111
vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
generated
vendored
111
vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
generated
vendored
@@ -1,111 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package configs
|
||||
|
||||
var (
|
||||
// DefaultSimpleDevices are devices that are to be both allowed and created.
|
||||
DefaultSimpleDevices = []*Device{
|
||||
// /dev/null and zero
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/full",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// consoles and ttys
|
||||
{
|
||||
Path: "/dev/tty",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// /dev/urandom,/dev/random
|
||||
{
|
||||
Path: "/dev/urandom",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/random",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
DefaultAllowedDevices = append([]*Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/console",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 136,
|
||||
Minor: Wildcard,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
|
||||
// tuntap
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
}, DefaultSimpleDevices...)
|
||||
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
|
||||
)
|
11
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
11
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
@@ -1,6 +1,7 @@
|
||||
package validate
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
@@ -35,14 +36,14 @@ func hasIDMapping(id int, mappings []configs.IDMap) bool {
|
||||
|
||||
func rootlessEUIDMappings(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return fmt.Errorf("rootless container requires user namespaces")
|
||||
return errors.New("rootless container requires user namespaces")
|
||||
}
|
||||
|
||||
if len(config.UidMappings) == 0 {
|
||||
return fmt.Errorf("rootless containers requires at least one UID mapping")
|
||||
return errors.New("rootless containers requires at least one UID mapping")
|
||||
}
|
||||
if len(config.GidMappings) == 0 {
|
||||
return fmt.Errorf("rootless containers requires at least one GID mapping")
|
||||
return errors.New("rootless containers requires at least one GID mapping")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -67,7 +68,7 @@ func rootlessEUIDMount(config *configs.Config) error {
|
||||
continue
|
||||
}
|
||||
if !hasIDMapping(uid, config.UidMappings) {
|
||||
return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
|
||||
return errors.New("cannot specify uid= mount options for unmapped uid in rootless containers")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +80,7 @@ func rootlessEUIDMount(config *configs.Config) error {
|
||||
continue
|
||||
}
|
||||
if !hasIDMapping(gid, config.GidMappings) {
|
||||
return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
|
||||
return errors.New("cannot specify gid= mount options for unmapped gid in rootless containers")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
25
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
25
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
@@ -1,6 +1,7 @@
|
||||
package validate
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -80,7 +81,7 @@ func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
return errors.New("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -88,7 +89,7 @@ func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
return errors.New("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -97,10 +98,10 @@ func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
|
||||
!config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
return errors.New("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
if config.ProcessLabel != "" && !selinux.GetEnabled() {
|
||||
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
|
||||
return errors.New("selinux label is specified in config, but selinux is disabled or not supported")
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -109,11 +110,11 @@ func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||
if config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
|
||||
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
|
||||
return errors.New("USER namespaces aren't enabled in the kernel")
|
||||
}
|
||||
} else {
|
||||
if config.UidMappings != nil || config.GidMappings != nil {
|
||||
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
|
||||
return errors.New("User namespace mappings specified, but USER namespace isn't enabled in the config")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -122,7 +123,7 @@ func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||
func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error {
|
||||
if config.Namespaces.Contains(configs.NEWCGROUP) {
|
||||
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
|
||||
return fmt.Errorf("cgroup namespaces aren't enabled in the kernel")
|
||||
return errors.New("cgroup namespaces aren't enabled in the kernel")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -182,21 +183,21 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
if config.IntelRdt != nil {
|
||||
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT is not supported or enabled")
|
||||
return errors.New("intelRdt is specified in config, but Intel RDT is not supported or enabled")
|
||||
}
|
||||
|
||||
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
|
||||
return fmt.Errorf("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
|
||||
return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
|
||||
}
|
||||
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
|
||||
return fmt.Errorf("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
|
||||
return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
|
||||
}
|
||||
|
||||
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
return errors.New("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
|
||||
return fmt.Errorf("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
|
||||
return errors.New("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
|
||||
}
|
||||
}
|
||||
|
||||
|
487
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
487
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
@@ -16,7 +16,6 @@ import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall" // only for SysProcAttr and Signal
|
||||
"time"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
@@ -27,8 +26,10 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
criurpc "github.com/checkpoint-restore/go-criu/rpc"
|
||||
"github.com/checkpoint-restore/go-criu/v4"
|
||||
criurpc "github.com/checkpoint-restore/go-criu/v4/rpc"
|
||||
"github.com/golang/protobuf/proto"
|
||||
errorsf "github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -65,8 +66,12 @@ type State struct {
|
||||
// Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups
|
||||
Rootless bool `json:"rootless"`
|
||||
|
||||
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
|
||||
// with the value as the path.
|
||||
// Paths to all the container's cgroups, as returned by (*cgroups.Manager).GetPaths
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the path
|
||||
// to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
|
||||
CgroupPaths map[string]string `json:"cgroup_paths"`
|
||||
|
||||
// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
|
||||
@@ -165,7 +170,17 @@ func (c *linuxContainer) OCIState() (*specs.State, error) {
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Processes() ([]int, error) {
|
||||
pids, err := c.cgroupManager.GetAllPids()
|
||||
var pids []int
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
// for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
|
||||
if status == Stopped && !c.cgroupManager.Exists() {
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
pids, err = c.cgroupManager.GetAllPids()
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
|
||||
}
|
||||
@@ -206,7 +221,7 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
return newGenericError(errors.New("container not running"), ContainerNotRunning)
|
||||
}
|
||||
if err := c.cgroupManager.Set(&config); err != nil {
|
||||
// Set configs back
|
||||
@@ -218,6 +233,9 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
||||
if c.intelRdtManager != nil {
|
||||
if err := c.intelRdtManager.Set(&config); err != nil {
|
||||
// Set configs back
|
||||
if err2 := c.cgroupManager.Set(c.config); err2 != nil {
|
||||
logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
|
||||
}
|
||||
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
|
||||
logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
|
||||
}
|
||||
@@ -292,7 +310,7 @@ func readFromExecFifo(execFifo io.Reader) error {
|
||||
return err
|
||||
}
|
||||
if len(data) <= 0 {
|
||||
return fmt.Errorf("cannot start an already running container")
|
||||
return errors.New("cannot start an already running container")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -309,7 +327,7 @@ func awaitFifoOpen(path string) <-chan openResult {
|
||||
func fifoOpen(path string, block bool) openResult {
|
||||
flags := os.O_RDONLY
|
||||
if !block {
|
||||
flags |= syscall.O_NONBLOCK
|
||||
flags |= unix.O_NONBLOCK
|
||||
}
|
||||
f, err := os.OpenFile(path, flags, 0)
|
||||
if err != nil {
|
||||
@@ -365,13 +383,12 @@ func (c *linuxContainer) start(process *Process) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i, hook := range c.config.Hooks.Poststart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return newSystemErrorWithCausef(err, "running poststart hook %d", i)
|
||||
|
||||
if err := c.config.Hooks[configs.Poststart].RunHooks(s); err != nil {
|
||||
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
||||
logrus.Warn(errorsf.Wrapf(err, "Running Poststart hook"))
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -379,13 +396,19 @@ func (c *linuxContainer) start(process *Process) error {
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
||||
if all {
|
||||
return signalAllProcesses(c.cgroupManager, s)
|
||||
}
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if all {
|
||||
// for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
|
||||
if status == Stopped && !c.cgroupManager.Exists() {
|
||||
return nil
|
||||
}
|
||||
return signalAllProcesses(c.cgroupManager, s)
|
||||
}
|
||||
// to avoid a PID reuse attack
|
||||
if status == Running || status == Created || status == Paused {
|
||||
if err := c.initProcess.signal(s); err != nil {
|
||||
@@ -393,7 +416,7 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
return newGenericError(errors.New("container not running"), ContainerNotRunning)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) createExecFifo() error {
|
||||
@@ -454,10 +477,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
}
|
||||
logFilePair := filePair{parentLogPipe, childLogPipe}
|
||||
|
||||
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
cmd := c.commandTemplate(p, childInitPipe, childLogPipe)
|
||||
if !p.Init {
|
||||
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
@@ -473,7 +493,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) {
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) *exec.Cmd {
|
||||
cmd := exec.Command(c.initPath, c.initArgs[1:]...)
|
||||
cmd.Args[0] = c.initArgs[0]
|
||||
cmd.Stdin = p.Stdin
|
||||
@@ -481,7 +501,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
|
||||
cmd.Stderr = p.Stderr
|
||||
cmd.Dir = c.config.Rootfs
|
||||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
cmd.SysProcAttr = &unix.SysProcAttr{}
|
||||
}
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("GOMAXPROCS=%s", os.Getenv("GOMAXPROCS")))
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...)
|
||||
@@ -507,9 +527,9 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
|
||||
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||||
// even with the parent still running.
|
||||
if c.config.ParentDeathSignal > 0 {
|
||||
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
|
||||
cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal)
|
||||
}
|
||||
return cmd, nil
|
||||
return cmd
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
|
||||
@@ -555,7 +575,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
||||
}
|
||||
return &setnsProcess{
|
||||
cmd: cmd,
|
||||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
cgroupPaths: state.CgroupPaths,
|
||||
rootlessCgroups: c.config.RootlessCgroups,
|
||||
intelRdtPath: state.IntelRdtPath,
|
||||
messageSockPair: messageSockPair,
|
||||
@@ -563,6 +583,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
initProcessPid: state.InitProcessPid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -650,7 +671,11 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
|
||||
if c.config.RootlessCgroups {
|
||||
logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups")
|
||||
}
|
||||
return notifyOnOOM(c.cgroupManager.GetPaths())
|
||||
path := c.cgroupManager.Path("memory")
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return notifyOnOOMV2(path)
|
||||
}
|
||||
return notifyOnOOM(path)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
|
||||
@@ -658,7 +683,7 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
|
||||
if c.config.RootlessCgroups {
|
||||
logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups")
|
||||
}
|
||||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
return notifyMemoryPressure(c.cgroupManager.Path("memory"), level)
|
||||
}
|
||||
|
||||
var criuFeatures *criurpc.CriuFeatures
|
||||
@@ -668,16 +693,6 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||
var t criurpc.CriuReqType
|
||||
t = criurpc.CriuReqType_FEATURE_CHECK
|
||||
|
||||
// criu 1.8 => 10800
|
||||
if err := c.checkCriuVersion(10800); err != nil {
|
||||
// Feature checking was introduced with CRIU 1.8.
|
||||
// Ignore the feature check if an older CRIU version is used
|
||||
// and just act as before.
|
||||
// As all automated PR testing is done using CRIU 1.7 this
|
||||
// code will not be tested by automated PR testing.
|
||||
return nil
|
||||
}
|
||||
|
||||
// make sure the features we are looking for are really not from
|
||||
// some previous check
|
||||
criuFeatures = nil
|
||||
@@ -691,10 +706,10 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||
Features: criuFeat,
|
||||
}
|
||||
|
||||
err := c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||
err := c.criuSwrk(nil, req, criuOpts, nil)
|
||||
if err != nil {
|
||||
logrus.Debugf("%s", err)
|
||||
return fmt.Errorf("CRIU feature check failed")
|
||||
return errors.New("CRIU feature check failed")
|
||||
}
|
||||
|
||||
logrus.Debugf("Feature check says: %s", criuFeatures)
|
||||
@@ -721,56 +736,12 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||
}
|
||||
|
||||
if missingFeatures {
|
||||
return fmt.Errorf("CRIU is missing features")
|
||||
return errors.New("CRIU is missing features")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseCriuVersion(path string) (int, error) {
|
||||
var x, y, z int
|
||||
|
||||
out, err := exec.Command(path, "-V").Output()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Unable to execute CRIU command: %s", path)
|
||||
}
|
||||
|
||||
x = 0
|
||||
y = 0
|
||||
z = 0
|
||||
if ep := strings.Index(string(out), "-"); ep >= 0 {
|
||||
// criu Git version format
|
||||
var version string
|
||||
if sp := strings.Index(string(out), "GitID"); sp > 0 {
|
||||
version = string(out)[sp:ep]
|
||||
} else {
|
||||
return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path)
|
||||
}
|
||||
|
||||
n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
|
||||
if err != nil {
|
||||
n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6
|
||||
y++
|
||||
} else {
|
||||
z++
|
||||
}
|
||||
if n < 2 || err != nil {
|
||||
return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err)
|
||||
}
|
||||
} else {
|
||||
// criu release version format
|
||||
n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2
|
||||
if err != nil {
|
||||
n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6
|
||||
}
|
||||
if n < 2 || err != nil {
|
||||
return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err)
|
||||
}
|
||||
}
|
||||
|
||||
return x*10000 + y*100 + z, nil
|
||||
}
|
||||
|
||||
func compareCriuVersion(criuVersion int, minVersion int) error {
|
||||
// simple function to perform the actual version compare
|
||||
if criuVersion < minVersion {
|
||||
@@ -780,9 +751,6 @@ func compareCriuVersion(criuVersion int, minVersion int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// This is used to store the result of criu version RPC
|
||||
var criuVersionRPC *criurpc.CriuVersion
|
||||
|
||||
// checkCriuVersion checks Criu version greater than or equal to minVersion
|
||||
func (c *linuxContainer) checkCriuVersion(minVersion int) error {
|
||||
|
||||
@@ -792,50 +760,13 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
|
||||
return compareCriuVersion(c.criuVersion, minVersion)
|
||||
}
|
||||
|
||||
// First try if this version of CRIU support the version RPC.
|
||||
// The CRIU version RPC was introduced with CRIU 3.0.
|
||||
|
||||
// First, reset the variable for the RPC answer to nil
|
||||
criuVersionRPC = nil
|
||||
|
||||
var t criurpc.CriuReqType
|
||||
t = criurpc.CriuReqType_VERSION
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
}
|
||||
|
||||
err := c.criuSwrk(nil, req, nil, false, nil)
|
||||
criu := criu.MakeCriu()
|
||||
var err error
|
||||
c.criuVersion, err = criu.GetCriuVersion()
|
||||
if err != nil {
|
||||
return fmt.Errorf("CRIU version check failed: %s", err)
|
||||
}
|
||||
|
||||
if criuVersionRPC != nil {
|
||||
logrus.Debugf("CRIU version: %s", criuVersionRPC)
|
||||
// major and minor are always set
|
||||
c.criuVersion = int(*criuVersionRPC.Major) * 10000
|
||||
c.criuVersion += int(*criuVersionRPC.Minor) * 100
|
||||
if criuVersionRPC.Sublevel != nil {
|
||||
c.criuVersion += int(*criuVersionRPC.Sublevel)
|
||||
}
|
||||
if criuVersionRPC.Gitid != nil {
|
||||
// runc's convention is that a CRIU git release is
|
||||
// always the same as increasing the minor by 1
|
||||
c.criuVersion -= (c.criuVersion % 100)
|
||||
c.criuVersion += 100
|
||||
}
|
||||
return compareCriuVersion(c.criuVersion, minVersion)
|
||||
}
|
||||
|
||||
// This is CRIU without the version RPC and therefore
|
||||
// older than 3.0. Parsing the output is required.
|
||||
|
||||
// This can be remove once runc does not work with criu older than 3.0
|
||||
|
||||
c.criuVersion, err = parseCriuVersion(c.criuPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return compareCriuVersion(c.criuVersion, minVersion)
|
||||
}
|
||||
|
||||
@@ -876,26 +807,6 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func waitForCriuLazyServer(r *os.File, status string) error {
|
||||
|
||||
data := make([]byte, 1)
|
||||
_, err := r.Read(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = fd.Write(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
|
||||
// CRIU will evaluate a configuration starting with release 3.11.
|
||||
// Settings in the configuration file will overwrite RPC settings.
|
||||
@@ -932,13 +843,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
// support for doing unprivileged dumps, but the setup of
|
||||
// rootless containers might make this complicated.
|
||||
|
||||
// criu 1.5.2 => 10502
|
||||
if err := c.checkCriuVersion(10502); err != nil {
|
||||
// We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0
|
||||
if err := c.checkCriuVersion(30000); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if criuOpts.ImagesDirectory == "" {
|
||||
return fmt.Errorf("invalid directory to save checkpoint")
|
||||
return errors.New("invalid directory to save checkpoint")
|
||||
}
|
||||
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
@@ -1006,8 +917,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
// CRIU expects the information about an external namespace
|
||||
// like this: --external net[<inode>]:<key>
|
||||
// This <key> is always 'extRootNetNS'.
|
||||
var netns syscall.Stat_t
|
||||
err = syscall.Stat(nsPath, &netns)
|
||||
var netns unix.Stat_t
|
||||
err = unix.Stat(nsPath, &netns)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1016,9 +927,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
}
|
||||
}
|
||||
|
||||
fcg := c.cgroupManager.GetPaths()["freezer"]
|
||||
if fcg != "" {
|
||||
rpcOpts.FreezeCgroup = proto.String(fcg)
|
||||
// CRIU can use cgroup freezer; when rpcOpts.FreezeCgroup
|
||||
// is not set, CRIU uses ptrace() to pause the processes.
|
||||
// Note cgroup v2 freezer is only supported since CRIU release 3.14.
|
||||
if !cgroups.IsCgroup2UnifiedMode() || c.checkCriuVersion(31400) == nil {
|
||||
if fcg := c.cgroupManager.Path("freezer"); fcg != "" {
|
||||
rpcOpts.FreezeCgroup = proto.String(fcg)
|
||||
}
|
||||
}
|
||||
|
||||
// append optional criu opts, e.g., page-server and port
|
||||
@@ -1037,10 +952,6 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
|
||||
// append optional manage cgroups mode
|
||||
if criuOpts.ManageCgroupsMode != 0 {
|
||||
// criu 1.7 => 10700
|
||||
if err := c.checkCriuVersion(10700); err != nil {
|
||||
return err
|
||||
}
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
rpcOpts.ManageCgroupsMode = &mode
|
||||
}
|
||||
@@ -1059,36 +970,53 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
} else {
|
||||
t = criurpc.CriuReqType_DUMP
|
||||
}
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
Opts: &rpcOpts,
|
||||
}
|
||||
|
||||
if criuOpts.LazyPages {
|
||||
// lazy migration requested; check if criu supports it
|
||||
feat := criurpc.CriuFeatures{
|
||||
LazyPages: proto.Bool(true),
|
||||
}
|
||||
|
||||
if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
statusRead, statusWrite, err := os.Pipe()
|
||||
if err != nil {
|
||||
return err
|
||||
if fd := criuOpts.StatusFd; fd != -1 {
|
||||
// check that the FD is valid
|
||||
flags, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid --status-fd argument %d: %w", fd, err)
|
||||
}
|
||||
// and writable
|
||||
if flags&unix.O_WRONLY == 0 {
|
||||
return fmt.Errorf("invalid --status-fd argument %d: not writable", fd)
|
||||
}
|
||||
|
||||
if c.checkCriuVersion(31500) != nil {
|
||||
// For criu 3.15+, use notifications (see case "status-ready"
|
||||
// in criuNotifications). Otherwise, rely on criu status fd.
|
||||
rpcOpts.StatusFd = proto.Int32(int32(fd))
|
||||
}
|
||||
}
|
||||
rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
|
||||
go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
|
||||
}
|
||||
|
||||
//no need to dump these information in pre-dump
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
Opts: &rpcOpts,
|
||||
}
|
||||
|
||||
// no need to dump all this in pre-dump
|
||||
if !criuOpts.PreDump {
|
||||
hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP)
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
c.addCriuDumpMount(req, m)
|
||||
case "cgroup":
|
||||
if cgroups.IsCgroup2UnifiedMode() || hasCgroupns {
|
||||
// real mount(s)
|
||||
continue
|
||||
}
|
||||
// a set of "external" bind mounts
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1120,7 +1048,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
}
|
||||
}
|
||||
|
||||
err = c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||
err = c.criuSwrk(nil, req, criuOpts, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1166,7 +1094,14 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
|
||||
func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
switch m.Device {
|
||||
case "cgroup":
|
||||
// Do nothing for cgroup, CRIU should handle it
|
||||
// No mount point(s) need to be created:
|
||||
//
|
||||
// * for v1, mount points are saved by CRIU because
|
||||
// /sys/fs/cgroup is a tmpfs mount
|
||||
//
|
||||
// * for v2, /sys/fs/cgroup is a real mount, but
|
||||
// the mountpoint appears as soon as /sys is mounted
|
||||
return nil
|
||||
case "bind":
|
||||
// The prepareBindMount() function checks if source
|
||||
// exists. So it cannot be used for other filesystem types.
|
||||
@@ -1174,7 +1109,7 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
// for all other file-systems just create the mountpoints
|
||||
// for all other filesystems just create the mountpoints
|
||||
dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1195,10 +1130,10 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
func isPathInPrefixList(path string, prefix []string) bool {
|
||||
for _, p := range prefix {
|
||||
if strings.HasPrefix(path, p+"/") {
|
||||
return false
|
||||
return true
|
||||
}
|
||||
}
|
||||
return true
|
||||
return false
|
||||
}
|
||||
|
||||
// prepareCriuRestoreMounts tries to set up the rootfs of the
|
||||
@@ -1220,7 +1155,7 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
|
||||
// if the mountpoints are not on a tmpfs, as CRIU will
|
||||
// restore the complete tmpfs content from its checkpoint.
|
||||
for _, m := range mounts {
|
||||
if isPathInPrefixList(m.Destination, tmpfs) {
|
||||
if !isPathInPrefixList(m.Destination, tmpfs) {
|
||||
if err := c.makeCriuRestoreMountpoints(m); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1240,8 +1175,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
|
||||
// support for unprivileged restore at the moment.
|
||||
|
||||
// criu 1.5.2 => 10502
|
||||
if err := c.checkCriuVersion(10502); err != nil {
|
||||
// We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0
|
||||
if err := c.checkCriuVersion(30000); err != nil {
|
||||
return err
|
||||
}
|
||||
if criuOpts.WorkDirectory == "" {
|
||||
@@ -1258,7 +1193,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
}
|
||||
defer workDir.Close()
|
||||
if criuOpts.ImagesDirectory == "" {
|
||||
return fmt.Errorf("invalid directory to restore checkpoint")
|
||||
return errors.New("invalid directory to restore checkpoint")
|
||||
}
|
||||
imageDir, err := os.Open(criuOpts.ImagesDirectory)
|
||||
if err != nil {
|
||||
@@ -1326,11 +1261,11 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
// The <key> needs to be the same as during checkpointing.
|
||||
// We are always using 'extRootNetNS' as the key in this.
|
||||
netns, err := os.Open(nsPath)
|
||||
defer netns.Close()
|
||||
if err != nil {
|
||||
logrus.Errorf("If a specific network namespace is defined it must exist: %s", err)
|
||||
return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
|
||||
}
|
||||
defer netns.Close()
|
||||
inheritFd := new(criurpc.InheritFd)
|
||||
inheritFd.Key = proto.String("extRootNetNS")
|
||||
// The offset of four is necessary because 0, 1, 2 and 3 is already
|
||||
@@ -1348,11 +1283,16 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
return err
|
||||
}
|
||||
|
||||
hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP)
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
c.addCriuRestoreMount(req, m)
|
||||
case "cgroup":
|
||||
if cgroups.IsCgroup2UnifiedMode() || hasCgroupns {
|
||||
continue
|
||||
}
|
||||
// cgroup v1 is a set of bind mounts, unless cgroupns is used
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1379,10 +1319,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
|
||||
// append optional manage cgroups mode
|
||||
if criuOpts.ManageCgroupsMode != 0 {
|
||||
// criu 1.7 => 10700
|
||||
if err := c.checkCriuVersion(10700); err != nil {
|
||||
return err
|
||||
}
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
req.Opts.ManageCgroupsMode = &mode
|
||||
}
|
||||
@@ -1406,10 +1342,15 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||
}
|
||||
}
|
||||
return c.criuSwrk(process, req, criuOpts, true, extraFiles)
|
||||
return c.criuSwrk(process, req, criuOpts, extraFiles)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
// need to apply cgroups only on restore
|
||||
if req.GetType() != criurpc.CriuReqType_RESTORE {
|
||||
return nil
|
||||
}
|
||||
|
||||
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
|
||||
if err := c.cgroupManager.Apply(pid); err != nil {
|
||||
return err
|
||||
@@ -1419,6 +1360,11 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return nil
|
||||
}
|
||||
// the stuff below is cgroupv1-specific
|
||||
|
||||
path := fmt.Sprintf("/proc/%d/cgroup", pid)
|
||||
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
|
||||
if err != nil {
|
||||
@@ -1436,7 +1382,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
|
||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error {
|
||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1484,26 +1430,29 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
// we close criuServer so that even if CRIU crashes or unexpectedly exits, runc will not hang.
|
||||
criuServer.Close()
|
||||
// cmd.Process will be replaced by a restored init.
|
||||
criuProcess := cmd.Process
|
||||
|
||||
var criuProcessState *os.ProcessState
|
||||
defer func() {
|
||||
criuClientCon.Close()
|
||||
_, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return
|
||||
if criuProcessState == nil {
|
||||
criuClientCon.Close()
|
||||
_, err := criuProcess.Wait()
|
||||
if err != nil {
|
||||
logrus.Warnf("wait on criuProcess returned %v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if applyCgroups {
|
||||
err := c.criuApplyCgroups(cmd.Process.Pid, req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.criuApplyCgroups(criuProcess.Pid, req); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var extFds []string
|
||||
if process != nil {
|
||||
extFds, err = getPipeFds(cmd.Process.Pid)
|
||||
extFds, err = getPipeFds(criuProcess.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1542,14 +1491,23 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
oob := make([]byte, 4096)
|
||||
for true {
|
||||
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
|
||||
if req.Opts != nil && req.Opts.StatusFd != nil {
|
||||
// Close status_fd as soon as we got something back from criu,
|
||||
// assuming it has consumed (reopened) it by this time.
|
||||
// Otherwise it will might be left open forever and whoever
|
||||
// is waiting on it will wait forever.
|
||||
fd := int(*req.Opts.StatusFd)
|
||||
_ = unix.Close(fd)
|
||||
req.Opts.StatusFd = nil
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if n == 0 {
|
||||
return fmt.Errorf("unexpected EOF")
|
||||
return errors.New("unexpected EOF")
|
||||
}
|
||||
if n == len(buf) {
|
||||
return fmt.Errorf("buffer is too small")
|
||||
return errors.New("buffer is too small")
|
||||
}
|
||||
|
||||
resp := new(criurpc.CriuResp)
|
||||
@@ -1559,25 +1517,16 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
}
|
||||
if !resp.GetSuccess() {
|
||||
typeString := req.GetType().String()
|
||||
if typeString == "VERSION" {
|
||||
// If the VERSION RPC fails this probably means that the CRIU
|
||||
// version is too old for this RPC. Just return 'nil'.
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
|
||||
}
|
||||
|
||||
t := resp.GetType()
|
||||
switch {
|
||||
case t == criurpc.CriuReqType_VERSION:
|
||||
logrus.Debugf("CRIU version: %s", resp)
|
||||
criuVersionRPC = resp.GetVersion()
|
||||
break
|
||||
case t == criurpc.CriuReqType_FEATURE_CHECK:
|
||||
logrus.Debugf("Feature check says: %s", resp)
|
||||
criuFeatures = resp.GetFeatures()
|
||||
case t == criurpc.CriuReqType_NOTIFY:
|
||||
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
|
||||
if err := c.criuNotifications(resp, process, cmd, opts, extFds, oob[:oobn]); err != nil {
|
||||
return err
|
||||
}
|
||||
t = criurpc.CriuReqType_NOTIFY
|
||||
@@ -1607,7 +1556,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
criuClientCon.CloseWrite()
|
||||
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
|
||||
// Here we want to wait only the CRIU process.
|
||||
st, err := cmd.Process.Wait()
|
||||
criuProcessState, err = criuProcess.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1619,8 +1568,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
||||
// If we got the message CriuReqType_PRE_DUMP it means
|
||||
// CRIU was successful and we need to forcefully stop CRIU
|
||||
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
|
||||
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
|
||||
if !criuProcessState.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
|
||||
return fmt.Errorf("criu failed: %s\nlog file: %s", criuProcessState.String(), logPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1653,43 +1602,53 @@ func unlockNetwork(config *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
|
||||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, cmd *exec.Cmd, opts *CriuOpts, fds []string, oob []byte) error {
|
||||
notify := resp.GetNotify()
|
||||
if notify == nil {
|
||||
return fmt.Errorf("invalid response: %s", resp.String())
|
||||
}
|
||||
logrus.Debugf("notify: %s\n", notify.GetScript())
|
||||
switch {
|
||||
case notify.GetScript() == "post-dump":
|
||||
script := notify.GetScript()
|
||||
logrus.Debugf("notify: %s\n", script)
|
||||
switch script {
|
||||
case "post-dump":
|
||||
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
case notify.GetScript() == "network-unlock":
|
||||
case "network-unlock":
|
||||
if err := unlockNetwork(c.config); err != nil {
|
||||
return err
|
||||
}
|
||||
case notify.GetScript() == "network-lock":
|
||||
case "network-lock":
|
||||
if err := lockNetwork(c.config); err != nil {
|
||||
return err
|
||||
}
|
||||
case notify.GetScript() == "setup-namespaces":
|
||||
case "setup-namespaces":
|
||||
if c.config.Hooks != nil {
|
||||
s, err := c.currentOCIState()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
s.Pid = int(notify.GetPid())
|
||||
for i, hook := range c.config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
|
||||
if err := c.config.Hooks[configs.Prestart].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.config.Hooks[configs.CreateRuntime].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case notify.GetScript() == "post-restore":
|
||||
case "post-restore":
|
||||
pid := notify.GetPid()
|
||||
r, err := newRestoredProcess(int(pid), fds)
|
||||
|
||||
p, err := os.FindProcess(int(pid))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cmd.Process = p
|
||||
|
||||
r, err := newRestoredProcess(cmd, fds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1710,7 +1669,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
case notify.GetScript() == "orphan-pts-master":
|
||||
case "orphan-pts-master":
|
||||
scm, err := unix.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1727,6 +1686,16 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil {
|
||||
return err
|
||||
}
|
||||
case "status-ready":
|
||||
if opts.StatusFd != -1 {
|
||||
// write \0 to status fd to notify that lazy page server is ready
|
||||
_, err := unix.Write(opts.StatusFd, []byte{0})
|
||||
if err != nil {
|
||||
logrus.Warnf("can't write \\0 to status fd: %v", err)
|
||||
}
|
||||
_ = unix.Close(opts.StatusFd)
|
||||
opts.StatusFd = -1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1746,13 +1715,30 @@ func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) saveState(s *State) error {
|
||||
f, err := os.Create(filepath.Join(c.root, stateFilename))
|
||||
func (c *linuxContainer) saveState(s *State) (retErr error) {
|
||||
tmpFile, err := ioutil.TempFile(c.root, "state-")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
return utils.WriteJSON(f, s)
|
||||
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
tmpFile.Close()
|
||||
os.Remove(tmpFile.Name())
|
||||
}
|
||||
}()
|
||||
|
||||
err = utils.WriteJSON(tmpFile, s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = tmpFile.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stateFilePath := filepath.Join(c.root, stateFilename)
|
||||
return os.Rename(tmpFile.Name(), stateFilePath)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) deleteState() error {
|
||||
@@ -1778,10 +1764,7 @@ func (c *linuxContainer) refreshState() error {
|
||||
if paused {
|
||||
return c.state.transition(&pausedState{c: c})
|
||||
}
|
||||
t, err := c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t := c.runType()
|
||||
switch t {
|
||||
case Created:
|
||||
return c.state.transition(&createdState{c: c})
|
||||
@@ -1791,48 +1774,32 @@ func (c *linuxContainer) refreshState() error {
|
||||
return c.state.transition(&stoppedState{c: c})
|
||||
}
|
||||
|
||||
func (c *linuxContainer) runType() (Status, error) {
|
||||
func (c *linuxContainer) runType() Status {
|
||||
if c.initProcess == nil {
|
||||
return Stopped, nil
|
||||
return Stopped
|
||||
}
|
||||
pid := c.initProcess.pid()
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return Stopped, nil
|
||||
return Stopped
|
||||
}
|
||||
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
|
||||
return Stopped, nil
|
||||
return Stopped
|
||||
}
|
||||
// We'll create exec fifo and blocking on it after container is created,
|
||||
// and delete it after start container.
|
||||
if _, err := os.Stat(filepath.Join(c.root, execFifoFilename)); err == nil {
|
||||
return Created, nil
|
||||
return Created
|
||||
}
|
||||
return Running, nil
|
||||
return Running
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isPaused() (bool, error) {
|
||||
fcg := c.cgroupManager.GetPaths()["freezer"]
|
||||
if fcg == "" {
|
||||
// A container doesn't have a freezer cgroup
|
||||
return false, nil
|
||||
}
|
||||
pausedState := "FROZEN"
|
||||
filename := "freezer.state"
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
filename = "cgroup.freeze"
|
||||
pausedState = "1"
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadFile(filepath.Join(fcg, filename))
|
||||
state, err := c.cgroupManager.GetFreezerState()
|
||||
if err != nil {
|
||||
// If freezer cgroup is not mounted, the container would just be not paused.
|
||||
if os.IsNotExist(err) || err == syscall.ENODEV {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemErrorWithCause(err, "checking if container is paused")
|
||||
return false, err
|
||||
}
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil
|
||||
return state == configs.Frozen, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) currentState() (*State, error) {
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
@@ -36,5 +36,5 @@ type CriuOpts struct {
|
||||
EmptyNs uint32 // don't c/r properties for namespace from this mask
|
||||
AutoDedup bool // auto deduplication for incremental dumps
|
||||
LazyPages bool // restore memory pages lazily using userfaultfd
|
||||
StatusFd string // fd for feedback when lazy server is ready
|
||||
StatusFd int // fd for feedback when lazy server is ready
|
||||
}
|
||||
|
105
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
105
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
@@ -11,7 +11,8 @@ import (
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
|
||||
"github.com/cyphar/filepath-securejoin"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
@@ -19,7 +20,6 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
@@ -50,28 +50,60 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
|
||||
}
|
||||
}
|
||||
|
||||
// SystemdCgroups is an options func to configure a LinuxFactory to return
|
||||
// containers that use systemd to create and manage cgroups.
|
||||
func SystemdCgroups(l *LinuxFactory) error {
|
||||
systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
l.NewCgroupsManager = systemdCgroupsManager
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUnifiedPath(paths map[string]string) string {
|
||||
unifiedPath := ""
|
||||
path := ""
|
||||
for k, v := range paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v))
|
||||
if path == "" {
|
||||
path = v
|
||||
} else if v != path {
|
||||
panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, path, v))
|
||||
}
|
||||
}
|
||||
// can be empty
|
||||
return unifiedPath
|
||||
if path != "" {
|
||||
if filepath.Clean(path) != path || !filepath.IsAbs(path) {
|
||||
panic(errors.Errorf("invalid dir path %q", path))
|
||||
}
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
func systemdCgroupV2(l *LinuxFactory, rootless bool) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return systemd.NewUnifiedManager(config, getUnifiedPath(paths), rootless)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SystemdCgroups is an options func to configure a LinuxFactory to return
|
||||
// containers that use systemd to create and manage cgroups.
|
||||
func SystemdCgroups(l *LinuxFactory) error {
|
||||
if !systemd.IsRunningSystemd() {
|
||||
return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager")
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return systemdCgroupV2(l, false)
|
||||
}
|
||||
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return systemd.NewLegacyManager(config, paths)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RootlessSystemdCgroups is rootless version of SystemdCgroups.
|
||||
func RootlessSystemdCgroups(l *LinuxFactory) error {
|
||||
if !systemd.IsRunningSystemd() {
|
||||
return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager")
|
||||
}
|
||||
|
||||
if !cgroups.IsCgroup2UnifiedMode() {
|
||||
return fmt.Errorf("cgroup v2 not enabled on this host, can't use systemd (rootless) as cgroups manager")
|
||||
}
|
||||
return systemdCgroupV2(l, true)
|
||||
}
|
||||
|
||||
func cgroupfs2(l *LinuxFactory, rootless bool) error {
|
||||
@@ -85,20 +117,21 @@ func cgroupfs2(l *LinuxFactory, rootless bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func cgroupfs(l *LinuxFactory, rootless bool) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, rootless)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return fs.NewManager(config, paths, rootless)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return containers
|
||||
// that use the native cgroups filesystem implementation to create and manage
|
||||
// cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, false)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return cgroupfs(l, false)
|
||||
}
|
||||
|
||||
// RootlessCgroupfs is an options func to configure a LinuxFactory to return
|
||||
@@ -108,17 +141,7 @@ func Cgroupfs(l *LinuxFactory) error {
|
||||
// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if
|
||||
// they've been set up properly).
|
||||
func RootlessCgroupfs(l *LinuxFactory) error {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return cgroupfs2(l, true)
|
||||
}
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
Rootless: true,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return cgroupfs(l, true)
|
||||
}
|
||||
|
||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||
@@ -137,7 +160,7 @@ func IntelRdtFs(l *LinuxFactory) error {
|
||||
|
||||
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
|
||||
func TmpfsRoot(l *LinuxFactory) error {
|
||||
mounted, err := mount.Mounted(l.Root)
|
||||
mounted, err := mountinfo.Mounted(l.Root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
2
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
@@ -80,7 +80,7 @@ func (e *genericError) Error() string {
|
||||
return e.Message
|
||||
}
|
||||
frame := e.Stack.Frames[0]
|
||||
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
|
||||
return fmt.Sprintf("%s:%d: %s caused: %s", frame.File, frame.Line, e.Cause, e.Message)
|
||||
}
|
||||
|
||||
func (e *genericError) Code() ErrorCode {
|
||||
|
11
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
11
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
@@ -10,7 +10,6 @@ import (
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall" // only for Errno
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -21,6 +20,7 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
@@ -68,6 +68,7 @@ type initConfig struct {
|
||||
ConsoleHeight uint16 `json:"console_height"`
|
||||
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
SpecState *specs.State `json:"spec_state,omitempty"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
@@ -272,10 +273,10 @@ func setupUser(config *initConfig) error {
|
||||
// Rather than just erroring out later in setuid(2) and setgid(2), check
|
||||
// that the user is mapped here.
|
||||
if _, err := config.Config.HostUID(execUser.Uid); err != nil {
|
||||
return fmt.Errorf("cannot set uid to unmapped user in user namespace")
|
||||
return errors.New("cannot set uid to unmapped user in user namespace")
|
||||
}
|
||||
if _, err := config.Config.HostGID(execUser.Gid); err != nil {
|
||||
return fmt.Errorf("cannot set gid to unmapped user in user namespace")
|
||||
return errors.New("cannot set gid to unmapped user in user namespace")
|
||||
}
|
||||
|
||||
if config.RootlessEUID {
|
||||
@@ -284,7 +285,7 @@ func setupUser(config *initConfig) error {
|
||||
// this check earlier, but if libcontainer.Process.User was typesafe
|
||||
// this might work.
|
||||
if len(addGroups) > 0 {
|
||||
return fmt.Errorf("cannot set any additional groups in a rootless container")
|
||||
return errors.New("cannot set any additional groups in a rootless container")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -455,7 +456,7 @@ func isWaitable(pid int) (bool, error) {
|
||||
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
|
||||
func isNoChildren(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case syscall.Errno:
|
||||
case unix.Errno:
|
||||
if err == unix.ECHILD {
|
||||
return true
|
||||
}
|
||||
|
7
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/BUILD
generated
vendored
7
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/BUILD
generated
vendored
@@ -3,13 +3,18 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"cmt.go",
|
||||
"intelrdt.go",
|
||||
"mbm.go",
|
||||
"monitoring.go",
|
||||
"stats.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/intelrdt",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/intelrdt",
|
||||
visibility = ["//visibility:public"],
|
||||
deps = select({
|
||||
deps = [
|
||||
"//vendor/github.com/sirupsen/logrus:go_default_library",
|
||||
] + select({
|
||||
"@io_bazel_rules_go//go/platform:android": [
|
||||
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
|
||||
],
|
||||
|
22
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go
generated
vendored
Normal file
22
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go
generated
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
package intelrdt
|
||||
|
||||
var (
|
||||
cmtEnabled bool
|
||||
)
|
||||
|
||||
// Check if Intel RDT/CMT is enabled.
|
||||
func IsCMTEnabled() bool {
|
||||
return cmtEnabled
|
||||
}
|
||||
|
||||
func getCMTNumaNodeStats(numaPath string) (*CMTNumaNodeStats, error) {
|
||||
stats := &CMTNumaNodeStats{}
|
||||
|
||||
llcOccupancy, err := getIntelRdtParamUint(numaPath, "llc_occupancy")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.LLCOccupancy = llcOccupancy
|
||||
|
||||
return stats, nil
|
||||
}
|
71
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
71
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
@@ -55,6 +55,10 @@ import (
|
||||
* | | |-- cbm_mask
|
||||
* | | |-- min_cbm_bits
|
||||
* | | |-- num_closids
|
||||
* | |-- L3_MON
|
||||
* | | |-- max_threshold_occupancy
|
||||
* | | |-- mon_features
|
||||
* | | |-- num_rmids
|
||||
* | |-- MB
|
||||
* | |-- bandwidth_gran
|
||||
* | |-- delay_linear
|
||||
@@ -191,8 +195,7 @@ type intelRdtData struct {
|
||||
// Check if Intel RDT sub-features are enabled in init()
|
||||
func init() {
|
||||
// 1. Check if hardware and kernel support Intel RDT sub-features
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
isCatFlagSet, isMbaFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
flagsSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -207,7 +210,7 @@ func init() {
|
||||
// "resource control" filesystem. Intel RDT sub-features can be
|
||||
// selectively disabled or enabled by kernel command line
|
||||
// (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel
|
||||
if isCatFlagSet {
|
||||
if flagsSet.CAT {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3")); err == nil {
|
||||
isCatEnabled = true
|
||||
}
|
||||
@@ -217,11 +220,23 @@ func init() {
|
||||
// MBA should be enabled because MBA Software Controller
|
||||
// depends on MBA
|
||||
isMbaEnabled = true
|
||||
} else if isMbaFlagSet {
|
||||
} else if flagsSet.MBA {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil {
|
||||
isMbaEnabled = true
|
||||
}
|
||||
}
|
||||
|
||||
if flagsSet.MBMTotal || flagsSet.MBMLocal {
|
||||
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3_MON")); err == nil {
|
||||
mbmEnabled = true
|
||||
cmtEnabled = true
|
||||
}
|
||||
|
||||
enabledMonFeatures, err = getMonFeatures(intelRdtRoot)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the mount point path of Intel RDT "resource control" filesysem
|
||||
@@ -298,40 +313,52 @@ func isIntelRdtMounted() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func parseCpuInfoFile(path string) (bool, bool, error) {
|
||||
isCatFlagSet := false
|
||||
isMbaFlagSet := false
|
||||
type cpuInfoFlags struct {
|
||||
CAT bool // Cache Allocation Technology
|
||||
MBA bool // Memory Bandwidth Allocation
|
||||
|
||||
// Memory Bandwidth Monitoring related.
|
||||
MBMTotal bool
|
||||
MBMLocal bool
|
||||
}
|
||||
|
||||
func parseCpuInfoFile(path string) (cpuInfoFlags, error) {
|
||||
infoFlags := cpuInfoFlags{}
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false, false, err
|
||||
return infoFlags, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return false, false, err
|
||||
}
|
||||
|
||||
line := s.Text()
|
||||
|
||||
// Search "cat_l3" and "mba" flags in first "flags" line
|
||||
if strings.Contains(line, "flags") {
|
||||
if strings.HasPrefix(line, "flags") {
|
||||
flags := strings.Split(line, " ")
|
||||
// "cat_l3" flag for CAT and "mba" flag for MBA
|
||||
for _, flag := range flags {
|
||||
switch flag {
|
||||
case "cat_l3":
|
||||
isCatFlagSet = true
|
||||
infoFlags.CAT = true
|
||||
case "mba":
|
||||
isMbaFlagSet = true
|
||||
infoFlags.MBA = true
|
||||
case "cqm_mbm_total":
|
||||
infoFlags.MBMTotal = true
|
||||
case "cqm_mbm_local":
|
||||
infoFlags.MBMLocal = true
|
||||
}
|
||||
}
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
return infoFlags, nil
|
||||
}
|
||||
}
|
||||
return isCatFlagSet, isMbaFlagSet, nil
|
||||
if err := s.Err(); err != nil {
|
||||
return infoFlags, err
|
||||
}
|
||||
|
||||
return infoFlags, nil
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
@@ -586,7 +613,8 @@ func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
|
||||
// The L3 cache and memory bandwidth schemata in 'container_id' group
|
||||
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
||||
containerPath := m.GetPath()
|
||||
tmpStrings, err := getIntelRdtParamString(containerPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -638,6 +666,11 @@ func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
||||
}
|
||||
}
|
||||
|
||||
err = getMonitoringStats(containerPath, stats)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
@@ -758,7 +791,7 @@ type LastCmdError struct {
|
||||
}
|
||||
|
||||
func (e *LastCmdError) Error() string {
|
||||
return fmt.Sprintf(e.Err.Error() + ", last_cmd_status: " + e.LastCmdStatus)
|
||||
return e.Err.Error() + ", last_cmd_status: " + e.LastCmdStatus
|
||||
}
|
||||
|
||||
func NewLastCmdError(err error) error {
|
||||
|
34
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
generated
vendored
Normal file
34
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go
generated
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
var (
|
||||
// The flag to indicate if Intel RDT/MBM is enabled
|
||||
mbmEnabled bool
|
||||
)
|
||||
|
||||
// Check if Intel RDT/MBM is enabled.
|
||||
func IsMBMEnabled() bool {
|
||||
return mbmEnabled
|
||||
}
|
||||
|
||||
func getMBMNumaNodeStats(numaPath string) (*MBMNumaNodeStats, error) {
|
||||
stats := &MBMNumaNodeStats{}
|
||||
if enabledMonFeatures.mbmTotalBytes {
|
||||
mbmTotalBytes, err := getIntelRdtParamUint(numaPath, "mbm_total_bytes")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MBMTotalBytes = mbmTotalBytes
|
||||
}
|
||||
|
||||
if enabledMonFeatures.mbmLocalBytes {
|
||||
mbmLocalBytes, err := getIntelRdtParamUint(numaPath, "mbm_local_bytes")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.MBMLocalBytes = mbmLocalBytes
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
85
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
generated
vendored
Normal file
85
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go
generated
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
package intelrdt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"github.com/sirupsen/logrus"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
var (
|
||||
enabledMonFeatures monFeatures
|
||||
)
|
||||
|
||||
type monFeatures struct {
|
||||
mbmTotalBytes bool
|
||||
mbmLocalBytes bool
|
||||
llcOccupancy bool
|
||||
}
|
||||
|
||||
func getMonFeatures(intelRdtRoot string) (monFeatures, error) {
|
||||
file, err := os.Open(filepath.Join(intelRdtRoot, "info", "L3_MON", "mon_features"))
|
||||
defer file.Close()
|
||||
if err != nil {
|
||||
return monFeatures{}, err
|
||||
}
|
||||
return parseMonFeatures(file)
|
||||
}
|
||||
|
||||
func parseMonFeatures(reader io.Reader) (monFeatures, error) {
|
||||
scanner := bufio.NewScanner(reader)
|
||||
|
||||
monFeatures := monFeatures{}
|
||||
|
||||
for scanner.Scan() {
|
||||
switch feature := scanner.Text(); feature {
|
||||
case "mbm_total_bytes":
|
||||
monFeatures.mbmTotalBytes = true
|
||||
case "mbm_local_bytes":
|
||||
monFeatures.mbmLocalBytes = true
|
||||
case "llc_occupancy":
|
||||
monFeatures.llcOccupancy = true
|
||||
default:
|
||||
logrus.Warnf("Unsupported Intel RDT monitoring feature: %s", feature)
|
||||
}
|
||||
}
|
||||
|
||||
return monFeatures, scanner.Err()
|
||||
}
|
||||
|
||||
func getMonitoringStats(containerPath string, stats *Stats) error {
|
||||
numaFiles, err := ioutil.ReadDir(filepath.Join(containerPath, "mon_data"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var mbmStats []MBMNumaNodeStats
|
||||
var cmtStats []CMTNumaNodeStats
|
||||
|
||||
for _, file := range numaFiles {
|
||||
if file.IsDir() {
|
||||
numaPath := filepath.Join(containerPath, "mon_data", file.Name())
|
||||
if IsMBMEnabled() {
|
||||
numaMBMStats, err := getMBMNumaNodeStats(numaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mbmStats = append(mbmStats, *numaMBMStats)
|
||||
}
|
||||
if IsCMTEnabled() {
|
||||
numaCMTStats, err := getCMTNumaNodeStats(numaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cmtStats = append(cmtStats, *numaCMTStats)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stats.MBMStats = &mbmStats
|
||||
stats.CMTStats = &cmtStats
|
||||
|
||||
return err
|
||||
}
|
19
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
19
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
@@ -15,6 +15,19 @@ type MemBwInfo struct {
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type MBMNumaNodeStats struct {
|
||||
// The 'mbm_total_bytes' in 'container_id' group.
|
||||
MBMTotalBytes uint64 `json:"mbm_total_bytes,omitempty"`
|
||||
|
||||
// The 'mbm_local_bytes' in 'container_id' group.
|
||||
MBMLocalBytes uint64 `json:"mbm_local_bytes,omitempty"`
|
||||
}
|
||||
|
||||
type CMTNumaNodeStats struct {
|
||||
// The 'llc_occupancy' in 'container_id' group.
|
||||
LLCOccupancy uint64 `json:"llc_occupancy,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
@@ -33,6 +46,12 @@ type Stats struct {
|
||||
|
||||
// The memory bandwidth schema in 'container_id' group
|
||||
MemBwSchema string `json:"mem_bw_schema,omitempty"`
|
||||
|
||||
// The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group
|
||||
MBMStats *[]MBMNumaNodeStats `json:"mbm_stats,omitempty"`
|
||||
|
||||
// The cache monitoring technology statistics from NUMA nodes in 'container_id' group
|
||||
CMTStats *[]CMTNumaNodeStats `json:"cmt_stats,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
|
3
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
@@ -3,7 +3,6 @@
|
||||
package keys
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -33,7 +32,7 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
|
||||
|
||||
res := strings.Split(dest, ";")
|
||||
if len(res) < 5 {
|
||||
return fmt.Errorf("Destination buffer for key description is too small")
|
||||
return errors.New("Destination buffer for key description is too small")
|
||||
}
|
||||
|
||||
// parse permissions
|
||||
|
27
vendor/github.com/opencontainers/runc/libcontainer/mount/BUILD
generated
vendored
27
vendor/github.com/opencontainers/runc/libcontainer/mount/BUILD
generated
vendored
@@ -1,27 +0,0 @@
|
||||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"mount.go",
|
||||
"mount_linux.go",
|
||||
"mountinfo.go",
|
||||
],
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/mount",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/mount",
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
23
vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
generated
vendored
23
vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
generated
vendored
@@ -1,23 +0,0 @@
|
||||
package mount
|
||||
|
||||
// GetMounts retrieves a list of mounts for the current running process.
|
||||
func GetMounts() ([]*Info, error) {
|
||||
return parseMountTable()
|
||||
}
|
||||
|
||||
// Mounted looks at /proc/self/mountinfo to determine of the specified
|
||||
// mountpoint has been mounted
|
||||
func Mounted(mountpoint string) (bool, error) {
|
||||
entries, err := parseMountTable()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Search the table for the mountpoint
|
||||
for _, e := range entries {
|
||||
if e.Mountpoint == mountpoint {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
82
vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
generated
vendored
82
vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
generated
vendored
@@ -1,82 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
/* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
|
||||
(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
|
||||
|
||||
(1) mount ID: unique identifier of the mount (may be reused after umount)
|
||||
(2) parent ID: ID of parent (or of self for the top of the mount tree)
|
||||
(3) major:minor: value of st_dev for files on filesystem
|
||||
(4) root: root of the mount within the filesystem
|
||||
(5) mount point: mount point relative to the process's root
|
||||
(6) mount options: per mount options
|
||||
(7) optional fields: zero or more fields of the form "tag[:value]"
|
||||
(8) separator: marks the end of the optional fields
|
||||
(9) filesystem type: name of filesystem of the form "type[.subtype]"
|
||||
(10) mount source: filesystem specific information or "none"
|
||||
(11) super options: per super block options*/
|
||||
mountinfoFormat = "%d %d %d:%d %s %s %s %s"
|
||||
)
|
||||
|
||||
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
|
||||
// bind mounts
|
||||
func parseMountTable() ([]*Info, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return parseInfoFile(f)
|
||||
}
|
||||
|
||||
func parseInfoFile(r io.Reader) ([]*Info, error) {
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
out = []*Info{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
p = &Info{}
|
||||
text = s.Text()
|
||||
optionalFields string
|
||||
)
|
||||
|
||||
if _, err := fmt.Sscanf(text, mountinfoFormat,
|
||||
&p.ID, &p.Parent, &p.Major, &p.Minor,
|
||||
&p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil {
|
||||
return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
|
||||
}
|
||||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
if len(postSeparatorFields) < 3 {
|
||||
return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if optionalFields != "-" {
|
||||
p.Optional = optionalFields
|
||||
}
|
||||
|
||||
p.Fstype = postSeparatorFields[0]
|
||||
p.Source = postSeparatorFields[1]
|
||||
p.VfsOpts = strings.Join(postSeparatorFields[2:], " ")
|
||||
out = append(out, p)
|
||||
}
|
||||
return out, nil
|
||||
}
|
40
vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
generated
vendored
40
vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
generated
vendored
@@ -1,40 +0,0 @@
|
||||
package mount
|
||||
|
||||
// Info reveals information about a particular mounted filesystem. This
|
||||
// struct is populated from the content in the /proc/<pid>/mountinfo file.
|
||||
type Info struct {
|
||||
// ID is a unique identifier of the mount (may be reused after umount).
|
||||
ID int
|
||||
|
||||
// Parent indicates the ID of the mount parent (or of self for the top of the
|
||||
// mount tree).
|
||||
Parent int
|
||||
|
||||
// Major indicates one half of the device ID which identifies the device class.
|
||||
Major int
|
||||
|
||||
// Minor indicates one half of the device ID which identifies a specific
|
||||
// instance of device.
|
||||
Minor int
|
||||
|
||||
// Root of the mount within the filesystem.
|
||||
Root string
|
||||
|
||||
// Mountpoint indicates the mount point relative to the process's root.
|
||||
Mountpoint string
|
||||
|
||||
// Opts represents mount-specific options.
|
||||
Opts string
|
||||
|
||||
// Optional represents optional fields.
|
||||
Optional string
|
||||
|
||||
// Fstype indicates the type of filesystem, such as EXT3.
|
||||
Fstype string
|
||||
|
||||
// Source indicates filesystem specific information or "none".
|
||||
Source string
|
||||
|
||||
// VfsOpts represents per super block options.
|
||||
VfsOpts string
|
||||
}
|
13
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
13
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
@@ -3,6 +3,7 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
@@ -11,8 +12,6 @@ import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const oomCgroupName = "memory"
|
||||
|
||||
type PressureLevel uint
|
||||
|
||||
const (
|
||||
@@ -66,19 +65,17 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct
|
||||
|
||||
// notifyOnOOM returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
func notifyOnOOM(dir string) (<-chan struct{}, error) {
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
return nil, errors.New("memory controller missing")
|
||||
}
|
||||
|
||||
return registerMemoryEvent(dir, "memory.oom_control", "")
|
||||
}
|
||||
|
||||
func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
func notifyMemoryPressure(dir string, level PressureLevel) (<-chan struct{}, error) {
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
return nil, errors.New("memory controller missing")
|
||||
}
|
||||
|
||||
if level > CriticalPressure {
|
||||
|
102
vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go
generated
vendored
Normal file
102
vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go
generated
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func getValueFromCgroup(path, key string) (int, error) {
|
||||
content, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
lines := strings.Split(string(content), "\n")
|
||||
for _, line := range lines {
|
||||
arr := strings.Split(line, " ")
|
||||
if len(arr) == 2 && arr[0] == key {
|
||||
return strconv.Atoi(arr[1])
|
||||
}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) {
|
||||
eventControlPath := filepath.Join(cgDir, evName)
|
||||
cgEvPath := filepath.Join(cgDir, cgEvName)
|
||||
fd, err := unix.InotifyInit()
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "unable to init inotify")
|
||||
}
|
||||
// watching oom kill
|
||||
evFd, err := unix.InotifyAddWatch(fd, eventControlPath, unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, errors.Wrap(err, "unable to add inotify watch")
|
||||
}
|
||||
// Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited
|
||||
cgFd, err := unix.InotifyAddWatch(fd, cgEvPath, unix.IN_MODIFY)
|
||||
if err != nil {
|
||||
unix.Close(fd)
|
||||
return nil, errors.Wrap(err, "unable to add inotify watch")
|
||||
}
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
var (
|
||||
buffer [unix.SizeofInotifyEvent + unix.PathMax + 1]byte
|
||||
offset uint32
|
||||
)
|
||||
defer func() {
|
||||
unix.Close(fd)
|
||||
close(ch)
|
||||
}()
|
||||
|
||||
for {
|
||||
n, err := unix.Read(fd, buffer[:])
|
||||
if err != nil {
|
||||
logrus.Warnf("unable to read event data from inotify, got error: %v", err)
|
||||
return
|
||||
}
|
||||
if n < unix.SizeofInotifyEvent {
|
||||
logrus.Warnf("we should read at least %d bytes from inotify, but got %d bytes.", unix.SizeofInotifyEvent, n)
|
||||
return
|
||||
}
|
||||
offset = 0
|
||||
for offset <= uint32(n-unix.SizeofInotifyEvent) {
|
||||
rawEvent := (*unix.InotifyEvent)(unsafe.Pointer(&buffer[offset]))
|
||||
offset += unix.SizeofInotifyEvent + uint32(rawEvent.Len)
|
||||
if rawEvent.Mask&unix.IN_MODIFY != unix.IN_MODIFY {
|
||||
continue
|
||||
}
|
||||
switch int(rawEvent.Wd) {
|
||||
case evFd:
|
||||
oom, err := getValueFromCgroup(eventControlPath, "oom_kill")
|
||||
if err != nil || oom > 0 {
|
||||
ch <- struct{}{}
|
||||
}
|
||||
case cgFd:
|
||||
pids, err := getValueFromCgroup(cgEvPath, "populated")
|
||||
if err != nil || pids == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// notifyOnOOMV2 returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
func notifyOnOOMV2(path string) (<-chan struct{}, error) {
|
||||
return registerMemoryEventV2(path, "memory.events", "cgroup.events")
|
||||
}
|
118
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
118
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
@@ -11,15 +11,16 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // only for Signal
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/logs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -68,6 +69,7 @@ type setnsProcess struct {
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
initProcessPid int
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (uint64, error) {
|
||||
@@ -76,7 +78,7 @@ func (p *setnsProcess) startTime() (uint64, error) {
|
||||
}
|
||||
|
||||
func (p *setnsProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
s, ok := sig.(unix.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
@@ -102,7 +104,25 @@ func (p *setnsProcess) start() (err error) {
|
||||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
// On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY.
|
||||
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
|
||||
// Try to join the cgroup of InitProcessPid.
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
|
||||
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
|
||||
if initCgErr == nil {
|
||||
if initCgPath, ok := initCg[""]; ok {
|
||||
initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath)
|
||||
logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)",
|
||||
p.pid(), p.cgroupPaths, err, initCg, initCgDirpath)
|
||||
// NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container.
|
||||
err = cgroups.WriteCgroupProc(initCgDirpath, p.pid())
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
}
|
||||
}
|
||||
}
|
||||
if p.intelRdtPath != "" {
|
||||
@@ -132,7 +152,7 @@ func (p *setnsProcess) start() (err error) {
|
||||
// This shouldn't happen.
|
||||
panic("unexpected procHooks in setns")
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
|
||||
return newSystemError(errors.New("invalid JSON payload from child"))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -279,7 +299,7 @@ func (p *initProcess) waitForChildExit(childPid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) start() error {
|
||||
func (p *initProcess) start() (retErr error) {
|
||||
defer p.messageSockPair.parent.Close()
|
||||
err := p.cmd.Start()
|
||||
p.process.ops = p
|
||||
@@ -290,6 +310,15 @@ func (p *initProcess) start() error {
|
||||
p.process.ops = nil
|
||||
return newSystemErrorWithCause(err, "starting init process command")
|
||||
}
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
p.manager.Destroy()
|
||||
if p.intelRdtManager != nil {
|
||||
p.intelRdtManager.Destroy()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Do this before syncing with child so that no children can escape the
|
||||
// cgroup. We don't need to worry about not doing this and not being root
|
||||
// because we'd be using the rootless cgroup manager in that case.
|
||||
@@ -301,16 +330,6 @@ func (p *initProcess) start() error {
|
||||
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// TODO: should not be the responsibility to call here
|
||||
p.manager.Destroy()
|
||||
if p.intelRdtManager != nil {
|
||||
p.intelRdtManager.Destroy()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
@@ -327,16 +346,7 @@ func (p *initProcess) start() error {
|
||||
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid)
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := p.manager.Apply(childPid); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
if err := p.intelRdtManager.Apply(childPid); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
|
||||
}
|
||||
}
|
||||
|
||||
// Now it's time to setup cgroup namesapce
|
||||
if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" {
|
||||
if _, err := p.messageSockPair.parent.Write([]byte{createCgroupns}); err != nil {
|
||||
@@ -349,18 +359,12 @@ func (p *initProcess) start() error {
|
||||
return newSystemErrorWithCause(err, "waiting for our first child to exit")
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// TODO: should not be the responsibility to call here
|
||||
p.manager.Destroy()
|
||||
if p.intelRdtManager != nil {
|
||||
p.intelRdtManager.Destroy()
|
||||
}
|
||||
}
|
||||
}()
|
||||
if err := p.createNetworkInterfaces(); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating network interfaces")
|
||||
}
|
||||
if err := p.updateSpecState(); err != nil {
|
||||
return newSystemErrorWithCause(err, "updating the spec state")
|
||||
}
|
||||
if err := p.sendConfig(); err != nil {
|
||||
return newSystemErrorWithCause(err, "sending config to init process")
|
||||
}
|
||||
@@ -377,9 +381,9 @@ func (p *initProcess) start() error {
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rlimits for ready process")
|
||||
}
|
||||
// call prestart hooks
|
||||
// call prestart and CreateRuntime hooks
|
||||
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
|
||||
// Setup cgroup before the hook, so that the prestart and CreateRuntime hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
}
|
||||
@@ -396,11 +400,14 @@ func (p *initProcess) start() error {
|
||||
}
|
||||
// initProcessStartTime hasn't been set yet.
|
||||
s.Pid = p.cmd.Process.Pid
|
||||
s.Status = "creating"
|
||||
for i, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
s.Status = configs.Creating
|
||||
hooks := p.config.Config.Hooks
|
||||
|
||||
if err := hooks[configs.Prestart].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -426,11 +433,14 @@ func (p *initProcess) start() error {
|
||||
}
|
||||
// initProcessStartTime hasn't been set yet.
|
||||
s.Pid = p.cmd.Process.Pid
|
||||
s.Status = "creating"
|
||||
for i, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
s.Status = configs.Creating
|
||||
hooks := p.config.Config.Hooks
|
||||
|
||||
if err := hooks[configs.Prestart].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Sync with child.
|
||||
@@ -439,7 +449,7 @@ func (p *initProcess) start() error {
|
||||
}
|
||||
sentResume = true
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
|
||||
return newSystemError(errors.New("invalid JSON payload from child"))
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -449,7 +459,7 @@ func (p *initProcess) start() error {
|
||||
return newSystemErrorWithCause(ierr, "container init")
|
||||
}
|
||||
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
|
||||
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
|
||||
return newSystemError(errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process"))
|
||||
}
|
||||
if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
|
||||
return newSystemErrorWithCause(err, "shutting down init pipe")
|
||||
@@ -491,6 +501,16 @@ func (p *initProcess) startTime() (uint64, error) {
|
||||
return stat.StartTime, err
|
||||
}
|
||||
|
||||
func (p *initProcess) updateSpecState() error {
|
||||
s, err := p.container.currentOCIState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.config.SpecState = s
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) sendConfig() error {
|
||||
// send the config to the container's init process, we don't use JSON Encode
|
||||
// here because there might be a problem in JSON decoder in some cases, see:
|
||||
@@ -516,7 +536,7 @@ func (p *initProcess) createNetworkInterfaces() error {
|
||||
}
|
||||
|
||||
func (p *initProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
s, ok := sig.(unix.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
|
25
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
25
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
@@ -5,31 +5,29 @@ package libcontainer
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
|
||||
func newRestoredProcess(cmd *exec.Cmd, fds []string) (*restoredProcess, error) {
|
||||
var (
|
||||
err error
|
||||
)
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pid := cmd.Process.Pid
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &restoredProcess{
|
||||
proc: proc,
|
||||
cmd: cmd,
|
||||
processStartTime: stat.StartTime,
|
||||
fds: fds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type restoredProcess struct {
|
||||
proc *os.Process
|
||||
cmd *exec.Cmd
|
||||
processStartTime uint64
|
||||
fds []string
|
||||
}
|
||||
@@ -39,11 +37,11 @@ func (p *restoredProcess) start() error {
|
||||
}
|
||||
|
||||
func (p *restoredProcess) pid() int {
|
||||
return p.proc.Pid
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *restoredProcess) terminate() error {
|
||||
err := p.proc.Kill()
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
@@ -53,10 +51,13 @@ func (p *restoredProcess) terminate() error {
|
||||
func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
||||
// TODO: how do we wait on the actual process?
|
||||
// maybe use --exec-cmd in criu
|
||||
st, err := p.proc.Wait()
|
||||
err := p.cmd.Wait()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if _, ok := err.(*exec.ExitError); !ok {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
st := p.cmd.ProcessState
|
||||
return st, nil
|
||||
}
|
||||
|
||||
@@ -65,7 +66,7 @@ func (p *restoredProcess) startTime() (uint64, error) {
|
||||
}
|
||||
|
||||
func (p *restoredProcess) signal(s os.Signal) error {
|
||||
return p.proc.Signal(s)
|
||||
return p.cmd.Process.Signal(s)
|
||||
}
|
||||
|
||||
func (p *restoredProcess) externalDescriptors() []string {
|
||||
|
119
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
119
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
@@ -14,10 +14,10 @@ import (
|
||||
"time"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"github.com/mrunalp/fileutils"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
@@ -55,7 +55,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
}
|
||||
}
|
||||
if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil {
|
||||
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
|
||||
return newSystemErrorWithCausef(err, "mounting %q to rootfs at %q", m.Source, m.Destination)
|
||||
}
|
||||
|
||||
for _, postcmd := range m.PostmountCmds {
|
||||
@@ -98,6 +98,13 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
|
||||
}
|
||||
|
||||
s := iConfig.SpecState
|
||||
s.Pid = unix.Getpid()
|
||||
s.Status = configs.Creating
|
||||
if err := iConfig.Config.Hooks[configs.CreateContainer].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if config.NoPivotRoot {
|
||||
err = msMoveRoot(config.Rootfs)
|
||||
} else if config.Namespaces.Contains(configs.NEWNS) {
|
||||
@@ -244,7 +251,7 @@ func mountCgroupV1(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
}
|
||||
cgroupmount := &configs.Mount{
|
||||
Source: "cgroup",
|
||||
Device: "cgroup",
|
||||
Device: "cgroup", // this is actually fstype
|
||||
Destination: subsystemPath,
|
||||
Flags: flags,
|
||||
Data: filepath.Base(subsystemPath),
|
||||
@@ -402,27 +409,9 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns b
|
||||
}
|
||||
case "cgroup":
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
if err := mountCgroupV2(m, rootfs, mountLabel, enableCgroupns); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
|
||||
if err := mountCgroupV1(m, rootfs, mountLabel, enableCgroupns); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if m.Flags&unix.MS_RDONLY != 0 {
|
||||
// remount cgroup root as readonly
|
||||
mcgrouproot := &configs.Mount{
|
||||
Source: m.Destination,
|
||||
Device: "bind",
|
||||
Destination: m.Destination,
|
||||
Flags: defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND,
|
||||
}
|
||||
if err := remount(mcgrouproot, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
return mountCgroupV2(m, rootfs, mountLabel, enableCgroupns)
|
||||
}
|
||||
return mountCgroupV1(m, rootfs, mountLabel, enableCgroupns)
|
||||
default:
|
||||
// ensure that the destination of the mount is resolved of symlinks at mount time because
|
||||
// any previous mounts can invalidate the next mount's destination.
|
||||
@@ -624,11 +613,14 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
if node.Path == "" {
|
||||
// The node only exists for cgroup reasons, ignore it here.
|
||||
return nil
|
||||
}
|
||||
dest := filepath.Join(rootfs, node.Path)
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bind {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
@@ -646,61 +638,45 @@ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
func mknodDevice(dest string, node *configs.Device) error {
|
||||
fileMode := node.FileMode
|
||||
switch node.Type {
|
||||
case 'c', 'u':
|
||||
fileMode |= unix.S_IFCHR
|
||||
case 'b':
|
||||
case configs.BlockDevice:
|
||||
fileMode |= unix.S_IFBLK
|
||||
case 'p':
|
||||
case configs.CharDevice:
|
||||
fileMode |= unix.S_IFCHR
|
||||
case configs.FifoDevice:
|
||||
fileMode |= unix.S_IFIFO
|
||||
default:
|
||||
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
|
||||
}
|
||||
if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
|
||||
dev, err := node.Mkdev()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
}
|
||||
|
||||
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
|
||||
for _, m := range mountinfo {
|
||||
if m.Mountpoint == dir {
|
||||
return m
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the parent mount point of directory passed in as argument. Also return
|
||||
// optional fields.
|
||||
func getParentMount(rootfs string) (string, string, error) {
|
||||
var path string
|
||||
|
||||
mountinfos, err := mount.GetMounts()
|
||||
mi, err := mountinfo.GetMounts(mountinfo.ParentsFilter(rootfs))
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
mountinfo := getMountInfo(mountinfos, rootfs)
|
||||
if mountinfo != nil {
|
||||
return rootfs, mountinfo.Optional, nil
|
||||
if len(mi) < 1 {
|
||||
return "", "", fmt.Errorf("could not find parent mount of %s", rootfs)
|
||||
}
|
||||
|
||||
path = rootfs
|
||||
for {
|
||||
path = filepath.Dir(path)
|
||||
|
||||
mountinfo = getMountInfo(mountinfos, path)
|
||||
if mountinfo != nil {
|
||||
return path, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
if path == "/" {
|
||||
break
|
||||
// find the longest mount point
|
||||
var idx, maxlen int
|
||||
for i := range mi {
|
||||
if len(mi[i].Mountpoint) > maxlen {
|
||||
maxlen = len(mi[i].Mountpoint)
|
||||
idx = i
|
||||
}
|
||||
}
|
||||
|
||||
// If we are here, we did not find parent mount. Something is wrong.
|
||||
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
|
||||
return mi[idx].Mountpoint, mi[idx].Optional, nil
|
||||
}
|
||||
|
||||
// Make parent mount private if it was shared
|
||||
@@ -825,25 +801,22 @@ func pivotRoot(rootfs string) error {
|
||||
}
|
||||
|
||||
func msMoveRoot(rootfs string) error {
|
||||
mountinfos, err := mount.GetMounts()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
absRootfs, err := filepath.Abs(rootfs)
|
||||
mountinfos, err := mountinfo.GetMounts(func(info *mountinfo.Info) (skip, stop bool) {
|
||||
skip = false
|
||||
stop = false
|
||||
// Collect every sysfs and proc file systems, except those under the container rootfs
|
||||
if (info.Fstype != "proc" && info.Fstype != "sysfs") || strings.HasPrefix(info.Mountpoint, rootfs) {
|
||||
skip = true
|
||||
return
|
||||
}
|
||||
return
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, info := range mountinfos {
|
||||
p, err := filepath.Abs(info.Mountpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Umount every syfs and proc file systems, except those under the container rootfs
|
||||
if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) {
|
||||
continue
|
||||
}
|
||||
p := info.Mountpoint
|
||||
// Be sure umount events are not propagated to the host.
|
||||
if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
return err
|
||||
|
29
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
29
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
@@ -4,6 +4,7 @@ package seccomp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
@@ -34,12 +35,12 @@ const (
|
||||
// of the init until they join the namespace
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
if config == nil {
|
||||
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
|
||||
return errors.New("cannot initialize Seccomp - nil config passed")
|
||||
}
|
||||
|
||||
defaultAction, err := getAction(config.DefaultAction)
|
||||
defaultAction, err := getAction(config.DefaultAction, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error initializing seccomp - invalid default action")
|
||||
return errors.New("error initializing seccomp - invalid default action")
|
||||
}
|
||||
|
||||
filter, err := libseccomp.NewFilter(defaultAction)
|
||||
@@ -67,7 +68,7 @@ func InitSeccomp(config *configs.Seccomp) error {
|
||||
// Add a rule for each syscall
|
||||
for _, call := range config.Syscalls {
|
||||
if call == nil {
|
||||
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
|
||||
return errors.New("encountered nil syscall while initializing Seccomp")
|
||||
}
|
||||
|
||||
if err = matchCall(filter, call); err != nil {
|
||||
@@ -101,22 +102,28 @@ func IsEnabled() bool {
|
||||
}
|
||||
|
||||
// Convert Libcontainer Action to Libseccomp ScmpAction
|
||||
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
|
||||
func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
|
||||
switch act {
|
||||
case configs.Kill:
|
||||
return actKill, nil
|
||||
case configs.Errno:
|
||||
if errnoRet != nil {
|
||||
return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil
|
||||
}
|
||||
return actErrno, nil
|
||||
case configs.Trap:
|
||||
return actTrap, nil
|
||||
case configs.Allow:
|
||||
return actAllow, nil
|
||||
case configs.Trace:
|
||||
if errnoRet != nil {
|
||||
return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil
|
||||
}
|
||||
return actTrace, nil
|
||||
case configs.Log:
|
||||
return actLog, nil
|
||||
default:
|
||||
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
|
||||
return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,7 +145,7 @@ func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
|
||||
case configs.MaskEqualTo:
|
||||
return libseccomp.CompareMaskedEqual, nil
|
||||
default:
|
||||
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
|
||||
return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,7 +154,7 @@ func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
|
||||
cond := libseccomp.ScmpCondition{}
|
||||
|
||||
if arg == nil {
|
||||
return cond, fmt.Errorf("cannot convert nil to syscall condition")
|
||||
return cond, errors.New("cannot convert nil to syscall condition")
|
||||
}
|
||||
|
||||
op, err := getOperator(arg.Op)
|
||||
@@ -161,11 +168,11 @@ func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
|
||||
// Add a rule to match a single syscall
|
||||
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
|
||||
if call == nil || filter == nil {
|
||||
return fmt.Errorf("cannot use nil as syscall to block")
|
||||
return errors.New("cannot use nil as syscall to block")
|
||||
}
|
||||
|
||||
if len(call.Name) == 0 {
|
||||
return fmt.Errorf("empty string is not a valid syscall")
|
||||
return errors.New("empty string is not a valid syscall")
|
||||
}
|
||||
|
||||
// If we can't resolve the syscall, assume it's not supported on this kernel
|
||||
@@ -176,7 +183,7 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
|
||||
}
|
||||
|
||||
// Convert the call's action to the libseccomp equivalent
|
||||
callAct, err := getAction(call.Action)
|
||||
callAct, err := getAction(call.Action, call.ErrnoRet)
|
||||
if err != nil {
|
||||
return fmt.Errorf("action in seccomp profile is invalid: %s", err)
|
||||
}
|
||||
|
11
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
11
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
@@ -11,9 +11,8 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
@@ -34,10 +33,10 @@ func (l *linuxSetnsInit) Init() error {
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil {
|
||||
if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
defer label.SetKeyLabel("")
|
||||
defer selinux.SetKeyLabel("")
|
||||
// Do not inherit the parent's session keyring.
|
||||
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
|
||||
// Same justification as in standart_init_linux.go as to why we
|
||||
@@ -62,10 +61,10 @@ func (l *linuxSetnsInit) Init() error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
defer label.SetProcessLabel("")
|
||||
defer selinux.SetExecLabel("")
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
|
24
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
24
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
@@ -7,14 +7,13 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"syscall" //only for Exec
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -48,10 +47,10 @@ func (l *linuxStandardInit) Init() error {
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil {
|
||||
if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
defer label.SetKeyLabel("")
|
||||
defer selinux.SetKeyLabel("")
|
||||
ringname, keepperms, newperms := l.getSessionRingParams()
|
||||
|
||||
// Do not inherit the parent's session keyring.
|
||||
@@ -84,7 +83,8 @@ func (l *linuxStandardInit) Init() error {
|
||||
return err
|
||||
}
|
||||
|
||||
label.Init()
|
||||
// initialises the labeling system
|
||||
selinux.GetEnabled()
|
||||
if err := prepareRootfs(l.pipe, l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -146,10 +146,10 @@ func (l *linuxStandardInit) Init() error {
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return errors.Wrap(err, "sync ready")
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil {
|
||||
return errors.Wrap(err, "set process label")
|
||||
}
|
||||
defer label.SetProcessLabel("")
|
||||
defer selinux.SetExecLabel("")
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
@@ -207,7 +207,15 @@ func (l *linuxStandardInit) Init() error {
|
||||
return newSystemErrorWithCause(err, "init seccomp")
|
||||
}
|
||||
}
|
||||
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
||||
|
||||
s := l.config.SpecState
|
||||
s.Pid = unix.Getpid()
|
||||
s.Status = configs.Created
|
||||
if err := l.config.Config.Hooks[configs.StartContainer].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := unix.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
||||
return newSystemErrorWithCause(err, "exec user process")
|
||||
}
|
||||
return nil
|
||||
|
41
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
41
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
@@ -61,17 +61,21 @@ func destroy(c *linuxContainer) error {
|
||||
}
|
||||
|
||||
func runPoststopHooks(c *linuxContainer) error {
|
||||
if c.config.Hooks != nil {
|
||||
s, err := c.currentOCIState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststop {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
hooks := c.config.Hooks
|
||||
if hooks == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
s, err := c.currentOCIState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.Status = configs.Stopped
|
||||
|
||||
if err := hooks[configs.Poststop].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -111,11 +115,7 @@ func (r *runningState) status() Status {
|
||||
func (r *runningState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
t, err := r.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t == Running {
|
||||
if r.c.runType() == Running {
|
||||
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
|
||||
}
|
||||
r.c.state = s
|
||||
@@ -130,11 +130,7 @@ func (r *runningState) transition(s containerState) error {
|
||||
}
|
||||
|
||||
func (r *runningState) destroy() error {
|
||||
t, err := r.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t == Running {
|
||||
if r.c.runType() == Running {
|
||||
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
|
||||
}
|
||||
return destroy(r.c)
|
||||
@@ -186,10 +182,7 @@ func (p *pausedState) transition(s containerState) error {
|
||||
}
|
||||
|
||||
func (p *pausedState) destroy() error {
|
||||
t, err := p.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t := p.c.runType()
|
||||
if t != Running && t != Created {
|
||||
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
|
||||
return err
|
||||
|
33
vendor/github.com/opencontainers/runc/libcontainer/sync.go
generated
vendored
33
vendor/github.com/opencontainers/runc/libcontainer/sync.go
generated
vendored
@@ -2,6 +2,7 @@ package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
@@ -19,11 +20,6 @@ type syncType string
|
||||
// procHooks --> [run hooks]
|
||||
// <-- procResume
|
||||
//
|
||||
// procConsole -->
|
||||
// <-- procConsoleReq
|
||||
// [send(fd)] --> [recv(fd)]
|
||||
// <-- procConsoleAck
|
||||
//
|
||||
// procReady --> [final setup]
|
||||
// <-- procRun
|
||||
const (
|
||||
@@ -50,22 +46,23 @@ func readSync(pipe io.Reader, expected syncType) error {
|
||||
var procSync syncT
|
||||
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
return fmt.Errorf("parent closed synchronisation channel")
|
||||
return errors.New("parent closed synchronisation channel")
|
||||
}
|
||||
return fmt.Errorf("failed reading error from parent: %v", err)
|
||||
}
|
||||
|
||||
if procSync.Type == procError {
|
||||
var ierr genericError
|
||||
|
||||
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
|
||||
return fmt.Errorf("failed reading error from parent: %v", err)
|
||||
}
|
||||
|
||||
if procSync.Type == procError {
|
||||
var ierr genericError
|
||||
return &ierr
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
|
||||
return fmt.Errorf("failed reading error from parent: %v", err)
|
||||
}
|
||||
|
||||
return &ierr
|
||||
}
|
||||
|
||||
if procSync.Type != expected {
|
||||
return fmt.Errorf("invalid synchronisation flag from parent")
|
||||
}
|
||||
if procSync.Type != expected {
|
||||
return errors.New("invalid synchronisation flag from parent")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
3
vendor/github.com/opencontainers/runc/libcontainer/system/BUILD
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/system/BUILD
generated
vendored
@@ -7,12 +7,9 @@ go_library(
|
||||
"proc.go",
|
||||
"syscall_linux_32.go",
|
||||
"syscall_linux_64.go",
|
||||
"sysconfig.go",
|
||||
"sysconfig_notcgo.go",
|
||||
"unsupported.go",
|
||||
"xattrs_linux.go",
|
||||
],
|
||||
cgo = True,
|
||||
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/system",
|
||||
importpath = "github.com/opencontainers/runc/libcontainer/system",
|
||||
visibility = ["//visibility:public"],
|
||||
|
39
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
39
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
@@ -5,26 +5,13 @@ package system
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall" // only for exec
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// If arg2 is nonzero, set the "child subreaper" attribute of the
|
||||
// calling process; if arg2 is zero, unset the attribute. When a
|
||||
// process is marked as a child subreaper, all of the children
|
||||
// that it creates, and their descendants, will be marked as
|
||||
// having a subreaper. In effect, a subreaper fulfills the role
|
||||
// of init(1) for its descendant processes. Upon termination of
|
||||
// a process that is orphaned (i.e., its immediate parent has
|
||||
// already terminated) and marked as having a subreaper, the
|
||||
// nearest still living ancestor subreaper will receive a SIGCHLD
|
||||
// signal and be able to wait(2) on the process to discover its
|
||||
// termination status.
|
||||
const PR_SET_CHILD_SUBREAPER = 36
|
||||
|
||||
type ParentDeathSignal int
|
||||
|
||||
func (p ParentDeathSignal) Restore() error {
|
||||
@@ -51,7 +38,7 @@ func Execv(cmd string, args []string, env []string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
return syscall.Exec(name, args, env)
|
||||
return unix.Exec(name, args, env)
|
||||
}
|
||||
|
||||
func Prlimit(pid, resource int, limit unix.Rlimit) error {
|
||||
@@ -100,15 +87,23 @@ func Setctty() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
inUserNS bool
|
||||
nsOnce sync.Once
|
||||
)
|
||||
|
||||
// RunningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||
func RunningInUserNS() bool {
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return false
|
||||
}
|
||||
return UIDMapInUserNS(uidmap)
|
||||
nsOnce.Do(func() {
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return
|
||||
}
|
||||
inUserNS = UIDMapInUserNS(uidmap)
|
||||
})
|
||||
return inUserNS
|
||||
}
|
||||
|
||||
func UIDMapInUserNS(uidmap []user.IDMap) bool {
|
||||
@@ -140,7 +135,7 @@ func GetParentNSeuid() int64 {
|
||||
|
||||
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
||||
func SetSubreaper(i int) error {
|
||||
return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
||||
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
||||
}
|
||||
|
||||
// GetSubreaper returns the subreaper setting for the calling process
|
||||
|
12
vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go
generated
vendored
12
vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go
generated
vendored
@@ -1,12 +0,0 @@
|
||||
// +build cgo,linux
|
||||
|
||||
package system
|
||||
|
||||
/*
|
||||
#include <unistd.h>
|
||||
*/
|
||||
import "C"
|
||||
|
||||
func GetClockTicks() int {
|
||||
return int(C.sysconf(C._SC_CLK_TCK))
|
||||
}
|
15
vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go
generated
vendored
15
vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig_notcgo.go
generated
vendored
@@ -1,15 +0,0 @@
|
||||
// +build !cgo windows
|
||||
|
||||
package system
|
||||
|
||||
func GetClockTicks() int {
|
||||
// TODO figure out a better alternative for platforms where we're missing cgo
|
||||
//
|
||||
// TODO Windows. This could be implemented using Win32 QueryPerformanceFrequency().
|
||||
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms644905(v=vs.85).aspx
|
||||
//
|
||||
// An example of its usage can be found here.
|
||||
// https://msdn.microsoft.com/en-us/library/windows/desktop/dn553408(v=vs.85).aspx
|
||||
|
||||
return 100
|
||||
}
|
28
vendor/github.com/opencontainers/runc/libcontainer/user/user.go
generated
vendored
28
vendor/github.com/opencontainers/runc/libcontainer/user/user.go
generated
vendored
@@ -162,10 +162,6 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
@@ -183,6 +179,9 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
|
||||
out = append(out, p)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
@@ -221,10 +220,6 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
if text == "" {
|
||||
continue
|
||||
@@ -242,6 +237,9 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
|
||||
out = append(out, p)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
@@ -532,10 +530,6 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
@@ -549,6 +543,9 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
|
||||
out = append(out, p)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
@@ -586,10 +583,6 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(s.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
@@ -603,6 +596,9 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
|
||||
out = append(out, p)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user