Update dependency opencontainer/runc

This commit is contained in:
Odin Ugedal
2019-06-20 20:34:03 +02:00
parent 81c8552d7e
commit 2bcdb944f0
42 changed files with 1415 additions and 631 deletions

View File

@@ -34,8 +34,10 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/stacktrace:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/utils:go_default_library",
"//vendor/github.com/opencontainers/runtime-spec/specs-go:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/github.com/checkpoint-restore/go-criu/rpc:go_default_library",
"//vendor/github.com/containerd/console:go_default_library",
"//vendor/github.com/cyphar/filepath-securejoin:go_default_library",
"//vendor/github.com/golang/protobuf/proto:go_default_library",
@@ -45,9 +47,9 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs/validate:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/criurpc:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/keys:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/logs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/mount:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/seccomp:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
@@ -78,9 +80,9 @@ filegroup(
"//vendor/github.com/opencontainers/runc/libcontainer/apparmor:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/criurpc:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/keys:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/logs:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/mount:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/seccomp:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/stacktrace:all-srcs",

View File

@@ -148,6 +148,7 @@ config := &configs.Config{
{Type: configs.NEWPID},
{Type: configs.NEWUSER},
{Type: configs.NEWNET},
{Type: configs.NEWCGROUP},
}),
Cgroups: &configs.Cgroup{
Name: "test-container",

View File

@@ -21,16 +21,17 @@ Minimum requirements:
### Namespaces
| Flag | Enabled |
| ------------ | ------- |
| CLONE_NEWPID | 1 |
| CLONE_NEWUTS | 1 |
| CLONE_NEWIPC | 1 |
| CLONE_NEWNET | 1 |
| CLONE_NEWNS | 1 |
| CLONE_NEWUSER | 1 |
| Flag | Enabled |
| --------------- | ------- |
| CLONE_NEWPID | 1 |
| CLONE_NEWUTS | 1 |
| CLONE_NEWIPC | 1 |
| CLONE_NEWNET | 1 |
| CLONE_NEWNS | 1 |
| CLONE_NEWUSER | 1 |
| CLONE_NEWCGROUP | 1 |
Namespaces are created for the container via the `clone` syscall.
Namespaces are created for the container via the `unshare` syscall.
### Filesystem
@@ -167,7 +168,8 @@ service (CLOS) and each CLOS has a capacity bitmask (CBM).
Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
over memory bandwidth for the software. A user controls the resource by
indicating the percentage of maximum memory bandwidth.
indicating the percentage of maximum memory bandwidth or memory bandwidth limit
in MBps unit if MBA Software Controller is enabled.
It can be used to handle L3 cache and memory bandwidth resources allocation
for containers if hardware and kernel support Intel RDT CAT and MBA features.
@@ -236,7 +238,7 @@ set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
Memory bandwidth schema:
It has allocation values for memory bandwidth on each socket, which contains
L3 cache id and memory bandwidth percentage.
L3 cache id and memory bandwidth.
```
Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
```
@@ -249,6 +251,18 @@ that is allocated is also dependent on the CPU model and can be looked up at
min_bw + N * bw_gran. Intermediate values are rounded to the next control
step available on the hardware.
If MBA Software Controller is enabled through mount option "-o mba_MBps"
mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
instead of "percentages". The kernel underneath would use a software feedback
mechanism or a "Software Controller" which reads the actual bandwidth using
MBM counters and adjust the memory bandwidth percentages to ensure:
"actual memory bandwidth < user specified memory bandwidth".
For example, on a two-socket machine, the schema line could be
"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
and 7000 MBps memory bandwidth limit on socket 1.
For more information about Intel RDT kernel interface:
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt

View File

@@ -15,6 +15,7 @@ go_library(
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/github.com/docker/go-units:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),

View File

@@ -317,7 +317,7 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}
func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err

View File

@@ -3,6 +3,7 @@
package fs
import (
"errors"
"fmt"
"io/ioutil"
"os"
@@ -17,7 +18,12 @@ import (
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// Ensure that kernel memory is available in this kernel build. If it
// isn't, we just ignore it because EnableKernelMemoryAccounting is
// automatically called for all memory limits.
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
return nil
}
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
@@ -34,8 +40,9 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
// We have specifically been asked to set a kmem limit. If the kernel
// doesn't support it we *must* error out.
return errors.New("kernel memory accounting not supported by this kernel")
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"

View File

@@ -2,10 +2,14 @@
package fs
import (
"errors"
)
func EnableKernelMemoryAccounting(path string) error {
return nil
}
func setKernelMemory(path string, kernelMemoryLimit int64) error {
return nil
return errors.New("kernel memory accounting disabled in this runc build")
}

View File

@@ -18,6 +18,10 @@ func UseSystemd() bool {
return false
}
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
return nil, fmt.Errorf("Systemd not supported")
}
func (m *Manager) Apply(pid int) error {
return fmt.Errorf("Systemd not supported")
}

View File

@@ -5,6 +5,7 @@ package systemd
import (
"errors"
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
@@ -71,13 +72,11 @@ const (
)
var (
connLock sync.Mutex
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasStartTransientSliceUnit bool
hasTransientDefaultDependencies bool
hasDelegateScope bool
hasDelegateSlice bool
connLock sync.Mutex
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasStartTransientSliceUnit bool
hasDelegateSlice bool
)
func newProp(name string, units interface{}) systemdDbus.Property {
@@ -115,53 +114,6 @@ func UseSystemd() bool {
}
}
// Ensure the scope name we use doesn't exist. Use the Pid to
// avoid collisions between multiple libcontainer users on a
// single host.
scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid())
testScopeExists := true
for i := 0; i <= testScopeWait; i++ {
if _, err := theConn.StopUnit(scope, "replace", nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
testScopeExists = false
break
}
}
}
time.Sleep(time.Millisecond)
}
// Bail out if we can't kill this scope without testing for DefaultDependencies
if testScopeExists {
return hasStartTransientUnit
}
// Assume StartTransientUnit on a scope allows DefaultDependencies
hasTransientDefaultDependencies = true
ddf := newProp("DefaultDependencies", false)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasTransientDefaultDependencies = false
}
}
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
// Assume StartTransientUnit on a scope allows Delegate
hasDelegateScope = true
dlScope := newProp("Delegate", true)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dlScope}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasDelegateScope = false
}
}
}
// Assume we have the ability to start a transient unit as a slice
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
@@ -206,12 +158,23 @@ func UseSystemd() bool {
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
theConn.StopUnit(slice, "replace", nil)
}
return hasStartTransientUnit
}
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
if !systemdUtil.IsRunningSystemd() {
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
}
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &Manager{
Cgroups: config,
Paths: paths,
}
}, nil
}
func (m *Manager) Apply(pid int) error {
var (
c = m.Cgroups
@@ -267,9 +230,8 @@ func (m *Manager) Apply(pid int) error {
properties = append(properties, newProp("Delegate", true))
}
} else {
if hasDelegateScope {
properties = append(properties, newProp("Delegate", true))
}
// Assume scopes always support delegation.
properties = append(properties, newProp("Delegate", true))
}
// Always enable accounting, this gets us the same behaviour as the fs implementation,
@@ -279,10 +241,9 @@ func (m *Manager) Apply(pid int) error {
newProp("CPUAccounting", true),
newProp("BlockIOAccounting", true))
if hasTransientDefaultDependencies {
properties = append(properties,
newProp("DefaultDependencies", false))
}
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
properties = append(properties,
newProp("DefaultDependencies", false))
if c.Resources.Memory != 0 {
properties = append(properties,
@@ -470,7 +431,7 @@ func ExpandSlice(slice string) (string, error) {
}
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
mountpoint, err := cgroups.FindCgroupMountpoint(subsystem)
mountpoint, err := cgroups.FindCgroupMountpoint(c.Path, subsystem)
if err != nil {
return "", err
}
@@ -590,6 +551,15 @@ func setKernelMemory(c *configs.Cgroup) error {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// do not try to enable the kernel memory if we already have
// tasks in the cgroup.
content, err := ioutil.ReadFile(filepath.Join(path, "tasks"))
if err != nil {
return err
}
if len(content) > 0 {
return nil
}
return fs.EnableKernelMemoryAccounting(path)
}

View File

@@ -14,39 +14,57 @@ import (
"time"
units "github.com/docker/go-units"
"golang.org/x/sys/unix"
)
const (
cgroupNamePrefix = "name="
CgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
// HugePageSizeUnitList is a list of the units used by the linux kernel when
// naming the HugePage control files.
// https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt
// TODO Since the kernel only use KB, MB and GB; TB and PB should be removed,
// depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393
var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
fields := strings.Fields(txt)
if len(fields) < 5 {
continue
}
if strings.HasPrefix(fields[4], cgroupPath) {
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], fields[3], nil
}
}
}
}
@@ -156,8 +174,8 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
continue
}
ss[opt] = true
if strings.HasPrefix(opt, cgroupNamePrefix) {
opt = opt[len(cgroupNamePrefix):]
if strings.HasPrefix(opt, CgroupNamePrefix) {
opt = opt[len(CgroupNamePrefix):]
}
m.Subsystems = append(m.Subsystems, opt)
numFound++
@@ -257,7 +275,7 @@ func GetInitCgroupPath(subsystem string) (string, error) {
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
if err != nil {
return "", err
}
@@ -343,7 +361,7 @@ func getControllerPath(subsystem string, cgroups map[string]string) (string, err
return p, nil
}
if p, ok := cgroups[cgroupNamePrefix+subsystem]; ok {
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
return p, nil
}
@@ -398,19 +416,26 @@ func RemovePaths(paths map[string]string) (err error) {
}
func GetHugePageSize() ([]string, error) {
var pageSizes []string
sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
if err != nil {
return pageSizes, err
return []string{}, err
}
var fileNames []string
for _, st := range files {
nameArray := strings.Split(st.Name(), "-")
fileNames = append(fileNames, st.Name())
}
return getHugePageSizeFromFilenames(fileNames)
}
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
var pageSizes []string
for _, fileName := range fileNames {
nameArray := strings.Split(fileName, "-")
pageSize, err := units.RAMInBytes(nameArray[1])
if err != nil {
return []string{}, err
}
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList)
pageSizes = append(pageSizes, sizeString)
}
@@ -454,10 +479,39 @@ func WriteCgroupProc(dir string, pid int) error {
}
// Dont attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
if pid == -1 {
return nil
}
cgroupProcessesFile, err := os.OpenFile(filepath.Join(dir, CgroupProcesses), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0700)
if err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
defer cgroupProcessesFile.Close()
for i := 0; i < 5; i++ {
_, err = cgroupProcessesFile.WriteString(strconv.Itoa(pid))
if err == nil {
return nil
}
// EINVAL might mean that the task being added to cgroup.procs is in state
// TASK_NEW. We should attempt to do so again.
if isEINVAL(err) {
time.Sleep(30 * time.Millisecond)
continue
}
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
return err
}
func isEINVAL(err error) bool {
switch err := err.(type) {
case *os.PathError:
return err.Err == unix.EINVAL
default:
return false
}
return nil
}

View File

@@ -272,26 +272,23 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
})
}
// HookState is the payload provided to a hook on execution.
type HookState specs.State
type Hook interface {
// Run executes the hook with the provided state.
Run(HookState) error
Run(*specs.State) error
}
// NewFunctionHook will call the provided function when the hook is run.
func NewFunctionHook(f func(HookState) error) FuncHook {
func NewFunctionHook(f func(*specs.State) error) FuncHook {
return FuncHook{
run: f,
}
}
type FuncHook struct {
run func(HookState) error
run func(*specs.State) error
}
func (f FuncHook) Run(s HookState) error {
func (f FuncHook) Run(s *specs.State) error {
return f.run(s)
}
@@ -314,7 +311,7 @@ type CommandHook struct {
Command
}
func (c Command) Run(s HookState) error {
func (c Command) Run(s *specs.State) error {
b, err := json.Marshal(s)
if err != nil {
return err

View File

@@ -5,7 +5,9 @@ type IntelRdt struct {
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
// The schema of memory bandwidth percentage per L3 cache id
// The schema of memory bandwidth per L3 cache id
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
// The unit of memory bandwidth is specified in "percentages" by
// default, and in "MBps" if MBA Software Controller is enabled.
MemBwSchema string `json:"memBwSchema,omitempty"`
}

View File

@@ -7,12 +7,13 @@ import (
)
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
NEWCGROUP NamespaceType = "NEWCGROUP"
)
var (
@@ -35,6 +36,8 @@ func NsName(ns NamespaceType) string {
return "user"
case NEWUTS:
return "uts"
case NEWCGROUP:
return "cgroup"
}
return ""
}
@@ -68,6 +71,7 @@ func NamespaceTypes() []NamespaceType {
NEWNET,
NEWPID,
NEWNS,
NEWCGROUP,
}
}

View File

@@ -9,12 +9,13 @@ func (n *Namespace) Syscall() int {
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: unix.CLONE_NEWNET,
NEWNS: unix.CLONE_NEWNS,
NEWUSER: unix.CLONE_NEWUSER,
NEWIPC: unix.CLONE_NEWIPC,
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
NEWNET: unix.CLONE_NEWNET,
NEWNS: unix.CLONE_NEWNS,
NEWUSER: unix.CLONE_NEWUSER,
NEWIPC: unix.CLONE_NEWIPC,
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
NEWCGROUP: unix.CLONE_NEWCGROUP,
}
// CloneFlags parses the container's Namespaces options to set the correct

View File

@@ -38,6 +38,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.usernamespace(config); err != nil {
return err
}
if err := v.cgroupnamespace(config); err != nil {
return err
}
if err := v.sysctl(config); err != nil {
return err
}
@@ -116,6 +119,15 @@ func (v *ConfigValidator) usernamespace(config *configs.Config) error {
return nil
}
func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error {
if config.Namespaces.Contains(configs.NEWCGROUP) {
if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) {
return fmt.Errorf("cgroup namespaces aren't enabled in the kernel")
}
}
return nil
}
// sysctl validates that the specified sysctl keys are valid or not.
// /proc/sys isn't completely namespaced and depending on which namespaces
// are specified, a subset of sysctls are permitted.

View File

@@ -9,6 +9,7 @@ import (
"time"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runtime-spec/specs-go"
)
// Status is the status of a container.
@@ -85,6 +86,12 @@ type BaseContainer interface {
// SystemError - System error.
State() (*State, error)
// OCIState returns the current container's state information.
//
// errors:
// SystemError - System error.
OCIState() (*specs.State, error)
// Returns the current config of the container.
Config() configs.Config

View File

@@ -19,13 +19,15 @@ import (
"syscall" // only for SysProcAttr and Signal
"time"
"github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
criurpc "github.com/checkpoint-restore/go-criu/rpc"
"github.com/golang/protobuf/proto"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
@@ -156,6 +158,12 @@ func (c *linuxContainer) State() (*State, error) {
return c.currentState()
}
func (c *linuxContainer) OCIState() (*specs.State, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentOCIState()
}
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetAllPids()
if err != nil {
@@ -329,6 +337,7 @@ func (c *linuxContainer) start(process *Process) error {
if err != nil {
return newSystemErrorWithCause(err, "creating new parent process")
}
parent.forwardChildLogs()
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
@@ -349,13 +358,9 @@ func (c *linuxContainer) start(process *Process) error {
c.initProcessStartTime = state.InitProcessStartTime
if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Pid: parent.pid(),
Bundle: bundle,
Annotations: annotations,
s, err := c.currentOCIState()
if err != nil {
return err
}
for i, hook := range c.config.Hooks.Poststart {
if err := hook.Run(s); err != nil {
@@ -374,10 +379,18 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
if all {
return signalAllProcesses(c.cgroupManager, s)
}
if err := c.initProcess.signal(s); err != nil {
return newSystemErrorWithCause(err, "signaling init process")
status, err := c.currentStatus()
if err != nil {
return err
}
return nil
// to avoid a PID reuse attack
if status == Running || status == Created || status == Paused {
if err := c.initProcess.signal(s); err != nil {
return newSystemErrorWithCause(err, "signaling init process")
}
return nil
}
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}
func (c *linuxContainer) createExecFifo() error {
@@ -426,16 +439,24 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
}
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
parentPipe, childPipe, err := utils.NewSockPair("init")
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new init pipe")
}
cmd, err := c.commandTemplate(p, childPipe)
messageSockPair := filePair{parentInitPipe, childInitPipe}
parentLogPipe, childLogPipe, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("Unable to create the log pipe: %s", err)
}
logFilePair := filePair{parentLogPipe, childLogPipe}
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
}
if !p.Init {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
}
// We only set up fifoFd if we're not doing a `runc exec`. The historic
@@ -446,10 +467,10 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
if err := c.includeExecFifo(cmd); err != nil {
return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
}
return c.newInitProcess(p, cmd, parentPipe, childPipe)
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) {
cmd := exec.Command(c.initPath, c.initArgs[1:]...)
cmd.Args[0] = c.initArgs[0]
cmd.Stdin = p.Stdin
@@ -467,10 +488,18 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
fmt.Sprintf("_LIBCONTAINER_CONSOLE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
)
}
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
cmd.ExtraFiles = append(cmd.ExtraFiles, childInitPipe)
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%s", c.root),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe)
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_LOGPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
fmt.Sprintf("_LIBCONTAINER_LOGLEVEL=%s", p.LogLevel),
)
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
@@ -480,7 +509,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
return cmd, nil
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
@@ -493,10 +522,10 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
if err != nil {
return nil, err
}
return &initProcess{
init := &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
messageSockPair: messageSockPair,
logFilePair: logFilePair,
manager: c.cgroupManager,
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
@@ -504,10 +533,12 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
process: p,
bootstrapData: data,
sharePidns: sharePidns,
}, nil
}
c.initProcess = init
return init, nil
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
state, err := c.currentState()
if err != nil {
@@ -524,8 +555,8 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
cgroupPaths: c.cgroupManager.GetPaths(),
rootlessCgroups: c.config.RootlessCgroups,
intelRdtPath: state.IntelRdtPath,
childPipe: childPipe,
parentPipe: parentPipe,
messageSockPair: messageSockPair,
logFilePair: logFilePair,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
@@ -862,6 +893,32 @@ func waitForCriuLazyServer(r *os.File, status string) error {
return nil
}
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
// CRIU will evaluate a configuration starting with release 3.11.
// Settings in the configuration file will overwrite RPC settings.
// Look for annotations. The annotation 'org.criu.config'
// specifies if CRIU should use a different, container specific
// configuration file.
_, annotations := utils.Annotations(c.config.Labels)
configFile, exists := annotations["org.criu.config"]
if exists {
// If the annotation 'org.criu.config' exists and is set
// to a non-empty string, tell CRIU to use that as a
// configuration file. If the file does not exist, CRIU
// will just ignore it.
if configFile != "" {
rpcOpts.ConfigFile = proto.String(configFile)
}
// If 'org.criu.config' exists and is set to an empty
// string, a runc specific CRIU configuration file will
// be not set at all.
} else {
// If the mentioned annotation has not been found, specify
// a default CRIU configuration file.
rpcOpts.ConfigFile = proto.String("/etc/criu/runc.conf")
}
}
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@@ -927,6 +984,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
LazyPages: proto.Bool(criuOpts.LazyPages),
}
c.handleCriuConfigurationFile(&rpcOpts)
// If the container is running in a network namespace and has
// a path to the network namespace configured, we will dump
// that network namespace as an external namespace and we
@@ -1098,6 +1157,75 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
}
}
// makeCriuRestoreMountpoints makes the actual mountpoints for the
// restore using CRIU. This function is inspired from the code in
// rootfs_linux.go
func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
switch m.Device {
case "cgroup":
// Do nothing for cgroup, CRIU should handle it
case "bind":
// The prepareBindMount() function checks if source
// exists. So it cannot be used for other filesystem types.
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
return err
}
default:
// for all other file-systems just create the mountpoints
dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination)
if err != nil {
return err
}
if err := checkMountDestination(c.config.Rootfs, dest); err != nil {
return err
}
m.Destination = dest
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
return nil
}
// isPathInPrefixList is a small function for CRIU restore to make sure
// mountpoints, which are on a tmpfs, are not created in the roofs
func isPathInPrefixList(path string, prefix []string) bool {
for _, p := range prefix {
if strings.HasPrefix(path, p+"/") {
return false
}
}
return true
}
// prepareCriuRestoreMounts tries to set up the rootfs of the
// container to be restored in the same way runc does it for
// initial container creation. Even for a read-only rootfs container
// runc modifies the rootfs to add mountpoints which do not exist.
// This function also creates missing mountpoints as long as they
// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway.
func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
// First get a list of a all tmpfs mounts
tmpfs := []string{}
for _, m := range mounts {
switch m.Device {
case "tmpfs":
tmpfs = append(tmpfs, m.Destination)
}
}
// Now go through all mounts and create the mountpoints
// if the mountpoints are not on a tmpfs, as CRIU will
// restore the complete tmpfs content from its checkpoint.
for _, m := range mounts {
if isPathInPrefixList(m.Destination, tmpfs) {
if err := c.makeCriuRestoreMountpoints(m); err != nil {
return err
}
}
}
return nil
}
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@@ -1177,6 +1305,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
},
}
c.handleCriuConfigurationFile(req.Opts)
// Same as during checkpointing. If the container has a specific network namespace
// assigned to it, this now expects that the checkpoint will be restored in a
// already created network namespace.
@@ -1209,6 +1339,12 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}
}
// This will modify the rootfs of the container in the same way runc
// modifies the container during initial creation.
if err := c.prepareCriuRestoreMounts(c.config.Mounts); err != nil {
return err
}
for _, m := range c.config.Mounts {
switch m.Device {
case "bind":
@@ -1537,14 +1673,11 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
}
case notify.GetScript() == "setup-namespaces":
if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Pid: int(notify.GetPid()),
Bundle: bundle,
Annotations: annotations,
s, err := c.currentOCIState()
if err != nil {
return nil
}
s.Pid = int(notify.GetPid())
for i, hook := range c.config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
@@ -1738,11 +1871,31 @@ func (c *linuxContainer) currentState() (*State, error) {
return state, nil
}
func (c *linuxContainer) currentOCIState() (*specs.State, error) {
bundle, annotations := utils.Annotations(c.config.Labels)
state := &specs.State{
Version: specs.Version,
ID: c.ID(),
Bundle: bundle,
Annotations: annotations,
}
status, err := c.currentStatus()
if err != nil {
return nil, err
}
state.Status = status.String()
if status != Stopped {
if c.initProcess != nil {
state.Pid = c.initProcess.pid()
}
}
return state, nil
}
// orderNamespacePaths sorts namespace paths into a list of paths that we
// can setns in order.
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
paths := []string{}
for _, ns := range configs.NamespaceTypes() {
// Remove namespaces that we don't need to join.

View File

@@ -1,2 +0,0 @@
gen: criurpc.proto
protoc --go_out=. criurpc.proto

File diff suppressed because it is too large Load Diff

View File

@@ -1,209 +0,0 @@
syntax = "proto2";
message criu_page_server_info {
optional string address = 1;
optional int32 port = 2;
optional int32 pid = 3;
optional int32 fd = 4;
}
message criu_veth_pair {
required string if_in = 1;
required string if_out = 2;
};
message ext_mount_map {
required string key = 1;
required string val = 2;
};
message join_namespace {
required string ns = 1;
required string ns_file = 2;
optional string extra_opt = 3;
}
message inherit_fd {
required string key = 1;
required int32 fd = 2;
};
message cgroup_root {
optional string ctrl = 1;
required string path = 2;
};
message unix_sk {
required uint32 inode = 1;
};
enum criu_cg_mode {
IGNORE = 0;
CG_NONE = 1;
PROPS = 2;
SOFT = 3;
FULL = 4;
STRICT = 5;
DEFAULT = 6;
};
message criu_opts {
required int32 images_dir_fd = 1;
optional int32 pid = 2; /* if not set on dump, will dump requesting process */
optional bool leave_running = 3;
optional bool ext_unix_sk = 4;
optional bool tcp_established = 5;
optional bool evasive_devices = 6;
optional bool shell_job = 7;
optional bool file_locks = 8;
optional int32 log_level = 9 [default = 2];
optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */
optional criu_page_server_info ps = 11;
optional bool notify_scripts = 12;
optional string root = 13;
optional string parent_img = 14;
optional bool track_mem = 15;
optional bool auto_dedup = 16;
optional int32 work_dir_fd = 17;
optional bool link_remap = 18;
repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */
optional uint32 cpu_cap = 20 [default = 0xffffffff];
optional bool force_irmap = 21;
repeated string exec_cmd = 22;
repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */
optional bool manage_cgroups = 24; /* backward compatibility */
repeated cgroup_root cg_root = 25;
optional bool rst_sibling = 26; /* swrk only */
repeated inherit_fd inherit_fd = 27; /* swrk only */
optional bool auto_ext_mnt = 28;
optional bool ext_sharing = 29;
optional bool ext_masters = 30;
repeated string skip_mnt = 31;
repeated string enable_fs = 32;
repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */
optional criu_cg_mode manage_cgroups_mode = 34;
optional uint32 ghost_limit = 35 [default = 0x100000];
repeated string irmap_scan_paths = 36;
repeated string external = 37;
optional uint32 empty_ns = 38;
repeated join_namespace join_ns = 39;
optional string cgroup_props = 41;
optional string cgroup_props_file = 42;
repeated string cgroup_dump_controller = 43;
optional string freeze_cgroup = 44;
optional uint32 timeout = 45;
optional bool tcp_skip_in_flight = 46;
optional bool weak_sysctls = 47;
optional bool lazy_pages = 48;
optional int32 status_fd = 49;
optional bool orphan_pts_master = 50;
}
message criu_dump_resp {
optional bool restored = 1;
}
message criu_restore_resp {
required int32 pid = 1;
}
message criu_notify {
optional string script = 1;
optional int32 pid = 2;
}
enum criu_req_type {
EMPTY = 0;
DUMP = 1;
RESTORE = 2;
CHECK = 3;
PRE_DUMP = 4;
PAGE_SERVER = 5;
NOTIFY = 6;
CPUINFO_DUMP = 7;
CPUINFO_CHECK = 8;
FEATURE_CHECK = 9;
VERSION = 10;
}
/*
* List of features which can queried via
* CRIU_REQ_TYPE__FEATURE_CHECK
*/
message criu_features {
optional bool mem_track = 1;
optional bool lazy_pages = 2;
}
/*
* Request -- each type corresponds to must-be-there
* request arguments of respective type
*/
message criu_req {
required criu_req_type type = 1;
optional criu_opts opts = 2;
optional bool notify_success = 3;
/*
* When set service won't close the connection but
* will wait for more req-s to appear. Works not
* for all request types.
*/
optional bool keep_open = 4;
/*
* 'features' can be used to query which features
* are supported by the installed criu/kernel
* via RPC.
*/
optional criu_features features = 5;
}
/*
* Response -- it states whether the request was served
* and additional request-specific information
*/
message criu_resp {
required criu_req_type type = 1;
required bool success = 2;
optional criu_dump_resp dump = 3;
optional criu_restore_resp restore = 4;
optional criu_notify notify = 5;
optional criu_page_server_info ps = 6;
optional int32 cr_errno = 7;
optional criu_features features = 8;
optional string cr_errmsg = 9;
optional criu_version version = 10;
}
/* Answer for criu_req_type.VERSION requests */
message criu_version {
required int32 major = 1;
required int32 minor = 2;
optional string gitid = 3;
optional int32 sublevel = 4;
optional int32 extra = 5;
optional string name = 6;
}

View File

@@ -51,12 +51,11 @@ func InitArgs(args ...string) func(*LinuxFactory) error {
// SystemdCgroups is an options func to configure a LinuxFactory to return
// containers that use systemd to create and manage cgroups.
func SystemdCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &systemd.Manager{
Cgroups: config,
Paths: paths,
}
systemdCgroupsManager, err := systemd.NewSystemdCgroupsManager()
if err != nil {
return err
}
l.NewCgroupsManager = systemdCgroupsManager
return nil
}

View File

@@ -28,7 +28,8 @@ import (
*
* Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
* over memory bandwidth for the software. A user controls the resource by
* indicating the percentage of maximum memory bandwidth.
* indicating the percentage of maximum memory bandwidth or memory bandwidth
* limit in MBps unit if MBA Software Controller is enabled.
*
* More details about Intel RDT CAT and MBA can be found in the section 17.18
* of Intel Software Developer Manual:
@@ -95,7 +96,7 @@ import (
*
* Memory bandwidth schema:
* It has allocation values for memory bandwidth on each socket, which contains
* L3 cache id and memory bandwidth percentage.
* L3 cache id and memory bandwidth.
* Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
* For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
*
@@ -106,6 +107,18 @@ import (
* min_bw + N * bw_gran. Intermediate values are rounded to the next control
* step available on the hardware.
*
* If MBA Software Controller is enabled through mount option "-o mba_MBps":
* mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
* We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
* instead of "percentages". The kernel underneath would use a software feedback
* mechanism or a "Software Controller" which reads the actual bandwidth using
* MBM counters and adjust the memory bandwidth percentages to ensure:
* "actual memory bandwidth < user specified memory bandwidth".
*
* For example, on a two-socket machine, the schema line could be
* "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
* and 7000 MBps memory bandwidth limit on socket 1.
*
* For more information about Intel RDT kernel interface:
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
*
@@ -165,6 +178,8 @@ var (
isCatEnabled bool
// The flag to indicate if Intel RDT/MBA is enabled
isMbaEnabled bool
// The flag to indicate if Intel RDT/MBA Software Controller is enabled
isMbaScEnabled bool
)
type intelRdtData struct {
@@ -197,7 +212,12 @@ func init() {
isCatEnabled = true
}
}
if isMbaFlagSet {
if isMbaScEnabled {
// We confirm MBA Software Controller is enabled in step 2,
// MBA should be enabled because MBA Software Controller
// depends on MBA
isMbaEnabled = true
} else if isMbaFlagSet {
if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil {
isMbaEnabled = true
}
@@ -232,6 +252,11 @@ func findIntelRdtMountpointDir() (string, error) {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
// Check if MBA Software Controller is enabled through mount option "-o mba_MBps"
if strings.Contains(postSeparatorFields[2], "mba_MBps") {
isMbaScEnabled = true
}
return fields[4], nil
}
}
@@ -461,7 +486,7 @@ func WriteIntelRdtTasks(dir string, pid int) error {
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
}
// Dont attach any pid if -1 is specified as a pid
// Don't attach any pid if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
@@ -480,6 +505,11 @@ func IsMbaEnabled() bool {
return isMbaEnabled
}
// Check if Intel RDT/MBA Software Controller is enabled
func IsMbaScEnabled() bool {
return isMbaScEnabled
}
// Get the 'container_id' path in Intel RDT "resource control" filesystem
func GetIntelRdtPath(id string) (string, error) {
rootPath, err := getIntelRdtRoot()
@@ -517,7 +547,7 @@ func (m *IntelRdtManager) Apply(pid int) (err error) {
func (m *IntelRdtManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
if err := os.RemoveAll(m.Path); err != nil {
if err := os.RemoveAll(m.GetPath()); err != nil {
return err
}
m.Path = ""
@@ -633,7 +663,7 @@ func (m *IntelRdtManager) Set(container *configs.Config) error {
//
// About memory bandwidth schema:
// It has allocation values for memory bandwidth on each socket, which
// contains L3 cache id and memory bandwidth percentage.
// contains L3 cache id and memory bandwidth.
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
// For example, on a two-socket machine, the schema line could be:
// "MB:0=20;1=70"
@@ -645,6 +675,19 @@ func (m *IntelRdtManager) Set(container *configs.Config) error {
// The available bandwidth control steps are: min_bw + N * bw_gran.
// Intermediate values are rounded to the next control step available
// on the hardware.
//
// If MBA Software Controller is enabled through mount option
// "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
// We could specify memory bandwidth in "MBps" (Mega Bytes per second)
// unit instead of "percentages". The kernel underneath would use a
// software feedback mechanism or a "Software Controller" which reads
// the actual bandwidth using MBM counters and adjust the memory
// bandwidth percentages to ensure:
// "actual memory bandwidth < user specified memory bandwidth".
//
// For example, on a two-socket machine, the schema line could be
// "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on
// socket 0 and 7000 MBps memory bandwidth limit on socket 1.
if container.IntelRdt != nil {
path := m.GetPath()
l3CacheSchema := container.IntelRdt.L3CacheSchema

View File

@@ -2,11 +2,11 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["criurpc.pb.go"],
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/criurpc",
importpath = "github.com/opencontainers/runc/libcontainer/criurpc",
srcs = ["logs.go"],
importmap = "k8s.io/kubernetes/vendor/github.com/opencontainers/runc/libcontainer/logs",
importpath = "github.com/opencontainers/runc/libcontainer/logs",
visibility = ["//visibility:public"],
deps = ["//vendor/github.com/golang/protobuf/proto:go_default_library"],
deps = ["//vendor/github.com/sirupsen/logrus:go_default_library"],
)
filegroup(

View File

@@ -0,0 +1,102 @@
package logs
import (
"bufio"
"encoding/json"
"fmt"
"io"
"os"
"strconv"
"sync"
"github.com/sirupsen/logrus"
)
var (
configureMutex = sync.Mutex{}
// loggingConfigured will be set once logging has been configured via invoking `ConfigureLogging`.
// Subsequent invocations of `ConfigureLogging` would be no-op
loggingConfigured = false
)
type Config struct {
LogLevel logrus.Level
LogFormat string
LogFilePath string
LogPipeFd string
}
func ForwardLogs(logPipe io.Reader) {
lineReader := bufio.NewReader(logPipe)
for {
line, err := lineReader.ReadBytes('\n')
if len(line) > 0 {
processEntry(line)
}
if err == io.EOF {
logrus.Debugf("log pipe has been closed: %+v", err)
return
}
if err != nil {
logrus.Errorf("log pipe read error: %+v", err)
}
}
}
func processEntry(text []byte) {
type jsonLog struct {
Level string `json:"level"`
Msg string `json:"msg"`
}
var jl jsonLog
if err := json.Unmarshal(text, &jl); err != nil {
logrus.Errorf("failed to decode %q to json: %+v", text, err)
return
}
lvl, err := logrus.ParseLevel(jl.Level)
if err != nil {
logrus.Errorf("failed to parse log level %q: %v\n", jl.Level, err)
return
}
logrus.StandardLogger().Logf(lvl, jl.Msg)
}
func ConfigureLogging(config Config) error {
configureMutex.Lock()
defer configureMutex.Unlock()
if loggingConfigured {
logrus.Debug("logging has already been configured")
return nil
}
logrus.SetLevel(config.LogLevel)
if config.LogPipeFd != "" {
logPipeFdInt, err := strconv.Atoi(config.LogPipeFd)
if err != nil {
return fmt.Errorf("failed to convert _LIBCONTAINER_LOGPIPE environment variable value %q to int: %v", config.LogPipeFd, err)
}
logrus.SetOutput(os.NewFile(uintptr(logPipeFdInt), "logpipe"))
} else if config.LogFilePath != "" {
f, err := os.OpenFile(config.LogFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0644)
if err != nil {
return err
}
logrus.SetOutput(f)
}
switch config.LogFormat {
case "text":
// retain logrus's default.
case "json":
logrus.SetFormatter(new(logrus.JSONFormatter))
default:
return fmt.Errorf("unknown log-format %q", config.LogFormat)
}
loggingConfigured = true
return nil
}

View File

@@ -76,6 +76,8 @@ type Process struct {
Init bool
ops processOperations
LogLevel string
}
// Wait waits for the process to exit.

View File

@@ -16,12 +16,17 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/logs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
// Synchronisation value for cgroup namespace setup.
// The same constant is defined in nsexec.c as "CREATECGROUPNS".
const createCgroupns = 0x80
type parentProcess interface {
// pid returns the pid for the running process.
pid() int
@@ -43,12 +48,19 @@ type parentProcess interface {
externalDescriptors() []string
setExternalDescriptors(fds []string)
forwardChildLogs()
}
type filePair struct {
parent *os.File
child *os.File
}
type setnsProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
messageSockPair filePair
logFilePair filePair
cgroupPaths map[string]string
rootlessCgroups bool
intelRdtPath string
@@ -72,14 +84,16 @@ func (p *setnsProcess) signal(sig os.Signal) error {
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
defer p.messageSockPair.parent.Close()
err = p.cmd.Start()
p.childPipe.Close()
// close the write-side of the pipes (controlled by child)
p.messageSockPair.child.Close()
p.logFilePair.child.Close()
if err != nil {
return newSystemErrorWithCause(err, "starting setns process")
}
if p.bootstrapData != nil {
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
}
@@ -105,11 +119,11 @@ func (p *setnsProcess) start() (err error) {
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for process")
}
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil {
return newSystemErrorWithCause(err, "writing config to pipe")
}
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
switch sync.Type {
case procReady:
// This shouldn't happen.
@@ -122,7 +136,7 @@ func (p *setnsProcess) start() (err error) {
}
})
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
@@ -148,16 +162,14 @@ func (p *setnsProcess) execSetns() error {
return newSystemError(&exec.ExitError{ProcessState: status})
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "reading pid from init pipe")
}
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// On Unix systems FindProcess always succeeds.
firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
@@ -203,10 +215,14 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func (p *setnsProcess) forwardChildLogs() {
go logs.ForwardLogs(p.logFilePair.parent)
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
messageSockPair filePair
logFilePair filePair
config *initConfig
manager cgroups.Manager
intelRdtManager intelrdt.Manager
@@ -225,12 +241,25 @@ func (p *initProcess) externalDescriptors() []string {
return p.fds
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
// This is called by initProcess.start function
func (p *initProcess) execSetns() error {
// getChildPid receives the final child's pid over the provided pipe.
func (p *initProcess) getChildPid() (int, error) {
var pid pid
if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
p.cmd.Wait()
return -1, err
}
// Clean up the zombie parent process
// On Unix systems FindProcess always succeeds.
firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
return pid.Pid, nil
}
func (p *initProcess) waitForChildExit(childPid int) error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
@@ -240,22 +269,8 @@ func (p *initProcess) execSetns() error {
p.cmd.Wait()
return &exec.ExitError{ProcessState: status}
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return err
}
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
process, err := os.FindProcess(pid.Pid)
process, err := os.FindProcess(childPid)
if err != nil {
return err
}
@@ -265,10 +280,12 @@ func (p *initProcess) execSetns() error {
}
func (p *initProcess) start() error {
defer p.parentPipe.Close()
defer p.messageSockPair.parent.Close()
err := p.cmd.Start()
p.process.ops = p
p.childPipe.Close()
// close the write-side of the pipes (controlled by child)
p.messageSockPair.child.Close()
p.logFilePair.child.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
@@ -294,22 +311,53 @@ func (p *initProcess) start() error {
}
}()
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
if err := p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "running exec setns process for init")
childPid, err := p.getChildPid()
if err != nil {
return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
fds, err := getPipeFds(childPid)
if err != nil {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid)
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(childPid); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Apply(childPid); err != nil {
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
}
}
// Now it's time to setup cgroup namesapce
if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" {
if _, err := p.messageSockPair.parent.Write([]byte{createCgroupns}); err != nil {
return newSystemErrorWithCause(err, "sending synchronization value to init process")
}
}
// Wait for our first child to exit
if err := p.waitForChildExit(childPid); err != nil {
return newSystemErrorWithCause(err, "waiting for our first child to exit")
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
if p.intelRdtManager != nil {
p.intelRdtManager.Destroy()
}
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return newSystemErrorWithCause(err, "creating network interfaces")
}
@@ -321,7 +369,7 @@ func (p *initProcess) start() error {
sentResume bool
)
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
switch sync.Type {
case procReady:
// set rlimits, this has to be done here because we lose permissions
@@ -342,14 +390,13 @@ func (p *initProcess) start() error {
}
if p.config.Config.Hooks != nil {
bundle, annotations := utils.Annotations(p.container.config.Labels)
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: bundle,
Annotations: annotations,
s, err := p.container.currentOCIState()
if err != nil {
return err
}
// initProcessStartTime hasn't been set yet.
s.Pid = p.cmd.Process.Pid
s.Status = "creating"
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
@@ -358,7 +405,7 @@ func (p *initProcess) start() error {
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procRun); err != nil {
if err := writeSync(p.messageSockPair.parent, procRun); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'run'")
}
sentRun = true
@@ -373,14 +420,13 @@ func (p *initProcess) start() error {
}
}
if p.config.Config.Hooks != nil {
bundle, annotations := utils.Annotations(p.container.config.Labels)
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: bundle,
Annotations: annotations,
s, err := p.container.currentOCIState()
if err != nil {
return err
}
// initProcessStartTime hasn't been set yet.
s.Pid = p.cmd.Process.Pid
s.Status = "creating"
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
@@ -388,7 +434,7 @@ func (p *initProcess) start() error {
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procResume); err != nil {
if err := writeSync(p.messageSockPair.parent, procResume); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'resume'")
}
sentResume = true
@@ -405,7 +451,7 @@ func (p *initProcess) start() error {
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "shutting down init pipe")
}
@@ -449,7 +495,7 @@ func (p *initProcess) sendConfig() error {
// send the config to the container's init process, we don't use JSON Encode
// here because there might be a problem in JSON decoder in some cases, see:
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
return utils.WriteJSON(p.parentPipe, p.config)
return utils.WriteJSON(p.messageSockPair.parent, p.config)
}
func (p *initProcess) createNetworkInterfaces() error {
@@ -481,6 +527,10 @@ func (p *initProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func (p *initProcess) forwardChildLogs() {
go logs.ForwardLogs(p.logFilePair.parent)
}
func getPipeFds(pid int) ([]string, error) {
fds := make([]string, 3)

View File

@@ -76,6 +76,9 @@ func (p *restoredProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func (p *restoredProcess) forwardChildLogs() {
}
// nonChildProcess represents a process where the calling process is not
// the parent process. This process is created when a factory loads a container from
// a persisted state.
@@ -120,3 +123,6 @@ func (p *nonChildProcess) externalDescriptors() []string {
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func (p *nonChildProcess) forwardChildLogs() {
}

View File

@@ -46,6 +46,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
return newSystemErrorWithCause(err, "preparing rootfs")
}
hasCgroupns := config.Namespaces.Contains(configs.NEWCGROUP)
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
@@ -53,8 +54,7 @@ func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
return newSystemErrorWithCause(err, "running premount command")
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
if err := mountToRootfs(m, config.Rootfs, config.MountLabel, hasCgroupns); err != nil {
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
}
@@ -182,7 +182,34 @@ func mountCmd(cmd configs.Command) error {
return nil
}
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
func prepareBindMount(m *configs.Mount, rootfs string) error {
stat, err := os.Stat(m.Source)
if err != nil {
// error out if the source of a bind mount does not exist as we will be
// unable to bind anything to it.
return err
}
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
var dest string
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}
return nil
}
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string, enableCgroupns bool) error {
var (
dest = m.Destination
)
@@ -257,25 +284,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
return nil
case "bind":
stat, err := os.Stat(m.Source)
if err != nil {
// error out if the source of a bind mount does not exist as we will be
// unable to bind anything to it.
return err
}
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
if err := prepareBindMount(m, rootfs); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
@@ -319,12 +328,33 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
Data: "mode=755",
PropagationFlags: m.PropagationFlags,
}
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
if err := mountToRootfs(tmpfs, rootfs, mountLabel, enableCgroupns); err != nil {
return err
}
for _, b := range binds {
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
return err
if enableCgroupns {
subsystemPath := filepath.Join(rootfs, b.Destination)
if err := os.MkdirAll(subsystemPath, 0755); err != nil {
return err
}
flags := defaultMountFlags
if m.Flags&unix.MS_RDONLY != 0 {
flags = flags | unix.MS_RDONLY
}
cgroupmount := &configs.Mount{
Source: "cgroup",
Device: "cgroup",
Destination: subsystemPath,
Flags: flags,
Data: filepath.Base(subsystemPath),
}
if err := mountNewCgroup(cgroupmount); err != nil {
return err
}
} else {
if err := mountToRootfs(b, rootfs, mountLabel, enableCgroupns); err != nil {
return err
}
}
}
for _, mc := range merged {
@@ -727,6 +757,41 @@ func pivotRoot(rootfs string) error {
}
func msMoveRoot(rootfs string) error {
mountinfos, err := mount.GetMounts()
if err != nil {
return err
}
absRootfs, err := filepath.Abs(rootfs)
if err != nil {
return err
}
for _, info := range mountinfos {
p, err := filepath.Abs(info.Mountpoint)
if err != nil {
return err
}
// Umount every syfs and proc file systems, except those under the container rootfs
if (info.Fstype != "proc" && info.Fstype != "sysfs") || filepath.HasPrefix(p, absRootfs) {
continue
}
// Be sure umount events are not propagated to the host.
if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
return err
}
if err := unix.Unmount(p, unix.MNT_DETACH); err != nil {
if err != unix.EINVAL && err != unix.EPERM {
return err
} else {
// If we have not privileges for umounting (e.g. rootless), then
// cover the path.
if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil {
return err
}
}
}
}
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
return err
}
@@ -859,3 +924,18 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
}
return nil
}
func mountNewCgroup(m *configs.Mount) error {
var (
data = m.Data
source = m.Source
)
if data == "systemd" {
data = cgroups.CgroupNamePrefix + data
source = "systemd"
}
if err := unix.Mount(source, m.Destination, m.Device, uintptr(m.Flags), data); err != nil {
return err
}
return nil
}

View File

@@ -34,6 +34,10 @@ func (l *linuxSetnsInit) Init() error {
defer runtime.UnlockOSThread()
if !l.config.Config.NoNewKeyring {
if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil {
return err
}
defer label.SetKeyLabel("")
// Do not inherit the parent's session keyring.
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
// Same justification as in standart_init_linux.go as to why we

View File

@@ -48,6 +48,10 @@ func (l *linuxStandardInit) Init() error {
runtime.LockOSThread()
defer runtime.UnlockOSThread()
if !l.config.Config.NoNewKeyring {
if err := label.SetKeyLabel(l.config.ProcessLabel); err != nil {
return err
}
defer label.SetKeyLabel("")
ringname, keepperms, newperms := l.getSessionRingParams()
// Do not inherit the parent's session keyring.

View File

@@ -8,7 +8,6 @@ import (
"path/filepath"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
@@ -63,12 +62,9 @@ func destroy(c *linuxContainer) error {
func runPoststopHooks(c *linuxContainer) error {
if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Bundle: bundle,
Annotations: annotations,
s, err := c.currentOCIState()
if err != nil {
return err
}
for _, hook := range c.config.Hooks.Poststop {
if err := hook.Run(s); err != nil {