Update runc to v1.0.0-rc91

https://github.com/opencontainers/runc/releases/tag/v1.0.0-rc91

Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
Davanum Srinivas
2020-07-01 22:06:59 -04:00
parent c91c72c867
commit 963625d7bc
275 changed files with 9060 additions and 18508 deletions

View File

@@ -155,8 +155,7 @@ config := &configs.Config{
Parent: "system",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: nil,
AllowedDevices: configs.DefaultAllowedDevices,
Devices: specconv.AllowedDevices,
},
},
MaskPaths: []string{
@@ -166,7 +165,7 @@ config := &configs.Config{
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Devices: specconv.AllowedDevices,
Hostname: "testing",
Mounts: []*configs.Mount{
{

View File

@@ -1,5 +1,9 @@
package configs
import (
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
)
type FreezerState string
const (
@@ -29,18 +33,16 @@ type Cgroup struct {
// Resources contains various cgroups settings to apply
*Resources
// SystemdProps are any additional properties for systemd,
// derived from org.systemd.property.xxx annotations.
// Ignored unless systemd is used for managing cgroups.
SystemdProps []systemdDbus.Property `json:"-"`
}
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
// Deprecated
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
DeniedDevices []*Device `json:"denied_devices,omitempty"`
Devices []*Device `json:"devices"`
// Devices is the set of access rules for devices in the container.
Devices []*DeviceRule `json:"devices"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
@@ -124,7 +126,4 @@ type Resources struct {
// CpuWeight sets a proportional bandwidth limit.
CpuWeight uint64 `json:"cpu_weight"`
// CpuMax sets she maximum bandwidth limit (format: max period).
CpuMax string `json:"cpu_max"`
}

View File

@@ -8,7 +8,7 @@ import (
"time"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@@ -70,9 +70,10 @@ type Arg struct {
// Syscall is a rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
Name string `json:"name"`
Action Action `json:"action"`
ErrnoRet *uint `json:"errnoRet"`
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts
@@ -175,7 +176,7 @@ type Config struct {
// Hooks are a collection of actions to perform at various container lifecycle events.
// CommandHooks are serialized to JSON, but other hooks are not.
Hooks *Hooks
Hooks Hooks
// Version is the version of opencontainer specification that is supported.
Version string `json:"version"`
@@ -202,17 +203,50 @@ type Config struct {
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
}
type Hooks struct {
type HookName string
type HookList []Hook
type Hooks map[HookName]HookList
const (
// Prestart commands are executed after the container namespaces are created,
// but before the user supplied command is executed from init.
Prestart []Hook
// Note: This hook is now deprecated
// Prestart commands are called in the Runtime namespace.
Prestart HookName = "prestart"
// CreateRuntime commands MUST be called as part of the create operation after
// the runtime environment has been created but before the pivot_root has been executed.
// CreateRuntime is called immediately after the deprecated Prestart hook.
// CreateRuntime commands are called in the Runtime Namespace.
CreateRuntime = "createRuntime"
// CreateContainer commands MUST be called as part of the create operation after
// the runtime environment has been created but before the pivot_root has been executed.
// CreateContainer commands are called in the Container namespace.
CreateContainer = "createContainer"
// StartContainer commands MUST be called as part of the start operation and before
// the container process is started.
// StartContainer commands are called in the Container namespace.
StartContainer = "startContainer"
// Poststart commands are executed after the container init process starts.
Poststart []Hook
// Poststart commands are called in the Runtime Namespace.
Poststart = "poststart"
// Poststop commands are executed after the container init process exits.
Poststop []Hook
}
// Poststop commands are called in the Runtime Namespace.
Poststop = "poststop"
)
// TODO move this to runtime-spec
// See: https://github.com/opencontainers/runtime-spec/pull/1046
const (
Creating = "creating"
Created = "created"
Running = "running"
Stopped = "stopped"
)
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
@@ -227,32 +261,39 @@ type Capabilities struct {
Ambient []string
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state struct {
Prestart []CommandHook
Poststart []CommandHook
Poststop []CommandHook
func (hooks HookList) RunHooks(state *specs.State) error {
for i, h := range hooks {
if err := h.Run(state); err != nil {
return errors.Wrapf(err, "Running hook #%d:", i)
}
}
return nil
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state map[HookName][]CommandHook
if err := json.Unmarshal(b, &state); err != nil {
return err
}
deserialize := func(shooks []CommandHook) (hooks []Hook) {
for _, shook := range shooks {
hooks = append(hooks, shook)
*hooks = Hooks{}
for n, commandHooks := range state {
if len(commandHooks) == 0 {
continue
}
return hooks
(*hooks)[n] = HookList{}
for _, h := range commandHooks {
(*hooks)[n] = append((*hooks)[n], h)
}
}
hooks.Prestart = deserialize(state.Prestart)
hooks.Poststart = deserialize(state.Poststart)
hooks.Poststop = deserialize(state.Poststop)
return nil
}
func (hooks Hooks) MarshalJSON() ([]byte, error) {
func (hooks *Hooks) MarshalJSON() ([]byte, error) {
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
for _, hook := range hooks {
switch chook := hook.(type) {
@@ -267,9 +308,12 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
}
return json.Marshal(map[string]interface{}{
"prestart": serialize(hooks.Prestart),
"poststart": serialize(hooks.Poststart),
"poststop": serialize(hooks.Poststop),
"prestart": serialize((*hooks)[Prestart]),
"createRuntime": serialize((*hooks)[CreateRuntime]),
"createContainer": serialize((*hooks)[CreateContainer]),
"startContainer": serialize((*hooks)[StartContainer]),
"poststart": serialize((*hooks)[Poststart]),
"poststop": serialize((*hooks)[Poststop]),
})
}

View File

@@ -1,8 +1,12 @@
package configs
import (
"errors"
"fmt"
"os"
"strconv"
"golang.org/x/sys/unix"
)
const (
@@ -12,21 +16,11 @@ const (
// TODO Windows: This can be factored out in the future
type Device struct {
// Device type, block, char, etc.
Type rune `json:"type"`
DeviceRule
// Path to the device.
Path string `json:"path"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Cgroup permissions format, rwm.
Permissions string `json:"permissions"`
// FileMode permission bits for the device.
FileMode os.FileMode `json:"file_mode"`
@@ -35,23 +29,154 @@ type Device struct {
// Gid of the device.
Gid uint32 `json:"gid"`
}
// Write the file to the allowed list
// DevicePermissions is a cgroupv1-style string to represent device access. It
// has to be a string for backward compatibility reasons, hence why it has
// methods to do set operations.
type DevicePermissions string
const (
deviceRead uint = (1 << iota)
deviceWrite
deviceMknod
)
func (p DevicePermissions) toSet() uint {
var set uint
for _, perm := range p {
switch perm {
case 'r':
set |= deviceRead
case 'w':
set |= deviceWrite
case 'm':
set |= deviceMknod
}
}
return set
}
func fromSet(set uint) DevicePermissions {
var perm string
if set&deviceRead == deviceRead {
perm += "r"
}
if set&deviceWrite == deviceWrite {
perm += "w"
}
if set&deviceMknod == deviceMknod {
perm += "m"
}
return DevicePermissions(perm)
}
// Union returns the union of the two sets of DevicePermissions.
func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs | rhs)
}
// Difference returns the set difference of the two sets of DevicePermissions.
// In set notation, A.Difference(B) gives you A\B.
func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs &^ rhs)
}
// Intersection computes the intersection of the two sets of DevicePermissions.
func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs & rhs)
}
// IsEmpty returns whether the set of permissions in a DevicePermissions is
// empty.
func (p DevicePermissions) IsEmpty() bool {
return p == DevicePermissions("")
}
// IsValid returns whether the set of permissions is a subset of valid
// permissions (namely, {r,w,m}).
func (p DevicePermissions) IsValid() bool {
return p == fromSet(p.toSet())
}
type DeviceType rune
const (
WildcardDevice DeviceType = 'a'
BlockDevice DeviceType = 'b'
CharDevice DeviceType = 'c' // or 'u'
FifoDevice DeviceType = 'p'
)
func (t DeviceType) IsValid() bool {
switch t {
case WildcardDevice, BlockDevice, CharDevice, FifoDevice:
return true
default:
return false
}
}
func (t DeviceType) CanMknod() bool {
switch t {
case BlockDevice, CharDevice, FifoDevice:
return true
default:
return false
}
}
func (t DeviceType) CanCgroup() bool {
switch t {
case WildcardDevice, BlockDevice, CharDevice:
return true
default:
return false
}
}
type DeviceRule struct {
// Type of device ('c' for char, 'b' for block). If set to 'a', this rule
// acts as a wildcard and all fields other than Allow are ignored.
Type DeviceType `json:"type"`
// Major is the device's major number.
Major int64 `json:"major"`
// Minor is the device's minor number.
Minor int64 `json:"minor"`
// Permissions is the set of permissions that this rule applies to (in the
// cgroupv1 format -- any combination of "rwm").
Permissions DevicePermissions `json:"permissions"`
// Allow specifies whether this rule is allowed.
Allow bool `json:"allow"`
}
func (d *Device) CgroupString() string {
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
}
func (d *Device) Mkdev() int {
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
}
// deviceNumberString converts the device number to a string return result.
func deviceNumberString(number int64) string {
if number == Wildcard {
return "*"
func (d *DeviceRule) CgroupString() string {
var (
major = strconv.FormatInt(d.Major, 10)
minor = strconv.FormatInt(d.Minor, 10)
)
if d.Major == Wildcard {
major = "*"
}
return fmt.Sprint(number)
if d.Minor == Wildcard {
minor = "*"
}
return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
}
func (d *DeviceRule) Mkdev() (uint64, error) {
if d.Major == Wildcard || d.Minor == Wildcard {
return 0, errors.New("cannot mkdev() device with wildcards")
}
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
}

View File

@@ -1,111 +0,0 @@
// +build linux
package configs
var (
// DefaultSimpleDevices are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
Major: 1,
Minor: 7,
Permissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
Major: 5,
Minor: 0,
Permissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
Major: 1,
Minor: 9,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
Major: 1,
Minor: 8,
Permissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Type: 'b',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
Major: 5,
Minor: 1,
Permissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
Major: 136,
Minor: Wildcard,
Permissions: "rwm",
},
{
Path: "",
Type: 'c',
Major: 5,
Minor: 2,
Permissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
Major: 10,
Minor: 200,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
)

View File

@@ -31,33 +31,33 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
}
var (
devType configs.DeviceType
mode = stat.Mode
devNumber = uint64(stat.Rdev)
major = unix.Major(devNumber)
minor = unix.Minor(devNumber)
)
if major == 0 {
return nil, ErrNotADevice
}
var (
devType rune
mode = stat.Mode
)
switch {
case mode&unix.S_IFBLK == unix.S_IFBLK:
devType = 'b'
devType = configs.BlockDevice
case mode&unix.S_IFCHR == unix.S_IFCHR:
devType = 'c'
devType = configs.CharDevice
case mode&unix.S_IFIFO == unix.S_IFIFO:
devType = configs.FifoDevice
default:
return nil, ErrNotADevice
}
return &configs.Device{
Type: devType,
Path: path,
Major: int64(major),
Minor: int64(minor),
Permissions: permissions,
FileMode: os.FileMode(mode),
Uid: stat.Uid,
Gid: stat.Gid,
DeviceRule: configs.DeviceRule{
Type: devType,
Major: int64(major),
Minor: int64(minor),
Permissions: configs.DevicePermissions(permissions),
},
Path: path,
FileMode: os.FileMode(mode),
Uid: stat.Uid,
Gid: stat.Gid,
}, nil
}

View File

@@ -1,7 +1,14 @@
// SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
/*
* Copyright (C) 2019 Aleksa Sarai <cyphar@cyphar.com>
* Copyright (C) 2019 SUSE LLC
*
* This work is dual licensed under the following licenses. You may use,
* redistribute, and/or modify the work under the conditions of either (or
* both) licenses.
*
* === Apache-2.0 ===
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
@@ -13,6 +20,23 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* === LGPL-2.1-or-later ===
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* <https://www.gnu.org/licenses/>.
*
*/
#define _GNU_SOURCE
@@ -95,8 +119,10 @@ static int is_self_cloned(void)
struct statfs fsbuf = {};
fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
if (fd < 0)
if (fd < 0) {
fprintf(stderr, "you have no read access to runc binary file\n");
return -ENOTRECOVERABLE;
}
/*
* Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for

View File

@@ -714,12 +714,12 @@ void nsexec(void)
* ready, so we can receive all possible error codes
* generated by children.
*/
syncfd = sync_child_pipe[1];
close(sync_child_pipe[0]);
while (!ready) {
enum sync_t s;
syncfd = sync_child_pipe[1];
close(sync_child_pipe[0]);
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state");
@@ -789,13 +789,13 @@ void nsexec(void)
/* Now sync with grandchild. */
syncfd = sync_grandchild_pipe[1];
close(sync_grandchild_pipe[0]);
ready = false;
while (!ready) {
enum sync_t s;
syncfd = sync_grandchild_pipe[1];
close(sync_grandchild_pipe[0]);
s = SYNC_GRANDCHILD;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(child, SIGKILL);

View File

@@ -4,6 +4,7 @@ package seccomp
import (
"bufio"
"errors"
"fmt"
"os"
"strings"
@@ -34,12 +35,12 @@ const (
// of the init until they join the namespace
func InitSeccomp(config *configs.Seccomp) error {
if config == nil {
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
return errors.New("cannot initialize Seccomp - nil config passed")
}
defaultAction, err := getAction(config.DefaultAction)
defaultAction, err := getAction(config.DefaultAction, nil)
if err != nil {
return fmt.Errorf("error initializing seccomp - invalid default action")
return errors.New("error initializing seccomp - invalid default action")
}
filter, err := libseccomp.NewFilter(defaultAction)
@@ -67,7 +68,7 @@ func InitSeccomp(config *configs.Seccomp) error {
// Add a rule for each syscall
for _, call := range config.Syscalls {
if call == nil {
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
return errors.New("encountered nil syscall while initializing Seccomp")
}
if err = matchCall(filter, call); err != nil {
@@ -101,22 +102,28 @@ func IsEnabled() bool {
}
// Convert Libcontainer Action to Libseccomp ScmpAction
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) {
switch act {
case configs.Kill:
return actKill, nil
case configs.Errno:
if errnoRet != nil {
return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil
}
return actErrno, nil
case configs.Trap:
return actTrap, nil
case configs.Allow:
return actAllow, nil
case configs.Trace:
if errnoRet != nil {
return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil
}
return actTrace, nil
case configs.Log:
return actLog, nil
default:
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule")
}
}
@@ -138,7 +145,7 @@ func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
case configs.MaskEqualTo:
return libseccomp.CompareMaskedEqual, nil
default:
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule")
}
}
@@ -147,7 +154,7 @@ func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
cond := libseccomp.ScmpCondition{}
if arg == nil {
return cond, fmt.Errorf("cannot convert nil to syscall condition")
return cond, errors.New("cannot convert nil to syscall condition")
}
op, err := getOperator(arg.Op)
@@ -161,11 +168,11 @@ func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
// Add a rule to match a single syscall
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
if call == nil || filter == nil {
return fmt.Errorf("cannot use nil as syscall to block")
return errors.New("cannot use nil as syscall to block")
}
if len(call.Name) == 0 {
return fmt.Errorf("empty string is not a valid syscall")
return errors.New("empty string is not a valid syscall")
}
// If we can't resolve the syscall, assume it's not supported on this kernel
@@ -176,7 +183,7 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
}
// Convert the call's action to the libseccomp equivalent
callAct, err := getAction(call.Action)
callAct, err := getAction(call.Action, call.ErrnoRet)
if err != nil {
return fmt.Errorf("action in seccomp profile is invalid: %s", err)
}

View File

@@ -162,10 +162,6 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
line := strings.TrimSpace(s.Text())
if line == "" {
continue
@@ -183,6 +179,9 @@ func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@@ -221,10 +220,6 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text == "" {
continue
@@ -242,6 +237,9 @@ func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@@ -532,10 +530,6 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
line := strings.TrimSpace(s.Text())
if line == "" {
continue
@@ -549,6 +543,9 @@ func ParseSubIDFilter(r io.Reader, filter func(SubID) bool) ([]SubID, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}
@@ -586,10 +583,6 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
line := strings.TrimSpace(s.Text())
if line == "" {
continue
@@ -603,6 +596,9 @@ func ParseIDMapFilter(r io.Reader, filter func(IDMap) bool) ([]IDMap, error) {
out = append(out, p)
}
}
if err := s.Err(); err != nil {
return nil, err
}
return out, nil
}