update runc to v1.0.0-rc93

full diff: https://github.com/opencontainers/runc/compare/v1.0.0-rc92...v1.0.0-rc93

also removes dependency on libcontainer/configs

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn
2020-11-10 10:39:35 +01:00
parent 54cc3483ff
commit 04d061fa6a
57 changed files with 1100 additions and 1324 deletions

View File

@@ -1,66 +0,0 @@
package configs
import "fmt"
// blockIODevice holds major:minor format supported in blkio cgroup
type blockIODevice struct {
// Major is the device's major number
Major int64 `json:"major"`
// Minor is the device's minor number
Minor int64 `json:"minor"`
}
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
type WeightDevice struct {
blockIODevice
// Weight is the bandwidth rate for the device, range is from 10 to 1000
Weight uint16 `json:"weight"`
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
LeafWeight uint16 `json:"leafWeight"`
}
// NewWeightDevice returns a configured WeightDevice pointer
func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
wd := &WeightDevice{}
wd.Major = major
wd.Minor = minor
wd.Weight = weight
wd.LeafWeight = leafWeight
return wd
}
// WeightString formats the struct to be writable to the cgroup specific file
func (wd *WeightDevice) WeightString() string {
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
}
// LeafWeightString formats the struct to be writable to the cgroup specific file
func (wd *WeightDevice) LeafWeightString() string {
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
}
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
type ThrottleDevice struct {
blockIODevice
// Rate is the IO rate limit per cgroup per device
Rate uint64 `json:"rate"`
}
// NewThrottleDevice returns a configured ThrottleDevice pointer
func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
td := &ThrottleDevice{}
td.Major = major
td.Minor = minor
td.Rate = rate
return td
}
// String formats the struct to be writable to the cgroup specific file
func (td *ThrottleDevice) String() string {
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
}
// StringName formats the struct to be writable to the cgroup specific file
func (td *ThrottleDevice) StringName(name string) string {
return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
}

View File

@@ -1,136 +0,0 @@
package configs
import (
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
)
type FreezerState string
const (
Undefined FreezerState = ""
Frozen FreezerState = "FROZEN"
Thawed FreezerState = "THAWED"
)
type Cgroup struct {
// Deprecated, use Path instead
Name string `json:"name,omitempty"`
// name of parent of cgroup or slice
// Deprecated, use Path instead
Parent string `json:"parent,omitempty"`
// Path specifies the path to cgroups that are created and/or joined by the container.
// The path is assumed to be relative to the host system cgroup mountpoint.
Path string `json:"path"`
// ScopePrefix describes prefix for the scope name
ScopePrefix string `json:"scope_prefix"`
// Paths represent the absolute cgroups paths to join.
// This takes precedence over Path.
Paths map[string]string
// Resources contains various cgroups settings to apply
*Resources
// SystemdProps are any additional properties for systemd,
// derived from org.systemd.property.xxx annotations.
// Ignored unless systemd is used for managing cgroups.
SystemdProps []systemdDbus.Property `json:"-"`
}
type Resources struct {
// Devices is the set of access rules for devices in the container.
Devices []*DeviceRule `json:"devices"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
// Memory reservation or soft_limit (in bytes)
MemoryReservation int64 `json:"memory_reservation"`
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
MemorySwap int64 `json:"memory_swap"`
// Kernel memory limit (in bytes)
KernelMemory int64 `json:"kernel_memory"`
// Kernel memory limit for TCP use (in bytes)
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
// CPU shares (relative weight vs. other containers)
CpuShares uint64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod uint64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_rt_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod uint64 `json:"cpu_rt_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
// MEM to use
CpusetMems string `json:"cpuset_mems"`
// Process limit; set <= `0' to disable limit.
PidsLimit int64 `json:"pids_limit"`
// Specifies per cgroup weight, range is from 10 to 1000.
BlkioWeight uint16 `json:"blkio_weight"`
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
// Weight per cgroup per device, can override BlkioWeight.
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
// IO read rate limit per cgroup per device, bytes per second.
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
// IO write rate limit per cgroup per device, bytes per second.
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
// IO read rate limit per cgroup per device, IO per second.
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
// IO write rate limit per cgroup per device, IO per second.
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
// set the freeze value for the process
Freezer FreezerState `json:"freezer"`
// Hugetlb limit (in bytes)
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
// Whether to disable OOM Killer
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness *uint64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid uint32 `json:"net_cls_classid_u"`
// Used on cgroups v2:
// CpuWeight sets a proportional bandwidth limit.
CpuWeight uint64 `json:"cpu_weight"`
// SkipDevices allows to skip configuring device permissions.
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
// common for many containers.
//
// NOTE it is impossible to start a container which has this flag set.
SkipDevices bool `json:"skip_devices"`
}

View File

@@ -1,8 +0,0 @@
// +build !linux
package configs
// TODO Windows: This can ultimately be entirely factored out on Windows as
// cgroups are a Unix-specific construct.
type Cgroup struct {
}

View File

@@ -1,389 +0,0 @@
package configs
import (
"bytes"
"encoding/json"
"fmt"
"os/exec"
"time"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
type Rlimit struct {
Type int `json:"type"`
Hard uint64 `json:"hard"`
Soft uint64 `json:"soft"`
}
// IDMap represents UID/GID Mappings for User Namespaces.
type IDMap struct {
ContainerID int `json:"container_id"`
HostID int `json:"host_id"`
Size int `json:"size"`
}
// Seccomp represents syscall restrictions
// By default, only the native architecture of the kernel is allowed to be used
// for syscalls. Additional architectures can be added by specifying them in
// Architectures.
type Seccomp struct {
DefaultAction Action `json:"default_action"`
Architectures []string `json:"architectures"`
Syscalls []*Syscall `json:"syscalls"`
}
// Action is taken upon rule match in Seccomp
type Action int
const (
Kill Action = iota + 1
Errno
Trap
Allow
Trace
Log
)
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
type Operator int
const (
EqualTo Operator = iota + 1
NotEqualTo
GreaterThan
GreaterThanOrEqualTo
LessThan
LessThanOrEqualTo
MaskEqualTo
)
// Arg is a rule to match a specific syscall argument in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
ValueTwo uint64 `json:"value_two"`
Op Operator `json:"op"`
}
// Syscall is a rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
ErrnoRet *uint `json:"errnoRet"`
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts
// which are common across platforms, and those which are platform specific.
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
// This is a common option when the container is running in ramdisk
NoPivotRoot bool `json:"no_pivot_root"`
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
// bind mounts are writtable.
Readonlyfs bool `json:"readonlyfs"`
// Specifies the mount propagation flags to be applied to /.
RootPropagation int `json:"rootPropagation"`
// Mounts specify additional source and destination paths that will be mounted inside the container's
// rootfs and mount namespace if specified
Mounts []*Mount `json:"mounts"`
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
Devices []*Device `json:"devices"`
MountLabel string `json:"mount_label"`
// Hostname optionally sets the container's hostname if provided
Hostname string `json:"hostname"`
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask
Capabilities *Capabilities `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
// Routes can be specified to create entries in the route table as the container is started
Routes []*Route `json:"routes"`
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
// placed into to limit the resources the container has available
Cgroups *Cgroup `json:"cgroups"`
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile,omitempty"`
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
ProcessLabel string `json:"process_label,omitempty"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits,omitempty"`
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
// for a process. Valid values are between the range [-1000, '1000'], where processes with
// higher scores are preferred for being killed. If it is unset then we don't touch the current
// value.
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
// GidMappings is an array of Group ID mappings for User Namespaces
GidMappings []IDMap `json:"gid_mappings"`
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
// mount pointing to /dev/null as to prevent reads of the file.
MaskPaths []string `json:"mask_paths"`
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
// so that these files prevent any writes.
ReadonlyPaths []string `json:"readonly_paths"`
// Sysctl is a map of properties and their values. It is the equivalent of using
// sysctl -w my.property.name value in Linux.
Sysctl map[string]string `json:"sysctl"`
// Seccomp allows actions to be taken whenever a syscall is made within the container.
// A number of rules are given, each having an action to be taken if a syscall matches it.
// A default action to be taken if no rules match is also given.
Seccomp *Seccomp `json:"seccomp"`
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
// Hooks are a collection of actions to perform at various container lifecycle events.
// CommandHooks are serialized to JSON, but other hooks are not.
Hooks Hooks
// Version is the version of opencontainer specification that is supported.
Version string `json:"version"`
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
// IntelRdt specifies settings for Intel RDT group that the container is placed into
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
// RootlessEUID is set when the runc was launched with non-zero EUID.
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
// When RootlessEUID is set, runc creates a new userns for the container.
// (config.json needs to contain userns settings)
RootlessEUID bool `json:"rootless_euid,omitempty"`
// RootlessCgroups is set when unlikely to have the full access to cgroups.
// When RootlessCgroups is set, cgroups errors are ignored.
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
}
type HookName string
type HookList []Hook
type Hooks map[HookName]HookList
const (
// Prestart commands are executed after the container namespaces are created,
// but before the user supplied command is executed from init.
// Note: This hook is now deprecated
// Prestart commands are called in the Runtime namespace.
Prestart HookName = "prestart"
// CreateRuntime commands MUST be called as part of the create operation after
// the runtime environment has been created but before the pivot_root has been executed.
// CreateRuntime is called immediately after the deprecated Prestart hook.
// CreateRuntime commands are called in the Runtime Namespace.
CreateRuntime = "createRuntime"
// CreateContainer commands MUST be called as part of the create operation after
// the runtime environment has been created but before the pivot_root has been executed.
// CreateContainer commands are called in the Container namespace.
CreateContainer = "createContainer"
// StartContainer commands MUST be called as part of the start operation and before
// the container process is started.
// StartContainer commands are called in the Container namespace.
StartContainer = "startContainer"
// Poststart commands are executed after the container init process starts.
// Poststart commands are called in the Runtime Namespace.
Poststart = "poststart"
// Poststop commands are executed after the container init process exits.
// Poststop commands are called in the Runtime Namespace.
Poststop = "poststop"
)
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
Bounding []string
// Effective is the set of capabilities checked by the kernel.
Effective []string
// Inheritable is the capabilities preserved across execve.
Inheritable []string
// Permitted is the limiting superset for effective capabilities.
Permitted []string
// Ambient is the ambient set of capabilities that are kept.
Ambient []string
}
func (hooks HookList) RunHooks(state *specs.State) error {
for i, h := range hooks {
if err := h.Run(state); err != nil {
return errors.Wrapf(err, "Running hook #%d:", i)
}
}
return nil
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state map[HookName][]CommandHook
if err := json.Unmarshal(b, &state); err != nil {
return err
}
*hooks = Hooks{}
for n, commandHooks := range state {
if len(commandHooks) == 0 {
continue
}
(*hooks)[n] = HookList{}
for _, h := range commandHooks {
(*hooks)[n] = append((*hooks)[n], h)
}
}
return nil
}
func (hooks *Hooks) MarshalJSON() ([]byte, error) {
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
for _, hook := range hooks {
switch chook := hook.(type) {
case CommandHook:
serializableHooks = append(serializableHooks, chook)
default:
logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
}
}
return serializableHooks
}
return json.Marshal(map[string]interface{}{
"prestart": serialize((*hooks)[Prestart]),
"createRuntime": serialize((*hooks)[CreateRuntime]),
"createContainer": serialize((*hooks)[CreateContainer]),
"startContainer": serialize((*hooks)[StartContainer]),
"poststart": serialize((*hooks)[Poststart]),
"poststop": serialize((*hooks)[Poststop]),
})
}
type Hook interface {
// Run executes the hook with the provided state.
Run(*specs.State) error
}
// NewFunctionHook will call the provided function when the hook is run.
func NewFunctionHook(f func(*specs.State) error) FuncHook {
return FuncHook{
run: f,
}
}
type FuncHook struct {
run func(*specs.State) error
}
func (f FuncHook) Run(s *specs.State) error {
return f.run(s)
}
type Command struct {
Path string `json:"path"`
Args []string `json:"args"`
Env []string `json:"env"`
Dir string `json:"dir"`
Timeout *time.Duration `json:"timeout"`
}
// NewCommandHook will execute the provided command when the hook is run.
func NewCommandHook(cmd Command) CommandHook {
return CommandHook{
Command: cmd,
}
}
type CommandHook struct {
Command
}
func (c Command) Run(s *specs.State) error {
b, err := json.Marshal(s)
if err != nil {
return err
}
var stdout, stderr bytes.Buffer
cmd := exec.Cmd{
Path: c.Path,
Args: c.Args,
Env: c.Env,
Stdin: bytes.NewReader(b),
Stdout: &stdout,
Stderr: &stderr,
}
if err := cmd.Start(); err != nil {
return err
}
errC := make(chan error, 1)
go func() {
err := cmd.Wait()
if err != nil {
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
}
errC <- err
}()
var timerCh <-chan time.Time
if c.Timeout != nil {
timer := time.NewTimer(*c.Timeout)
defer timer.Stop()
timerCh = timer.C
}
select {
case err := <-errC:
return err
case <-timerCh:
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}
}

View File

@@ -1,61 +0,0 @@
package configs
import "fmt"
// HostUID gets the translated uid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootUID() (int, error) {
return c.HostUID(0)
}
// HostGID gets the translated gid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootGID() (int, error) {
return c.HostGID(0)
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View File

@@ -1,5 +0,0 @@
package configs
func (d *DeviceRule) Mkdev() (uint64, error) {
return 0, nil
}

View File

@@ -1,9 +0,0 @@
package configs
type HugepageLimit struct {
// which type of hugepage to limit.
Pagesize string `json:"page_size"`
// usage limit for hugepage.
Limit uint64 `json:"limit"`
}

View File

@@ -1,13 +0,0 @@
package configs
type IntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
// The schema of memory bandwidth per L3 cache id
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
// The unit of memory bandwidth is specified in "percentages" by
// default, and in "MBps" if MBA Software Controller is enabled.
MemBwSchema string `json:"memBwSchema,omitempty"`
}

View File

@@ -1,14 +0,0 @@
package configs
import (
"fmt"
)
type IfPrioMap struct {
Interface string `json:"interface"`
Priority int64 `json:"priority"`
}
func (i *IfPrioMap) CgroupString() string {
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
}

View File

@@ -1,39 +0,0 @@
package configs
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota
)
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
// Destination path for the mount inside the container.
Destination string `json:"destination"`
// Device the mount is for.
Device string `json:"device"`
// Mount flags.
Flags int `json:"flags"`
// Propagation Flags
PropagationFlags []int `json:"propagation_flags"`
// Mount data applied to the mount.
Data string `json:"data"`
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`
// Optional Command to be run after Source is mounted.
PostmountCmds []Command `json:"postmount_cmds"`
}

View File

@@ -1,5 +0,0 @@
package configs
type NamespaceType string
type Namespaces []Namespace

View File

@@ -1,126 +0,0 @@
package configs
import (
"fmt"
"os"
"sync"
)
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
NEWCGROUP NamespaceType = "NEWCGROUP"
)
var (
nsLock sync.Mutex
supportedNamespaces = make(map[NamespaceType]bool)
)
// NsName converts the namespace type to its filename
func NsName(ns NamespaceType) string {
switch ns {
case NEWNET:
return "net"
case NEWNS:
return "mnt"
case NEWPID:
return "pid"
case NEWIPC:
return "ipc"
case NEWUSER:
return "user"
case NEWUTS:
return "uts"
case NEWCGROUP:
return "cgroup"
}
return ""
}
// IsNamespaceSupported returns whether a namespace is available or
// not
func IsNamespaceSupported(ns NamespaceType) bool {
nsLock.Lock()
defer nsLock.Unlock()
supported, ok := supportedNamespaces[ns]
if ok {
return supported
}
nsFile := NsName(ns)
// if the namespace type is unknown, just return false
if nsFile == "" {
return false
}
_, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile))
// a namespace is supported if it exists and we have permissions to read it
supported = err == nil
supportedNamespaces[ns] = supported
return supported
}
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWUSER, // Keep user NS always first, don't move it.
NEWIPC,
NEWUTS,
NEWNET,
NEWPID,
NEWNS,
NEWCGROUP,
}
}
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path"`
}
func (n *Namespace) GetPath(pid int) string {
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
}
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}
func (n *Namespaces) PathOf(t NamespaceType) string {
i := n.index(t)
if i == -1 {
return ""
}
return (*n)[i].Path
}

View File

@@ -1,32 +0,0 @@
// +build linux
package configs
import "golang.org/x/sys/unix"
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: unix.CLONE_NEWNET,
NEWNS: unix.CLONE_NEWNS,
NEWUSER: unix.CLONE_NEWUSER,
NEWIPC: unix.CLONE_NEWIPC,
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
NEWCGROUP: unix.CLONE_NEWCGROUP,
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
var flag int
for _, v := range *n {
if v.Path != "" {
continue
}
flag |= namespaceInfo[v.Type]
}
return uintptr(flag)
}

View File

@@ -1,13 +0,0 @@
// +build !linux,!windows
package configs
func (n *Namespace) Syscall() int {
panic("No namespace syscall support")
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
}

View File

@@ -1,8 +0,0 @@
// +build !linux
package configs
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
}

View File

@@ -1,72 +0,0 @@
package configs
// Network defines configuration for a container's networking stack
//
// The network configuration can be omitted from a container causing the
// container to be setup with the host's networking stack
type Network struct {
// Type sets the networks type, commonly veth and loopback
Type string `json:"type"`
// Name of the network interface
Name string `json:"name"`
// The bridge to use.
Bridge string `json:"bridge"`
// MacAddress contains the MAC address to set on the network interface
MacAddress string `json:"mac_address"`
// Address contains the IPv4 and mask to set on the network interface
Address string `json:"address"`
// Gateway sets the gateway address that is used as the default for the interface
Gateway string `json:"gateway"`
// IPv6Address contains the IPv6 and mask to set on the network interface
IPv6Address string `json:"ipv6_address"`
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
IPv6Gateway string `json:"ipv6_gateway"`
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
Mtu int `json:"mtu"`
// TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
// container's interfaces if a pair is created, specifically in the case of type veth
// Note: This does not apply to loopback interfaces.
TxQueueLen int `json:"txqueuelen"`
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
// container.
HostInterfaceName string `json:"host_interface_name"`
// HairpinMode specifies if hairpin NAT should be enabled on the virtual interface
// bridge port in the case of type veth
// Note: This is unsupported on some systems.
// Note: This does not apply to loopback interfaces.
HairpinMode bool `json:"hairpin_mode"`
}
// Routes can be specified to create entries in the route table as the container is started
//
// All of destination, source, and gateway should be either IPv4 or IPv6.
// One of the three options must be present, and omitted entries will use their
// IP family default for the route table. For IPv4 for example, setting the
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
// destination of 0.0.0.0(or *) when viewed in the route table.
type Route struct {
// Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6
Destination string `json:"destination"`
// Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6
Source string `json:"source"`
// Sets the gateway. Accepts IPv4 and IPv6
Gateway string `json:"gateway"`
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name"`
}

View File

@@ -1,4 +1,4 @@
package configs
package devices
import (
"fmt"
@@ -11,7 +11,7 @@ const (
)
type Device struct {
DeviceRule
Rule
// Path to the device.
Path string `json:"path"`
@@ -26,10 +26,10 @@ type Device struct {
Gid uint32 `json:"gid"`
}
// DevicePermissions is a cgroupv1-style string to represent device access. It
// Permissions is a cgroupv1-style string to represent device access. It
// has to be a string for backward compatibility reasons, hence why it has
// methods to do set operations.
type DevicePermissions string
type Permissions string
const (
deviceRead uint = (1 << iota)
@@ -37,7 +37,7 @@ const (
deviceMknod
)
func (p DevicePermissions) toSet() uint {
func (p Permissions) toSet() uint {
var set uint
for _, perm := range p {
switch perm {
@@ -52,7 +52,7 @@ func (p DevicePermissions) toSet() uint {
return set
}
func fromSet(set uint) DevicePermissions {
func fromSet(set uint) Permissions {
var perm string
if set&deviceRead == deviceRead {
perm += "r"
@@ -63,53 +63,53 @@ func fromSet(set uint) DevicePermissions {
if set&deviceMknod == deviceMknod {
perm += "m"
}
return DevicePermissions(perm)
return Permissions(perm)
}
// Union returns the union of the two sets of DevicePermissions.
func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions {
// Union returns the union of the two sets of Permissions.
func (p Permissions) Union(o Permissions) Permissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs | rhs)
}
// Difference returns the set difference of the two sets of DevicePermissions.
// Difference returns the set difference of the two sets of Permissions.
// In set notation, A.Difference(B) gives you A\B.
func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions {
func (p Permissions) Difference(o Permissions) Permissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs &^ rhs)
}
// Intersection computes the intersection of the two sets of DevicePermissions.
func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions {
// Intersection computes the intersection of the two sets of Permissions.
func (p Permissions) Intersection(o Permissions) Permissions {
lhs := p.toSet()
rhs := o.toSet()
return fromSet(lhs & rhs)
}
// IsEmpty returns whether the set of permissions in a DevicePermissions is
// IsEmpty returns whether the set of permissions in a Permissions is
// empty.
func (p DevicePermissions) IsEmpty() bool {
return p == DevicePermissions("")
func (p Permissions) IsEmpty() bool {
return p == Permissions("")
}
// IsValid returns whether the set of permissions is a subset of valid
// permissions (namely, {r,w,m}).
func (p DevicePermissions) IsValid() bool {
func (p Permissions) IsValid() bool {
return p == fromSet(p.toSet())
}
type DeviceType rune
type Type rune
const (
WildcardDevice DeviceType = 'a'
BlockDevice DeviceType = 'b'
CharDevice DeviceType = 'c' // or 'u'
FifoDevice DeviceType = 'p'
WildcardDevice Type = 'a'
BlockDevice Type = 'b'
CharDevice Type = 'c' // or 'u'
FifoDevice Type = 'p'
)
func (t DeviceType) IsValid() bool {
func (t Type) IsValid() bool {
switch t {
case WildcardDevice, BlockDevice, CharDevice, FifoDevice:
return true
@@ -118,7 +118,7 @@ func (t DeviceType) IsValid() bool {
}
}
func (t DeviceType) CanMknod() bool {
func (t Type) CanMknod() bool {
switch t {
case BlockDevice, CharDevice, FifoDevice:
return true
@@ -127,7 +127,7 @@ func (t DeviceType) CanMknod() bool {
}
}
func (t DeviceType) CanCgroup() bool {
func (t Type) CanCgroup() bool {
switch t {
case WildcardDevice, BlockDevice, CharDevice:
return true
@@ -136,10 +136,10 @@ func (t DeviceType) CanCgroup() bool {
}
}
type DeviceRule struct {
type Rule struct {
// Type of device ('c' for char, 'b' for block). If set to 'a', this rule
// acts as a wildcard and all fields other than Allow are ignored.
Type DeviceType `json:"type"`
Type Type `json:"type"`
// Major is the device's major number.
Major int64 `json:"major"`
@@ -149,13 +149,13 @@ type DeviceRule struct {
// Permissions is the set of permissions that this rule applies to (in the
// cgroupv1 format -- any combination of "rwm").
Permissions DevicePermissions `json:"permissions"`
Permissions Permissions `json:"permissions"`
// Allow specifies whether this rule is allowed.
Allow bool `json:"allow"`
}
func (d *DeviceRule) CgroupString() string {
func (d *Rule) CgroupString() string {
var (
major = strconv.FormatInt(d.Major, 10)
minor = strconv.FormatInt(d.Minor, 10)

View File

@@ -1,6 +1,6 @@
// +build !windows
package configs
package devices
import (
"errors"
@@ -8,7 +8,7 @@ import (
"golang.org/x/sys/unix"
)
func (d *DeviceRule) Mkdev() (uint64, error) {
func (d *Rule) Mkdev() (uint64, error) {
if d.Major == Wildcard || d.Minor == Wildcard {
return 0, errors.New("cannot mkdev() device with wildcards")
}

View File

@@ -0,0 +1,5 @@
package devices
func (d *Rule) Mkdev() (uint64, error) {
return 0, nil
}

View File

@@ -6,7 +6,6 @@ import (
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
@@ -23,7 +22,7 @@ var (
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the
// information about a linux device and return that information as a Device struct.
func DeviceFromPath(path, permissions string) (*configs.Device, error) {
func DeviceFromPath(path, permissions string) (*Device, error) {
var stat unix.Stat_t
err := unixLstat(path, &stat)
if err != nil {
@@ -31,7 +30,7 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
}
var (
devType configs.DeviceType
devType Type
mode = stat.Mode
devNumber = uint64(stat.Rdev)
major = unix.Major(devNumber)
@@ -39,20 +38,20 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
)
switch mode & unix.S_IFMT {
case unix.S_IFBLK:
devType = configs.BlockDevice
devType = BlockDevice
case unix.S_IFCHR:
devType = configs.CharDevice
devType = CharDevice
case unix.S_IFIFO:
devType = configs.FifoDevice
devType = FifoDevice
default:
return nil, ErrNotADevice
}
return &configs.Device{
DeviceRule: configs.DeviceRule{
return &Device{
Rule: Rule{
Type: devType,
Major: int64(major),
Minor: int64(minor),
Permissions: configs.DevicePermissions(permissions),
Permissions: Permissions(permissions),
},
Path: path,
FileMode: os.FileMode(mode),
@@ -62,18 +61,18 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
}
// HostDevices returns all devices that can be found under /dev directory.
func HostDevices() ([]*configs.Device, error) {
func HostDevices() ([]*Device, error) {
return GetDevices("/dev")
}
// GetDevices recursively traverses a directory specified by path
// and returns all devices found there.
func GetDevices(path string) ([]*configs.Device, error) {
func GetDevices(path string) ([]*Device, error) {
files, err := ioutilReadDir(path)
if err != nil {
return nil, err
}
var out []*configs.Device
var out []*Device
for _, f := range files {
switch {
case f.IsDir():
@@ -104,7 +103,7 @@ func GetDevices(path string) ([]*configs.Device, error) {
}
return nil, err
}
if device.Type == configs.FifoDevice {
if device.Type == FifoDevice {
continue
}
out = append(out, device)

View File

@@ -3,8 +3,8 @@
package user
import (
"fmt"
"os/user"
"strconv"
)
func lookupUser(username string) (User, error) {
@@ -16,7 +16,7 @@ func lookupUser(username string) (User, error) {
}
func lookupUid(uid int) (User, error) {
u, err := user.LookupId(fmt.Sprintf("%d", uid))
u, err := user.LookupId(strconv.Itoa(uid))
if err != nil {
return User{}, err
}
@@ -32,7 +32,7 @@ func lookupGroup(groupname string) (Group, error) {
}
func lookupGid(gid int) (Group, error) {
g, err := user.LookupGroupId(fmt.Sprintf("%d", gid))
g, err := user.LookupGroupId(strconv.Itoa(gid))
if err != nil {
return Group{}, err
}

View File

@@ -466,7 +466,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
// we asked for a group but didn't find it. let's check to see
// if we wanted a numeric group
if !found {
gid, err := strconv.Atoi(ag)
gid, err := strconv.ParseInt(ag, 10, 64)
if err != nil {
return nil, fmt.Errorf("Unable to find group %s", ag)
}
@@ -474,7 +474,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
if gid < minId || gid > maxId {
return nil, ErrRange
}
gidMap[gid] = struct{}{}
gidMap[int(gid)] = struct{}{}
}
}
gids := []int{}

View File

@@ -60,7 +60,7 @@ type Process struct {
SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"`
}
// LinuxCapabilities specifies the whitelist of capabilities that are kept for a process.
// LinuxCapabilities specifies the list of allowed capabilities that are kept for a process.
// http://man7.org/linux/man-pages/man7/capabilities.7.html
type LinuxCapabilities struct {
// Bounding is the set of capabilities checked by the kernel.
@@ -354,7 +354,7 @@ type LinuxRdma struct {
// LinuxResources has container runtime resource constraints
type LinuxResources struct {
// Devices configures the device whitelist.
// Devices configures the device allowlist.
Devices []LinuxDeviceCgroup `json:"devices,omitempty"`
// Memory restriction configuration
Memory *LinuxMemory `json:"memory,omitempty"`
@@ -372,6 +372,8 @@ type LinuxResources struct {
// Limits are a set of key value pairs that define RDMA resource limits,
// where the key is device name and value is resource limits.
Rdma map[string]LinuxRdma `json:"rdma,omitempty"`
// Unified resources.
Unified map[string]string `json:"unified,omitempty"`
}
// LinuxDevice represents the mknod information for a Linux special device file
@@ -392,7 +394,8 @@ type LinuxDevice struct {
GID *uint32 `json:"gid,omitempty"`
}
// LinuxDeviceCgroup represents a device rule for the whitelist controller
// LinuxDeviceCgroup represents a device rule for the devices specified to
// the device controller
type LinuxDeviceCgroup struct {
// Allow or deny
Allow bool `json:"allow"`
@@ -628,6 +631,7 @@ const (
ArchS390X Arch = "SCMP_ARCH_S390X"
ArchPARISC Arch = "SCMP_ARCH_PARISC"
ArchPARISC64 Arch = "SCMP_ARCH_PARISC64"
ArchRISCV64 Arch = "SCMP_ARCH_RISCV64"
)
// LinuxSeccompAction taken upon Seccomp rule match