update cadvisor, docker, and runc godeps

This commit is contained in:
David Ashpole
2017-09-05 12:38:57 -07:00
parent 2f543f321d
commit e5a6a79fd7
618 changed files with 47108 additions and 13510 deletions

View File

@@ -10,13 +10,14 @@ go_library(
"generic_error.go",
"process.go",
"stats.go",
"sync.go",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"capabilities_linux.go",
"compat_1.5_linux.go",
"console_linux.go",
"container_linux.go",
"criu_opts_unix.go",
"criu_opts_linux.go",
"factory_linux.go",
"init_linux.go",
"message_linux.go",
@@ -43,27 +44,30 @@ go_library(
deps = [
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/stacktrace:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/utils:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/github.com/Sirupsen/logrus:go_default_library",
"//vendor/github.com/docker/docker/pkg/mount:go_default_library",
"//vendor/github.com/docker/docker/pkg/symlink:go_default_library",
"//vendor/github.com/golang/protobuf/proto:go_default_library",
"//vendor/github.com/mrunalp/fileutils:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/apparmor:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs/validate:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/criurpc:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/keys:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/label:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/seccomp:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/user:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/utils:go_default_library",
"//vendor/github.com/opencontainers/selinux/go-selinux/label:go_default_library",
"//vendor/github.com/sirupsen/logrus:go_default_library",
"//vendor/github.com/syndtr/gocapability/capability:go_default_library",
"//vendor/github.com/vishvananda/netlink:go_default_library",
"//vendor/github.com/vishvananda/netlink/nl:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),
@@ -85,9 +89,7 @@ filegroup(
"//vendor/github.com/opencontainers/runc/libcontainer/configs:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/criurpc:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/keys:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/label:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/seccomp:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/selinux:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/stacktrace:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/system:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/user:all-srcs",

View File

@@ -1,3 +1,7 @@
# libcontainer
[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer)
Libcontainer provides a native Go implementation for creating containers
with namespaces, cgroups, capabilities, and filesystem access controls.
It allows you to manage the lifecycle of the container performing additional operations
@@ -16,7 +20,14 @@ the current binary (/proc/self/exe) to be executed as the init process, and use
arg "init", we call the first step process "bootstrap", so you always need a "init"
function as the entry of "bootstrap".
In addition to the go init function the early stage bootstrap is handled by importing
[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md).
```go
import (
_ "github.com/opencontainers/runc/libcontainer/nsenter"
)
func init() {
if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1)
@@ -45,25 +56,91 @@ Once you have an instance of the factory created we can create a configuration
struct describing how the container is to be created. A sample would look similar to this:
```go
defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
config := &configs.Config{
Rootfs: "/your/path/to/rootfs",
Capabilities: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Capabilities: &configs.Capabilities{
Bounding: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Effective: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Inheritable: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Permitted: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
Ambient: []string{
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE",
},
},
Namespaces: configs.Namespaces([]configs.Namespace{
{Type: configs.NEWNS},
{Type: configs.NEWUTS},
@@ -101,14 +178,14 @@ config := &configs.Config{
Source: "tmpfs",
Destination: "/dev",
Device: "tmpfs",
Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME,
Flags: unix.MS_NOSUID | unix.MS_STRICTATIME,
Data: "mode=755",
},
{
Source: "devpts",
Destination: "/dev/pts",
Device: "devpts",
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
Flags: unix.MS_NOSUID | unix.MS_NOEXEC,
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
},
{
@@ -128,7 +205,7 @@ config := &configs.Config{
Source: "sysfs",
Destination: "/sys",
Device: "sysfs",
Flags: defaultMountFlags | syscall.MS_RDONLY,
Flags: defaultMountFlags | unix.MS_RDONLY,
},
},
UidMappings: []configs.IDMap{
@@ -154,7 +231,7 @@ config := &configs.Config{
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Type: unix.RLIMIT_NOFILE,
Hard: uint64(1025),
Soft: uint64(1025),
},
@@ -223,6 +300,12 @@ container.Signal(signal)
// update container resource constraints.
container.Set(config)
// get current status of the container.
status, err := container.Status()
// get current container's state information.
state, err := container.State()
```

View File

@@ -7,6 +7,7 @@ import (
"os"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/syndtr/gocapability/capability"
)
@@ -30,40 +31,84 @@ func init() {
}
}
func newCapWhitelist(caps []string) (*whitelist, error) {
l := []capability.Cap{}
for _, c := range caps {
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
bounding := []capability.Cap{}
for _, c := range capConfig.Bounding {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
l = append(l, v)
bounding = append(bounding, v)
}
effective := []capability.Cap{}
for _, c := range capConfig.Effective {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
effective = append(effective, v)
}
inheritable := []capability.Cap{}
for _, c := range capConfig.Inheritable {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
inheritable = append(inheritable, v)
}
permitted := []capability.Cap{}
for _, c := range capConfig.Permitted {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
permitted = append(permitted, v)
}
ambient := []capability.Cap{}
for _, c := range capConfig.Ambient {
v, ok := capabilityMap[c]
if !ok {
return nil, fmt.Errorf("unknown capability %q", c)
}
ambient = append(ambient, v)
}
pid, err := capability.NewPid(os.Getpid())
if err != nil {
return nil, err
}
return &whitelist{
keep: l,
pid: pid,
return &containerCapabilities{
bounding: bounding,
effective: effective,
inheritable: inheritable,
permitted: permitted,
ambient: ambient,
pid: pid,
}, nil
}
type whitelist struct {
pid capability.Capabilities
keep []capability.Cap
type containerCapabilities struct {
pid capability.Capabilities
bounding []capability.Cap
effective []capability.Cap
inheritable []capability.Cap
permitted []capability.Cap
ambient []capability.Cap
}
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
func (w *whitelist) dropBoundingSet() error {
w.pid.Clear(capability.BOUNDS)
w.pid.Set(capability.BOUNDS, w.keep...)
return w.pid.Apply(capability.BOUNDS)
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
func (c *containerCapabilities) ApplyBoundingSet() error {
c.pid.Clear(capability.BOUNDS)
c.pid.Set(capability.BOUNDS, c.bounding...)
return c.pid.Apply(capability.BOUNDS)
}
// drop drops all capabilities for the current process except those specified in the whitelist.
func (w *whitelist) drop() error {
w.pid.Clear(allCapabilityTypes)
w.pid.Set(allCapabilityTypes, w.keep...)
return w.pid.Apply(allCapabilityTypes)
// Apply sets all the capabilities for the current process in the config.
func (c *containerCapabilities) ApplyCaps() error {
c.pid.Clear(allCapabilityTypes)
c.pid.Set(capability.BOUNDS, c.bounding...)
c.pid.Set(capability.PERMITTED, c.permitted...)
c.pid.Set(capability.INHERITABLE, c.inheritable...)
c.pid.Set(capability.EFFECTIVE, c.effective...)
c.pid.Set(capability.AMBIENT, c.ambient...)
return c.pid.Apply(allCapabilityTypes)
}

View File

@@ -34,6 +34,7 @@ filegroup(
srcs = [
":package-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless:all-srcs",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:all-srcs",
],
tags = ["automanaged"],

View File

@@ -27,9 +27,9 @@ type Manager interface {
// Destroys the cgroup set
Destroy() error
// NewCgroupManager() and LoadCgroupManager() require following attributes:
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
// Paths map[string]string
// Cgroups *cgroups.Cgroup
// Cgroups *configs.Cgroup
// Paths maps cgroup subsystem to path at which it is mounted.
// Cgroups specifies specific cgroup settings for the various subsystems

View File

@@ -31,6 +31,7 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/utils:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),

View File

@@ -114,8 +114,8 @@ func (m *Manager) Apply(pid int) (err error) {
return err
}
m.Paths = make(map[string]string)
if c.Paths != nil {
paths := make(map[string]string)
for name, path := range c.Paths {
_, err := d.path(name)
if err != nil {
@@ -124,17 +124,12 @@ func (m *Manager) Apply(pid int) (err error) {
}
return err
}
paths[name] = path
m.Paths[name] = path
}
m.Paths = paths
return cgroups.EnterPid(m.Paths, pid)
}
paths := make(map[string]string)
for _, sys := range subsystems {
if err := sys.Apply(d); err != nil {
return err
}
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs
@@ -147,9 +142,12 @@ func (m *Manager) Apply(pid int) (err error) {
}
return err
}
paths[sys.Name()] = p
m.Paths[sys.Name()] = p
if err := sys.Apply(d); err != nil {
return err
}
}
m.Paths = paths
return nil
}
@@ -269,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
}, nil
}
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
initPath, err := cgroups.GetThisCgroupDir(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relDir, err := filepath.Rel(root, initPath)
if err != nil {
return "", err
}
return filepath.Join(mountpoint, relDir), nil
}
func (raw *cgroupData) path(subsystem string) (string, error) {
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
// If we didn't mount the subsystem, there is no point we make the path.
if err != nil {
return "", err
@@ -299,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
parentPath, err := raw.parentPath(subsystem, mnt, root)
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
// process could in container and shared pid namespace with host, and
// /proc/1/cgroup could point to whole other world of cgroups.
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
if err != nil {
return "", err
}
@@ -348,8 +332,8 @@ func removePath(p string, err error) error {
return nil
}
func CheckCpushares(path string, c int64) error {
var cpuShares int64
func CheckCpushares(path string, c uint64) error {
var cpuShares uint64
if c == 0 {
return nil

View File

@@ -55,7 +55,7 @@ func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
return err
}
}
@@ -69,12 +69,12 @@ func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.CpuShares != 0 {
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
return err
}
}
if cgroup.Resources.CpuPeriod != 0 {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
return err
}
}

View File

@@ -57,13 +57,31 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
if dir == "" {
return nil
}
root, err := getCgroupRoot()
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
if err != nil {
return err
}
if err := s.ensureParent(dir, root); err != nil {
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
// 'ensureParent' start with parent because we don't want to
// explicitly inherit from parent, it could conflict with
// 'cpuset.cpu_exclusive'.
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
return err
}
if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
// We didn't inherit cpuset configs from parent, but we have
// to ensure cpuset configs are set before moving task into the
// cgroup.
// The logic is, if user specified cpuset configs, use these
// specified configs, otherwise, inherit from parent. This makes
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
// keep backward compatbility.
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
return err
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
@@ -136,3 +154,10 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
func (s *CpusetGroup) isEmpty(b []byte) bool {
return len(bytes.Trim(b, "\n")) == 0
}
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
if err := s.Set(path, cgroup); err != nil {
return err
}
return s.copyIfNeeded(path, filepath.Dir(path))
}

View File

@@ -10,13 +10,19 @@ import (
"path/filepath"
"strconv"
"strings"
"syscall"
"syscall" // only for Errno
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/sys/unix"
)
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
)
type MemoryGroup struct {
}
@@ -29,14 +35,18 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
path, err := d.path("memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
} else if path == "" {
return nil
}
if memoryAssigned(d.config) {
if path != "" {
if _, err := os.Stat(path); os.IsNotExist(err) {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
}
if d.config.KernelMemory != 0 {
// Only enable kernel memory accouting when this cgroup
// is created by libcontainer, otherwise we might get
// error when people use `cgroupsPath` to join an existed
// cgroup whose kernel memory is not initialized.
if err := EnableKernelMemoryAccounting(path); err != nil {
return err
}
@@ -85,7 +95,7 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == syscall.EBUSY {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
@@ -96,9 +106,18 @@ func setKernelMemory(path string, kernelMemoryLimit int64) error {
}
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// If the memory update is set to -1 we should also
// set swap to -1, it means unlimited memory.
if cgroup.Resources.Memory == -1 {
// Only set swap if it's enabled in kernel
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
cgroup.Resources.MemorySwap = -1
}
}
// When memory and swap memory are both set, we need to handle the cases
// for updating container.
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 {
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return err
@@ -107,29 +126,29 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// When update memory limit, we should adapt the write sequence
// for memory and swap memory, so it won't fail because the new
// value and the old value don't fit kernel's validation.
if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
} else {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
} else {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.MemorySwap != 0 {
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
@@ -167,12 +186,12 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
}
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
return nil
} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
} else if *cgroup.Resources.MemorySwappiness <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
return err
}
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
}
return nil
@@ -224,6 +243,14 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
}
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
value, err := getCgroupParamUint(path, useHierarchy)
if err != nil {
return err
}
if value == 1 {
stats.MemoryStats.UseHierarchy = true
}
return nil
}
@@ -234,7 +261,7 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
cgroup.Resources.KernelMemory > 0 ||
cgroup.Resources.KernelMemoryTCP > 0 ||
cgroup.Resources.OomKillDisable ||
(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {

View File

@@ -4,14 +4,17 @@ go_library(
name = "go_default_library",
srcs = select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"selinux.go",
"rootless.go",
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/github.com/opencontainers/runc/libcontainer/system:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs/validate:go_default_library",
],
"//conditions:default": [],
}),

View File

@@ -0,0 +1,128 @@
// +build linux
package rootless
import (
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
)
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
// needlessly. We should probably export this list.
var subsystems = []subsystem{
&fs.CpusetGroup{},
&fs.DevicesGroup{},
&fs.MemoryGroup{},
&fs.CpuGroup{},
&fs.CpuacctGroup{},
&fs.PidsGroup{},
&fs.BlkioGroup{},
&fs.HugetlbGroup{},
&fs.NetClsGroup{},
&fs.NetPrioGroup{},
&fs.PerfEventGroup{},
&fs.FreezerGroup{},
&fs.NameGroup{GroupName: "name=systemd"},
}
type subsystem interface {
// Name returns the name of the subsystem.
Name() string
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
GetStats(path string, stats *cgroups.Stats) error
}
// The noop cgroup manager is used for rootless containers, because we currently
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
// by factory if we are in rootless mode. We error out if any cgroup options are
// set in the config -- this may change in the future with upcoming kernel features
// like the cgroup namespace.
type Manager struct {
Cgroups *configs.Cgroup
Paths map[string]string
}
func (m *Manager) Apply(pid int) error {
// If there are no cgroup settings, there's nothing to do.
if m.Cgroups == nil {
return nil
}
// We can't set paths.
// TODO(cyphar): Implement the case where the runner of a rootless container
// owns their own cgroup, which would allow us to set up a
// cgroup for each path.
if m.Cgroups.Paths != nil {
return fmt.Errorf("cannot change cgroup path in rootless container")
}
// We load the paths into the manager.
paths := make(map[string]string)
for _, sys := range subsystems {
name := sys.Name()
path, err := cgroups.GetOwnCgroupPath(name)
if err != nil {
// Ignore paths we couldn't resolve.
continue
}
paths[name] = path
}
m.Paths = paths
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
}
func (m *Manager) Set(container *configs.Config) error {
// We have to re-do the validation here, since someone might decide to
// update a rootless container.
return validate.New().Validate(container)
}
func (m *Manager) GetPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetPids(dir)
}
func (m *Manager) GetAllPids() ([]int, error) {
dir, err := cgroups.GetOwnCgroupPath("devices")
if err != nil {
return nil, err
}
return cgroups.GetAllPids(dir)
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container. While this doesn't
// actually require write access to a cgroup directory, the
// statistics are not useful if they can be affected by
// non-container processes.
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
}
func (m *Manager) Freeze(state configs.FreezerState) error {
// TODO(cyphar): We can make this work if we figure out a way to allow usage
// of cgroups with a rootless container.
return fmt.Errorf("cannot use freezer cgroup in rootless container")
}
func (m *Manager) Destroy() error {
// We don't have to do anything here because we didn't do any setup.
return nil
}

View File

@@ -51,6 +51,8 @@ type MemoryStats struct {
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
// usage of kernel TCP memory
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`
Stats map[string]uint64 `json:"stats,omitempty"`
}

View File

@@ -5,7 +5,6 @@ package systemd
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
@@ -266,7 +265,14 @@ func (m *Manager) Apply(pid int) error {
if c.Resources.CpuShares != 0 {
properties = append(properties,
newProp("CPUShares", uint64(c.Resources.CpuShares)))
newProp("CPUShares", c.Resources.CpuShares))
}
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
properties = append(properties,
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
}
if c.Resources.BlkioWeight != 0 {
@@ -327,15 +333,6 @@ func (m *Manager) GetPaths() map[string]string {
return paths
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
path, err := getSubsystemPath(c, subsystem)
if err != nil {
@@ -356,7 +353,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
switch name {
case "name=systemd":
// let systemd handle this
break
case "cpuset":
path, err := getSubsystemPath(c, name)
if err != nil && !cgroups.IsNotFound(err) {
@@ -366,7 +362,6 @@ func joinCgroups(c *configs.Cgroup, pid int) error {
if err := s.ApplyDir(path, c, pid); err != nil {
return err
}
break
default:
_, err := join(c, name, pid)
if err != nil {
@@ -429,7 +424,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
return "", err
}
initPath, err := cgroups.GetInitCgroupDir(subsystem)
initPath, err := cgroups.GetInitCgroup(subsystem)
if err != nil {
return "", err
}

View File

@@ -23,36 +23,14 @@ const (
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError(subsystem)
mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
return mnt, err
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
@@ -88,6 +66,21 @@ func isSubsystemAvailable(subsystem string) bool {
return avail
}
func GetClosestMountpointAncestor(dir, mountinfo string) string {
deepestMountPoint := ""
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
mountInfoParts := strings.Fields(mountInfoEntry)
if len(mountInfoParts) < 5 {
continue
}
mountPoint := mountInfoParts[4]
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
deepestMountPoint = mountPoint
}
}
return deepestMountPoint
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@@ -131,7 +124,7 @@ type Mount struct {
Subsystems []string
}
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
if len(m.Subsystems) == 0 {
return "", fmt.Errorf("no subsystem for mount")
}
@@ -149,7 +142,7 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount,
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+9] != "cgroup" {
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
@@ -211,9 +204,6 @@ func GetAllSubsystems() ([]string, error) {
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
if text[0] != '#' {
parts := strings.Fields(text)
@@ -222,11 +212,14 @@ func GetAllSubsystems() ([]string, error) {
}
}
}
if err := s.Err(); err != nil {
return nil, err
}
return subsystems, nil
}
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
@@ -235,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupDir(subsystem string) (string, error) {
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func GetInitCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
if err != nil {
return "", err
@@ -245,6 +246,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
return getControllerPath(subsystem, cgroups)
}
func GetInitCgroupPath(subsystem string) (string, error) {
cgroup, err := GetInitCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
if err != nil {
return "", err
}
// This is needed for nested containers, because in /proc/self/cgroup we
// see pathes from host, which don't exist in container.
relCgroup, err := filepath.Rel(root, cgroup)
if err != nil {
return "", err
}
return filepath.Join(mnt, relCgroup), nil
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
if err != nil {
@@ -287,10 +313,6 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
cgroups := make(map[string]string)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
// from cgroups(7):
// /proc/[pid]/cgroup
@@ -307,6 +329,10 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
cgroups[subs] = parts[2]
}
}
if err := s.Err(); err != nil {
return nil, err
}
return cgroups, nil
}

View File

@@ -16,11 +16,11 @@ go_library(
"network.go",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"cgroup_unix.go",
"config_unix.go",
"cgroup_linux.go",
"config_linux.go",
"device_defaults.go",
"namespaces_linux.go",
"namespaces_syscall.go",
"namespaces_unix.go",
],
"@io_bazel_rules_go//go/platform:windows_amd64": [
"cgroup_windows.go",
@@ -28,7 +28,15 @@ go_library(
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = ["//vendor/github.com/Sirupsen/logrus:go_default_library"],
deps = [
"//vendor/github.com/opencontainers/runtime-spec/specs-go:go_default_library",
"//vendor/github.com/sirupsen/logrus:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),
)
filegroup(

View File

@@ -1,5 +1,3 @@
// +build linux freebsd
package configs
type FreezerState string
@@ -60,19 +58,19 @@ type Resources struct {
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
CpuShares uint64 `json:"cpu_shares"`
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuQuota int64 `json:"cpu_quota"`
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
CpuPeriod uint64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_rt_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_rt_period"`
CpuRtPeriod uint64 `json:"cpu_rt_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
@@ -114,7 +112,7 @@ type Resources struct {
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness *int64 `json:"memory_swappiness"`
MemorySwappiness *uint64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`

View File

@@ -7,7 +7,9 @@ import (
"os/exec"
"time"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type Rlimit struct {
@@ -85,11 +87,6 @@ type Config struct {
// that the parent process dies.
ParentDeathSignal int `json:"parent_death_signal"`
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
PivotDir string `json:"pivot_dir"`
// Path to a directory containing the container's root filesystem.
Rootfs string `json:"rootfs"`
@@ -117,8 +114,8 @@ type Config struct {
Namespaces Namespaces `json:"namespaces"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
Capabilities []string `json:"capabilities"`
// All capabilities not specified will be dropped from the processes capability mask
Capabilities *Capabilities `json:"capabilities"`
// Networks specifies the container's network setup to be created
Networks []*Network `json:"networks"`
@@ -187,6 +184,9 @@ type Config struct {
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
}
type Hooks struct {
@@ -201,6 +201,19 @@ type Hooks struct {
Poststop []Hook
}
type Capabilities struct {
// Bounding is the set of capabilities checked by the kernel.
Bounding []string
// Effective is the set of capabilities checked by the kernel.
Effective []string
// Inheritable is the capabilities preserved across execve.
Inheritable []string
// Permitted is the limiting superset for effective capabilities.
Permitted []string
// Ambient is the ambient set of capabilities that are kept.
Ambient []string
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state struct {
Prestart []CommandHook
@@ -248,13 +261,7 @@ func (hooks Hooks) MarshalJSON() ([]byte, error) {
}
// HookState is the payload provided to a hook on execution.
type HookState struct {
Version string `json:"ociVersion"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
BundlePath string `json:"bundlePath"`
}
type HookState specs.State
type Hook interface {
// Run executes the hook with the provided state.

View File

@@ -0,0 +1,61 @@
package configs
import "fmt"
// HostUID gets the translated uid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostUID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootUID() (int, error) {
return c.HostUID(0)
}
// HostGID gets the translated gid for the process on host which could be
// different when user namespaces are enabled.
func (c Config) HostGID(containerId int) (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
}
return id, nil
}
// Return unchanged id.
return containerId, nil
}
// HostRootGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostRootGID() (int, error) {
return c.HostGID(0)
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View File

@@ -1,51 +0,0 @@
// +build freebsd linux
package configs
import "fmt"
// HostUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// HostGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
}
return id, nil
}
// Return default root gid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View File

@@ -1,5 +1,11 @@
package configs
const (
// EXT_COPYUP is a directive to copy up the contents of a directory when
// a tmpfs is mounted over it.
EXT_COPYUP = 1 << iota
)
type Mount struct {
// Source path for the mount.
Source string `json:"source"`
@@ -22,6 +28,9 @@ type Mount struct {
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
Relabel string `json:"relabel"`
// Extensions are additional flags that are specific to runc.
Extensions int `json:"extensions"`
// Optional Command to be run before Source is mounted.
PremountCmds []Command `json:"premount_cmds"`

View File

@@ -1,5 +1,3 @@
// +build linux freebsd
package configs
import (
@@ -22,8 +20,8 @@ var (
supportedNamespaces = make(map[NamespaceType]bool)
)
// nsToFile converts the namespace type to its filename
func nsToFile(ns NamespaceType) string {
// NsName converts the namespace type to its filename
func NsName(ns NamespaceType) string {
switch ns {
case NEWNET:
return "net"
@@ -50,7 +48,7 @@ func IsNamespaceSupported(ns NamespaceType) bool {
if ok {
return supported
}
nsFile := nsToFile(ns)
nsFile := NsName(ns)
// if the namespace type is unknown, just return false
if nsFile == "" {
return false
@@ -64,12 +62,12 @@ func IsNamespaceSupported(ns NamespaceType) bool {
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWUSER, // Keep user NS always first, don't move it.
NEWIPC,
NEWUTS,
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}
@@ -81,10 +79,7 @@ type Namespace struct {
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type))
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
}
func (n *Namespaces) Remove(t NamespaceType) bool {

View File

@@ -2,19 +2,19 @@
package configs
import "syscall"
import "golang.org/x/sys/unix"
func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type]
}
var namespaceInfo = map[NamespaceType]int{
NEWNET: syscall.CLONE_NEWNET,
NEWNS: syscall.CLONE_NEWNS,
NEWUSER: syscall.CLONE_NEWUSER,
NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID,
NEWNET: unix.CLONE_NEWNET,
NEWNS: unix.CLONE_NEWNS,
NEWUSER: unix.CLONE_NEWUSER,
NEWIPC: unix.CLONE_NEWIPC,
NEWUTS: unix.CLONE_NEWUTS,
NEWPID: unix.CLONE_NEWPID,
}
// CloneFlags parses the container's Namespaces options to set the correct

View File

@@ -4,12 +4,10 @@ package configs
func (n *Namespace) Syscall() int {
panic("No namespace syscall support")
return 0
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
return uintptr(0)
}

View File

@@ -1,4 +1,4 @@
// +build !linux,!freebsd
// +build !linux
package configs

View File

@@ -2,11 +2,14 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["validator.go"],
srcs = [
"rootless.go",
"validator.go",
],
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/selinux:go_default_library",
"//vendor/github.com/opencontainers/selinux/go-selinux:go_default_library",
],
)

View File

@@ -0,0 +1,117 @@
package validate
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
geteuid = os.Geteuid
getegid = os.Getegid
)
func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
return err
}
// Currently, cgroups cannot effectively be used in rootless containers.
// The new cgroup namespace doesn't really help us either because it doesn't
// have nice interactions with the user namespace (we're working with upstream
// to fix this).
if err := rootlessCgroup(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this
// has to be verified by setupUser() in init_linux.go.
return nil
}
func rootlessMappings(config *configs.Config) error {
rootuid, err := config.HostRootUID()
if err != nil {
return fmt.Errorf("failed to get root uid from uidMappings: %v", err)
}
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
if rootuid != euid {
return fmt.Errorf("rootless containers cannot map container root to a different host user")
}
}
rootgid, err := config.HostRootGID()
if err != nil {
return fmt.Errorf("failed to get root gid from gidMappings: %v", err)
}
// Similar to the above test, we need to make sure that we aren't trying to
// map to a group ID that we don't have the right to be.
if rootgid != getegid() {
return fmt.Errorf("rootless containers cannot map container root to a different host group")
}
// We can only map one user and group inside a container (our own).
if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one user")
}
if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 {
return fmt.Errorf("rootless containers cannot map more than one group")
}
return nil
}
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
}
// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())
// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
}
return nil
}
// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
for _, mount := range config.Mounts {
// Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") && opt != "uid=0" {
return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0")
}
if strings.HasPrefix(opt, "gid=") && opt != "gid=0" {
return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0")
}
}
}
return nil
}

View File

@@ -7,7 +7,7 @@ import (
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/selinux"
selinux "github.com/opencontainers/selinux/go-selinux"
)
type Validator interface {
@@ -40,12 +40,23 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.sysctl(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
}
}
return nil
}
// rootfs validates if the rootfs is an absolute path and is not a symlink
// to the container's root filesystem.
func (v *ConfigValidator) rootfs(config *configs.Config) error {
if _, err := os.Stat(config.Rootfs); err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
}
return err
}
cleaned, err := filepath.Abs(config.Rootfs)
if err != nil {
return err
@@ -81,7 +92,7 @@ func (v *ConfigValidator) security(config *configs.Config) error {
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
if config.ProcessLabel != "" && !selinux.SelinuxEnabled() {
if config.ProcessLabel != "" && !selinux.GetEnabled() {
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
}
@@ -126,6 +137,11 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
}
if strings.HasPrefix(s, "net.") {
if config.Namespaces.Contains(configs.NEWNET) {
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
if err := checkHostNs(s, path); err != nil {
return err
}
}
continue
} else {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
@@ -136,3 +152,44 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
return nil
}
func isSymbolicLink(path string) (bool, error) {
fi, err := os.Lstat(path)
if err != nil {
return false, err
}
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
}
// checkHostNs checks whether network sysctl is used in host namespace.
func checkHostNs(sysctlConfig string, path string) error {
var currentProcessNetns = "/proc/self/ns/net"
// readlink on the current processes network namespace
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
if err != nil {
return fmt.Errorf("read soft link %q error", currentProcessNetns)
}
// First check if the provided path is a symbolic link
symLink, err := isSymbolicLink(path)
if err != nil {
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
}
if symLink == false {
// The provided namespace is not a symbolic link,
// it is not the host namespace.
return nil
}
// readlink on the path provided in the struct
destOfContainer, err := os.Readlink(path)
if err != nil {
return fmt.Errorf("read soft link %q error", path)
}
if destOfContainer == destOfCurrentProcess {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
}
return nil
}

View File

@@ -1,15 +1,17 @@
package libcontainer
import "io"
import (
"io"
"os"
)
// Console represents a pseudo TTY.
type Console interface {
io.ReadWriter
io.Closer
io.ReadWriteCloser
// Path returns the filesystem path to the slave side of the pty.
Path() string
// Fd returns the fd for the master of the pty.
Fd() uintptr
File() *os.File
}

View File

@@ -6,8 +6,8 @@ import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View File

@@ -3,17 +3,21 @@ package libcontainer
import (
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
"github.com/opencontainers/runc/libcontainer/label"
"golang.org/x/sys/unix"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
func ConsoleFromFile(f *os.File) Console {
return &linuxConsole{
master: f,
}
}
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
func newConsole() (Console, error) {
master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
@@ -24,34 +28,20 @@ func NewConsole(uid, gid int) (Console, error) {
if err := unlockpt(master); err != nil {
return nil, err
}
if err := os.Chmod(console, 0600); err != nil {
return nil, err
}
if err := os.Chown(console, uid, gid); err != nil {
return nil, err
}
return &linuxConsole{
slavePath: console,
master: master,
}, nil
}
// newConsoleFromPath is an internal function returning an initialized console for use inside
// a container's MNT namespace.
func newConsoleFromPath(slavePath string) *linuxConsole {
return &linuxConsole{
slavePath: slavePath,
}
}
// linuxConsole is a linux pseudo TTY for use within a container.
type linuxConsole struct {
master *os.File
slavePath string
}
func (c *linuxConsole) Fd() uintptr {
return c.master.Fd()
func (c *linuxConsole) File() *os.File {
return c.master
}
func (c *linuxConsole) Path() string {
@@ -75,33 +65,29 @@ func (c *linuxConsole) Close() error {
// mount initializes the console inside the rootfs mounting with the specified mount label
// and applying the correct ownership of the console.
func (c *linuxConsole) mount(rootfs, mountLabel string) error {
oldMask := syscall.Umask(0000)
defer syscall.Umask(oldMask)
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
return err
}
dest := filepath.Join(rootfs, "/dev/console")
f, err := os.Create(dest)
func (c *linuxConsole) mount() error {
oldMask := unix.Umask(0000)
defer unix.Umask(oldMask)
f, err := os.Create("/dev/console")
if err != nil && !os.IsExist(err) {
return err
}
if f != nil {
f.Close()
}
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(syscall.O_RDWR)
slave, err := c.open(unix.O_RDWR)
if err != nil {
return err
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := syscall.Dup3(fd, i, 0); err != nil {
if err := unix.Dup3(fd, i, 0); err != nil {
return err
}
}
@@ -110,7 +96,7 @@ func (c *linuxConsole) dupStdio() error {
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
func (c *linuxConsole) open(flag int) (*os.File, error) {
r, e := syscall.Open(c.slavePath, flag, 0)
r, e := unix.Open(c.slavePath, flag, 0)
if e != nil {
return nil, &os.PathError{
Op: "open",
@@ -122,7 +108,7 @@ func (c *linuxConsole) open(flag int) (*os.File, error) {
}
func ioctl(fd uintptr, flag, data uintptr) error {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 {
return err
}
return nil
@@ -132,14 +118,35 @@ func ioctl(fd uintptr, flag, data uintptr) error {
// unlockpt should be called before opening the slave side of a pty.
func unlockpt(f *os.File) error {
var u int32
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
}
// ptsname retrieves the name of the first available pts for the given master.
func ptsname(f *os.File) (string, error) {
var n int32
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN)
if err != nil {
return "", err
}
return fmt.Sprintf("/dev/pts/%d", n), nil
}
// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair
// created by us acts normally. In particular, a not-very-well-known default of
// Linux unix98 ptys is that they have +onlcr by default. While this isn't a
// problem for terminal emulators, because we relay data from the terminal we
// also relay that funky line discipline.
func SaneTerminal(terminal *os.File) error {
termios, err := unix.IoctlGetTermios(int(terminal.Fd()), unix.TCGETS)
if err != nil {
return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error())
}
// Set -onlcr so we don't have to deal with \r.
termios.Oflag &^= unix.ONLCR
if err := unix.IoctlSetTermios(int(terminal.Fd()), unix.TCSETS, termios); err != nil {
return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error())
}
return nil
}

View File

@@ -4,8 +4,8 @@ import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// newConsole returns an initialized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
func newConsole() (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View File

@@ -1,7 +1,7 @@
package libcontainer
// NewConsole returns an initalized console that can be used within a container
func NewConsole(uid, gid int) (Console, error) {
// newConsole returns an initialized console that can be used within a container
func newConsole() (Console, error) {
return &windowsConsole{}, nil
}

View File

@@ -54,7 +54,7 @@ type BaseState struct {
InitProcessPid int `json:"init_process_pid"`
// InitProcessStartTime is the init process start time in clock cycles since boot time.
InitProcessStartTime string `json:"init_process_start"`
InitProcessStartTime uint64 `json:"init_process_start"`
// Created is the unix timestamp for the creation time of the container in UTC
Created time.Time `json:"created"`
@@ -123,7 +123,7 @@ type BaseContainer interface {
// SystemError - System error.
Start(process *Process) (err error)
// Run immediately starts the process inside the conatiner. Returns error if process
// Run immediately starts the process inside the container. Returns error if process
// fails to start. It does not block waiting for the exec fifo after start returns but
// opens the fifo after start returns.
//
@@ -134,20 +134,29 @@ type BaseContainer interface {
// SystemError - System error.
Run(process *Process) (err error)
// Destroys the container after killing all running processes.
// Destroys the container, if its in a valid state, after killing any
// remaining running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// Running containers must first be stopped using Signal(..).
// Paused containers must first be resumed using Resume(..).
//
// errors:
// ContainerNotStopped - Container is still running,
// ContainerPaused - Container is paused,
// SystemError - System error.
Destroy() error
// Signal sends the provided signal code to the container's initial process.
//
// If all is specified the signal is sent to all processes in the container
// including the initial process.
//
// errors:
// SystemError - System error.
Signal(s os.Signal) error
Signal(s os.Signal, all bool) error
// Exec signals the container to exec the users process at the end of the init.
//

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,3 @@
// +build linux freebsd
package libcontainer
// cgroup restoring strategy provided by criu
@@ -25,13 +23,16 @@ type VethPairName struct {
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
ParentImage string // direcotry for storing parent image files in pre-dump and dump
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
FileLocks bool // handle file locks, for safety
PreDump bool // call criu predump to perform iterative checkpoint
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
AutoDedup bool // auto deduplication for incremental dumps
}

View File

@@ -12,6 +12,7 @@ It has these top-level messages:
CriuPageServerInfo
CriuVethPair
ExtMountMap
JoinNamespace
InheritFd
CgroupRoot
UnixSk
@@ -22,21 +23,30 @@ It has these top-level messages:
CriuFeatures
CriuReq
CriuResp
CriuVersion
*/
package criurpc
import proto "github.com/golang/protobuf/proto"
import fmt "fmt"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
// A compilation error at this line likely means your copy of the
// proto package needs to be updated.
const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package
type CriuCgMode int32
const (
CriuCgMode_IGNORE CriuCgMode = 0
CriuCgMode_NONE CriuCgMode = 1
CriuCgMode_CG_NONE CriuCgMode = 1
CriuCgMode_PROPS CriuCgMode = 2
CriuCgMode_SOFT CriuCgMode = 3
CriuCgMode_FULL CriuCgMode = 4
@@ -46,7 +56,7 @@ const (
var CriuCgMode_name = map[int32]string{
0: "IGNORE",
1: "NONE",
1: "CG_NONE",
2: "PROPS",
3: "SOFT",
4: "FULL",
@@ -55,7 +65,7 @@ var CriuCgMode_name = map[int32]string{
}
var CriuCgMode_value = map[string]int32{
"IGNORE": 0,
"NONE": 1,
"CG_NONE": 1,
"PROPS": 2,
"SOFT": 3,
"FULL": 4,
@@ -79,6 +89,7 @@ func (x *CriuCgMode) UnmarshalJSON(data []byte) error {
*x = CriuCgMode(value)
return nil
}
func (CriuCgMode) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{0} }
type CriuReqType int32
@@ -93,19 +104,21 @@ const (
CriuReqType_CPUINFO_DUMP CriuReqType = 7
CriuReqType_CPUINFO_CHECK CriuReqType = 8
CriuReqType_FEATURE_CHECK CriuReqType = 9
CriuReqType_VERSION CriuReqType = 10
)
var CriuReqType_name = map[int32]string{
0: "EMPTY",
1: "DUMP",
2: "RESTORE",
3: "CHECK",
4: "PRE_DUMP",
5: "PAGE_SERVER",
6: "NOTIFY",
7: "CPUINFO_DUMP",
8: "CPUINFO_CHECK",
9: "FEATURE_CHECK",
0: "EMPTY",
1: "DUMP",
2: "RESTORE",
3: "CHECK",
4: "PRE_DUMP",
5: "PAGE_SERVER",
6: "NOTIFY",
7: "CPUINFO_DUMP",
8: "CPUINFO_CHECK",
9: "FEATURE_CHECK",
10: "VERSION",
}
var CriuReqType_value = map[string]int32{
"EMPTY": 0,
@@ -118,6 +131,7 @@ var CriuReqType_value = map[string]int32{
"CPUINFO_DUMP": 7,
"CPUINFO_CHECK": 8,
"FEATURE_CHECK": 9,
"VERSION": 10,
}
func (x CriuReqType) Enum() *CriuReqType {
@@ -136,6 +150,7 @@ func (x *CriuReqType) UnmarshalJSON(data []byte) error {
*x = CriuReqType(value)
return nil
}
func (CriuReqType) EnumDescriptor() ([]byte, []int) { return fileDescriptor0, []int{1} }
type CriuPageServerInfo struct {
Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"`
@@ -145,9 +160,10 @@ type CriuPageServerInfo struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
func (*CriuPageServerInfo) ProtoMessage() {}
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
func (*CriuPageServerInfo) ProtoMessage() {}
func (*CriuPageServerInfo) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} }
func (m *CriuPageServerInfo) GetAddress() string {
if m != nil && m.Address != nil {
@@ -178,14 +194,15 @@ func (m *CriuPageServerInfo) GetFd() int32 {
}
type CriuVethPair struct {
IfIn *string `protobuf:"bytes,1,req,name=if_in" json:"if_in,omitempty"`
IfOut *string `protobuf:"bytes,2,req,name=if_out" json:"if_out,omitempty"`
IfIn *string `protobuf:"bytes,1,req,name=if_in,json=ifIn" json:"if_in,omitempty"`
IfOut *string `protobuf:"bytes,2,req,name=if_out,json=ifOut" json:"if_out,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
func (*CriuVethPair) ProtoMessage() {}
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
func (*CriuVethPair) ProtoMessage() {}
func (*CriuVethPair) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} }
func (m *CriuVethPair) GetIfIn() string {
if m != nil && m.IfIn != nil {
@@ -207,9 +224,10 @@ type ExtMountMap struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
func (*ExtMountMap) ProtoMessage() {}
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
func (*ExtMountMap) ProtoMessage() {}
func (*ExtMountMap) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} }
func (m *ExtMountMap) GetKey() string {
if m != nil && m.Key != nil {
@@ -225,15 +243,49 @@ func (m *ExtMountMap) GetVal() string {
return ""
}
type JoinNamespace struct {
Ns *string `protobuf:"bytes,1,req,name=ns" json:"ns,omitempty"`
NsFile *string `protobuf:"bytes,2,req,name=ns_file,json=nsFile" json:"ns_file,omitempty"`
ExtraOpt *string `protobuf:"bytes,3,opt,name=extra_opt,json=extraOpt" json:"extra_opt,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *JoinNamespace) Reset() { *m = JoinNamespace{} }
func (m *JoinNamespace) String() string { return proto.CompactTextString(m) }
func (*JoinNamespace) ProtoMessage() {}
func (*JoinNamespace) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{3} }
func (m *JoinNamespace) GetNs() string {
if m != nil && m.Ns != nil {
return *m.Ns
}
return ""
}
func (m *JoinNamespace) GetNsFile() string {
if m != nil && m.NsFile != nil {
return *m.NsFile
}
return ""
}
func (m *JoinNamespace) GetExtraOpt() string {
if m != nil && m.ExtraOpt != nil {
return *m.ExtraOpt
}
return ""
}
type InheritFd struct {
Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"`
Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *InheritFd) Reset() { *m = InheritFd{} }
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
func (*InheritFd) ProtoMessage() {}
func (m *InheritFd) Reset() { *m = InheritFd{} }
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
func (*InheritFd) ProtoMessage() {}
func (*InheritFd) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{4} }
func (m *InheritFd) GetKey() string {
if m != nil && m.Key != nil {
@@ -255,9 +307,10 @@ type CgroupRoot struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
func (*CgroupRoot) ProtoMessage() {}
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
func (*CgroupRoot) ProtoMessage() {}
func (*CgroupRoot) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{5} }
func (m *CgroupRoot) GetCtrl() string {
if m != nil && m.Ctrl != nil {
@@ -278,9 +331,10 @@ type UnixSk struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *UnixSk) Reset() { *m = UnixSk{} }
func (m *UnixSk) String() string { return proto.CompactTextString(m) }
func (*UnixSk) ProtoMessage() {}
func (m *UnixSk) Reset() { *m = UnixSk{} }
func (m *UnixSk) String() string { return proto.CompactTextString(m) }
func (*UnixSk) ProtoMessage() {}
func (*UnixSk) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{6} }
func (m *UnixSk) GetInode() uint32 {
if m != nil && m.Inode != nil {
@@ -290,51 +344,62 @@ func (m *UnixSk) GetInode() uint32 {
}
type CriuOpts struct {
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"`
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"`
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"`
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"`
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"`
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"`
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"`
LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"`
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"`
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"`
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"`
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"`
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"`
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"`
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"`
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"`
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"`
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"`
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"`
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"`
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"`
AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt" json:"auto_ext_mnt,omitempty"`
ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing" json:"ext_sharing,omitempty"`
ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters" json:"ext_masters,omitempty"`
SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt" json:"skip_mnt,omitempty"`
EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs" json:"enable_fs,omitempty"`
UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino" json:"unix_sk_ino,omitempty"`
ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"`
GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,def=1048576" json:"ghost_limit,omitempty"`
IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths" json:"irmap_scan_paths,omitempty"`
External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"`
EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns" json:"empty_ns,omitempty"`
NoSeccomp *bool `protobuf:"varint,39,opt,name=no_seccomp" json:"no_seccomp,omitempty"`
XXX_unrecognized []byte `json:"-"`
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd,json=imagesDirFd" json:"images_dir_fd,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running,json=leaveRunning" json:"leave_running,omitempty"`
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk,json=extUnixSk" json:"ext_unix_sk,omitempty"`
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established,json=tcpEstablished" json:"tcp_established,omitempty"`
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices,json=evasiveDevices" json:"evasive_devices,omitempty"`
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job,json=shellJob" json:"shell_job,omitempty"`
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks,json=fileLocks" json:"file_locks,omitempty"`
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,json=logLevel,def=2" json:"log_level,omitempty"`
LogFile *string `protobuf:"bytes,10,opt,name=log_file,json=logFile" json:"log_file,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts,json=notifyScripts" json:"notify_scripts,omitempty"`
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img,json=parentImg" json:"parent_img,omitempty"`
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem,json=trackMem" json:"track_mem,omitempty"`
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup,json=autoDedup" json:"auto_dedup,omitempty"`
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd,json=workDirFd" json:"work_dir_fd,omitempty"`
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap,json=linkRemap" json:"link_remap,omitempty"`
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,json=cpuCap,def=4294967295" json:"cpu_cap,omitempty"`
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap,json=forceIrmap" json:"force_irmap,omitempty"`
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd,json=execCmd" json:"exec_cmd,omitempty"`
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt,json=extMnt" json:"ext_mnt,omitempty"`
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups,json=manageCgroups" json:"manage_cgroups,omitempty"`
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root,json=cgRoot" json:"cg_root,omitempty"`
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling,json=rstSibling" json:"rst_sibling,omitempty"`
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd,json=inheritFd" json:"inherit_fd,omitempty"`
AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt,json=autoExtMnt" json:"auto_ext_mnt,omitempty"`
ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing,json=extSharing" json:"ext_sharing,omitempty"`
ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters,json=extMasters" json:"ext_masters,omitempty"`
SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt,json=skipMnt" json:"skip_mnt,omitempty"`
EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs,json=enableFs" json:"enable_fs,omitempty"`
UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino,json=unixSkIno" json:"unix_sk_ino,omitempty"`
ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,json=manageCgroupsMode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"`
GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,json=ghostLimit,def=1048576" json:"ghost_limit,omitempty"`
IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths,json=irmapScanPaths" json:"irmap_scan_paths,omitempty"`
External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"`
EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns,json=emptyNs" json:"empty_ns,omitempty"`
JoinNs []*JoinNamespace `protobuf:"bytes,39,rep,name=join_ns,json=joinNs" json:"join_ns,omitempty"`
CgroupProps *string `protobuf:"bytes,41,opt,name=cgroup_props,json=cgroupProps" json:"cgroup_props,omitempty"`
CgroupPropsFile *string `protobuf:"bytes,42,opt,name=cgroup_props_file,json=cgroupPropsFile" json:"cgroup_props_file,omitempty"`
CgroupDumpController []string `protobuf:"bytes,43,rep,name=cgroup_dump_controller,json=cgroupDumpController" json:"cgroup_dump_controller,omitempty"`
FreezeCgroup *string `protobuf:"bytes,44,opt,name=freeze_cgroup,json=freezeCgroup" json:"freeze_cgroup,omitempty"`
Timeout *uint32 `protobuf:"varint,45,opt,name=timeout" json:"timeout,omitempty"`
TcpSkipInFlight *bool `protobuf:"varint,46,opt,name=tcp_skip_in_flight,json=tcpSkipInFlight" json:"tcp_skip_in_flight,omitempty"`
WeakSysctls *bool `protobuf:"varint,47,opt,name=weak_sysctls,json=weakSysctls" json:"weak_sysctls,omitempty"`
LazyPages *bool `protobuf:"varint,48,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"`
StatusFd *int32 `protobuf:"varint,49,opt,name=status_fd,json=statusFd" json:"status_fd,omitempty"`
OrphanPtsMaster *bool `protobuf:"varint,50,opt,name=orphan_pts_master,json=orphanPtsMaster" json:"orphan_pts_master,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
func (*CriuOpts) ProtoMessage() {}
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
func (*CriuOpts) ProtoMessage() {}
func (*CriuOpts) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{7} }
const Default_CriuOpts_LogLevel int32 = 2
const Default_CriuOpts_CpuCap uint32 = 4294967295
@@ -606,9 +671,79 @@ func (m *CriuOpts) GetEmptyNs() uint32 {
return 0
}
func (m *CriuOpts) GetNoSeccomp() bool {
if m != nil && m.NoSeccomp != nil {
return *m.NoSeccomp
func (m *CriuOpts) GetJoinNs() []*JoinNamespace {
if m != nil {
return m.JoinNs
}
return nil
}
func (m *CriuOpts) GetCgroupProps() string {
if m != nil && m.CgroupProps != nil {
return *m.CgroupProps
}
return ""
}
func (m *CriuOpts) GetCgroupPropsFile() string {
if m != nil && m.CgroupPropsFile != nil {
return *m.CgroupPropsFile
}
return ""
}
func (m *CriuOpts) GetCgroupDumpController() []string {
if m != nil {
return m.CgroupDumpController
}
return nil
}
func (m *CriuOpts) GetFreezeCgroup() string {
if m != nil && m.FreezeCgroup != nil {
return *m.FreezeCgroup
}
return ""
}
func (m *CriuOpts) GetTimeout() uint32 {
if m != nil && m.Timeout != nil {
return *m.Timeout
}
return 0
}
func (m *CriuOpts) GetTcpSkipInFlight() bool {
if m != nil && m.TcpSkipInFlight != nil {
return *m.TcpSkipInFlight
}
return false
}
func (m *CriuOpts) GetWeakSysctls() bool {
if m != nil && m.WeakSysctls != nil {
return *m.WeakSysctls
}
return false
}
func (m *CriuOpts) GetLazyPages() bool {
if m != nil && m.LazyPages != nil {
return *m.LazyPages
}
return false
}
func (m *CriuOpts) GetStatusFd() int32 {
if m != nil && m.StatusFd != nil {
return *m.StatusFd
}
return 0
}
func (m *CriuOpts) GetOrphanPtsMaster() bool {
if m != nil && m.OrphanPtsMaster != nil {
return *m.OrphanPtsMaster
}
return false
}
@@ -618,9 +753,10 @@ type CriuDumpResp struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
func (*CriuDumpResp) ProtoMessage() {}
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
func (*CriuDumpResp) ProtoMessage() {}
func (*CriuDumpResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{8} }
func (m *CriuDumpResp) GetRestored() bool {
if m != nil && m.Restored != nil {
@@ -634,9 +770,10 @@ type CriuRestoreResp struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
func (*CriuRestoreResp) ProtoMessage() {}
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
func (*CriuRestoreResp) ProtoMessage() {}
func (*CriuRestoreResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{9} }
func (m *CriuRestoreResp) GetPid() int32 {
if m != nil && m.Pid != nil {
@@ -651,9 +788,10 @@ type CriuNotify struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
func (*CriuNotify) ProtoMessage() {}
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
func (*CriuNotify) ProtoMessage() {}
func (*CriuNotify) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{10} }
func (m *CriuNotify) GetScript() string {
if m != nil && m.Script != nil {
@@ -673,13 +811,15 @@ func (m *CriuNotify) GetPid() int32 {
// List of features which can queried via
// CRIU_REQ_TYPE__FEATURE_CHECK
type CriuFeatures struct {
MemTrack *bool `protobuf:"varint,1,opt,name=mem_track" json:"mem_track,omitempty"`
MemTrack *bool `protobuf:"varint,1,opt,name=mem_track,json=memTrack" json:"mem_track,omitempty"`
LazyPages *bool `protobuf:"varint,2,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuFeatures) Reset() { *m = CriuFeatures{} }
func (m *CriuFeatures) String() string { return proto.CompactTextString(m) }
func (*CriuFeatures) ProtoMessage() {}
func (m *CriuFeatures) Reset() { *m = CriuFeatures{} }
func (m *CriuFeatures) String() string { return proto.CompactTextString(m) }
func (*CriuFeatures) ProtoMessage() {}
func (*CriuFeatures) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{11} }
func (m *CriuFeatures) GetMemTrack() bool {
if m != nil && m.MemTrack != nil {
@@ -688,15 +828,22 @@ func (m *CriuFeatures) GetMemTrack() bool {
return false
}
func (m *CriuFeatures) GetLazyPages() bool {
if m != nil && m.LazyPages != nil {
return *m.LazyPages
}
return false
}
type CriuReq struct {
Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"`
Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"`
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success" json:"notify_success,omitempty"`
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success,json=notifySuccess" json:"notify_success,omitempty"`
//
// When set service won't close the connection but
// will wait for more req-s to appear. Works not
// for all request types.
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open" json:"keep_open,omitempty"`
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open,json=keepOpen" json:"keep_open,omitempty"`
//
// 'features' can be used to query which features
// are supported by the installed criu/kernel
@@ -705,9 +852,10 @@ type CriuReq struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuReq) Reset() { *m = CriuReq{} }
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
func (*CriuReq) ProtoMessage() {}
func (m *CriuReq) Reset() { *m = CriuReq{} }
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
func (*CriuReq) ProtoMessage() {}
func (*CriuReq) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{12} }
func (m *CriuReq) GetType() CriuReqType {
if m != nil && m.Type != nil {
@@ -751,14 +899,17 @@ type CriuResp struct {
Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"`
Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"`
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno" json:"cr_errno,omitempty"`
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno,json=crErrno" json:"cr_errno,omitempty"`
Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"`
CrErrmsg *string `protobuf:"bytes,9,opt,name=cr_errmsg,json=crErrmsg" json:"cr_errmsg,omitempty"`
Version *CriuVersion `protobuf:"bytes,10,opt,name=version" json:"version,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuResp) Reset() { *m = CriuResp{} }
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
func (*CriuResp) ProtoMessage() {}
func (m *CriuResp) Reset() { *m = CriuResp{} }
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
func (*CriuResp) ProtoMessage() {}
func (*CriuResp) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{13} }
func (m *CriuResp) GetType() CriuReqType {
if m != nil && m.Type != nil {
@@ -816,7 +967,212 @@ func (m *CriuResp) GetFeatures() *CriuFeatures {
return nil
}
func (m *CriuResp) GetCrErrmsg() string {
if m != nil && m.CrErrmsg != nil {
return *m.CrErrmsg
}
return ""
}
func (m *CriuResp) GetVersion() *CriuVersion {
if m != nil {
return m.Version
}
return nil
}
// Answer for criu_req_type.VERSION requests
type CriuVersion struct {
Major *int32 `protobuf:"varint,1,req,name=major" json:"major,omitempty"`
Minor *int32 `protobuf:"varint,2,req,name=minor" json:"minor,omitempty"`
Gitid *string `protobuf:"bytes,3,opt,name=gitid" json:"gitid,omitempty"`
Sublevel *int32 `protobuf:"varint,4,opt,name=sublevel" json:"sublevel,omitempty"`
Extra *int32 `protobuf:"varint,5,opt,name=extra" json:"extra,omitempty"`
Name *string `protobuf:"bytes,6,opt,name=name" json:"name,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuVersion) Reset() { *m = CriuVersion{} }
func (m *CriuVersion) String() string { return proto.CompactTextString(m) }
func (*CriuVersion) ProtoMessage() {}
func (*CriuVersion) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{14} }
func (m *CriuVersion) GetMajor() int32 {
if m != nil && m.Major != nil {
return *m.Major
}
return 0
}
func (m *CriuVersion) GetMinor() int32 {
if m != nil && m.Minor != nil {
return *m.Minor
}
return 0
}
func (m *CriuVersion) GetGitid() string {
if m != nil && m.Gitid != nil {
return *m.Gitid
}
return ""
}
func (m *CriuVersion) GetSublevel() int32 {
if m != nil && m.Sublevel != nil {
return *m.Sublevel
}
return 0
}
func (m *CriuVersion) GetExtra() int32 {
if m != nil && m.Extra != nil {
return *m.Extra
}
return 0
}
func (m *CriuVersion) GetName() string {
if m != nil && m.Name != nil {
return *m.Name
}
return ""
}
func init() {
proto.RegisterType((*CriuPageServerInfo)(nil), "criu_page_server_info")
proto.RegisterType((*CriuVethPair)(nil), "criu_veth_pair")
proto.RegisterType((*ExtMountMap)(nil), "ext_mount_map")
proto.RegisterType((*JoinNamespace)(nil), "join_namespace")
proto.RegisterType((*InheritFd)(nil), "inherit_fd")
proto.RegisterType((*CgroupRoot)(nil), "cgroup_root")
proto.RegisterType((*UnixSk)(nil), "unix_sk")
proto.RegisterType((*CriuOpts)(nil), "criu_opts")
proto.RegisterType((*CriuDumpResp)(nil), "criu_dump_resp")
proto.RegisterType((*CriuRestoreResp)(nil), "criu_restore_resp")
proto.RegisterType((*CriuNotify)(nil), "criu_notify")
proto.RegisterType((*CriuFeatures)(nil), "criu_features")
proto.RegisterType((*CriuReq)(nil), "criu_req")
proto.RegisterType((*CriuResp)(nil), "criu_resp")
proto.RegisterType((*CriuVersion)(nil), "criu_version")
proto.RegisterEnum("CriuCgMode", CriuCgMode_name, CriuCgMode_value)
proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value)
}
func init() { proto.RegisterFile("criurpc.proto", fileDescriptor0) }
var fileDescriptor0 = []byte{
// 1781 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x56, 0xdd, 0x72, 0x5b, 0xb7,
0x11, 0x0e, 0x29, 0xfe, 0x1c, 0x82, 0x3f, 0xa6, 0x10, 0xdb, 0x81, 0x93, 0xda, 0x62, 0xe8, 0x28,
0x51, 0x15, 0x97, 0x4d, 0x58, 0x3b, 0xae, 0x33, 0xed, 0x85, 0x47, 0x22, 0x5d, 0x36, 0x92, 0xc8,
0x01, 0x25, 0xcf, 0xe4, 0x0a, 0x73, 0x74, 0x0e, 0x48, 0xc1, 0x3c, 0x7f, 0x05, 0x40, 0x45, 0xf2,
0x83, 0xf4, 0x29, 0xfa, 0x0c, 0x7d, 0x84, 0xbe, 0x4e, 0x6f, 0x3b, 0xbb, 0x00, 0x65, 0x29, 0xc9,
0xb4, 0xbd, 0xc3, 0x7e, 0x58, 0x00, 0xbb, 0xfb, 0xed, 0x0f, 0x48, 0x3b, 0xd2, 0x6a, 0xad, 0x8b,
0x68, 0x50, 0xe8, 0xdc, 0xe6, 0xfd, 0x25, 0x79, 0x00, 0x80, 0x28, 0xc2, 0xa5, 0x14, 0x46, 0xea,
0x4b, 0xa9, 0x85, 0xca, 0x16, 0x39, 0x65, 0xa4, 0x1e, 0xc6, 0xb1, 0x96, 0xc6, 0xb0, 0x52, 0xaf,
0xb4, 0xd7, 0xe0, 0x1b, 0x91, 0x52, 0x52, 0x29, 0x72, 0x6d, 0x59, 0xb9, 0x57, 0xda, 0xab, 0x72,
0x5c, 0xd3, 0x2e, 0xd9, 0x2a, 0x54, 0xcc, 0xb6, 0x10, 0x82, 0x25, 0xed, 0x90, 0xf2, 0x22, 0x66,
0x15, 0x04, 0xca, 0x8b, 0xb8, 0xff, 0x27, 0xd2, 0xc1, 0x87, 0x2e, 0xa5, 0xbd, 0x10, 0x45, 0xa8,
0x34, 0xfd, 0x98, 0x54, 0xd5, 0x42, 0xa8, 0x8c, 0x95, 0x7a, 0xe5, 0xbd, 0x06, 0xaf, 0xa8, 0xc5,
0x24, 0xa3, 0x0f, 0x48, 0x4d, 0x2d, 0x44, 0xbe, 0x86, 0xeb, 0x01, 0xad, 0xaa, 0xc5, 0x74, 0x6d,
0xfb, 0x7f, 0x20, 0x6d, 0x79, 0x65, 0x45, 0x9a, 0xaf, 0x33, 0x2b, 0xd2, 0xb0, 0x80, 0x07, 0x57,
0xf2, 0xda, 0x1f, 0x85, 0x25, 0x20, 0x97, 0x61, 0xe2, 0x8f, 0xc1, 0xb2, 0xff, 0x96, 0x74, 0xde,
0xe5, 0x2a, 0x13, 0x59, 0x98, 0x4a, 0x53, 0x84, 0x91, 0x04, 0xa3, 0x32, 0xe3, 0x0f, 0x95, 0x33,
0x43, 0x3f, 0x21, 0xf5, 0xcc, 0x88, 0x85, 0x4a, 0xa4, 0x3f, 0x57, 0xcb, 0xcc, 0x58, 0x25, 0x92,
0x7e, 0x46, 0x1a, 0xf2, 0xca, 0xea, 0x50, 0xe4, 0x85, 0x45, 0xaf, 0x1a, 0x3c, 0x40, 0x60, 0x5a,
0xd8, 0xfe, 0x80, 0x10, 0x95, 0x5d, 0x48, 0xad, 0xac, 0x58, 0xc4, 0xbf, 0x62, 0x89, 0x73, 0x1d,
0x2e, 0x74, 0xae, 0xbf, 0x20, 0xcd, 0x68, 0xa9, 0xf3, 0x75, 0x21, 0x74, 0x9e, 0x5b, 0x88, 0x5f,
0x64, 0x75, 0xe2, 0xc3, 0x8a, 0x6b, 0x8c, 0x69, 0x68, 0x2f, 0xbc, 0x15, 0xb8, 0xee, 0xef, 0x90,
0xfa, 0x3a, 0x53, 0x57, 0xc2, 0xac, 0xe8, 0x7d, 0x52, 0x55, 0x59, 0x1e, 0x4b, 0x7c, 0xa5, 0xcd,
0x9d, 0xd0, 0xff, 0x57, 0x9b, 0x34, 0x30, 0xa6, 0x79, 0x61, 0x0d, 0xed, 0x93, 0xb6, 0x4a, 0xc3,
0xa5, 0x34, 0x22, 0x56, 0x5a, 0x2c, 0x62, 0xd4, 0xad, 0xf2, 0xa6, 0x03, 0x0f, 0x95, 0x1e, 0xc7,
0x1b, 0x9a, 0xca, 0x1f, 0x68, 0x7a, 0x4a, 0xda, 0x89, 0x0c, 0x2f, 0xa5, 0xd0, 0xeb, 0x2c, 0x53,
0xd9, 0x12, 0x9d, 0x0d, 0x78, 0x0b, 0x41, 0xee, 0x30, 0xfa, 0x84, 0x34, 0x21, 0xfa, 0xde, 0x1a,
0x24, 0x35, 0xe0, 0x10, 0xa0, 0xb3, 0x4c, 0x5d, 0xcd, 0x57, 0xf4, 0x2b, 0x72, 0xcf, 0x46, 0x85,
0x90, 0xc6, 0x86, 0xe7, 0x89, 0x32, 0x17, 0x32, 0x66, 0x55, 0xd4, 0xe9, 0xd8, 0xa8, 0x18, 0x7d,
0x40, 0x41, 0x51, 0x5e, 0x86, 0x46, 0x5d, 0x4a, 0x11, 0xcb, 0x4b, 0x15, 0x49, 0xc3, 0x6a, 0x4e,
0xd1, 0xc3, 0x87, 0x0e, 0x85, 0xf8, 0x9b, 0x0b, 0x99, 0x24, 0xe2, 0x5d, 0x7e, 0xce, 0xea, 0xa8,
0x12, 0x20, 0xf0, 0xd7, 0xfc, 0x9c, 0x3e, 0x26, 0x04, 0x28, 0x13, 0x49, 0x1e, 0xad, 0x0c, 0x0b,
0x9c, 0x35, 0x80, 0x1c, 0x01, 0x40, 0x9f, 0x90, 0x46, 0x92, 0x2f, 0x45, 0x22, 0x2f, 0x65, 0xc2,
0x1a, 0xe0, 0xea, 0xf7, 0xa5, 0x21, 0x0f, 0x92, 0x7c, 0x79, 0x04, 0x10, 0x7d, 0x44, 0x60, 0xed,
0x58, 0x27, 0x2e, 0xb5, 0x93, 0x7c, 0x89, 0xb4, 0x7f, 0x49, 0xca, 0x85, 0x61, 0xcd, 0x5e, 0x69,
0xaf, 0x39, 0x7c, 0x38, 0xf8, 0xd5, 0xc2, 0xe0, 0xe5, 0xc2, 0xd0, 0x5d, 0xd2, 0xc9, 0x72, 0xab,
0x16, 0xd7, 0xc2, 0x44, 0x5a, 0x15, 0xd6, 0xb0, 0x16, 0x5a, 0xd1, 0x76, 0xe8, 0xdc, 0x81, 0xc0,
0x2a, 0x30, 0xce, 0xda, 0x8e, 0x69, 0x64, 0xff, 0x31, 0x21, 0x45, 0xa8, 0x65, 0x66, 0x85, 0x4a,
0x97, 0xac, 0x83, 0x3b, 0x0d, 0x87, 0x4c, 0xd2, 0x25, 0x38, 0x6e, 0x75, 0x18, 0xad, 0x44, 0x2a,
0x53, 0x76, 0xcf, 0x39, 0x8e, 0xc0, 0xb1, 0x4c, 0xe1, 0x6c, 0xb8, 0xb6, 0xb9, 0x88, 0x65, 0xbc,
0x2e, 0x58, 0xd7, 0x39, 0x0e, 0xc8, 0x21, 0x00, 0x40, 0xd3, 0x4f, 0xb9, 0x5e, 0x6d, 0xf8, 0xdf,
0x46, 0x96, 0x1b, 0x00, 0x39, 0xf6, 0x1f, 0x13, 0x92, 0xa8, 0x6c, 0x25, 0xb4, 0x4c, 0xc3, 0x82,
0x51, 0x77, 0x1c, 0x10, 0x0e, 0x00, 0xdd, 0x25, 0x55, 0x28, 0x4e, 0xc3, 0x3e, 0xee, 0x6d, 0xed,
0x35, 0x87, 0xf7, 0x06, 0x77, 0xeb, 0x95, 0xbb, 0x5d, 0xfa, 0x94, 0xd4, 0xa3, 0x62, 0x2d, 0xa2,
0xb0, 0x60, 0xf7, 0x7b, 0xa5, 0xbd, 0xf6, 0xf7, 0xe4, 0xf9, 0xf0, 0xd5, 0xf3, 0x57, 0xdf, 0xbd,
0x1c, 0xbe, 0x7a, 0xc1, 0x6b, 0x51, 0xb1, 0x3e, 0x08, 0x0b, 0xba, 0x43, 0x9a, 0x8b, 0x5c, 0x47,
0x52, 0x28, 0x0d, 0x6f, 0x3d, 0xc0, 0xb7, 0x08, 0x42, 0x13, 0x40, 0x80, 0x04, 0x79, 0x25, 0x23,
0x11, 0xa5, 0x31, 0x7b, 0xd8, 0xdb, 0x02, 0x12, 0x40, 0x3e, 0x48, 0x21, 0x49, 0xea, 0x58, 0xeb,
0x99, 0x65, 0x9f, 0xa0, 0x25, 0x9d, 0xc1, 0x9d, 0xda, 0xe7, 0x35, 0x79, 0x65, 0x8f, 0x33, 0x0b,
0x2c, 0xa4, 0x61, 0x06, 0xfc, 0xb8, 0xf2, 0x32, 0x8c, 0x39, 0x16, 0x1c, 0x7a, 0xe0, 0x40, 0xba,
0x4b, 0xea, 0xd1, 0x12, 0x4b, 0x8f, 0x3d, 0xc2, 0xfb, 0x5a, 0x83, 0x5b, 0xe5, 0xc8, 0x6b, 0xd1,
0x92, 0x03, 0x31, 0x3b, 0xa4, 0xa9, 0x8d, 0x15, 0x46, 0x9d, 0x27, 0x50, 0x07, 0x9f, 0x3a, 0x93,
0xb5, 0xb1, 0x73, 0x87, 0xd0, 0xfd, 0xdb, 0x65, 0xcf, 0x3e, 0xc3, 0xab, 0x9a, 0x83, 0x0f, 0x10,
0x6f, 0xf8, 0xf5, 0x38, 0xa6, 0x3d, 0xd2, 0x42, 0xa6, 0x36, 0x8e, 0xfc, 0xc6, 0xdd, 0x06, 0xd8,
0xc8, 0x19, 0xbf, 0xe3, 0x6a, 0xca, 0x5c, 0x84, 0x1a, 0x9e, 0x7b, 0xec, 0x14, 0xe4, 0x95, 0x9d,
0x3b, 0x64, 0xa3, 0x90, 0x86, 0xc6, 0x4a, 0x6d, 0xd8, 0x93, 0x1b, 0x85, 0x63, 0x87, 0x40, 0x08,
0xcd, 0x4a, 0x15, 0x78, 0xff, 0x8e, 0x0b, 0x21, 0xc8, 0x70, 0x39, 0xb4, 0xaf, 0x2c, 0x3c, 0x4f,
0xa4, 0x58, 0x18, 0xd6, 0xc3, 0xbd, 0xc0, 0x01, 0x63, 0x43, 0xf7, 0x48, 0xd3, 0x57, 0xb2, 0x50,
0x59, 0xce, 0x3e, 0x47, 0x47, 0x82, 0x81, 0xc7, 0x78, 0x63, 0x8d, 0x45, 0x3d, 0xc9, 0x72, 0xfa,
0x67, 0xf2, 0xf1, 0xdd, 0x00, 0x8b, 0x14, 0x9a, 0x50, 0xbf, 0x57, 0xda, 0xeb, 0x0c, 0xdb, 0x2e,
0x3f, 0xa2, 0x25, 0x82, 0x7c, 0xfb, 0x4e, 0xd0, 0x8f, 0xf3, 0x58, 0xc2, 0x43, 0xcb, 0x8b, 0xdc,
0x58, 0x91, 0xa8, 0x54, 0x59, 0xf6, 0x14, 0xb3, 0xa5, 0xfe, 0xed, 0x37, 0xcf, 0xff, 0xf8, 0xe2,
0xe5, 0x77, 0x9c, 0xe0, 0xde, 0x11, 0x6c, 0xd1, 0x3d, 0xd2, 0xc5, 0x44, 0x11, 0x26, 0x0a, 0x33,
0x01, 0xdd, 0xcf, 0xb0, 0x2f, 0xd0, 0xec, 0x0e, 0xe2, 0xf3, 0x28, 0xcc, 0x66, 0x80, 0xd2, 0x4f,
0x21, 0x6f, 0xac, 0xd4, 0x59, 0x98, 0xb0, 0x5d, 0xef, 0x98, 0x97, 0x31, 0xa7, 0xd2, 0xc2, 0x5e,
0x8b, 0xcc, 0xb0, 0x2f, 0xe1, 0x31, 0x5e, 0x47, 0xf9, 0x04, 0x7c, 0xae, 0xbb, 0x51, 0x60, 0xd8,
0x57, 0x3e, 0xbb, 0xef, 0x8e, 0x06, 0x5e, 0x03, 0xf9, 0xc4, 0xd0, 0xcf, 0x49, 0xcb, 0x67, 0x47,
0xa1, 0xf3, 0xc2, 0xb0, 0xdf, 0x62, 0x85, 0xfa, 0x06, 0x3e, 0x03, 0x88, 0xee, 0x93, 0xed, 0xdb,
0x2a, 0xae, 0x93, 0xec, 0xa3, 0xde, 0xbd, 0x5b, 0x7a, 0xd8, 0x51, 0x9e, 0x93, 0x87, 0x5e, 0x37,
0x5e, 0xa7, 0x85, 0x88, 0xf2, 0xcc, 0xea, 0x3c, 0x49, 0xa4, 0x66, 0x5f, 0xa3, 0xf5, 0xf7, 0xdd,
0xee, 0xe1, 0x3a, 0x2d, 0x0e, 0x6e, 0xf6, 0xa0, 0x2b, 0x2f, 0xb4, 0x94, 0xef, 0x37, 0x81, 0x67,
0xcf, 0xf0, 0xf6, 0x96, 0x03, 0x5d, 0x8c, 0x61, 0x42, 0x5b, 0x95, 0x4a, 0x98, 0x95, 0xbf, 0x73,
0xde, 0x7a, 0x91, 0x7e, 0x4d, 0x28, 0xf4, 0x63, 0xcc, 0x0e, 0x95, 0x89, 0x45, 0xa2, 0x96, 0x17,
0x96, 0x0d, 0x30, 0x83, 0xa0, 0x53, 0xcf, 0x57, 0xaa, 0x98, 0x64, 0x63, 0x84, 0xc1, 0xe1, 0x9f,
0x64, 0xb8, 0x12, 0xe6, 0xda, 0x44, 0x36, 0x31, 0xec, 0xf7, 0xa8, 0xd6, 0x04, 0x6c, 0xee, 0x20,
0x6c, 0x1c, 0xe1, 0xfb, 0x6b, 0xec, 0x85, 0x86, 0x7d, 0xe3, 0x1b, 0x47, 0xf8, 0xfe, 0x7a, 0x06,
0x00, 0x36, 0x6b, 0x1b, 0xda, 0xb5, 0x81, 0xba, 0xf8, 0x16, 0xbb, 0x4e, 0xe0, 0x80, 0x71, 0x0c,
0xc1, 0xca, 0x75, 0x71, 0x01, 0xb4, 0x5a, 0xe3, 0xb3, 0x99, 0x0d, 0x9d, 0x29, 0x6e, 0x63, 0x66,
0x8d, 0x4b, 0xe9, 0xfe, 0x33, 0xff, 0x47, 0xc0, 0x50, 0x69, 0x69, 0x0a, 0xa0, 0x5b, 0x4b, 0x63,
0x73, 0x2d, 0x63, 0x9c, 0x97, 0x01, 0xbf, 0x91, 0xfb, 0xbb, 0x64, 0x1b, 0xb5, 0x3d, 0xe0, 0x0e,
0xf8, 0x09, 0xe7, 0x66, 0x1f, 0x2c, 0xfb, 0x2f, 0x49, 0x13, 0xd5, 0x5c, 0x6b, 0xa6, 0x0f, 0x49,
0xcd, 0xf5, 0x6c, 0x3f, 0x7f, 0xbd, 0xf4, 0xcb, 0xd1, 0xd8, 0xff, 0xc1, 0xfd, 0x95, 0xc4, 0x42,
0x86, 0x76, 0xad, 0x9d, 0x9f, 0xa9, 0x4c, 0x05, 0xb6, 0xe3, 0x8d, 0x35, 0xa9, 0x4c, 0x4f, 0x41,
0xfe, 0x59, 0x8c, 0xca, 0x3f, 0x8b, 0x51, 0xff, 0x9f, 0x25, 0x12, 0x78, 0x6b, 0xff, 0x46, 0xfb,
0xa4, 0x62, 0xaf, 0x0b, 0x37, 0xcd, 0x3b, 0xc3, 0xce, 0x60, 0xb3, 0x21, 0x00, 0xe5, 0xb8, 0x47,
0x9f, 0x90, 0x0a, 0x8c, 0x75, 0xbc, 0xa9, 0x39, 0x24, 0x83, 0x9b, 0x41, 0xcf, 0x11, 0xbf, 0x3d,
0x82, 0xd6, 0x51, 0x04, 0xdf, 0xb4, 0xad, 0x3b, 0x23, 0xc8, 0x81, 0x60, 0xf3, 0x4a, 0xca, 0x42,
0xe4, 0x85, 0xcc, 0xfc, 0xe0, 0x0e, 0x00, 0x98, 0x16, 0x32, 0xa3, 0xfb, 0x24, 0xd8, 0x38, 0x87,
0x03, 0xbb, 0xb9, 0xb1, 0x65, 0x83, 0xf2, 0x9b, 0xfd, 0xfe, 0xbf, 0xcb, 0xfe, 0xb3, 0x81, 0x61,
0xfe, 0x7f, 0x3c, 0x60, 0xa4, 0xbe, 0x31, 0x0d, 0xbe, 0x35, 0x01, 0xdf, 0x88, 0xf4, 0x29, 0xa9,
0x00, 0xc5, 0x68, 0xf1, 0xcd, 0xa0, 0xb9, 0x21, 0x9d, 0xe3, 0x26, 0x7d, 0x46, 0xea, 0x9e, 0x59,
0xb4, 0xbb, 0x39, 0xa4, 0x83, 0x5f, 0xd0, 0xcd, 0x37, 0x2a, 0xf4, 0x0b, 0x52, 0x73, 0x8e, 0x7b,
0x47, 0x5a, 0x83, 0x5b, 0xa4, 0x73, 0xbf, 0xe7, 0xe7, 0x7b, 0xed, 0x7f, 0xce, 0xf7, 0x47, 0x40,
0x96, 0x90, 0x5a, 0x67, 0x39, 0xfe, 0x3e, 0xaa, 0xbc, 0x1e, 0xe9, 0x11, 0x88, 0x77, 0x62, 0x16,
0xfc, 0xf7, 0x98, 0x41, 0xf0, 0xdd, 0x35, 0xa9, 0x59, 0xe2, 0x4f, 0xa4, 0xc1, 0x03, 0xbc, 0x27,
0x35, 0x4b, 0x18, 0x73, 0x97, 0x52, 0x1b, 0x95, 0x67, 0xf8, 0x0b, 0x69, 0x6e, 0x1a, 0xaa, 0x07,
0xf9, 0x66, 0xb7, 0xff, 0xf7, 0x12, 0x69, 0xdd, 0xde, 0x81, 0xdf, 0x60, 0x1a, 0xbe, 0xcb, 0xb5,
0xcf, 0x72, 0x27, 0x20, 0xaa, 0xb2, 0x5c, 0xfb, 0x8f, 0xa7, 0x13, 0x00, 0x5d, 0x2a, 0xeb, 0xbf,
0xe6, 0x0d, 0xee, 0x04, 0x28, 0x2b, 0xb3, 0x3e, 0x77, 0x3f, 0xa4, 0x8a, 0x2f, 0x58, 0x2f, 0xc3,
0x09, 0xfc, 0xe9, 0x62, 0x20, 0xab, 0xdc, 0x09, 0xf0, 0x95, 0x81, 0x5e, 0x89, 0xb1, 0x6b, 0x70,
0x5c, 0xef, 0x0b, 0x6f, 0x97, 0x1f, 0x01, 0x94, 0x90, 0xda, 0xe4, 0xcd, 0xc9, 0x94, 0x8f, 0xba,
0x1f, 0xd1, 0x26, 0xa9, 0x1f, 0xbc, 0x11, 0x27, 0xd3, 0x93, 0x51, 0xb7, 0x44, 0x1b, 0xa4, 0x3a,
0xe3, 0xd3, 0xd9, 0xbc, 0x5b, 0xa6, 0x01, 0xa9, 0xcc, 0xa7, 0xe3, 0xd3, 0xee, 0x16, 0xac, 0xc6,
0x67, 0x47, 0x47, 0xdd, 0x0a, 0x9c, 0x9b, 0x9f, 0xf2, 0xc9, 0xc1, 0x69, 0xb7, 0x0a, 0xe7, 0x0e,
0x47, 0xe3, 0xd7, 0x67, 0x47, 0xa7, 0xdd, 0xda, 0xfe, 0x3f, 0x4a, 0xbe, 0x04, 0x37, 0x99, 0x05,
0x37, 0x8d, 0x8e, 0x67, 0xa7, 0x3f, 0x76, 0x3f, 0x82, 0xf3, 0x87, 0x67, 0xc7, 0xb3, 0x6e, 0x09,
0xce, 0xf0, 0xd1, 0xfc, 0x14, 0x1e, 0x2e, 0x83, 0xc6, 0xc1, 0x5f, 0x46, 0x07, 0x3f, 0x74, 0xb7,
0x68, 0x8b, 0x04, 0x33, 0x3e, 0x12, 0xa8, 0x55, 0xa1, 0xf7, 0x48, 0x73, 0xf6, 0xfa, 0xcd, 0x48,
0xcc, 0x47, 0xfc, 0xed, 0x88, 0x77, 0xab, 0xf0, 0xec, 0xc9, 0xf4, 0x74, 0x32, 0xfe, 0xb1, 0x5b,
0xa3, 0x5d, 0xd2, 0x3a, 0x98, 0x9d, 0x4d, 0x4e, 0xc6, 0x53, 0xa7, 0x5e, 0xa7, 0xdb, 0xa4, 0xbd,
0x41, 0xdc, 0x7d, 0x01, 0x40, 0xe3, 0xd1, 0xeb, 0xd3, 0x33, 0x3e, 0xf2, 0x50, 0x03, 0x9e, 0x7e,
0x3b, 0xe2, 0xf3, 0xc9, 0xf4, 0xa4, 0x4b, 0xfe, 0x13, 0x00, 0x00, 0xff, 0xff, 0x5f, 0x2a, 0xaf,
0x49, 0x5b, 0x0d, 0x00, 0x00,
}

View File

@@ -1,3 +1,5 @@
syntax = "proto2";
message criu_page_server_info {
optional string address = 1;
optional int32 port = 2;
@@ -15,6 +17,12 @@ message ext_mount_map {
required string val = 2;
};
message join_namespace {
required string ns = 1;
required string ns_file = 2;
optional string extra_opt = 3;
}
message inherit_fd {
required string key = 1;
required int32 fd = 2;
@@ -31,7 +39,7 @@ message unix_sk {
enum criu_cg_mode {
IGNORE = 0;
NONE = 1;
CG_NONE = 1;
PROPS = 2;
SOFT = 3;
FULL = 4;
@@ -63,13 +71,13 @@ message criu_opts {
optional int32 work_dir_fd = 17;
optional bool link_remap = 18;
repeated criu_veth_pair veths = 19;
repeated criu_veth_pair veths = 19; /* DEPRECATED, use external instead */
optional uint32 cpu_cap = 20 [default = 0xffffffff];
optional bool force_irmap = 21;
repeated string exec_cmd = 22;
repeated ext_mount_map ext_mnt = 23;
repeated ext_mount_map ext_mnt = 23; /* DEPRECATED, use external instead */
optional bool manage_cgroups = 24; /* backward compatibility */
repeated cgroup_root cg_root = 25;
@@ -83,14 +91,26 @@ message criu_opts {
repeated string skip_mnt = 31;
repeated string enable_fs = 32;
repeated unix_sk unix_sk_ino = 33;
repeated unix_sk unix_sk_ino = 33; /* DEPRECATED, use external instead */
optional criu_cg_mode manage_cgroups_mode = 34;
optional uint32 ghost_limit = 35 [default = 0x100000];
repeated string irmap_scan_paths = 36;
repeated string external = 37;
optional uint32 empty_ns = 38;
optional bool no_seccomp = 39;
repeated join_namespace join_ns = 39;
optional string cgroup_props = 41;
optional string cgroup_props_file = 42;
repeated string cgroup_dump_controller = 43;
optional string freeze_cgroup = 44;
optional uint32 timeout = 45;
optional bool tcp_skip_in_flight = 46;
optional bool weak_sysctls = 47;
optional bool lazy_pages = 48;
optional int32 status_fd = 49;
optional bool orphan_pts_master = 50;
}
message criu_dump_resp {
@@ -120,6 +140,8 @@ enum criu_req_type {
CPUINFO_CHECK = 8;
FEATURE_CHECK = 9;
VERSION = 10;
}
/*
@@ -128,6 +150,7 @@ enum criu_req_type {
*/
message criu_features {
optional bool mem_track = 1;
optional bool lazy_pages = 2;
}
/*
@@ -171,4 +194,16 @@ message criu_resp {
optional int32 cr_errno = 7;
optional criu_features features = 8;
optional string cr_errmsg = 9;
optional criu_version version = 10;
}
/* Answer for criu_req_type.VERSION requests */
message criu_version {
required int32 major = 1;
required int32 minor = 2;
optional string gitid = 3;
optional int32 sublevel = 4;
optional int32 extra = 5;
optional string name = 6;
}

View File

@@ -60,9 +60,9 @@ func (c ErrorCode) String() string {
type Error interface {
error
// Returns a verbose string including the error message
// and a representation of the stack trace suitable for
// printing.
// Returns an error if it failed to write the detail of the Error to w.
// The detail of the Error may include the error message and a
// representation of the stack trace.
Detail(w io.Writer) error
// Returns the error code for this error.

View File

@@ -10,7 +10,7 @@ type Factory interface {
// between 1 and 1024 characters, inclusive.
//
// The id must not already be in use by an existing container. Containers created using
// a factory with the same path (and file system) must have distinct ids.
// a factory with the same path (and filesystem) must have distinct ids.
//
// Returns the new container with a running process.
//
@@ -28,7 +28,6 @@ type Factory interface {
//
// errors:
// Path does not exist
// Container is stopped
// System error
Load(id string) (Container, error)

View File

@@ -10,15 +10,17 @@ import (
"regexp"
"runtime/debug"
"strconv"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
const (
@@ -26,15 +28,20 @@ const (
execFifoFilename = "exec.fifo"
)
var (
idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
maxIdLen = 1024
)
var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
// InitArgs returns an options func to configure a LinuxFactory with the
// provided init binary path and arguments.
func InitArgs(args ...string) func(*LinuxFactory) error {
return func(l *LinuxFactory) error {
return func(l *LinuxFactory) (err error) {
if len(args) > 0 {
// Resolve relative paths to ensure that its available
// after directory changes.
if args[0], err = filepath.Abs(args[0]); err != nil {
return newGenericError(err, ConfigInvalid)
}
}
l.InitArgs = args
return nil
}
@@ -65,6 +72,20 @@ func Cgroupfs(l *LinuxFactory) error {
return nil
}
// RootlessCgroups is an options func to configure a LinuxFactory to
// return containers that use the "rootless" cgroup manager, which will
// fail to do any operations not possible to do with an unprivileged user.
// It should only be used in conjunction with rootless containers.
func RootlessCgroups(l *LinuxFactory) error {
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
return &rootless.Manager{
Cgroups: config,
Paths: paths,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
@@ -72,7 +93,7 @@ func TmpfsRoot(l *LinuxFactory) error {
return err
}
if !mounted {
if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
return err
}
}
@@ -141,14 +162,6 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
uid, err := config.HostUID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
gid, err := config.HostGID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
@@ -158,18 +171,11 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := os.MkdirAll(containerRoot, 0711); err != nil {
return nil, newGenericError(err, SystemError)
}
if err := os.Chown(containerRoot, uid, gid); err != nil {
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
return nil, newGenericError(err, SystemError)
}
fifoName := filepath.Join(containerRoot, execFifoFilename)
oldMask := syscall.Umask(0000)
if err := syscall.Mkfifo(fifoName, 0622); err != nil {
syscall.Umask(oldMask)
return nil, newGenericError(err, SystemError)
}
syscall.Umask(oldMask)
if err := os.Chown(fifoName, uid, gid); err != nil {
return nil, newGenericError(err, SystemError)
if config.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
id: id,
@@ -197,6 +203,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
// We have to use the RootlessManager.
if state.Rootless {
RootlessCgroups(l)
}
c := &linuxContainer{
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
@@ -222,58 +232,71 @@ func (l *LinuxFactory) Type() string {
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
var pipefd, rootfd int
for _, pair := range []struct {
k string
v *int
}{
{"_LIBCONTAINER_INITPIPE", &pipefd},
{"_LIBCONTAINER_STATEDIR", &rootfd},
} {
var (
pipefd, fifofd int
consoleSocket *os.File
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
)
s := os.Getenv(pair.k)
i, err := strconv.Atoi(s)
if err != nil {
return fmt.Errorf("unable to convert %s=%s to int", pair.k, s)
}
*pair.v = i
// Get the INITPIPE.
pipefd, err = strconv.Atoi(envInitPipe)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
)
defer pipe.Close()
// Only init processes have FIFOFD.
fifofd = -1
if it == initStandard {
if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
}
}
if envConsole != "" {
console, err := strconv.Atoi(envConsole)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
}
consoleSocket = os.NewFile(uintptr(console), "console-socket")
defer consoleSocket.Close()
}
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
var i initer
defer func() {
// We have an error during the initialization of the container's init,
// send it back to the parent process in the form of an initError.
// If container's init successed, syscall.Exec will not return, hence
// this defer function will never be called.
if _, ok := i.(*linuxStandardInit); ok {
// Synchronisation only necessary for standard init.
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
panic(err)
}
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
fmt.Fprintln(os.Stderr, err)
return
}
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
panic(err)
fmt.Fprintln(os.Stderr, err)
return
}
// ensure that this pipe is always closed
pipe.Close()
}()
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
}
}()
i, err = newContainerInit(it, pipe, rootfd)
i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
if err != nil {
return err
}
// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
return i.Init()
}
@@ -297,8 +320,6 @@ func (l *LinuxFactory) validateID(id string) error {
if !idRegex.MatchString(id) {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
if len(id) > maxIdLen {
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
}
return nil
}

View File

@@ -9,20 +9,6 @@ import (
"github.com/opencontainers/runc/libcontainer/stacktrace"
)
type syncType uint8
const (
procReady syncType = iota
procError
procRun
procHooks
procResume
)
type syncT struct {
Type syncType `json:"type"`
}
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
Code: {{.ECode}}
{{if .Message }}

View File

@@ -6,20 +6,21 @@ import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"strconv"
"strings"
"syscall"
"syscall" // only for Errno
"unsafe"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink"
"golang.org/x/sys/unix"
)
type initType string
@@ -30,7 +31,8 @@ const (
)
type pid struct {
Pid int `json:"pid"`
Pid int `json:"pid"`
PidFirstChild int `json:"pid_first"`
}
// network is an internal struct used to setup container networks.
@@ -44,29 +46,29 @@ type network struct {
// initConfig is used for transferring parameters from Exec() to Init()
type initConfig struct {
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities []string `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Console string `json:"console"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
ExecFifoPath string `json:"start_pipe_path"`
Args []string `json:"args"`
Env []string `json:"env"`
Cwd string `json:"cwd"`
Capabilities *configs.Capabilities `json:"capabilities"`
ProcessLabel string `json:"process_label"`
AppArmorProfile string `json:"apparmor_profile"`
NoNewPrivileges bool `json:"no_new_privileges"`
User string `json:"user"`
AdditionalGroups []string `json:"additional_groups"`
Config *configs.Config `json:"config"`
Networks []*network `json:"network"`
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
CreateConsole bool `json:"create_console"`
Rootless bool `json:"rootless"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error) {
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
@@ -77,14 +79,17 @@ func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error)
switch t {
case initSetns:
return &linuxSetnsInit{
config: config,
pipe: pipe,
consoleSocket: consoleSocket,
config: config,
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
parentPid: syscall.Getppid(),
config: config,
stateDirFD: stateDirFD,
pipe: pipe,
consoleSocket: consoleSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)
@@ -116,16 +121,18 @@ func finalizeNamespace(config *initConfig) error {
return err
}
capabilities := config.Config.Capabilities
capabilities := &configs.Capabilities{}
if config.Capabilities != nil {
capabilities = config.Capabilities
} else if config.Config.Capabilities != nil {
capabilities = config.Config.Capabilities
}
w, err := newCapWhitelist(capabilities)
w, err := newContainerCapList(capabilities)
if err != nil {
return err
}
// drop capabilities in bounding set before changing user
if err := w.dropBoundingSet(); err != nil {
if err := w.ApplyBoundingSet(); err != nil {
return err
}
// preserve existing capabilities while we change users
@@ -138,36 +145,70 @@ func finalizeNamespace(config *initConfig) error {
if err := system.ClearKeepCaps(); err != nil {
return err
}
// drop all other capabilities
if err := w.drop(); err != nil {
if err := w.ApplyCaps(); err != nil {
return err
}
if config.Cwd != "" {
if err := syscall.Chdir(config.Cwd); err != nil {
if err := unix.Chdir(config.Cwd); err != nil {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
}
}
return nil
}
// setupConsole sets up the console from inside the container, and sends the
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
// consoles are scoped to a container properly (see runc#814 and the many
// issues related to that). This has to be run *after* we've pivoted to the new
// rootfs (and the users' configuration is entirely set up).
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
defer socket.Close()
// At this point, /dev/ptmx points to something that we would expect. We
// used to change the owner of the slave path, but since the /dev/pts mount
// can have gid=X set (at the users' option). So touching the owner of the
// slave PTY is not necessary, as the kernel will handle that for us. Note
// however, that setupUser (specifically fixStdioPermissions) *will* change
// the UID owner of the console to be the user the process will run as (so
// they can actually control their console).
console, err := newConsole()
if err != nil {
return err
}
// After we return from here, we don't need the console anymore.
defer console.Close()
linuxConsole, ok := console.(*linuxConsole)
if !ok {
return fmt.Errorf("failed to cast console to *linuxConsole")
}
// Mount the console inside our rootfs.
if mount {
if err := linuxConsole.mount(); err != nil {
return err
}
}
// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(socket, linuxConsole.File()); err != nil {
return err
}
// Now, dup over all the things.
return linuxConsole.dupStdio()
}
// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil {
if err := writeSync(pipe, procReady); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procRun {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procRun); err != nil {
return err
}
return nil
}
@@ -176,19 +217,15 @@ func syncParentReady(pipe io.ReadWriter) error {
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
// Tell parent.
if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
if err := writeSync(pipe, procHooks); err != nil {
return err
}
// Wait for parent to give the all-clear.
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type != procResume {
return fmt.Errorf("invalid synchronisation flag from parent")
}
if err := readSync(pipe, procResume); err != nil {
return err
}
return nil
}
@@ -196,18 +233,21 @@ func syncParentHooks(pipe io.ReadWriter) error {
func setupUser(config *initConfig) error {
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Uid: 0,
Gid: 0,
Home: "/",
}
passwdPath, err := user.GetPasswdPath()
if err != nil {
return err
}
groupPath, err := user.GetGroupPath()
if err != nil {
return err
}
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
if err != nil {
return err
@@ -220,22 +260,49 @@ func setupUser(config *initConfig) error {
return err
}
}
if config.Rootless {
if execUser.Uid != 0 {
return fmt.Errorf("cannot run as a non-root user in a rootless container")
}
if execUser.Gid != 0 {
return fmt.Errorf("cannot run as a non-root group in a rootless container")
}
// We cannot set any additional groups in a rootless container and thus we
// bail if the user asked us to do so. TODO: We currently can't do this
// earlier, but if libcontainer.Process.User was typesafe this might work.
if len(addGroups) > 0 {
return fmt.Errorf("cannot set any additional groups in a rootless container")
}
}
// before we change to the container's user make sure that the processes STDIO
// is correctly owned by the user that we are switching to.
if err := fixStdioPermissions(execUser); err != nil {
if err := fixStdioPermissions(config, execUser); err != nil {
return err
}
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
// There's nothing we can do about /etc/group entries, so we silently
// ignore setting groups here (since the user didn't explicitly ask us to
// set the group).
if !config.Rootless {
suppGroups := append(execUser.Sgids, addGroups...)
if err := unix.Setgroups(suppGroups); err != nil {
return err
}
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
if err := system.Setuid(execUser.Uid); err != nil {
return err
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", execUser.Home); err != nil {
@@ -248,9 +315,9 @@ func setupUser(config *initConfig) error {
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
// The ownership needs to match because it is created outside of the container and needs to be
// localized.
func fixStdioPermissions(u *user.ExecUser) error {
var null syscall.Stat_t
if err := syscall.Stat("/dev/null", &null); err != nil {
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
var null unix.Stat_t
if err := unix.Stat("/dev/null", &null); err != nil {
return err
}
for _, fd := range []uintptr{
@@ -258,15 +325,31 @@ func fixStdioPermissions(u *user.ExecUser) error {
os.Stderr.Fd(),
os.Stdout.Fd(),
} {
var s syscall.Stat_t
if err := syscall.Fstat(int(fd), &s); err != nil {
var s unix.Stat_t
if err := unix.Fstat(int(fd), &s); err != nil {
return err
}
// skip chown of /dev/null if it was used as one of the STDIO fds.
// Skip chown of /dev/null if it was used as one of the STDIO fds.
if s.Rdev == null.Rdev {
continue
}
if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
// Skip chown if s.Gid is actually an unmapped gid in the host. While
// this is a bit dodgy if it just so happens that the console _is_
// owned by overflow_gid, there's no way for us to disambiguate this as
// a userspace program.
if _, err := config.Config.HostGID(int(s.Gid)); err != nil {
continue
}
// We only change the uid owner (as it is possible for the mount to
// prefer a different gid, and there's no reason for us to change it).
// The reason why we don't just leave the default uid=X mount setup is
// that users expect to be able to actually use their console. Without
// this code, you couldn't effectively run as a non-root user inside a
// container and also have a console set up.
if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
return err
}
}
@@ -321,23 +404,58 @@ func setupRoute(config *configs.Config) error {
func setupRlimits(limits []configs.Rlimit, pid int) error {
for _, rlimit := range limits {
if err := system.Prlimit(pid, rlimit.Type, syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
func setOomScoreAdj(oomScoreAdj int, pid int) error {
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
const _P_PID = 1
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
type siginfo struct {
si_signo int32
si_errno int32
si_code int32
// below here is a union; si_pid is the only field we use
si_pid int32
// Pad to 128 bytes as detailed in blockUntilWaitable
pad [96]byte
}
// killCgroupProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending a SIGKILL to each process then waiting for them to
// exit.
func killCgroupProcesses(m cgroups.Manager) error {
// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
si := &siginfo{}
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
if e != 0 {
return false, os.NewSyscallError("waitid", e)
}
return si.si_pid != 0, nil
}
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
func isNoChildren(err error) bool {
switch err := err.(type) {
case syscall.Errno:
if err == unix.ECHILD {
return true
}
case *os.SyscallError:
if err.Err == unix.ECHILD {
return true
}
}
return false
}
// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL then it will wait for each process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
@@ -354,16 +472,31 @@ func killCgroupProcesses(m cgroups.Manager) error {
continue
}
procs = append(procs, p)
if err := p.Kill(); err != nil {
if err := p.Signal(s); err != nil {
logrus.Warn(err)
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}
for _, p := range procs {
if s != unix.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil {
if !isNoChildren(err) {
logrus.Warn("signalAllProcesses: ", p.Pid, err)
}
continue
} else if !ok {
// Not ready to report so don't wait
continue
}
}
if _, err := p.Wait(); err != nil {
logrus.Warn(err)
if !isNoChildren(err) {
logrus.Warn("wait: ", err)
}
}
}
return nil

View File

@@ -9,6 +9,12 @@ go_library(
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),
)
filegroup(

View File

@@ -6,30 +6,16 @@ import (
"fmt"
"strconv"
"strings"
"syscall"
"unsafe"
)
const KEYCTL_JOIN_SESSION_KEYRING = 1
const KEYCTL_SETPERM = 5
const KEYCTL_DESCRIBE = 6
"golang.org/x/sys/unix"
)
type KeySerial uint32
func JoinSessionKeyring(name string) (KeySerial, error) {
var _name *byte
var err error
if len(name) > 0 {
_name, err = syscall.BytePtrFromString(name)
if err != nil {
return KeySerial(0), err
}
}
sessKeyId, _, errn := syscall.Syscall(syscall.SYS_KEYCTL, KEYCTL_JOIN_SESSION_KEYRING, uintptr(unsafe.Pointer(_name)), 0)
if errn != 0 {
return 0, fmt.Errorf("could not create session key: %v", errn)
sessKeyId, err := unix.KeyctlJoinSessionKeyring(name)
if err != nil {
return 0, fmt.Errorf("could not create session key: %v", err)
}
return KeySerial(sessKeyId), nil
}
@@ -38,10 +24,8 @@ func JoinSessionKeyring(name string) (KeySerial, error) {
// anding the bits with the given mask (clearing permissions) and setting
// additional permission bits
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
dest := make([]byte, 1024)
destBytes := unsafe.Pointer(&dest[0])
if _, _, err := syscall.Syscall6(syscall.SYS_KEYCTL, uintptr(KEYCTL_DESCRIBE), uintptr(ringId), uintptr(destBytes), uintptr(len(dest)), 0, 0); err != 0 {
dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId))
if err != nil {
return err
}
@@ -58,7 +42,7 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
perm := (uint32(perm64) & mask) | setbits
if _, _, err := syscall.Syscall(syscall.SYS_KEYCTL, uintptr(KEYCTL_SETPERM), uintptr(ringId), uintptr(perm)); err != 0 {
if err := unix.KeyctlSetperm(int(ringId), perm); err != nil {
return err
}

View File

@@ -1,21 +0,0 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["label.go"],
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -1,84 +0,0 @@
// +build !selinux !linux
package label
// InitLabels returns the process label and file labels to be used within
// the container. A list of options can be passed into this function to alter
// the labels.
func InitLabels(options []string) (string, string, error) {
return "", "", nil
}
func GetROMountLabel() string {
return ""
}
func GenLabels(options string) (string, string, error) {
return "", "", nil
}
func FormatMountLabel(src string, mountLabel string) string {
return src
}
func SetProcessLabel(processLabel string) error {
return nil
}
func GetFileLabel(path string) (string, error) {
return "", nil
}
func SetFileLabel(path string, fileLabel string) error {
return nil
}
func SetFileCreateLabel(fileLabel string) error {
return nil
}
func Relabel(path string, fileLabel string, shared bool) error {
return nil
}
func GetPidLabel(pid int) (string, error) {
return "", nil
}
func Init() {
}
func ReserveLabel(label string) error {
return nil
}
func UnreserveLabel(label string) error {
return nil
}
// DupSecOpt takes a process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return nil
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return nil
}
// Validate checks that the label does not include unexpected options
func Validate(label string) error {
return nil
}
// RelabelNeeded checks whether the user requested a relabel
func RelabelNeeded(label string) bool {
return false
}
// IsShared checks that the label includes a "shared" mark
func IsShared(label string) bool {
return false
}

View File

@@ -1,208 +0,0 @@
// +build selinux,linux
package label
import (
"fmt"
"strings"
"github.com/opencontainers/runc/libcontainer/selinux"
)
// Valid Label Options
var validOptions = map[string]bool{
"disable": true,
"type": true,
"user": true,
"role": true,
"level": true,
}
var ErrIncompatibleLabel = fmt.Errorf("Bad SELinux option z and Z can not be used together")
// InitLabels returns the process label and file labels to be used within
// the container. A list of options can be passed into this function to alter
// the labels. The labels returned will include a random MCS String, that is
// guaranteed to be unique.
func InitLabels(options []string) (string, string, error) {
if !selinux.SelinuxEnabled() {
return "", "", nil
}
processLabel, mountLabel := selinux.GetLxcContexts()
if processLabel != "" {
pcon := selinux.NewContext(processLabel)
mcon := selinux.NewContext(mountLabel)
for _, opt := range options {
val := strings.SplitN(opt, "=", 2)
if val[0] != "label" {
continue
}
if len(val) < 2 {
return "", "", fmt.Errorf("bad label option %q, valid options 'disable' or \n'user, role, level, type' followed by ':' and a value", opt)
}
if val[1] == "disable" {
return "", "", nil
}
con := strings.SplitN(val[1], ":", 2)
if len(con) < 2 || !validOptions[con[0]] {
return "", "", fmt.Errorf("bad label option %q, valid options 'disable, user, role, level, type'", con[0])
}
pcon[con[0]] = con[1]
if con[0] == "level" || con[0] == "user" {
mcon[con[0]] = con[1]
}
}
processLabel = pcon.Get()
mountLabel = mcon.Get()
}
return processLabel, mountLabel, nil
}
func GetROMountLabel() string {
return selinux.GetROFileLabel()
}
// DEPRECATED: The GenLabels function is only to be used during the transition to the official API.
func GenLabels(options string) (string, string, error) {
return InitLabels(strings.Fields(options))
}
// FormatMountLabel returns a string to be used by the mount command.
// The format of this string will be used to alter the labeling of the mountpoint.
// The string returned is suitable to be used as the options field of the mount command.
// If you need to have additional mount point options, you can pass them in as
// the first parameter. Second parameter is the label that you wish to apply
// to all content in the mount point.
func FormatMountLabel(src, mountLabel string) string {
if mountLabel != "" {
switch src {
case "":
src = fmt.Sprintf("context=%q", mountLabel)
default:
src = fmt.Sprintf("%s,context=%q", src, mountLabel)
}
}
return src
}
// SetProcessLabel takes a process label and tells the kernel to assign the
// label to the next program executed by the current process.
func SetProcessLabel(processLabel string) error {
if processLabel == "" {
return nil
}
return selinux.Setexeccon(processLabel)
}
// GetProcessLabel returns the process label that the kernel will assign
// to the next program executed by the current process. If "" is returned
// this indicates that the default labeling will happen for the process.
func GetProcessLabel() (string, error) {
return selinux.Getexeccon()
}
// GetFileLabel returns the label for specified path
func GetFileLabel(path string) (string, error) {
return selinux.Getfilecon(path)
}
// SetFileLabel modifies the "path" label to the specified file label
func SetFileLabel(path string, fileLabel string) error {
if selinux.SelinuxEnabled() && fileLabel != "" {
return selinux.Setfilecon(path, fileLabel)
}
return nil
}
// SetFileCreateLabel tells the kernel the label for all files to be created
func SetFileCreateLabel(fileLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setfscreatecon(fileLabel)
}
return nil
}
// Relabel changes the label of path to the filelabel string.
// It changes the MCS label to s0 if shared is true.
// This will allow all containers to share the content.
func Relabel(path string, fileLabel string, shared bool) error {
if !selinux.SelinuxEnabled() {
return nil
}
if fileLabel == "" {
return nil
}
exclude_paths := map[string]bool{"/": true, "/usr": true, "/etc": true}
if exclude_paths[path] {
return fmt.Errorf("SELinux relabeling of %s is not allowed", path)
}
if shared {
c := selinux.NewContext(fileLabel)
c["level"] = "s0"
fileLabel = c.Get()
}
if err := selinux.Chcon(path, fileLabel, true); err != nil {
return fmt.Errorf("SELinux relabeling of %s is not allowed: %q", path, err)
}
return nil
}
// GetPidLabel will return the label of the process running with the specified pid
func GetPidLabel(pid int) (string, error) {
return selinux.Getpidcon(pid)
}
// Init initialises the labeling system
func Init() {
selinux.SelinuxEnabled()
}
// ReserveLabel will record the fact that the MCS label has already been used.
// This will prevent InitLabels from using the MCS label in a newly created
// container
func ReserveLabel(label string) error {
selinux.ReserveLabel(label)
return nil
}
// UnreserveLabel will remove the reservation of the MCS label.
// This will allow InitLabels to use the MCS label in a newly created
// containers
func UnreserveLabel(label string) error {
selinux.FreeLxcContexts(label)
return nil
}
// DupSecOpt takes an process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return selinux.DupSecOpt(src)
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return selinux.DisableSecOpt()
}
// Validate checks that the label does not include unexpected options
func Validate(label string) error {
if strings.Contains(label, "z") && strings.Contains(label, "Z") {
return ErrIncompatibleLabel
}
return nil
}
// RelabelNeeded checks whether the user requested a relabel
func RelabelNeeded(label string) bool {
return strings.Contains(label, "z") || strings.Contains(label, "Z")
}
// IsShared checks that the label includes a "shared" mark
func IsShared(label string) bool {
return strings.Contains(label, "z")
}

View File

@@ -3,9 +3,8 @@
package libcontainer
import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// list of known message types we want to send to bootstrap program
@@ -13,13 +12,12 @@ import (
const (
InitMsg uint16 = 62000
CloneFlagsAttr uint16 = 27281
ConsolePathAttr uint16 = 27282
NsPathsAttr uint16 = 27283
UidmapAttr uint16 = 27284
GidmapAttr uint16 = 27285
SetgroupAttr uint16 = 27286
// When syscall.NLA_HDRLEN is in gccgo, take this out.
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
NsPathsAttr uint16 = 27282
UidmapAttr uint16 = 27283
GidmapAttr uint16 = 27284
SetgroupAttr uint16 = 27285
OomScoreAdjAttr uint16 = 27286
RootlessAttr uint16 = 27287
)
type Int32msg struct {
@@ -41,7 +39,7 @@ func (msg *Int32msg) Serialize() []byte {
}
func (msg *Int32msg) Len() int {
return syscall_NLA_HDRLEN + 4
return unix.NLA_HDRLEN + 4
}
// Bytemsg has the following representation
@@ -54,7 +52,7 @@ type Bytemsg struct {
func (msg *Bytemsg) Serialize() []byte {
l := msg.Len()
buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1))
buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1))
native := nl.NativeEndian()
native.PutUint16(buf[0:2], uint16(l))
native.PutUint16(buf[2:4], msg.Type)
@@ -63,7 +61,7 @@ func (msg *Bytemsg) Serialize() []byte {
}
func (msg *Bytemsg) Len() int {
return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
}
type Boolmsg struct {
@@ -85,5 +83,5 @@ func (msg *Boolmsg) Serialize() []byte {
}
func (msg *Boolmsg) Len() int {
return syscall_NLA_HDRLEN + 1
return unix.NLA_HDRLEN + 1
}

View File

@@ -7,7 +7,8 @@ import (
"io/ioutil"
"os"
"path/filepath"
"syscall"
"golang.org/x/sys/unix"
)
const oomCgroupName = "memory"
@@ -25,13 +26,13 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct
if err != nil {
return nil, err
}
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
if syserr != 0 {
fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
if err != nil {
evFile.Close()
return nil, syserr
return nil, err
}
eventfd := os.NewFile(fd, "eventfd")
eventfd := os.NewFile(uintptr(fd), "eventfd")
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
@@ -43,9 +44,9 @@ func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct
ch := make(chan struct{})
go func() {
defer func() {
close(ch)
eventfd.Close()
evFile.Close()
close(ch)
}()
buf := make([]byte, 8)
for {

View File

@@ -47,12 +47,9 @@ type Process struct {
// ExtraFiles specifies additional open files to be inherited by the container
ExtraFiles []*os.File
// consolePath is the path to the console allocated to the container.
consolePath string
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capabilities not specified will be dropped from the processes capability mask
Capabilities []string
Capabilities *configs.Capabilities
// AppArmorProfile specifies the profile to apply to the process and is
// changed at the time the process is execed
@@ -68,6 +65,9 @@ type Process struct {
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []configs.Rlimit
// ConsoleSocket provides the masterfd console.
ConsoleSocket *os.File
ops processOperations
}
@@ -104,22 +104,3 @@ type IO struct {
Stdout io.ReadCloser
Stderr io.ReadCloser
}
// NewConsole creates new console for process and returns it
func (p *Process) NewConsole(rootuid, rootgid int) (Console, error) {
console, err := NewConsole(rootuid, rootgid)
if err != nil {
return nil, err
}
p.consolePath = console.Path()
return console, nil
}
// ConsoleFromPath sets the process's console with the path provided
func (p *Process) ConsoleFromPath(path string) error {
if p.consolePath != "" {
return newGenericError(fmt.Errorf("console path already exists for process"), ConsoleExists)
}
p.consolePath = path
return nil
}

View File

@@ -11,12 +11,14 @@ import (
"os/exec"
"path/filepath"
"strconv"
"syscall"
"syscall" // only for Signal
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
type parentProcess interface {
@@ -33,7 +35,7 @@ type parentProcess interface {
wait() (*os.ProcessState, error)
// startTime returns the process start time.
startTime() (string, error)
startTime() (uint64, error)
signal(os.Signal) error
@@ -51,11 +53,11 @@ type setnsProcess struct {
fds []string
process *Process
bootstrapData io.Reader
rootDir *os.File
}
func (p *setnsProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
func (p *setnsProcess) startTime() (uint64, error) {
stat, err := system.Stat(p.pid())
return stat.StartTime, err
}
func (p *setnsProcess) signal(sig os.Signal) error {
@@ -63,14 +65,13 @@ func (p *setnsProcess) signal(sig os.Signal) error {
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.pid(), s)
return unix.Kill(p.pid(), s)
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
err = p.cmd.Start()
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
return newSystemErrorWithCause(err, "starting setns process")
}
@@ -82,15 +83,12 @@ func (p *setnsProcess) start() (err error) {
if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process")
}
if len(p.cgroupPaths) > 0 {
// We can't join cgroups if we're in a rootless container.
if !p.config.Rootless && len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting oom score")
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@@ -100,15 +98,22 @@ func (p *setnsProcess) start() (err error) {
return newSystemErrorWithCause(err, "writing config to pipe")
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
// This shouldn't happen.
panic("unexpected procReady in setns")
case procHooks:
// This shouldn't happen.
panic("unexpected procHooks in setns")
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
})
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding init error from pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
@@ -136,6 +141,16 @@ func (p *setnsProcess) execSetns() error {
p.cmd.Wait()
return newSystemErrorWithCause(err, "reading pid from init pipe")
}
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
@@ -188,7 +203,6 @@ type initProcess struct {
process *Process
bootstrapData io.Reader
sharePidns bool
rootDir *os.File
}
func (p *initProcess) pid() int {
@@ -219,6 +233,16 @@ func (p *initProcess) execSetns() error {
p.cmd.Wait()
return err
}
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
process, err := os.FindProcess(pid.Pid)
if err != nil {
return err
@@ -233,13 +257,12 @@ func (p *initProcess) start() error {
err := p.cmd.Start()
p.process.ops = p
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
}
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return err
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
if err := p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "running exec setns process for init")
@@ -252,8 +275,9 @@ func (p *initProcess) start() error {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
// Do this before syncing with child so that no children can escape the
// cgroup. We don't need to worry about not doing this and not being root
// because we'd be using the rootless cgroup manager in that case.
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
@@ -270,30 +294,13 @@ func (p *initProcess) start() error {
return newSystemErrorWithCause(err, "sending config to init process")
}
var (
procSync syncT
sentRun bool
sentResume bool
ierr *genericError
)
dec := json.NewDecoder(p.parentPipe)
loop:
for {
if err := dec.Decode(&procSync); err != nil {
if err == io.EOF {
break loop
}
return newSystemErrorWithCause(err, "decoding sync type from init pipe")
}
switch procSync.Type {
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
switch sync.Type {
case procReady:
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting oom score for ready process")
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@@ -301,12 +308,17 @@ loop:
}
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
@@ -316,18 +328,21 @@ loop:
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
return newSystemErrorWithCause(err, "reading syncT run type")
if err := writeSync(p.parentPipe, procRun); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'run'")
}
sentRun = true
case procHooks:
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
}
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Root: p.config.Config.Rootfs,
BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"),
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
@@ -336,34 +351,27 @@ loop:
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
return newSystemErrorWithCause(err, "reading syncT resume type")
if err := writeSync(p.parentPipe, procResume); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'resume'")
}
sentResume = true
case procError:
// wait for the child process to fully complete and receive an error message
// if one was encoutered
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding proc error from init")
}
if ierr != nil {
break loop
}
// Programmer error.
panic("No error following JSON procError payload.")
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
}
}
return nil
})
if !sentRun {
return newSystemErrorWithCause(ierr, "container init")
}
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "shutting down init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
@@ -379,7 +387,7 @@ func (p *initProcess) wait() (*os.ProcessState, error) {
}
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.sharePidns {
killCgroupProcesses(p.manager)
signalAllProcesses(p.manager, unix.SIGKILL)
}
return p.cmd.ProcessState, nil
}
@@ -395,8 +403,9 @@ func (p *initProcess) terminate() error {
return err
}
func (p *initProcess) startTime() (string, error) {
return system.GetProcessStartTime(p.pid())
func (p *initProcess) startTime() (uint64, error) {
stat, err := system.Stat(p.pid())
return stat.StartTime, err
}
func (p *initProcess) sendConfig() error {
@@ -428,7 +437,7 @@ func (p *initProcess) signal(sig os.Signal) error {
if !ok {
return errors.New("os: unsupported signal type")
}
return syscall.Kill(p.pid(), s)
return unix.Kill(p.pid(), s)
}
func (p *initProcess) setExternalDescriptors(newFds []string) {
@@ -440,9 +449,17 @@ func getPipeFds(pid int) ([]string, error) {
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
for i := 0; i < 3; i++ {
// XXX: This breaks if the path is not a valid symlink (which can
// happen in certain particularly unlucky mount namespace setups).
f := filepath.Join(dirPath, strconv.Itoa(i))
target, err := os.Readlink(f)
if err != nil {
// Ignore permission errors, for rootless containers and other
// non-dumpable processes. if we can't get the fd for a particular
// file, there's not much we can do.
if os.IsPermission(err) {
continue
}
return fds, err
}
fds[i] = target
@@ -450,8 +467,10 @@ func getPipeFds(pid int) ([]string, error) {
return fds, nil
}
// InitializeIO creates pipes for use with the process's STDIO
// and returns the opposite side for each
// InitializeIO creates pipes for use with the process's stdio and returns the
// opposite side for each. Do not use this if you want to have a pseudoterminal
// set up for you by libcontainer (TODO: fix that too).
// TODO: This is mostly unnecessary, and should be handled by clients.
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}
@@ -459,7 +478,7 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
defer func() {
if err != nil {
for _, fd := range fds {
syscall.Close(int(fd))
unix.Close(int(fd))
}
}
}()
@@ -484,7 +503,7 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
p.Stderr, i.Stderr = w, r
// change ownership of the pipes incase we are in a user namespace
for _, fd := range fds {
if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil {
if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
return nil, err
}
}

View File

@@ -17,20 +17,20 @@ func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
if err != nil {
return nil, err
}
started, err := system.GetProcessStartTime(pid)
stat, err := system.Stat(pid)
if err != nil {
return nil, err
}
return &restoredProcess{
proc: proc,
processStartTime: started,
processStartTime: stat.StartTime,
fds: fds,
}, nil
}
type restoredProcess struct {
proc *os.Process
processStartTime string
processStartTime uint64
fds []string
}
@@ -60,7 +60,7 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) {
return st, nil
}
func (p *restoredProcess) startTime() (string, error) {
func (p *restoredProcess) startTime() (uint64, error) {
return p.processStartTime, nil
}
@@ -81,7 +81,7 @@ func (p *restoredProcess) setExternalDescriptors(newFds []string) {
// a persisted state.
type nonChildProcess struct {
processPid int
processStartTime string
processStartTime uint64
fds []string
}
@@ -101,7 +101,7 @@ func (p *nonChildProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *nonChildProcess) startTime() (string, error) {
func (p *nonChildProcess) startTime() (uint64, error) {
return p.processStartTime, nil
}

View File

@@ -11,19 +11,21 @@ import (
"path"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/symlink"
"github.com/mrunalp/fileutils"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/system"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/sys/unix"
)
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
// needsSetupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
@@ -35,9 +37,10 @@ func needsSetupDev(config *configs.Config) bool {
return true
}
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
// prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro. You must call
// finalizeRootfs after this function to finish setting up the rootfs.
func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemErrorWithCause(err, "preparing rootfs")
}
@@ -49,6 +52,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
return newSystemErrorWithCause(err, "running premount command")
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
}
@@ -59,17 +63,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
}
}
}
if setupDev {
if err := createDevices(config); err != nil {
return newSystemErrorWithCause(err, "creating device nodes")
}
if err := setupPtmx(config, console); err != nil {
if err := setupPtmx(config); err != nil {
return newSystemErrorWithCause(err, "setting up ptmx")
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemErrorWithCause(err, "setting up /dev symlinks")
}
}
// Signal the parent to run the pre-start hooks.
// The hooks are run after the mounts are setup, but before we switch to the new
// root, so that the old root is still available in the hooks for any mount
@@ -77,40 +83,60 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
if err := syncParentHooks(pipe); err != nil {
return err
}
if err := syscall.Chdir(config.Rootfs); err != nil {
// The reason these operations are done here rather than in finalizeRootfs
// is because the console-handling code gets quite sticky if we have to set
// up the console before doing the pivot_root(2). This is because the
// Console API has to also work with the ExecIn case, which means that the
// API must be able to deal with being inside as well as outside the
// container. It's just cleaner to do this here (at the expense of the
// operation not being perfectly split).
if err := unix.Chdir(config.Rootfs); err != nil {
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
} else {
err = pivotRoot(config.Rootfs, config.PivotDir)
err = pivotRoot(config.Rootfs)
}
if err != nil {
return newSystemErrorWithCause(err, "jailing process inside rootfs")
}
if setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
}
}
return nil
}
// finalizeRootfs sets anything to ro if necessary. You must call
// prepareRootfs first.
func finalizeRootfs(config *configs.Config) (err error) {
// remount dev as ro if specified
for _, m := range config.Mounts {
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
if m.Flags&syscall.MS_RDONLY != 0 {
if err := remountReadonly(m.Destination); err != nil {
if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
if err := remountReadonly(m); err != nil {
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
}
}
break
}
}
// set rootfs ( / ) as readonly
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemErrorWithCause(err, "setting rootfs as readonly")
}
}
syscall.Umask(0022)
unix.Umask(0022)
return nil
}
@@ -152,15 +178,41 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
return nil
case "tmpfs":
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
tmpDir := ""
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
if copyUp {
tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
if err != nil {
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
}
defer os.RemoveAll(tmpDir)
m.Destination = tmpDir
}
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
return err
}
if copyUp {
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
}
if stat != nil {
if err = os.Chmod(dest, stat.Mode()); err != nil {
return err
@@ -178,7 +230,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
if dest, err = symlink.FollowSymlinkInScope(filepath.Join(rootfs, m.Destination), rootfs); err != nil {
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
@@ -194,7 +246,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
// bind mount won't change mount options, we need remount to make mount options effective.
// first check that we have non-default options required before attempting a remount
if m.Flags&^(syscall.MS_REC|syscall.MS_REMOUNT|syscall.MS_BIND) != 0 {
if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
// only remount if unique mount options are set
if err := remount(m, rootfs); err != nil {
return err
@@ -248,19 +300,32 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
}
}
}
if m.Flags&syscall.MS_RDONLY != 0 {
if m.Flags&unix.MS_RDONLY != 0 {
// remount cgroup root as readonly
mcgrouproot := &configs.Mount{
Source: m.Destination,
Device: "bind",
Destination: m.Destination,
Flags: defaultMountFlags | syscall.MS_RDONLY | syscall.MS_BIND,
Flags: defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
}
}
default:
// ensure that the destination of the mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
var err error
if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil {
return err
}
if err := checkMountDestination(rootfs, dest); err != nil {
return err
}
// update the mount with the correct dest after symlinks are resolved.
m.Destination = dest
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
@@ -283,7 +348,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
var binds []*configs.Mount
for _, mm := range mounts {
dir, err := mm.GetThisCgroupDir(cgroupPaths)
dir, err := mm.GetOwnCgroup(cgroupPaths)
if err != nil {
return nil, err
}
@@ -294,8 +359,8 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
binds = append(binds, &configs.Mount{
Device: "bind",
Source: filepath.Join(mm.Mountpoint, relDir),
Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags,
Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
Flags: unix.MS_BIND | unix.MS_REC | m.Flags,
PropagationFlags: m.PropagationFlags,
})
}
@@ -371,22 +436,22 @@ func setupDevSymlinks(rootfs string) error {
// needs to be called after we chroot/pivot into the container's rootfs so that any
// symlinks are resolved locally.
func reOpenDevNull() error {
var stat, devNullStat syscall.Stat_t
var stat, devNullStat unix.Stat_t
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
if err != nil {
return fmt.Errorf("Failed to open /dev/null - %s", err)
}
defer file.Close()
if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
return err
}
for fd := 0; fd < 3; fd++ {
if err := syscall.Fstat(fd, &stat); err != nil {
if err := unix.Fstat(fd, &stat); err != nil {
return err
}
if stat.Rdev == devNullStat.Rdev {
// Close and re-open the fd.
if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil {
if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
return err
}
}
@@ -397,16 +462,16 @@ func reOpenDevNull() error {
// Create the device nodes in the container.
func createDevices(config *configs.Config) error {
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
oldMask := syscall.Umask(0000)
oldMask := unix.Umask(0000)
for _, node := range config.Devices {
// containers running in a user namespace are not allowed to mknod
// devices so we can just bind mount it from the host.
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
syscall.Umask(oldMask)
unix.Umask(oldMask)
return err
}
}
syscall.Umask(oldMask)
unix.Umask(oldMask)
return nil
}
@@ -418,7 +483,7 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
if f != nil {
f.Close()
}
return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "")
return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
}
// Creates the device node in the rootfs of the container.
@@ -445,17 +510,19 @@ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
func mknodDevice(dest string, node *configs.Device) error {
fileMode := node.FileMode
switch node.Type {
case 'c':
fileMode |= syscall.S_IFCHR
case 'c', 'u':
fileMode |= unix.S_IFCHR
case 'b':
fileMode |= syscall.S_IFBLK
fileMode |= unix.S_IFBLK
case 'p':
fileMode |= unix.S_IFIFO
default:
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
}
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
return err
}
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
return unix.Chown(dest, int(node.Uid), int(node.Gid))
}
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
@@ -522,34 +589,36 @@ func rootfsParentMountPrivate(rootfs string) error {
// shared. Secondly when we bind mount rootfs it will propagate to
// parent namespace and we don't want that to happen.
if sharedMount {
return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "")
return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "")
}
return nil
}
func prepareRoot(config *configs.Config) error {
flag := syscall.MS_SLAVE | syscall.MS_REC
flag := unix.MS_SLAVE | unix.MS_REC
if config.RootPropagation != 0 {
flag = config.RootPropagation
}
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
return err
}
if config.NoPivotRoot {
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
return err
}
// Make parent mount private to make sure following bind mount does
// not propagate in other namespaces. Also it will help with kernel
// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
return err
}
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "")
}
func setReadonly() error {
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
return unix.Mount("/", "/", "bind", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
}
func setupPtmx(config *configs.Config, console *linuxConsole) error {
func setupPtmx(config *configs.Config) error {
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
return err
@@ -557,66 +626,73 @@ func setupPtmx(config *configs.Config, console *linuxConsole) error {
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
return fmt.Errorf("symlink dev ptmx %s", err)
}
if console != nil {
return console.mount(config.Rootfs, config.MountLabel)
}
return nil
}
func pivotRoot(rootfs, pivotBaseDir string) (err error) {
if pivotBaseDir == "" {
pivotBaseDir = "/"
}
tmpDir := filepath.Join(rootfs, pivotBaseDir)
if err := os.MkdirAll(tmpDir, 0755); err != nil {
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
}
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
if err != nil {
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
}
defer func() {
errVal := os.Remove(pivotDir)
if err == nil {
err = errVal
}
}()
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
// Make the parent mount private
if err := rootfsParentMountPrivate(rootfs); err != nil {
return err
}
// Try again
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
}
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
// path to pivot dir now changed, update
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
// pivotRoot will call pivot_root such that rootfs becomes the new root
// filesystem, and everything else is cleaned up.
func pivotRoot(rootfs string) error {
// While the documentation may claim otherwise, pivot_root(".", ".") is
// actually valid. What this results in is / being the new root but
// /proc/self/cwd being the old root. Since we can play around with the cwd
// with pivot_root this allows us to pivot without creating directories in
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
// Make pivotDir rprivate to make sure any of the unmounts don't
// propagate to parent.
if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer unix.Close(oldroot)
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
if err != nil {
return err
}
defer unix.Close(newroot)
// Change to the new root so that the pivot_root actually acts on it.
if err := unix.Fchdir(newroot); err != nil {
return err
}
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
return fmt.Errorf("unmount pivot_root dir %s", err)
if err := unix.PivotRoot(".", "."); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
// Currently our "." is oldroot (according to the current kernel code).
// However, purely for safety, we will fchdir(oldroot) since there isn't
// really any guarantee from the kernel what /proc/self/cwd will be after a
// pivot_root(2).
if err := unix.Fchdir(oldroot); err != nil {
return err
}
// Make oldroot rprivate to make sure our unmounts don't propagate to the
// host (and thus bork the machine).
if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
return err
}
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
return err
}
// Switch back to our shiny new root.
if err := unix.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
}
return nil
}
func msMoveRoot(rootfs string) error {
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
return err
}
if err := syscall.Chroot("."); err != nil {
if err := unix.Chroot("."); err != nil {
return err
}
return syscall.Chdir("/")
return unix.Chdir("/")
}
// createIfNotExists creates a file or a directory only if it does not already exist.
@@ -639,18 +715,27 @@ func createIfNotExists(path string, isDir bool) error {
return nil
}
// remountReadonly will bind over the top of an existing path and ensure that it is read-only.
func remountReadonly(path string) error {
// readonlyPath will make a path read only.
func readonlyPath(path string) error {
if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
}
// remountReadonly will remount an existing mount point and ensure that it is read-only.
func remountReadonly(m *configs.Mount) error {
var (
dest = m.Destination
flags = m.Flags
)
for i := 0; i < 5; i++ {
if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) {
if err := unix.Mount("", dest, "", uintptr(flags|unix.MS_REMOUNT|unix.MS_RDONLY), ""); err != nil {
switch err {
case syscall.EINVAL:
// Probably not a mountpoint, use bind-mount
if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
return err
}
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
case syscall.EBUSY:
case unix.EBUSY:
time.Sleep(100 * time.Millisecond)
continue
default:
@@ -659,7 +744,7 @@ func remountReadonly(path string) error {
}
return nil
}
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
}
// maskPath masks the top of the specified path inside a container to avoid
@@ -668,9 +753,9 @@ func remountReadonly(path string) error {
// For files, maskPath bind mounts /dev/null over the top of the specified path.
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
func maskPath(path string) error {
if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
if err == syscall.ENOTDIR {
return syscall.Mount("tmpfs", path, "tmpfs", syscall.MS_RDONLY, "")
if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
if err == unix.ENOTDIR {
return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, "")
}
return err
}
@@ -691,7 +776,7 @@ func remount(m *configs.Mount, rootfs string) error {
if !strings.HasPrefix(dest, rootfs) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
if err := unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), ""); err != nil {
return err
}
return nil
@@ -706,18 +791,20 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
flags = m.Flags
)
if libcontainerUtils.CleanPath(dest) == "/dev" {
flags &= ^syscall.MS_RDONLY
flags &= ^unix.MS_RDONLY
}
if !strings.HasPrefix(dest, rootfs) {
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
dest = filepath.Join(rootfs, dest)
}
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
return err
}
for _, pflag := range m.PropagationFlags {
if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
return err
}
}

View File

@@ -7,21 +7,19 @@ import (
"fmt"
"os"
"strings"
"syscall"
"github.com/opencontainers/runc/libcontainer/configs"
libseccomp "github.com/seccomp/libseccomp-golang"
"golang.org/x/sys/unix"
)
var (
actAllow = libseccomp.ActAllow
actTrap = libseccomp.ActTrap
actKill = libseccomp.ActKill
actTrace = libseccomp.ActTrace.SetReturnCode(int16(syscall.EPERM))
actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
SeccompModeFilter = uintptr(2)
actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
)
// Filters given syscalls in a container, preventing them from being used
@@ -84,9 +82,9 @@ func IsEnabled() bool {
s, err := parseStatusFile("/proc/self/status")
if err != nil {
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
return true
}
}
@@ -212,10 +210,6 @@ func parseStatusFile(path string) (map[string]string, error) {
status := make(map[string]string)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
parts := strings.Split(text, ":")
@@ -225,5 +219,9 @@ func parseStatusFile(path string) (map[string]string, error) {
status[parts[0]] = parts[1]
}
if err := s.Err(); err != nil {
return nil, err
}
return status, nil
}

View File

@@ -1,511 +0,0 @@
// +build linux
package selinux
import (
"bufio"
"crypto/rand"
"encoding/binary"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"syscall"
"github.com/opencontainers/runc/libcontainer/system"
)
const (
Enforcing = 1
Permissive = 0
Disabled = -1
selinuxDir = "/etc/selinux/"
selinuxConfig = selinuxDir + "config"
selinuxTypeTag = "SELINUXTYPE"
selinuxTag = "SELINUX"
selinuxPath = "/sys/fs/selinux"
xattrNameSelinux = "security.selinux"
stRdOnly = 0x01
)
var (
assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`)
mcsList = make(map[string]bool)
mcsLock sync.Mutex
selinuxfs = "unknown"
selinuxEnabled = false // Stores whether selinux is currently enabled
selinuxEnabledChecked = false // Stores whether selinux enablement has been checked or established yet
)
type SELinuxContext map[string]string
// SetDisabled disables selinux support for the package
func SetDisabled() {
selinuxEnabled, selinuxEnabledChecked = false, true
}
// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs
// filesystem or an empty string if no mountpoint is found. Selinuxfs is
// a proc-like pseudo-filesystem that exposes the selinux policy API to
// processes. The existence of an selinuxfs mount is used to determine
// whether selinux is currently enabled or not.
func getSelinuxMountPoint() string {
if selinuxfs != "unknown" {
return selinuxfs
}
selinuxfs = ""
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return selinuxfs
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
// Safe as mountinfo encodes mountpoints with spaces as \040.
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
continue
}
if !strings.Contains(txt[sepIdx:], "selinuxfs") {
continue
}
fields := strings.Split(txt, " ")
if len(fields) < 5 {
continue
}
selinuxfs = fields[4]
break
}
if selinuxfs != "" {
var buf syscall.Statfs_t
syscall.Statfs(selinuxfs, &buf)
if (buf.Flags & stRdOnly) == 1 {
selinuxfs = ""
}
}
return selinuxfs
}
// SelinuxEnabled returns whether selinux is currently enabled.
func SelinuxEnabled() bool {
if selinuxEnabledChecked {
return selinuxEnabled
}
selinuxEnabledChecked = true
if fs := getSelinuxMountPoint(); fs != "" {
if con, _ := Getcon(); con != "kernel" {
selinuxEnabled = true
}
}
return selinuxEnabled
}
func readConfig(target string) (value string) {
var (
val, key string
bufin *bufio.Reader
)
in, err := os.Open(selinuxConfig)
if err != nil {
return ""
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err != io.EOF {
return ""
}
done = true
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == target {
return strings.Trim(val, "\"")
}
}
}
return ""
}
func getSELinuxPolicyRoot() string {
return selinuxDir + readConfig(selinuxTypeTag)
}
func readCon(name string) (string, error) {
var val string
in, err := os.Open(name)
if err != nil {
return "", err
}
defer in.Close()
_, err = fmt.Fscanf(in, "%s", &val)
return val, err
}
// Setfilecon sets the SELinux label for this path or returns an error.
func Setfilecon(path string, scon string) error {
return system.Lsetxattr(path, xattrNameSelinux, []byte(scon), 0)
}
// Getfilecon returns the SELinux label for this path or returns an error.
func Getfilecon(path string) (string, error) {
con, err := system.Lgetxattr(path, xattrNameSelinux)
if err != nil {
return "", err
}
// Trim the NUL byte at the end of the byte buffer, if present.
if len(con) > 0 && con[len(con)-1] == '\x00' {
con = con[:len(con)-1]
}
return string(con), nil
}
func Setfscreatecon(scon string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), scon)
}
func Getfscreatecon() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()))
}
// Getcon returns the SELinux label of the current process thread, or an error.
func Getcon() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid()))
}
// Getpidcon returns the SELinux label of the given pid, or an error.
func Getpidcon(pid int) (string, error) {
return readCon(fmt.Sprintf("/proc/%d/attr/current", pid))
}
func Getexeccon() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()))
}
func writeCon(name string, val string) error {
out, err := os.OpenFile(name, os.O_WRONLY, 0)
if err != nil {
return err
}
defer out.Close()
if val != "" {
_, err = out.Write([]byte(val))
} else {
_, err = out.Write(nil)
}
return err
}
func Setexeccon(scon string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), scon)
}
func (c SELinuxContext) Get() string {
return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"])
}
func NewContext(scon string) SELinuxContext {
c := make(SELinuxContext)
if len(scon) != 0 {
con := strings.SplitN(scon, ":", 4)
c["user"] = con[0]
c["role"] = con[1]
c["type"] = con[2]
c["level"] = con[3]
}
return c
}
func ReserveLabel(scon string) {
if len(scon) != 0 {
con := strings.SplitN(scon, ":", 4)
mcsAdd(con[3])
}
}
func selinuxEnforcePath() string {
return fmt.Sprintf("%s/enforce", selinuxPath)
}
func SelinuxGetEnforce() int {
var enforce int
enforceS, err := readCon(selinuxEnforcePath())
if err != nil {
return -1
}
enforce, err = strconv.Atoi(string(enforceS))
if err != nil {
return -1
}
return enforce
}
func SelinuxSetEnforce(mode int) error {
return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode))
}
func SelinuxGetEnforceMode() int {
switch readConfig(selinuxTag) {
case "enforcing":
return Enforcing
case "permissive":
return Permissive
}
return Disabled
}
func mcsAdd(mcs string) error {
mcsLock.Lock()
defer mcsLock.Unlock()
if mcsList[mcs] {
return fmt.Errorf("MCS Label already exists")
}
mcsList[mcs] = true
return nil
}
func mcsDelete(mcs string) {
mcsLock.Lock()
mcsList[mcs] = false
mcsLock.Unlock()
}
func IntToMcs(id int, catRange uint32) string {
var (
SETSIZE = int(catRange)
TIER = SETSIZE
ORD = id
)
if id < 1 || id > 523776 {
return ""
}
for ORD > TIER {
ORD = ORD - TIER
TIER--
}
TIER = SETSIZE - TIER
ORD = ORD + TIER
return fmt.Sprintf("s0:c%d,c%d", TIER, ORD)
}
func uniqMcs(catRange uint32) string {
var (
n uint32
c1, c2 uint32
mcs string
)
for {
binary.Read(rand.Reader, binary.LittleEndian, &n)
c1 = n % catRange
binary.Read(rand.Reader, binary.LittleEndian, &n)
c2 = n % catRange
if c1 == c2 {
continue
} else {
if c1 > c2 {
t := c1
c1 = c2
c2 = t
}
}
mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2)
if err := mcsAdd(mcs); err != nil {
continue
}
break
}
return mcs
}
func FreeLxcContexts(scon string) {
if len(scon) != 0 {
con := strings.SplitN(scon, ":", 4)
mcsDelete(con[3])
}
}
var roFileLabel string
func GetROFileLabel() (fileLabel string) {
return roFileLabel
}
func GetLxcContexts() (processLabel string, fileLabel string) {
var (
val, key string
bufin *bufio.Reader
)
if !SelinuxEnabled() {
return "", ""
}
lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot())
in, err := os.Open(lxcPath)
if err != nil {
return "", ""
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err == io.EOF {
done = true
} else {
goto exit
}
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == "process" {
processLabel = strings.Trim(val, "\"")
}
if key == "file" {
fileLabel = strings.Trim(val, "\"")
}
if key == "ro_file" {
roFileLabel = strings.Trim(val, "\"")
}
}
}
if processLabel == "" || fileLabel == "" {
return "", ""
}
if roFileLabel == "" {
roFileLabel = fileLabel
}
exit:
// mcs := IntToMcs(os.Getpid(), 1024)
mcs := uniqMcs(1024)
scon := NewContext(processLabel)
scon["level"] = mcs
processLabel = scon.Get()
scon = NewContext(fileLabel)
scon["level"] = mcs
fileLabel = scon.Get()
return processLabel, fileLabel
}
func SecurityCheckContext(val string) error {
return writeCon(fmt.Sprintf("%s.context", selinuxPath), val)
}
func CopyLevel(src, dest string) (string, error) {
if src == "" {
return "", nil
}
if err := SecurityCheckContext(src); err != nil {
return "", err
}
if err := SecurityCheckContext(dest); err != nil {
return "", err
}
scon := NewContext(src)
tcon := NewContext(dest)
mcsDelete(tcon["level"])
mcsAdd(scon["level"])
tcon["level"] = scon["level"]
return tcon.Get(), nil
}
// Prevent users from relabing system files
func badPrefix(fpath string) error {
var badprefixes = []string{"/usr"}
for _, prefix := range badprefixes {
if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) {
return fmt.Errorf("Relabeling content in %s is not allowed.", prefix)
}
}
return nil
}
// Chcon changes the fpath file object to the SELinux label scon.
// If the fpath is a directory and recurse is true Chcon will walk the
// directory tree setting the label
func Chcon(fpath string, scon string, recurse bool) error {
if scon == "" {
return nil
}
if err := badPrefix(fpath); err != nil {
return err
}
callback := func(p string, info os.FileInfo, err error) error {
return Setfilecon(p, scon)
}
if recurse {
return filepath.Walk(fpath, callback)
}
return Setfilecon(fpath, scon)
}
// DupSecOpt takes an SELinux process label and returns security options that
// can will set the SELinux Type and Level for future container processes
func DupSecOpt(src string) []string {
if src == "" {
return nil
}
con := NewContext(src)
if con["user"] == "" ||
con["role"] == "" ||
con["type"] == "" ||
con["level"] == "" {
return nil
}
return []string{"label=user:" + con["user"],
"label=role:" + con["role"],
"label=type:" + con["type"],
"label=level:" + con["level"]}
}
// DisableSecOpt returns a security opt that can be used to disabling SELinux
// labeling support for future container processes
func DisableSecOpt() []string {
return []string{"label=disable"}
}

View File

@@ -8,15 +8,19 @@ import (
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/sys/unix"
)
// linuxSetnsInit performs the container's initialization for running a new process
// inside an existing container.
type linuxSetnsInit struct {
config *initConfig
pipe *os.File
consoleSocket *os.File
config *initConfig
}
func (l *linuxSetnsInit) getSessionRingName() string {
@@ -30,8 +34,16 @@ func (l *linuxSetnsInit) Init() error {
return err
}
}
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}

View File

@@ -4,24 +4,26 @@ package libcontainer
import (
"fmt"
"io"
"os"
"os/exec"
"syscall"
"syscall" //only for Exec
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux/label"
"golang.org/x/sys/unix"
)
type linuxStandardInit struct {
pipe io.ReadWriteCloser
parentPid int
stateDirFD int
config *initConfig
pipe *os.File
consoleSocket *os.File
parentPid int
fifoFd int
config *initConfig
}
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
@@ -40,10 +42,6 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
}
// PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
// the kernel
const PR_SET_NO_NEW_PRIVS = 0x26
func (l *linuxStandardInit) Init() error {
if !l.config.Config.NoNewKeyring {
ringname, keepperms, newperms := l.getSessionRingParams()
@@ -59,18 +57,6 @@ func (l *linuxStandardInit) Init() error {
}
}
var console *linuxConsole
if l.config.Console != "" {
console = newConsoleFromPath(l.config.Console)
if err := console.dupStdio(); err != nil {
return err
}
}
if console != nil {
if err := system.Setctty(); err != nil {
return err
}
}
if err := setupNetwork(l.config); err != nil {
return err
}
@@ -79,14 +65,35 @@ func (l *linuxStandardInit) Init() error {
}
label.Init()
// InitializeMountNamespace() can be executed only for a new mount namespace
// prepareRootfs() can be executed only for a new mount namespace.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := setupRootfs(l.config.Config, console, l.pipe); err != nil {
if err := prepareRootfs(l.pipe, l.config.Config); err != nil {
return err
}
}
// Set up the console. This has to be done *before* we finalize the rootfs,
// but *after* we've given the user the chance to set up all of the mounts
// they wanted.
if l.config.CreateConsole {
if err := setupConsole(l.consoleSocket, l.config, true); err != nil {
return err
}
if err := system.Setctty(); err != nil {
return err
}
}
// Finish the rootfs setup.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := finalizeRootfs(l.config.Config); err != nil {
return err
}
}
if hostname := l.config.Config.Hostname; hostname != "" {
if err := syscall.Sethostname([]byte(hostname)); err != nil {
if err := unix.Sethostname([]byte(hostname)); err != nil {
return err
}
}
@@ -103,7 +110,7 @@ func (l *linuxStandardInit) Init() error {
}
}
for _, path := range l.config.Config.ReadonlyPaths {
if err := remountReadonly(path); err != nil {
if err := readonlyPath(path); err != nil {
return err
}
}
@@ -117,7 +124,7 @@ func (l *linuxStandardInit) Init() error {
return err
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
}
}
@@ -146,8 +153,8 @@ func (l *linuxStandardInit) Init() error {
// compare the parent from the initial start of the init process and make sure that it did not change.
// if the parent changes that means it died and we were reparented to something else so we should
// just kill ourself and not cause problems for someone else.
if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
if unix.Getppid() != l.parentPid {
return unix.Kill(unix.Getpid(), unix.SIGKILL)
}
// check for the arg before waiting to make sure it exists and it is returned
// as a create time error.
@@ -157,13 +164,15 @@ func (l *linuxStandardInit) Init() error {
}
// close the pipe to signal that we have completed our init.
l.pipe.Close()
// wait for the fifo to be opened on the other side before
// exec'ing the users process.
fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0)
// Wait for the FIFO to be opened on the other side before exec-ing the
// user process. We open it through /proc/self/fd/$fd, because the fd that
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
// re-open an O_PATH fd through /proc.
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "openat exec fifo")
}
if _, err := syscall.Write(fd, []byte("0")); err != nil {
if _, err := unix.Write(fd, []byte("0")); err != nil {
return newSystemErrorWithCause(err, "write 0 exec fifo")
}
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
@@ -171,6 +180,9 @@ func (l *linuxStandardInit) Init() error {
return newSystemErrorWithCause(err, "init seccomp")
}
}
// close the statedir fd before exec because the kernel resets dumpable in the wrong order
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
unix.Close(l.fifoFd)
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}

View File

@@ -6,11 +6,12 @@ import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)
func newStateTransitionError(from, to containerState) error {
@@ -39,7 +40,7 @@ type containerState interface {
func destroy(c *linuxContainer) error {
if !c.config.Namespaces.Contains(configs.NEWPID) {
if err := killCgroupProcesses(c.cgroupManager); err != nil {
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
logrus.Warn(err)
}
}
@@ -58,10 +59,9 @@ func destroy(c *linuxContainer) error {
func runPoststopHooks(c *linuxContainer) error {
if c.config.Hooks != nil {
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Root: c.config.Rootfs,
BundlePath: utils.SearchLabels(c.config.Labels, "bundle"),
Version: c.config.Version,
ID: c.id,
Bundle: utils.SearchLabels(c.config.Labels, "bundle"),
}
for _, hook := range c.config.Hooks.Poststop {
if err := hook.Run(s); err != nil {
@@ -83,10 +83,7 @@ func (b *stoppedState) status() Status {
func (b *stoppedState) transition(s containerState) error {
switch s.(type) {
case *runningState:
b.c.state = s
return nil
case *restoredState:
case *runningState, *restoredState:
b.c.state = s
return nil
case *stoppedState:
@@ -160,7 +157,7 @@ func (i *createdState) transition(s containerState) error {
}
func (i *createdState) destroy() error {
i.c.initProcess.signal(syscall.SIGKILL)
i.c.initProcess.signal(unix.SIGKILL)
return destroy(i.c)
}
@@ -199,7 +196,7 @@ func (p *pausedState) destroy() error {
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
}
// restoredState is the same as the running state but also has accociated checkpoint
// restoredState is the same as the running state but also has associated checkpoint
// information that maybe need destroyed when the container is stopped and destroy is called.
type restoredState struct {
imageDir string
@@ -212,9 +209,7 @@ func (r *restoredState) status() Status {
func (r *restoredState) transition(s containerState) error {
switch s.(type) {
case *stoppedState:
return nil
case *runningState:
case *stoppedState, *runningState:
return nil
}
return newStateTransitionError(r, s)

View File

@@ -0,0 +1,107 @@
package libcontainer
import (
"encoding/json"
"fmt"
"io"
"github.com/opencontainers/runc/libcontainer/utils"
)
type syncType string
// Constants that are used for synchronisation between the parent and child
// during container setup. They come in pairs (with procError being a generic
// response which is followed by a &genericError).
//
// [ child ] <-> [ parent ]
//
// procHooks --> [run hooks]
// <-- procResume
//
// procConsole -->
// <-- procConsoleReq
// [send(fd)] --> [recv(fd)]
// <-- procConsoleAck
//
// procReady --> [final setup]
// <-- procRun
const (
procError syncType = "procError"
procReady syncType = "procReady"
procRun syncType = "procRun"
procHooks syncType = "procHooks"
procResume syncType = "procResume"
)
type syncT struct {
Type syncType `json:"type"`
}
// writeSync is used to write to a synchronisation pipe. An error is returned
// if there was a problem writing the payload.
func writeSync(pipe io.Writer, sync syncType) error {
if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
return err
}
return nil
}
// readSync is used to read from a synchronisation pipe. An error is returned
// if we got a genericError, the pipe was closed, or we got an unexpected flag.
func readSync(pipe io.Reader, expected syncType) error {
var procSync syncT
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
if err == io.EOF {
return fmt.Errorf("parent closed synchronisation channel")
}
if procSync.Type == procError {
var ierr genericError
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
return fmt.Errorf("failed reading error from parent: %v", err)
}
return &ierr
}
if procSync.Type != expected {
return fmt.Errorf("invalid synchronisation flag from parent")
}
}
return nil
}
// parseSync runs the given callback function on each syncT received from the
// child. It will return once io.EOF is returned from the given pipe.
func parseSync(pipe io.Reader, fn func(*syncT) error) error {
dec := json.NewDecoder(pipe)
for {
var sync syncT
if err := dec.Decode(&sync); err != nil {
if err == io.EOF {
break
}
return err
}
// We handle this case outside fn for cleanliness reasons.
var ierr *genericError
if sync.Type == procError {
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding proc error from init")
}
if ierr != nil {
return ierr
}
// Programmer error.
panic("No error following JSON procError payload.")
}
if err := fn(&sync); err != nil {
return err
}
}
return nil
}

View File

@@ -8,7 +8,6 @@ go_library(
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"linux.go",
"setns_linux.go",
"syscall_linux_64.go",
"sysconfig.go",
"xattrs_linux.go",
@@ -20,6 +19,12 @@ go_library(
}),
cgo = True,
visibility = ["//visibility:public"],
deps = select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),
)
filegroup(

View File

@@ -7,8 +7,10 @@ import (
"fmt"
"os"
"os/exec"
"syscall"
"syscall" // only for exec
"unsafe"
"golang.org/x/sys/unix"
)
// If arg2 is nonzero, set the "child subreaper" attribute of the
@@ -53,8 +55,8 @@ func Execv(cmd string, args []string, env []string) error {
return syscall.Exec(name, args, env)
}
func Prlimit(pid, resource int, limit syscall.Rlimit) error {
_, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
func Prlimit(pid, resource int, limit unix.Rlimit) error {
_, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
if err != 0 {
return err
}
@@ -62,7 +64,7 @@ func Prlimit(pid, resource int, limit syscall.Rlimit) error {
}
func SetParentDeathSignal(sig uintptr) error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
return err
}
return nil
@@ -70,15 +72,14 @@ func SetParentDeathSignal(sig uintptr) error {
func GetParentDeathSignal() (ParentDeathSignal, error) {
var sig int
_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
if err != 0 {
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
return -1, err
}
return ParentDeathSignal(sig), nil
}
func SetKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
return err
}
@@ -86,7 +87,7 @@ func SetKeepCaps() error {
}
func ClearKeepCaps() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
return err
}
@@ -94,7 +95,7 @@ func ClearKeepCaps() error {
}
func Setctty() error {
if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
return err
}
return nil
@@ -131,13 +132,5 @@ func RunningInUserNS() bool {
// SetSubreaper sets the value i as the subreaper setting for the calling process
func SetSubreaper(i int) error {
return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}
func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if e1 != 0 {
err = e1
}
return
return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}

View File

@@ -1,27 +1,113 @@
package system
import (
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
)
// look in /proc to find the process start time so that we can verify
// that this pid has started after ourself
// State is the status of a process.
type State rune
const ( // Only values for Linux 3.14 and later are listed here
Dead State = 'X'
DiskSleep State = 'D'
Running State = 'R'
Sleeping State = 'S'
Stopped State = 'T'
TracingStop State = 't'
Zombie State = 'Z'
)
// String forms of the state from proc(5)'s documentation for
// /proc/[pid]/status' "State" field.
func (s State) String() string {
switch s {
case Dead:
return "dead"
case DiskSleep:
return "disk sleep"
case Running:
return "running"
case Sleeping:
return "sleeping"
case Stopped:
return "stopped"
case TracingStop:
return "tracing stop"
case Zombie:
return "zombie"
default:
return fmt.Sprintf("unknown (%c)", s)
}
}
// Stat_t represents the information from /proc/[pid]/stat, as
// described in proc(5) with names based on the /proc/[pid]/status
// fields.
type Stat_t struct {
// PID is the process ID.
PID uint
// Name is the command run by the process.
Name string
// State is the state of the process.
State State
// StartTime is the number of clock ticks after system boot (since
// Linux 2.6).
StartTime uint64
}
// Stat returns a Stat_t instance for the specified process.
func Stat(pid int) (stat Stat_t, err error) {
bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
if err != nil {
return stat, err
}
return parseStat(string(bytes))
}
// GetProcessStartTime is deprecated. Use Stat(pid) and
// Stat_t.StartTime instead.
func GetProcessStartTime(pid int) (string, error) {
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
stat, err := Stat(pid)
if err != nil {
return "", err
}
parts := strings.Split(string(data), " ")
// the starttime is located at pos 22
// from the man page
//
// starttime %llu (was %lu before Linux 2.6)
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
// (divide by sysconf(_SC_CLK_TCK)).
return parts[22-1], nil // starts at 1
return fmt.Sprintf("%d", stat.StartTime), nil
}
func parseStat(data string) (stat Stat_t, err error) {
// From proc(5), field 2 could contain space and is inside `(` and `)`.
// The following is an example:
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
i := strings.LastIndex(data, ")")
if i <= 2 || i >= len(data)-1 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
parts := strings.SplitN(data[:i], "(", 2)
if len(parts) != 2 {
return stat, fmt.Errorf("invalid stat data: %q", data)
}
stat.Name = parts[1]
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
if err != nil {
return stat, err
}
// parts indexes should be offset by 3 from the field number given
// proc(5), because parts is zero-indexed and we've removed fields
// one (PID) and two (Name) in the paren-split.
parts = strings.Split(data[i+2:], " ")
var state int
fmt.Sscanf(parts[3-3], "%c", &state)
stat.State = State(state)
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
return stat, nil
}

View File

@@ -1,40 +0,0 @@
package system
import (
"fmt"
"runtime"
"syscall"
)
// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
//
// We need different setns values for the different platforms and arch
// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
var setNsMap = map[string]uintptr{
"linux/386": 346,
"linux/arm64": 268,
"linux/amd64": 308,
"linux/arm": 375,
"linux/ppc": 350,
"linux/ppc64": 350,
"linux/ppc64le": 350,
"linux/s390x": 339,
}
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
func SysSetns() uint32 {
return uint32(sysSetns)
}
func Setns(fd uintptr, flags uintptr) error {
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
if !exists {
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
}
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
if err != 0 {
return err
}
return nil
}

View File

@@ -3,12 +3,12 @@
package system
import (
"syscall"
"golang.org/x/sys/unix"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
@@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}

View File

@@ -3,12 +3,12 @@
package system
import (
"syscall"
"golang.org/x/sys/unix"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
@@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID, uintptr(gid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}

View File

@@ -3,12 +3,12 @@
package system
import (
"syscall"
"golang.org/x/sys/unix"
)
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}
@@ -17,7 +17,7 @@ func Setuid(uid int) (err error) {
// Setgid sets the gid of the calling thread to the specified gid.
func Setgid(gid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
if e1 != 0 {
err = e1
}

View File

@@ -1,99 +1,35 @@
package system
import (
"syscall"
"unsafe"
)
var _zero uintptr
// Returns the size of xattrs and nil error
// Requires path, takes allocated []byte or nil as last argument
func Llistxattr(path string, dest []byte) (size int, err error) {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return -1, err
}
var newpathBytes unsafe.Pointer
if len(dest) > 0 {
newpathBytes = unsafe.Pointer(&dest[0])
} else {
newpathBytes = unsafe.Pointer(&_zero)
}
_size, _, errno := syscall.Syscall6(syscall.SYS_LLISTXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(newpathBytes), uintptr(len(dest)), 0, 0, 0)
size = int(_size)
if errno != 0 {
return -1, errno
}
return size, nil
}
import "golang.org/x/sys/unix"
// Returns a []byte slice if the xattr is set and nil otherwise
// Requires path and its attribute as arguments
func Lgetxattr(path string, attr string) ([]byte, error) {
var sz int
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return nil, err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return nil, err
}
// Start with a 128 length byte array
sz = 128
dest := make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
dest := make([]byte, 128)
sz, errno := unix.Lgetxattr(path, attr, dest)
switch {
case errno == syscall.ENODATA:
case errno == unix.ENODATA:
return nil, errno
case errno == syscall.ENOTSUP:
case errno == unix.ENOTSUP:
return nil, errno
case errno == syscall.ERANGE:
case errno == unix.ERANGE:
// 128 byte array might just not be good enough,
// A dummy buffer is used ``uintptr(0)`` to get real size
// A dummy buffer is used to get the real size
// of the xattrs on disk
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
sz = int(_sz)
if sz < 0 {
sz, errno = unix.Lgetxattr(path, attr, []byte{})
if errno != nil {
return nil, errno
}
dest = make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
if errno != 0 {
sz, errno = unix.Lgetxattr(path, attr, dest)
if errno != nil {
return nil, errno
}
case errno != 0:
case errno != nil:
return nil, errno
}
sz = int(_sz)
return dest[:sz], nil
}
func Lsetxattr(path string, attr string, data []byte, flags int) error {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return err
}
var dataBytes unsafe.Pointer
if len(data) > 0 {
dataBytes = unsafe.Pointer(&data[0])
} else {
dataBytes = unsafe.Pointer(&_zero)
}
_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
if errno != 0 {
return errno
}
return nil
}

View File

@@ -16,6 +16,15 @@ go_library(
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = select({
"@io_bazel_rules_go//go/platform:darwin_amd64": [
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),
)
filegroup(

View File

@@ -2,7 +2,6 @@ package user
import (
"errors"
"syscall"
)
var (
@@ -36,13 +35,6 @@ func lookupUser(filter func(u User) bool) (User, error) {
return users[0], nil
}
// CurrentUser looks up the current user by their user id in /etc/passwd. If the
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(syscall.Getuid())
}
// LookupUser looks up a user by their username in /etc/passwd. If the user
// cannot be found (or there is no /etc/passwd file on the filesystem), then
// LookupUser returns an error.
@@ -84,13 +76,6 @@ func lookupGroup(filter func(g Group) bool) (Group, error) {
return groups[0], nil
}
// CurrentGroup looks up the current user's group by their primary group id's
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(syscall.Getgid())
}
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
// returns an error.

View File

@@ -5,6 +5,8 @@ package user
import (
"io"
"os"
"golang.org/x/sys/unix"
)
// Unix-specific path to the passwd and group formatted files.
@@ -28,3 +30,17 @@ func GetGroupPath() (string, error) {
func GetGroup() (io.ReadCloser, error) {
return os.Open(unixGroupPath)
}
// CurrentUser looks up the current user by their user id in /etc/passwd. If the
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(unix.Getuid())
}
// CurrentGroup looks up the current user's group by their primary group id's
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(unix.Getgid())
}

View File

@@ -2,7 +2,10 @@
package user
import "io"
import (
"io"
"syscall"
)
func GetPasswdPath() (string, error) {
return "", ErrUnsupported
@@ -19,3 +22,17 @@ func GetGroupPath() (string, error) {
func GetGroup() (io.ReadCloser, error) {
return nil, ErrUnsupported
}
// CurrentUser looks up the current user by their user id in /etc/passwd. If the
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(syscall.Getuid())
}
// CurrentGroup looks up the current user's group by their primary group id's
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(syscall.Getgid())
}

View File

@@ -199,18 +199,16 @@ type ExecUser struct {
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserPath(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
passwd, err := os.Open(passwdPath)
if err != nil {
passwd = nil
} else {
defer passwd.Close()
var passwd, group io.Reader
if passwdFile, err := os.Open(passwdPath); err == nil {
passwd = passwdFile
defer passwdFile.Close()
}
group, err := os.Open(groupPath)
if err != nil {
group = nil
} else {
defer group.Close()
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
}
return GetExecUser(userSpec, defaults, passwd, group)
@@ -343,7 +341,7 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (
if len(groups) > 0 {
// First match wins, even if there's more than one matching entry.
user.Gid = groups[0].Gid
} else if groupArg != "" {
} else {
// If we can't find a group with the given name, the only other valid
// option is if it's a numeric group name with no associated entry in group.
@@ -433,9 +431,11 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
// that opens the groupPath given and gives it as an argument to
// GetAdditionalGroups.
func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) {
group, err := os.Open(groupPath)
if err == nil {
defer group.Close()
var group io.Reader
if groupFile, err := os.Open(groupPath); err == nil {
group = groupFile
defer groupFile.Close()
}
return GetAdditionalGroups(additionalGroups, group)
}

View File

@@ -5,8 +5,14 @@ go_library(
srcs = [
"utils.go",
"utils_unix.go",
],
] + select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"cmsg.go",
],
"//conditions:default": [],
}),
visibility = ["//visibility:public"],
deps = ["//vendor/golang.org/x/sys/unix:go_default_library"],
)
filegroup(

View File

@@ -0,0 +1,95 @@
// +build linux
package utils
/*
* Copyright 2016, 2017 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import (
"fmt"
"os"
"golang.org/x/sys/unix"
)
// MaxSendfdLen is the maximum length of the name of a file descriptor being
// sent using SendFd. The name of the file handle returned by RecvFd will never
// be larger than this value.
const MaxNameLen = 4096
// oobSpace is the size of the oob slice required to store a single FD. Note
// that unix.UnixRights appears to make the assumption that fd is always int32,
// so sizeof(fd) = 4.
var oobSpace = unix.CmsgSpace(4)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) {
// For some reason, unix.Recvmsg uses the length rather than the capacity
// when passing the msg_controllen and other attributes to recvmsg. So we
// have to actually set the length.
name := make([]byte, MaxNameLen)
oob := make([]byte, oobSpace)
sockfd := socket.Fd()
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
if err != nil {
return nil, err
}
if n >= MaxNameLen || oobn != oobSpace {
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
name = name[:n]
oob = oob[:oobn]
scms, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return nil, err
}
if len(scms) != 1 {
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
scm := scms[0]
fds, err := unix.ParseUnixRights(&scm)
if err != nil {
return nil, err
}
if len(fds) != 1 {
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
}
fd := uintptr(fds[0])
return os.NewFile(fd, string(name)), nil
}
// SendFd sends a file descriptor over the given AF_UNIX socket. In
// addition, the file.Name() of the given file will also be sent as
// non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor).
func SendFd(socket, file *os.File) error {
name := []byte(file.Name())
if len(name) >= MaxNameLen {
return fmt.Errorf("sendfd: filename too long: %s", file.Name())
}
oob := unix.UnixRights(int(file.Fd()))
return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
}

View File

@@ -8,8 +8,9 @@ import (
"os"
"path/filepath"
"strings"
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
const (
@@ -41,7 +42,7 @@ func ResolveRootfs(uncleanRootfs string) (string, error) {
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func ExitStatus(status syscall.WaitStatus) int {
func ExitStatus(status unix.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}

View File

@@ -4,8 +4,10 @@ package utils
import (
"io/ioutil"
"os"
"strconv"
"syscall"
"golang.org/x/sys/unix"
)
func CloseExecFrom(minFd int) error {
@@ -25,9 +27,18 @@ func CloseExecFrom(minFd int) error {
continue
}
// intentionally ignore errors from syscall.CloseOnExec
syscall.CloseOnExec(fd)
// intentionally ignore errors from unix.CloseOnExec
unix.CloseOnExec(fd)
// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
}
return nil
}
// NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}