Bump cadvisor dependencies to latest head.

This commit is contained in:
Lantao Liu
2016-07-20 20:10:46 -07:00
committed by Random-Liu
parent 01a5ddd782
commit cb1b3c86d3
169 changed files with 7413 additions and 3322 deletions

View File

@@ -186,8 +186,8 @@ process := &libcontainer.Process{
err := container.Start(process)
if err != nil {
container.Destroy()
logrus.Fatal(err)
container.Destroy()
return
}
@@ -219,9 +219,6 @@ container.Resume()
// send signal to container's init process.
container.Signal(signal)
// update container resource constraints.
container.Set(config)
```

View File

@@ -90,7 +90,7 @@ in tmpfs.
After `/dev/null` has been setup we check for any external links between
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
to `/dev/null` outside the container we close and `dup2` the `/dev/null`
to `/dev/null` outside the container we close and `dup2` the the `/dev/null`
that is local to the container's rootfs.
@@ -297,7 +297,7 @@ a container.
| -------------- | ------------------------------------------------------------------ |
| Get processes | Return all the pids for processes running inside a container |
| Get Stats | Return resource statistics for the container as a whole |
| Wait | Waits on the container's init process ( pid 1 ) |
| Wait | Wait waits on the container's init process ( pid 1 ) |
| Wait Process | Wait on any of the container's processes returning the exit status |
| Destroy | Kill the container's init process and remove any filesystem state |
| Signal | Send a signal to the container's init process |

View File

@@ -7,7 +7,6 @@ package apparmor
// #include <stdlib.h>
import "C"
import (
"fmt"
"io/ioutil"
"os"
"unsafe"
@@ -33,7 +32,7 @@ func ApplyProfile(name string) error {
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
if _, err := C.aa_change_onexec(cName); err != nil {
return fmt.Errorf("apparmor failed to apply profile: %s", err)
return err
}
return nil
}

View File

@@ -9,6 +9,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -32,6 +33,7 @@ var (
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
)
@@ -140,9 +142,7 @@ func (m *Manager) Apply(pid int) (err error) {
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(sys.Name())
if err != nil {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
if cgroups.IsNotFound(err) {
continue
}
return err
@@ -339,7 +339,7 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
return "", err
}
return path, nil
@@ -349,7 +349,7 @@ func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
return fmt.Errorf("no such directory for %s.", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)

View File

@@ -8,6 +8,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
@@ -66,7 +67,7 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
}
// because we are not using d.join we need to place the pid into the procs file
// unlike the other subsystems
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
return err
}

View File

@@ -9,7 +9,6 @@ import (
"path/filepath"
"strconv"
"strings"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
@@ -27,75 +26,38 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
// reset error.
err = nil
if path == "" {
// Invalid input.
return fmt.Errorf("invalid path for memory cgroups: %+v", d)
if memoryAssigned(d.config) {
if path != "" {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached.
if err := s.SetKernelMemory(path, d.config); err != nil {
return err
}
}
defer func() {
if err != nil {
os.RemoveAll(path)
}
}()
if !cgroups.PathExists(path) {
if err = os.MkdirAll(path, 0755); err != nil {
return err
}
}
if memoryAssigned(d.config) {
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if err = s.SetKernelMemory(path, d.config); err != nil {
return err
}
}
// We need to join memory cgroup after set memory limits, because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
if _, jerr := d.join("memory"); jerr != nil && !cgroups.IsNotFound(jerr) {
err = jerr
_, err = d.join("memory")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
return nil
}
func getModifyTime(path string) (time.Time, error) {
stat, err := os.Stat(path)
if err != nil {
return time.Time{}, fmt.Errorf("failed to get memory cgroups creation time: %v", err)
}
return stat.ModTime(), nil
}
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
// This has to be done separately because it has special
// constraints (it can only be initialized before setting up a
// hierarchy or adding a task to the cgroups. However, if
// sucessfully initialized, it can be updated anytime afterwards)
if cgroup.Resources.KernelMemory != 0 {
// Is kmem.limit_in_bytes already set?
// memory.kmem.max_usage_in_bytes is a read-only file. Use it to get cgroups creation time.
kmemCreationTime, err := getModifyTime(filepath.Join(path, "memory.kmem.max_usage_in_bytes"))
if err != nil {
return err
}
kmemLimitsUpdateTime, err := getModifyTime(filepath.Join(path, "memory.kmem.limit_in_bytes"))
if err != nil {
return err
}
// kmem.limit_in_bytes has already been set if its update time is after that of creation time.
// We use `!=` op instead of `>` because updates are losing precision compared to creation.
kmemInitialized := !kmemLimitsUpdateTime.Equal(kmemCreationTime)
if !kmemInitialized {
// If there's already tasks in the cgroup, we can't change the limit either
tasks, err := getCgroupParamString(path, "tasks")
if err != nil {
return err
}
if tasks != "" {
return fmt.Errorf("cannot set kmem.limit_in_bytes after task have joined this cgroup")
}
}
// This has to be done separately because it has special constraints (it
// can't be done after there are processes attached to the cgroup).
if cgroup.Resources.KernelMemory > 0 {
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
return err
}
@@ -151,10 +113,6 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
if err := s.SetKernelMemory(path, cgroup); err != nil {
return err
}
if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return err

View File

@@ -12,6 +12,7 @@ import (
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)

View File

@@ -11,7 +11,6 @@ type ThrottlingData struct {
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
// CpuUsage denotes the usage of a CPU.
// All CPU stats are aggregate since container inception.
type CpuUsage struct {
// Total CPU time consumed.

View File

@@ -74,7 +74,6 @@ var (
theConn *systemdDbus.Conn
hasStartTransientUnit bool
hasTransientDefaultDependencies bool
hasDelegate bool
)
func newProp(name string, units interface{}) systemdDbus.Property {
@@ -147,20 +146,6 @@ func UseSystemd() bool {
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
// Assume StartTransientUnit on a scope allows Delegate
hasDelegate = true
dl := newProp("Delegate", true)
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil {
if dbusError, ok := err.(dbus.Error); ok {
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
hasDelegate = false
}
}
}
// Not critical because of the stop unit logic above.
theConn.StopUnit(scope, "replace", nil)
}
return hasStartTransientUnit
}
@@ -198,12 +183,9 @@ func (m *Manager) Apply(pid int) error {
systemdDbus.PropSlice(slice),
systemdDbus.PropDescription("docker container "+c.Name),
newProp("PIDs", []uint32{uint32(pid)}),
)
if hasDelegate {
// This is only supported on systemd versions 218 and above.
properties = append(properties, newProp("Delegate", true))
}
newProp("Delegate", true),
)
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
@@ -232,9 +214,11 @@ func (m *Manager) Apply(pid int) error {
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if c.Resources.KernelMemory != 0 {
// We need to set kernel memory before processes join cgroup because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
// And swap memory limit needs to be set after memory limit, only
// memory limit is handled by systemd, so it's kind of ugly here.
if c.Resources.KernelMemory > 0 {
if err := setKernelMemory(c); err != nil {
return err
}
@@ -289,7 +273,7 @@ func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
return fmt.Errorf("no such directory for %s.", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
@@ -485,5 +469,11 @@ func setKernelMemory(c *configs.Cgroup) error {
return err
}
return os.MkdirAll(path, 0755)
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
// This doesn't get called by manager.Set, so we need to do it here.
s := &fs.MemoryGroup{}
return s.SetKernelMemory(path, c)
}

View File

@@ -16,19 +16,13 @@ import (
"github.com/docker/go-units"
)
const (
cgroupNamePrefix = "name="
CgroupProcesses = "cgroup.procs"
)
const cgroupNamePrefix = "name="
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
@@ -53,9 +47,6 @@ func FindCgroupMountpoint(subsystem string) (string, error) {
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
@@ -79,15 +70,6 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@@ -142,8 +124,7 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
numFound := 0
for scanner.Scan() && numFound < len(ss) {
for scanner.Scan() {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
@@ -158,15 +139,12 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if !ss[opt] {
continue
}
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
} else {
}
if ss[opt] {
m.Subsystems = append(m.Subsystems, opt)
}
numFound++
}
res = append(res, m)
}
@@ -183,19 +161,19 @@ func GetCgroupMounts() ([]Mount, error) {
}
defer f.Close()
all, err := ParseCgroupFile("/proc/self/cgroup")
all, err := GetAllSubsystems()
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for s := range all {
for _, s := range all {
allMap[s] = true
}
return getCgroupMountsHelper(allMap, f)
}
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
// Returns all the cgroup subsystems supported by the kernel
func GetAllSubsystems() ([]string, error) {
f, err := os.Open("/proc/cgroups")
if err != nil {
@@ -221,7 +199,7 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
// Returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
@@ -242,7 +220,7 @@ func GetInitCgroupDir(subsystem string) (string, error) {
}
func readProcsFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
if err != nil {
return nil, err
}
@@ -265,8 +243,6 @@ func readProcsFile(dir string) ([]int, error) {
return out, nil
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@@ -274,12 +250,7 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
defer f.Close()
return parseCgroupFromReader(f)
}
// helper function for ParseCgroupFile to make testing easier
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
s := bufio.NewScanner(r)
s := bufio.NewScanner(f)
cgroups := make(map[string]string)
for s.Scan() {
@@ -288,16 +259,7 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
}
text := s.Text()
// from cgroups(7):
// /proc/[pid]/cgroup
// ...
// For each cgroup hierarchy ... there is one entry
// containing three colon-separated fields of the form:
// hierarchy-ID:subsystem-list:cgroup-path
parts := strings.SplitN(text, ":", 3)
if len(parts) < 3 {
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
}
parts := strings.Split(text, ":")
for _, subs := range strings.Split(parts[1], ",") {
cgroups[subs] = parts[2]
@@ -329,7 +291,8 @@ func PathExists(path string) bool {
func EnterPid(cgroupPaths map[string]string, pid int) error {
for _, path := range cgroupPaths {
if PathExists(path) {
if err := WriteCgroupProc(path, pid); err != nil {
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
[]byte(strconv.Itoa(pid)), 0700); err != nil {
return err
}
}
@@ -398,7 +361,7 @@ func GetAllPids(path string) ([]int, error) {
// collect pids from all sub-cgroups
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
dir, file := filepath.Split(p)
if file != CgroupProcesses {
if file != "cgroup.procs" {
return nil
}
if iErr != nil {
@@ -413,20 +376,3 @@ func GetAllPids(path string) ([]int, error) {
})
return pids, err
}
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
func WriteCgroupProc(dir string, pid int) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s", CgroupProcesses)
}
// Dont attach any pid to the cgroup if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
}
}
return nil
}

View File

@@ -33,7 +33,7 @@ type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"`
}
// Action is taken upon rule match in Seccomp
// An action to be taken upon rule match in Seccomp
type Action int
const (
@@ -44,7 +44,7 @@ const (
Trace
)
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
// A comparison operator to be used when matching syscall arguments in Seccomp
type Operator int
const (
@@ -57,7 +57,7 @@ const (
MaskEqualTo
)
// Arg is a rule to match a specific syscall argument in Seccomp
// A rule to match a specific syscall argument in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
@@ -65,7 +65,7 @@ type Arg struct {
Op Operator `json:"op"`
}
// Syscall is a rule to match a syscall in Seccomp
// An rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
@@ -187,10 +187,6 @@ type Config struct {
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
}
type Hooks struct {
@@ -265,7 +261,7 @@ type Hook interface {
Run(HookState) error
}
// NewFunctionHook will call the provided function when the hook is run.
// NewFunctionHooks will call the provided function when the hook is run.
func NewFunctionHook(f func(HookState) error) FuncHook {
return FuncHook{
run: f,
@@ -288,7 +284,7 @@ type Command struct {
Timeout *time.Duration `json:"timeout"`
}
// NewCommandHook will execute the provided command when the hook is run.
// NewCommandHooks will execute the provided command when the hook is run.
func NewCommandHook(cmd Command) CommandHook {
return CommandHook{
Command: cmd,

View File

@@ -4,7 +4,7 @@ package configs
import "fmt"
// HostUID gets the root uid for the process on host which could be non-zero
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
@@ -21,7 +21,7 @@ func (c Config) HostUID() (int, error) {
return 0, nil
}
// HostGID gets the root gid for the process on host which could be non-zero
// Gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {

View File

@@ -3,7 +3,7 @@
package configs
var (
// DefaultSimpleDevices are devices that are to be both allowed and created.
// These are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{

View File

@@ -7,7 +7,6 @@ import (
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/selinux"
)
type Validator interface {
@@ -81,10 +80,6 @@ func (v *ConfigValidator) security(config *configs.Config) error {
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
if config.ProcessLabel != "" && !selinux.SelinuxEnabled() {
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
}
return nil
}

View File

@@ -1,11 +0,0 @@
package libcontainer
import (
"errors"
)
// NewConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func NewConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on Solaris")
}

View File

@@ -1,4 +1,4 @@
// Package libcontainer provides a native Go implementation for creating containers
// Libcontainer provides a native Go implementation for creating containers
// with namespaces, cgroups, capabilities, and filesystem access controls.
// It allows you to manage the lifecycle of the container performing additional operations
// after the container is created.
@@ -11,20 +11,24 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
// Status is the status of a container.
// The status of a container.
type Status int
const (
// Created is the status that denotes the container exists but has not been run yet.
// The container exists but has not been run yet
Created Status = iota
// Running is the status that denotes the container exists and is running.
// The container exists and is running.
Running
// Pausing is the status that denotes the container exists, it is in the process of being paused.
// The container exists, it is in the process of being paused.
Pausing
// Paused is the status that denotes the container exists, but all its processes are paused.
// The container exists, but all its processes are paused.
Paused
// Stopped is the status that denotes the container does not have a created or running process.
Stopped
// The container does not exist.
Destroyed
)
func (s Status) String() string {
@@ -37,8 +41,8 @@ func (s Status) String() string {
return "pausing"
case Paused:
return "paused"
case Stopped:
return "stopped"
case Destroyed:
return "destroyed"
default:
return "unknown"
}
@@ -63,7 +67,7 @@ type BaseState struct {
Config configs.Config `json:"config"`
}
// BaseContainer is a libcontainer container object.
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
@@ -76,13 +80,13 @@ type BaseContainer interface {
//
// errors:
// ContainerDestroyed - Container no longer exists,
// SystemError - System error.
// Systemerror - System error.
Status() (Status, error)
// State returns the current container's state information.
//
// errors:
// SystemError - System error.
// Systemerror - System error.
State() (*State, error)
// Returns the current config of the container.
@@ -92,7 +96,7 @@ type BaseContainer interface {
//
// errors:
// ContainerDestroyed - Container no longer exists,
// SystemError - System error.
// Systemerror - System error.
//
// Some of the returned PIDs may no longer refer to processes in the Container, unless
// the Container state is PAUSED in which case every PID in the slice is valid.
@@ -102,7 +106,7 @@ type BaseContainer interface {
//
// errors:
// ContainerDestroyed - Container no longer exists,
// SystemError - System error.
// Systemerror - System error.
Stats() (*Stats, error)
// Set resources of container as configured
@@ -110,7 +114,7 @@ type BaseContainer interface {
// We can use this to change resources when containers are running.
//
// errors:
// SystemError - System error.
// Systemerror - System error.
Set(config configs.Config) error
// Start a process inside the container. Returns error if process fails to
@@ -120,38 +124,21 @@ type BaseContainer interface {
// ContainerDestroyed - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
// Systemerror - System error.
Start(process *Process) (err error)
// Run immediatly starts the process inside the conatiner. Returns error if process
// fails to start. It does not block waiting for the exec fifo after start returns but
// opens the fifo after start returns.
//
// errors:
// ContainerDestroyed - Container no longer exists,
// ConfigInvalid - config is invalid,
// ContainerPaused - Container is paused,
// SystemError - System error.
Run(process *Process) (err error)
// Destroys the container after killing all running processes.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
//
// errors:
// SystemError - System error.
// Systemerror - System error.
Destroy() error
// Signal sends the provided signal code to the container's initial process.
//
// errors:
// SystemError - System error.
// Systemerror - System error.
Signal(s os.Signal) error
// Exec signals the container to exec the users process at the end of the init.
//
// errors:
// SystemError - System error.
Exec() error
}

View File

@@ -22,7 +22,6 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/syndtr/gocapability/capability"
"github.com/vishvananda/netlink/nl"
@@ -31,19 +30,18 @@ import (
const stdioFdCount = 3
type linuxContainer struct {
id string
root string
config *configs.Config
cgroupManager cgroups.Manager
initPath string
initArgs []string
initProcess parentProcess
initProcessStartTime string
criuPath string
m sync.Mutex
criuVersion int
state containerState
created time.Time
id string
root string
config *configs.Config
cgroupManager cgroups.Manager
initPath string
initArgs []string
initProcess parentProcess
criuPath string
m sync.Mutex
criuVersion int
state containerState
created time.Time
}
// State represents a running container's state
@@ -64,7 +62,7 @@ type State struct {
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
}
// Container is a libcontainer container object.
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
@@ -86,7 +84,7 @@ type Container interface {
// Systemerror - System error.
Restore(process *Process, criuOpts *CriuOpts) error
// If the Container state is RUNNING, sets the Container state to PAUSING and pauses
// If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses
// the execution of any user processes. Asynchronously, when the container finished being paused the
// state is changed to PAUSED.
// If the Container state is PAUSED, do nothing.
@@ -143,7 +141,7 @@ func (c *linuxContainer) State() (*State, error) {
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetAllPids()
if err != nil {
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
return nil, newSystemError(err)
}
return pids, nil
}
@@ -154,14 +152,14 @@ func (c *linuxContainer) Stats() (*Stats, error) {
stats = &Stats{}
)
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
return stats, newSystemError(err)
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
if err != nil {
return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName)
return stats, newSystemError(err)
}
stats.Interfaces = append(stats.Interfaces, istats)
}
@@ -172,13 +170,6 @@ func (c *linuxContainer) Stats() (*Stats, error) {
func (c *linuxContainer) Set(config configs.Config) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if status == Stopped {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}
c.config = &config
return c.cgroupManager.Set(c.config)
}
@@ -190,76 +181,28 @@ func (c *linuxContainer) Start(process *Process) error {
if err != nil {
return err
}
return c.start(process, status == Stopped)
}
func (c *linuxContainer) Run(process *Process) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
doInit := status == Destroyed
parent, err := c.newParentProcess(process, doInit)
if err != nil {
return err
}
if err := c.start(process, status == Stopped); err != nil {
return err
}
if status == Stopped {
return c.exec()
}
return nil
}
func (c *linuxContainer) Exec() error {
c.m.Lock()
defer c.m.Unlock()
return c.exec()
}
func (c *linuxContainer) exec() error {
path := filepath.Join(c.root, execFifoFilename)
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
return newSystemErrorWithCause(err, "open exec fifo for reading")
}
defer f.Close()
data, err := ioutil.ReadAll(f)
if err != nil {
return err
}
if len(data) > 0 {
os.Remove(path)
return nil
}
return fmt.Errorf("cannot start an already running container")
}
func (c *linuxContainer) start(process *Process, isInit bool) error {
parent, err := c.newParentProcess(process, isInit)
if err != nil {
return newSystemErrorWithCause(err, "creating new parent process")
return newSystemError(err)
}
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil {
logrus.Warn(err)
}
return newSystemErrorWithCause(err, "starting container process")
return newSystemError(err)
}
// generate a timestamp indicating when the container was started
c.created = time.Now().UTC()
c.state = &runningState{
c: c,
}
if isInit {
c.state = &createdState{
c: c,
}
state, err := c.updateState(parent)
if err != nil {
if doInit {
if err := c.updateState(parent); err != nil {
return err
}
c.initProcessStartTime = state.InitProcessStartTime
if c.config.Hooks != nil {
s := configs.HookState{
Version: c.config.Version,
@@ -268,12 +211,12 @@ func (c *linuxContainer) start(process *Process, isInit bool) error {
Root: c.config.Rootfs,
BundlePath: utils.SearchLabels(c.config.Labels, "bundle"),
}
for i, hook := range c.config.Hooks.Poststart {
for _, hook := range c.config.Hooks.Poststart {
if err := hook.Run(s); err != nil {
if err := parent.terminate(); err != nil {
logrus.Warn(err)
}
return newSystemErrorWithCausef(err, "running poststart hook %d", i)
return newSystemError(err)
}
}
}
@@ -283,7 +226,7 @@ func (c *linuxContainer) start(process *Process, isInit bool) error {
func (c *linuxContainer) Signal(s os.Signal) error {
if err := c.initProcess.signal(s); err != nil {
return newSystemErrorWithCause(err, "signaling init process")
return newSystemError(err)
}
return nil
}
@@ -291,23 +234,19 @@ func (c *linuxContainer) Signal(s os.Signal) error {
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
parentPipe, childPipe, err := newPipe()
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new init pipe")
return nil, newSystemError(err)
}
rootDir, err := os.Open(c.root)
cmd, err := c.commandTemplate(p, childPipe)
if err != nil {
return nil, err
}
cmd, err := c.commandTemplate(p, childPipe, rootDir)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
return nil, newSystemError(err)
}
if !doInit {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe, rootDir)
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
}
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
return c.newInitProcess(p, cmd, parentPipe, childPipe)
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File) (*exec.Cmd, error) {
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
cmd := &exec.Cmd{
Path: c.initPath,
Args: c.initArgs,
@@ -319,10 +258,8 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
}
cmd.ExtraFiles = append(p.ExtraFiles, childPipe, rootDir)
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-2),
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
@@ -332,7 +269,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File
return cmd, nil
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) {
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
@@ -355,15 +292,14 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
process: p,
bootstrapData: data,
sharePidns: sharePidns,
rootDir: rootDir,
}, nil
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*setnsProcess, error) {
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
state, err := c.currentState()
if err != nil {
return nil, newSystemErrorWithCause(err, "getting container's current state")
return nil, newSystemError(err)
}
// for setns process, we dont have to set cloneflags as the process namespaces
// will only be set via setns syscall
@@ -380,7 +316,6 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
rootDir: rootDir,
}, nil
}
@@ -399,7 +334,6 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
AppArmorProfile: c.config.AppArmorProfile,
ProcessLabel: c.config.ProcessLabel,
Rlimits: c.config.Rlimits,
ExecFifoPath: filepath.Join(c.root, execFifoFilename),
}
if process.NoNewPrivileges != nil {
cfg.NoNewPrivileges = *process.NoNewPrivileges
@@ -437,16 +371,15 @@ func (c *linuxContainer) Pause() error {
if err != nil {
return err
}
switch status {
case Running, Created:
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
return err
}
return c.state.transition(&pausedState{
c: c,
})
if status != Running {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}
return newGenericError(fmt.Errorf("container not running: %s", status), ContainerNotRunning)
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
return err
}
return c.state.transition(&pausedState{
c: c,
})
}
func (c *linuxContainer) Resume() error {
@@ -475,13 +408,13 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
}
// checkCriuVersion checks Criu version greater than or equal to minVersion
func (c *linuxContainer) checkCriuVersion(minVersion string) error {
// check Criu version greater than or equal to min_version
func (c *linuxContainer) checkCriuVersion(min_version string) error {
var x, y, z, versionReq int
_, err := fmt.Sscanf(minVersion, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
_, err := fmt.Sscanf(min_version, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
if err != nil {
_, err = fmt.Sscanf(minVersion, "Version: %d.%d\n", &x, &y) // 1.6
_, err = fmt.Sscanf(min_version, "Version: %d.%d\n", &x, &y) // 1.6
}
versionReq = x*10000 + y*100 + z
@@ -526,7 +459,7 @@ func (c *linuxContainer) checkCriuVersion(minVersion string) error {
c.criuVersion = x*10000 + y*100 + z
if c.criuVersion < versionReq {
return fmt.Errorf("CRIU version must be %s or higher", minVersion)
return fmt.Errorf("CRIU version must be %s or higher", min_version)
}
return nil
@@ -674,27 +607,6 @@ func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mo
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) {
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(iface.HostInterfaceName)
veth.IfIn = proto.String(iface.Name)
req.Opts.Veths = append(req.Opts.Veths, veth)
break
case "loopback":
break
}
}
for _, i := range criuOpts.VethPairs {
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(i.HostInterfaceName)
veth.IfIn = proto.String(i.ContainerInterfaceName)
req.Opts.Veths = append(req.Opts.Veths, veth)
}
}
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@@ -778,9 +690,23 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
break
}
}
if criuOpts.EmptyNs&syscall.CLONE_NEWNET == 0 {
c.restoreNetwork(req, criuOpts)
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(iface.HostInterfaceName)
veth.IfIn = proto.String(iface.Name)
req.Opts.Veths = append(req.Opts.Veths, veth)
break
case "loopback":
break
}
}
for _, i := range criuOpts.VethPairs {
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(i.HostInterfaceName)
veth.IfIn = proto.String(i.ContainerInterfaceName)
req.Opts.Veths = append(req.Opts.Veths, veth)
}
// append optional manage cgroups mode
@@ -1029,9 +955,9 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
Pid: int(notify.GetPid()),
Root: c.config.Rootfs,
}
for i, hook := range c.config.Hooks.Prestart {
for _, hook := range c.config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
return newSystemError(err)
}
}
}
@@ -1048,7 +974,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
}); err != nil {
return err
}
if _, err := c.updateState(r); err != nil {
if err := c.updateState(r); err != nil {
return err
}
if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil {
@@ -1060,17 +986,13 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
return nil
}
func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
func (c *linuxContainer) updateState(process parentProcess) error {
c.initProcess = process
state, err := c.currentState()
if err != nil {
return nil, err
return err
}
err = c.saveState(state)
if err != nil {
return nil, err
}
return state, nil
return c.saveState(state)
}
func (c *linuxContainer) saveState(s *State) error {
@@ -1105,75 +1027,37 @@ func (c *linuxContainer) refreshState() error {
if paused {
return c.state.transition(&pausedState{c: c})
}
t, err := c.runType()
running, err := c.isRunning()
if err != nil {
return err
}
switch t {
case Created:
return c.state.transition(&createdState{c: c})
case Running:
if running {
return c.state.transition(&runningState{c: c})
}
return c.state.transition(&stoppedState{c: c})
}
// doesInitProcessExist checks if the init process is still the same process
// as the initial one, it could happen that the original process has exited
// and a new process has been created with the same pid, in this case, the
// container would already be stopped.
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
startTime, err := system.GetProcessStartTime(initPid)
if err != nil {
return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid)
}
if c.initProcessStartTime != startTime {
func (c *linuxContainer) isRunning() (bool, error) {
if c.initProcess == nil {
return false, nil
}
return true, nil
}
func (c *linuxContainer) runType() (Status, error) {
if c.initProcess == nil {
return Stopped, nil
}
pid := c.initProcess.pid()
// return Running if the init process is alive
if err := syscall.Kill(pid, 0); err != nil {
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
if err == syscall.ESRCH {
// It means the process does not exist anymore, could happen when the
// process exited just when we call the function, we should not return
// error in this case.
return Stopped, nil
return false, nil
}
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
return false, newSystemError(err)
}
// check if the process is still the original init process.
exist, err := c.doesInitProcessExist(pid)
if !exist || err != nil {
return Stopped, err
}
// check if the process that is running is the init process or the user's process.
// this is the difference between the container Running and Created.
environ, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/environ", pid))
if err != nil {
return Stopped, newSystemErrorWithCausef(err, "reading /proc/%d/environ", pid)
}
check := []byte("_LIBCONTAINER")
if bytes.Contains(environ, check) {
return Created, nil
}
return Running, nil
return true, nil
}
func (c *linuxContainer) isPaused() (bool, error) {
data, err := ioutil.ReadFile(filepath.Join(c.cgroupManager.GetPaths()["freezer"], "freezer.state"))
if err != nil {
// If freezer cgroup is not mounted, the container would just be not paused.
if os.IsNotExist(err) {
return false, nil
}
return false, newSystemErrorWithCause(err, "checking if container is paused")
return false, newSystemError(err)
}
return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil
}
@@ -1241,7 +1125,7 @@ func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceTyp
}
// only set to join this namespace if it exists
if _, err := os.Lstat(p); err != nil {
return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p)
return nil, newSystemError(err)
}
// do not allow namespace path with comma as we use it to separate
// the namespace paths

View File

@@ -1,20 +0,0 @@
package libcontainer
// State represents a running container's state
type State struct {
BaseState
// Platform specific fields below here
}
// A libcontainer container object.
//
// Each container is thread-safe within the same process. Since a container can
// be destroyed by a separate process, any function may return that the container
// was not found.
type Container interface {
BaseContainer
// Methods below here are platform specific
}

View File

@@ -3,13 +3,13 @@
package libcontainer
// cgroup restoring strategy provided by criu
type cgMode uint32
type cg_mode uint32
const (
CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
CRIU_CG_MODE_SOFT cg_mode = 3 + iota // restore cgroup properties if only dir created by criu
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
)
type CriuPageServerInfo struct {
@@ -32,6 +32,6 @@ type CriuOpts struct {
FileLocks bool // handle file locks, for safety
PageServer CriuPageServerInfo // allow to dump to criu page server
VethPairs []VethPairName // pass the veth to criu when restore
ManageCgroupsMode cgMode // dump or restore cgroup mode
ManageCgroupsMode cg_mode // dump or restore cgroup mode
EmptyNs uint32 // don't c/r properties for namespace from this mask
}

View File

@@ -2,7 +2,7 @@ package libcontainer
import "io"
// ErrorCode is the API error code type.
// API error code type.
type ErrorCode int
// API error codes.
@@ -56,7 +56,7 @@ func (c ErrorCode) String() string {
}
}
// Error is the API error type.
// API Error type.
type Error interface {
error

View File

@@ -23,12 +23,11 @@ import (
)
const (
stateFilename = "state.json"
execFifoFilename = "exec.fifo"
stateFilename = "state.json"
)
var (
idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
idRegex = regexp.MustCompile(`^[\w-\.]+$`)
maxIdLen = 1024
)
@@ -159,34 +158,13 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
if err := l.Validator.Validate(config); err != nil {
return nil, newGenericError(err, ConfigInvalid)
}
uid, err := config.HostUID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
gid, err := config.HostGID()
if err != nil {
return nil, newGenericError(err, SystemError)
}
containerRoot := filepath.Join(l.Root, id)
if _, err := os.Stat(containerRoot); err == nil {
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
} else if !os.IsNotExist(err) {
return nil, newGenericError(err, SystemError)
}
if err := os.MkdirAll(containerRoot, 0711); err != nil {
return nil, newGenericError(err, SystemError)
}
if err := os.Chown(containerRoot, uid, gid); err != nil {
return nil, newGenericError(err, SystemError)
}
fifoName := filepath.Join(containerRoot, execFifoFilename)
oldMask := syscall.Umask(0000)
if err := syscall.Mkfifo(fifoName, 0622); err != nil {
syscall.Umask(oldMask)
return nil, newGenericError(err, SystemError)
}
syscall.Umask(oldMask)
if err := os.Chown(fifoName, uid, gid); err != nil {
if err := os.MkdirAll(containerRoot, 0700); err != nil {
return nil, newGenericError(err, SystemError)
}
c := &linuxContainer{
@@ -217,18 +195,17 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
fds: state.ExternalDescriptors,
}
c := &linuxContainer{
initProcess: r,
initProcessStartTime: state.InitProcessStartTime,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
initProcess: r,
id: id,
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
created: state.Created,
}
c.state = &loadedState{c: c}
c.state = &createdState{c: c, s: Created}
if err := c.refreshState(); err != nil {
return nil, err
}
@@ -242,18 +219,10 @@ func (l *LinuxFactory) Type() string {
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func (l *LinuxFactory) StartInitialization() (err error) {
var pipefd, rootfd int
for k, v := range map[string]*int{
"_LIBCONTAINER_INITPIPE": &pipefd,
"_LIBCONTAINER_STATEDIR": &rootfd,
} {
s := os.Getenv(k)
i, err := strconv.Atoi(s)
if err != nil {
return fmt.Errorf("unable to convert %s=%s to int", k, s)
}
*v = i
fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
pipefd, err := strconv.Atoi(fdStr)
if err != nil {
return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
}
var (
pipe = os.NewFile(uintptr(pipefd), "pipe")
@@ -262,7 +231,6 @@ func (l *LinuxFactory) StartInitialization() (err error) {
// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
var i initer
defer func() {
// We have an error during the initialization of the container's init,
@@ -271,22 +239,24 @@ func (l *LinuxFactory) StartInitialization() (err error) {
// this defer function will never be called.
if _, ok := i.(*linuxStandardInit); ok {
// Synchronisation only necessary for standard init.
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
panic(err)
}
}
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
panic(err)
}
// ensure that this pipe is always closed
pipe.Close()
}()
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
}
}()
i, err = newContainerInit(it, pipe, rootfd)
i, err = newContainerInit(it, pipe)
if err != nil {
return err
}

View File

@@ -1,7 +1,6 @@
package libcontainer
import (
"fmt"
"io"
"text/template"
"time"
@@ -52,21 +51,6 @@ func newGenericError(err error, c ErrorCode) Error {
}
func newSystemError(err error) Error {
return createSystemError(err, "")
}
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
return createSystemError(err, fmt.Sprintf(cause, v...))
}
func newSystemErrorWithCause(err error, cause string) Error {
return createSystemError(err, cause)
}
// createSystemError creates the specified error with the correct number of
// stack frames skipped. This is only to be called by the other functions for
// formatting the error.
func createSystemError(err error, cause string) Error {
if le, ok := err.(Error); ok {
return le
}
@@ -74,8 +58,7 @@ func createSystemError(err error, cause string) Error {
Timestamp: time.Now(),
Err: err,
ECode: SystemError,
Cause: cause,
Stack: stacktrace.Capture(2),
Stack: stacktrace.Capture(1),
}
if err != nil {
gerr.Message = err.Error()
@@ -87,17 +70,12 @@ type genericError struct {
Timestamp time.Time
ECode ErrorCode
Err error `json:"-"`
Cause string
Message string
Stack stacktrace.Stacktrace
}
func (e *genericError) Error() string {
if e.Cause == "" {
return e.Message
}
frame := e.Stack.Frames[0]
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
return e.Message
}
func (e *genericError) Code() ErrorCode {

View File

@@ -58,14 +58,13 @@ type initConfig struct {
PassedFilesCount int `json:"passed_files_count"`
ContainerId string `json:"containerid"`
Rlimits []configs.Rlimit `json:"rlimits"`
ExecFifoPath string `json:"start_pipe_path"`
}
type initer interface {
Init() error
}
func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error) {
func newContainerInit(t initType, pipe *os.File) (initer, error) {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
@@ -80,10 +79,9 @@ func newContainerInit(t initType, pipe *os.File, stateDirFD int) (initer, error)
}, nil
case initStandard:
return &linuxStandardInit{
pipe: pipe,
parentPid: syscall.Getppid(),
config: config,
stateDirFD: stateDirFD,
pipe: pipe,
parentPid: syscall.Getppid(),
config: config,
}, nil
}
return nil, fmt.Errorf("unknown init type %q", t)

View File

@@ -4,9 +4,9 @@ package keyctl
import (
"fmt"
"strconv"
"strings"
"syscall"
"strings"
"strconv"
"unsafe"
)
@@ -17,7 +17,7 @@ const KEYCTL_DESCRIBE = 6
type KeySerial uint32
func JoinSessionKeyring(name string) (KeySerial, error) {
var _name *byte
var _name *byte = nil
var err error
if len(name) > 0 {
@@ -34,7 +34,7 @@ func JoinSessionKeyring(name string) (KeySerial, error) {
return KeySerial(sessKeyId), nil
}
// ModKeyringPerm modifies permissions on a keyring by reading the current permissions,
// modify permissions on a keyring by reading the current permissions,
// anding the bits with the given mask (clearing permissions) and setting
// additional permission bits
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
@@ -64,3 +64,4 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
return nil
}

View File

@@ -107,7 +107,7 @@ func SetFileLabel(path string, fileLabel string) error {
return nil
}
// SetFileCreateLabel tells the kernel the label for all files to be created
// Tell the kernel the label for all files to be created
func SetFileCreateLabel(fileLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setfscreatecon(fileLabel)
@@ -115,7 +115,7 @@ func SetFileCreateLabel(fileLabel string) error {
return nil
}
// Relabel changes the label of path to the filelabel string.
// Change the label of path to the filelabel string.
// It changes the MCS label to s0 if shared is true.
// This will allow all containers to share the content.
func Relabel(path string, fileLabel string, shared bool) error {

View File

@@ -27,8 +27,7 @@ type Int32msg struct {
Value uint32
}
// Serialize serializes the message.
// Int32msg has the following representation
// int32msg has the following representation
// | nlattr len | nlattr type |
// | uint32 value |
func (msg *Int32msg) Serialize() []byte {
@@ -44,7 +43,7 @@ func (msg *Int32msg) Len() int {
return syscall_NLA_HDRLEN + 4
}
// Bytemsg has the following representation
// bytemsg has the following representation
// | nlattr len | nlattr type |
// | value | pad |
type Bytemsg struct {

View File

@@ -102,8 +102,8 @@ type IO struct {
}
// NewConsole creates new console for process and returns it
func (p *Process) NewConsole(rootuid, rootgid int) (Console, error) {
console, err := NewConsole(rootuid, rootgid)
func (p *Process) NewConsole(rootuid int) (Console, error) {
console, err := NewConsole(rootuid, rootuid)
if err != nil {
return nil, err
}

View File

@@ -51,7 +51,6 @@ type setnsProcess struct {
fds []string
process *Process
bootstrapData io.Reader
rootDir *os.File
}
func (p *setnsProcess) startTime() (string, error) {
@@ -70,49 +69,48 @@ func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
err = p.cmd.Start()
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
return newSystemErrorWithCause(err, "starting setns process")
return newSystemError(err)
}
if p.bootstrapData != nil {
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
return newSystemError(err)
}
}
if err = p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "executing setns process")
return newSystemError(err)
}
if len(p.cgroupPaths) > 0 {
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
return newSystemError(err)
}
}
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting oom score")
return newSystemError(err)
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for process")
return newSystemError(err)
}
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return newSystemErrorWithCause(err, "writing config to pipe")
return newSystemError(err)
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
return newSystemError(err)
}
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *genericError
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding init error from pipe")
return newSystemError(err)
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return ierr
return newSystemError(ierr)
}
return nil
}
@@ -125,7 +123,7 @@ func (p *setnsProcess) execSetns() error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "waiting on setns process to finish")
return newSystemError(err)
}
if !status.Success() {
p.cmd.Wait()
@@ -134,7 +132,7 @@ func (p *setnsProcess) execSetns() error {
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "reading pid from init pipe")
return newSystemError(err)
}
process, err := os.FindProcess(pid.Pid)
if err != nil {
@@ -188,7 +186,6 @@ type initProcess struct {
process *Process
bootstrapData io.Reader
sharePidns bool
rootDir *os.File
}
func (p *initProcess) pid() int {
@@ -224,7 +221,6 @@ func (p *initProcess) execSetns() error {
return err
}
p.cmd.Process = process
p.process.ops = p
return nil
}
@@ -233,29 +229,28 @@ func (p *initProcess) start() error {
err := p.cmd.Start()
p.process.ops = p
p.childPipe.Close()
p.rootDir.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
return newSystemError(err)
}
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
return err
}
if err := p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "running exec setns process for init")
return newSystemError(err)
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
if err != nil {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
return newSystemError(err)
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
return newSystemError(err)
}
defer func() {
if err != nil {
@@ -264,10 +259,10 @@ func (p *initProcess) start() error {
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return newSystemErrorWithCause(err, "creating nework interfaces")
return newSystemError(err)
}
if err := p.sendConfig(); err != nil {
return newSystemErrorWithCause(err, "sending config to init process")
return newSystemError(err)
}
var (
procSync syncT
@@ -283,21 +278,21 @@ loop:
if err == io.EOF {
break loop
}
return newSystemErrorWithCause(err, "decoding sync type from init pipe")
return newSystemError(err)
}
switch procSync.Type {
case procReady:
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
return newSystemError(err)
}
// set oom_score_adj
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting oom score for ready process")
return newSystemError(err)
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for ready process")
return newSystemError(err)
}
// call prestart hooks
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
@@ -308,16 +303,16 @@ loop:
Pid: p.pid(),
Root: p.config.Config.Rootfs,
}
for i, hook := range p.config.Config.Hooks.Prestart {
for _, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
return newSystemError(err)
}
}
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
return newSystemErrorWithCause(err, "reading syncT run type")
return newSystemError(err)
}
sentRun = true
case procHooks:
@@ -329,22 +324,22 @@ loop:
Root: p.config.Config.Rootfs,
BundlePath: utils.SearchLabels(p.config.Config.Labels, "bundle"),
}
for i, hook := range p.config.Config.Hooks.Prestart {
for _, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
return newSystemError(err)
}
}
}
// Sync with child.
if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
return newSystemErrorWithCause(err, "reading syncT resume type")
return newSystemError(err)
}
sentResume = true
case procError:
// wait for the child process to fully complete and receive an error message
// if one was encoutered
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
return newSystemErrorWithCause(err, "decoding proc error from init")
return newSystemError(err)
}
if ierr != nil {
break loop
@@ -352,22 +347,22 @@ loop:
// Programmer error.
panic("No error following JSON procError payload.")
default:
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
return newSystemError(fmt.Errorf("invalid JSON synchronisation payload from child"))
}
}
if !sentRun {
return newSystemErrorWithCause(ierr, "container init failed")
return newSystemError(fmt.Errorf("could not synchronise with container process: %v", ierr))
}
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "shutting down init pipe")
return newSystemError(err)
}
// Must be done after Shutdown so the child will exit and we can wait for it.
if ierr != nil {
p.wait()
return ierr
return newSystemError(ierr)
}
return nil
}
@@ -452,7 +447,7 @@ func getPipeFds(pid int) ([]string, error) {
// InitializeIO creates pipes for use with the process's STDIO
// and returns the opposite side for each
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
func (p *Process) InitializeIO(rootuid int) (i *IO, err error) {
var fds []uintptr
i = &IO{}
// cleanup in case of an error
@@ -484,7 +479,7 @@ func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
p.Stderr, i.Stderr = w, r
// change ownership of the pipes incase we are in a user namespace
for _, fd := range fds {
if err := syscall.Fchown(int(fd), rootuid, rootgid); err != nil {
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
return nil, err
}
}

View File

@@ -25,10 +25,10 @@ import (
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// needsSetupDev returns true if /dev needs to be set up.
// setupDev returns true if /dev needs to be set up.
func needsSetupDev(config *configs.Config) bool {
for _, m := range config.Mounts {
if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
if m.Device == "bind" && (m.Destination == "/dev" || m.Destination == "/dev/") {
return false
}
}
@@ -39,35 +39,35 @@ func needsSetupDev(config *configs.Config) bool {
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
if err := prepareRoot(config); err != nil {
return newSystemErrorWithCause(err, "preparing rootfs")
return newSystemError(err)
}
setupDev := needsSetupDev(config)
for _, m := range config.Mounts {
for _, precmd := range m.PremountCmds {
if err := mountCmd(precmd); err != nil {
return newSystemErrorWithCause(err, "running premount command")
return newSystemError(err)
}
}
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q", m.Destination, config.Rootfs)
return newSystemError(err)
}
for _, postcmd := range m.PostmountCmds {
if err := mountCmd(postcmd); err != nil {
return newSystemErrorWithCause(err, "running postmount command")
return newSystemError(err)
}
}
}
if setupDev {
if err := createDevices(config); err != nil {
return newSystemErrorWithCause(err, "creating device nodes")
return newSystemError(err)
}
if err := setupPtmx(config, console); err != nil {
return newSystemErrorWithCause(err, "setting up ptmx")
return newSystemError(err)
}
if err := setupDevSymlinks(config.Rootfs); err != nil {
return newSystemErrorWithCause(err, "setting up /dev symlinks")
return newSystemError(err)
}
}
// Signal the parent to run the pre-start hooks.
@@ -78,7 +78,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
return err
}
if err := syscall.Chdir(config.Rootfs); err != nil {
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
return newSystemError(err)
}
if config.NoPivotRoot {
err = msMoveRoot(config.Rootfs)
@@ -86,19 +86,19 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
err = pivotRoot(config.Rootfs, config.PivotDir)
}
if err != nil {
return newSystemErrorWithCause(err, "jailing process inside rootfs")
return newSystemError(err)
}
if setupDev {
if err := reOpenDevNull(); err != nil {
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
return newSystemError(err)
}
}
// remount dev as ro if specifed
for _, m := range config.Mounts {
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
if m.Destination == "/dev" {
if m.Flags&syscall.MS_RDONLY != 0 {
if err := remountReadonly(m.Destination); err != nil {
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
return newSystemError(err)
}
}
break
@@ -107,7 +107,7 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
// set rootfs ( / ) as readonly
if config.Readonlyfs {
if err := setReadonly(); err != nil {
return newSystemErrorWithCause(err, "setting rootfs as readonly")
return newSystemError(err)
}
}
syscall.Umask(0022)
@@ -115,12 +115,14 @@ func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWrit
}
func mountCmd(cmd configs.Command) error {
command := exec.Command(cmd.Path, cmd.Args[:]...)
command.Env = cmd.Env
command.Dir = cmd.Dir
if out, err := command.CombinedOutput(); err != nil {
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
}
return nil
}
@@ -238,23 +240,34 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
return err
}
}
// create symlinks for merged cgroups
cwd, err := os.Getwd()
if err != nil {
return err
}
if err := os.Chdir(filepath.Join(rootfs, m.Destination)); err != nil {
return err
}
for _, mc := range merged {
for _, ss := range strings.Split(mc, ",") {
// symlink(2) is very dumb, it will just shove the path into
// the link and doesn't do any checks or relative path
// conversion. Also, don't error out if the cgroup already exists.
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
if err := os.Symlink(mc, ss); err != nil {
// if cgroup already exists, then okay(it could have been created before)
if os.IsExist(err) {
continue
}
os.Chdir(cwd)
return err
}
}
}
if err := os.Chdir(cwd); err != nil {
return err
}
if m.Flags&syscall.MS_RDONLY != 0 {
// remount cgroup root as readonly
mcgrouproot := &configs.Mount{
Source: m.Destination,
Device: "bind",
Destination: m.Destination,
Flags: defaultMountFlags | syscall.MS_RDONLY | syscall.MS_BIND,
Flags: defaultMountFlags | syscall.MS_RDONLY,
}
if err := remount(mcgrouproot, rootfs); err != nil {
return err
@@ -502,10 +515,10 @@ func getParentMount(rootfs string) (string, string, error) {
}
// Make parent mount private if it was shared
func rootfsParentMountPrivate(rootfs string) error {
func rootfsParentMountPrivate(config *configs.Config) error {
sharedMount := false
parentMount, optionalOpts, err := getParentMount(rootfs)
parentMount, optionalOpts, err := getParentMount(config.Rootfs)
if err != nil {
return err
}
@@ -537,10 +550,9 @@ func prepareRoot(config *configs.Config) error {
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
return err
}
if config.NoPivotRoot {
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
return err
}
if err := rootfsParentMountPrivate(config); err != nil {
return err
}
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
@@ -583,14 +595,7 @@ func pivotRoot(rootfs, pivotBaseDir string) (err error) {
}
}()
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
// Make the parent mount private
if err := rootfsParentMountPrivate(rootfs); err != nil {
return err
}
// Try again
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
return fmt.Errorf("pivot_root %s", err)
}
return fmt.Errorf("pivot_root %s", err)
}
if err := syscall.Chdir("/"); err != nil {
return fmt.Errorf("chdir / %s", err)
@@ -700,7 +705,7 @@ func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
data = label.FormatMountLabel(m.Data, mountLabel)
flags = m.Flags
)
if libcontainerUtils.CleanPath(dest) == "/dev" {
if dest == "/dev" {
flags &= ^syscall.MS_RDONLY
}
if !strings.HasPrefix(dest, rootfs) {

View File

@@ -36,11 +36,6 @@ var archs = map[string]string{
"SCMP_ARCH_MIPSEL": "mipsel",
"SCMP_ARCH_MIPSEL64": "mipsel64",
"SCMP_ARCH_MIPSEL64N32": "mipsel64n32",
"SCMP_ARCH_PPC": "ppc",
"SCMP_ARCH_PPC64": "ppc64",
"SCMP_ARCH_PPC64LE": "ppc64le",
"SCMP_ARCH_S390": "s390",
"SCMP_ARCH_S390X": "s390x",
}
// ConvertStringToOperator converts a string into a Seccomp comparison operator.

View File

@@ -10,7 +10,7 @@ import (
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
// InitSeccomp does nothing because seccomp is not supported.
// Seccomp not supported, do nothing
func InitSeccomp(config *configs.Seccomp) error {
if config != nil {
return ErrSeccompNotEnabled

View File

@@ -16,6 +16,7 @@ import (
"sync"
"syscall"
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/system"
)
@@ -59,31 +60,16 @@ func getSelinuxMountPoint() string {
}
selinuxfs = ""
f, err := os.Open("/proc/self/mountinfo")
mounts, err := mount.GetMounts()
if err != nil {
return selinuxfs
}
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
// Safe as mountinfo encodes mountpoints with spaces as \040.
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
continue
for _, mount := range mounts {
if mount.Fstype == "selinuxfs" {
selinuxfs = mount.Mountpoint
break
}
if !strings.Contains(txt[sepIdx:], "selinuxfs") {
continue
}
fields := strings.Split(txt, " ")
if len(fields) < 5 {
continue
}
selinuxfs = fields[4]
break
}
if selinuxfs != "" {
var buf syscall.Statfs_t
syscall.Statfs(selinuxfs, &buf)
@@ -311,7 +297,7 @@ func IntToMcs(id int, catRange uint32) string {
for ORD > TIER {
ORD = ORD - TIER
TIER--
TIER -= 1
}
TIER = SETSIZE - TIER
ORD = ORD + TIER
@@ -452,7 +438,7 @@ func badPrefix(fpath string) error {
return nil
}
// Chcon changes the fpath file object to the SELinux label scon.
// Change the fpath file object to the SELinux label scon.
// If the fpath is a directory and recurse is true Chcon will walk the
// directory tree setting the label
func Chcon(fpath string, scon string, recurse bool) error {
@@ -486,14 +472,14 @@ func DupSecOpt(src string) []string {
con["level"] == "" {
return nil
}
return []string{"label=user:" + con["user"],
"label=role:" + con["role"],
"label=type:" + con["type"],
"label=level:" + con["level"]}
return []string{"label:user:" + con["user"],
"label:role:" + con["role"],
"label:type:" + con["type"],
"label:level:" + con["level"]}
}
// DisableSecOpt returns a security opt that can be used to disabling SELinux
// labeling support for future container processes
func DisableSecOpt() []string {
return []string{"label=disable"}
return []string{"label:disable"}
}

View File

@@ -24,11 +24,9 @@ func (l *linuxSetnsInit) getSessionRingName() string {
}
func (l *linuxSetnsInit) Init() error {
if !l.config.Config.NoNewKeyring {
// do not inherit the parent's session keyring
if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err
}
// do not inherit the parent's session keyring
if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil {
return err
}
if l.config.NoNewPrivileges {
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
@@ -46,8 +44,10 @@ func (l *linuxSetnsInit) Init() error {
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err
}
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
if l.config.ProcessLabel != "" {
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
return err
}
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View File

@@ -2,14 +2,14 @@ package stacktrace
import "runtime"
// Capture captures a stacktrace for the current calling go program
// Caputure captures a stacktrace for the current calling go program
//
// skip is the number of frames to skip
func Capture(userSkip int) Stacktrace {
var (
skip = userSkip + 1 // add one for our own function
frames []Frame
prevPc uintptr
prevPc uintptr = 0
)
for i := skip; ; i++ {
pc, file, line, ok := runtime.Caller(i)

View File

@@ -6,7 +6,6 @@ import (
"fmt"
"io"
"os"
"os/exec"
"syscall"
"github.com/opencontainers/runc/libcontainer/apparmor"
@@ -18,10 +17,9 @@ import (
)
type linuxStandardInit struct {
pipe io.ReadWriteCloser
parentPid int
stateDirFD int
config *initConfig
pipe io.ReadWriter
parentPid int
config *initConfig
}
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
@@ -45,18 +43,16 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
const PR_SET_NO_NEW_PRIVS = 0x26
func (l *linuxStandardInit) Init() error {
if !l.config.Config.NoNewKeyring {
ringname, keepperms, newperms := l.getSessionRingParams()
ringname, keepperms, newperms := l.getSessionRingParams()
// do not inherit the parent's session keyring
sessKeyId, err := keyctl.JoinSessionKeyring(ringname)
if err != nil {
return err
}
// make session keyring searcheable
if err := keyctl.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err
}
// do not inherit the parent's session keyring
sessKeyId, err := keyctl.JoinSessionKeyring(ringname)
if err != nil {
return err
}
// make session keyring searcheable
if err := keyctl.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
return err
}
var console *linuxConsole
@@ -127,10 +123,7 @@ func (l *linuxStandardInit) Init() error {
if err := syncParentReady(l.pipe); err != nil {
return err
}
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
// do this before dropping capabilities; otherwise do it as late as possible
// just before execve so as few syscalls take place after it as possible.
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
if l.config.Config.Seccomp != nil {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
@@ -149,30 +142,6 @@ func (l *linuxStandardInit) Init() error {
if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
}
// check for the arg before waiting to make sure it exists and it is returned
// as a create time error.
name, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
// close the pipe to signal that we have completed our init.
l.pipe.Close()
// wait for the fifo to be opened on the other side before
// exec'ing the users process.
fd, err := syscall.Openat(l.stateDirFD, execFifoFilename, os.O_WRONLY|syscall.O_CLOEXEC, 0)
if err != nil {
return newSystemErrorWithCause(err, "openat exec fifo")
}
if _, err := syscall.Write(fd, []byte("0")); err != nil {
return newSystemErrorWithCause(err, "write 0 exec fifo")
}
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return newSystemErrorWithCause(err, "init seccomp")
}
}
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
return newSystemErrorWithCause(err, "exec user process")
}
return nil
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
}

View File

@@ -6,7 +6,6 @@ import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runc/libcontainer/configs"
@@ -78,7 +77,7 @@ type stoppedState struct {
}
func (b *stoppedState) status() Status {
return Stopped
return Destroyed
}
func (b *stoppedState) transition(s containerState) error {
@@ -111,11 +110,11 @@ func (r *runningState) status() Status {
func (r *runningState) transition(s containerState) error {
switch s.(type) {
case *stoppedState:
t, err := r.c.runType()
running, err := r.c.isRunning()
if err != nil {
return err
}
if t == Running {
if running {
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
}
r.c.state = s
@@ -130,40 +129,16 @@ func (r *runningState) transition(s containerState) error {
}
func (r *runningState) destroy() error {
t, err := r.c.runType()
running, err := r.c.isRunning()
if err != nil {
return err
}
if t == Running {
if running {
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
}
return destroy(r.c)
}
type createdState struct {
c *linuxContainer
}
func (i *createdState) status() Status {
return Created
}
func (i *createdState) transition(s containerState) error {
switch s.(type) {
case *runningState, *pausedState, *stoppedState:
i.c.state = s
return nil
case *createdState:
return nil
}
return newStateTransitionError(i, s)
}
func (i *createdState) destroy() error {
i.c.initProcess.signal(syscall.SIGKILL)
return destroy(i.c)
}
// pausedState represents a container that is currently pause. It cannot be destroyed in a
// paused state and must transition back to running first.
type pausedState struct {
@@ -186,11 +161,11 @@ func (p *pausedState) transition(s containerState) error {
}
func (p *pausedState) destroy() error {
t, err := p.c.runType()
isRunning, err := p.c.isRunning()
if err != nil {
return err
}
if t != Running && t != Created {
if !isRunning {
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
return err
}
@@ -200,7 +175,7 @@ func (p *pausedState) destroy() error {
}
// restoredState is the same as the running state but also has accociated checkpoint
// information that maybe need destroyed when the container is stopped and destroy is called.
// information that maybe need destroyed when the container is stopped and destory is called.
type restoredState struct {
imageDir string
c *linuxContainer
@@ -229,23 +204,23 @@ func (r *restoredState) destroy() error {
return destroy(r.c)
}
// loadedState is used whenever a container is restored, loaded, or setting additional
// createdState is used whenever a container is restored, loaded, or setting additional
// processes inside and it should not be destroyed when it is exiting.
type loadedState struct {
type createdState struct {
c *linuxContainer
s Status
}
func (n *loadedState) status() Status {
func (n *createdState) status() Status {
return n.s
}
func (n *loadedState) transition(s containerState) error {
func (n *createdState) transition(s containerState) error {
n.c.state = s
return nil
}
func (n *loadedState) destroy() error {
func (n *createdState) destroy() error {
if err := n.c.refreshState(); err != nil {
return err
}

View File

@@ -1,7 +0,0 @@
package libcontainer
// Solaris - TODO
type Stats struct {
Interfaces []*NetworkInterface
}

View File

@@ -100,12 +100,17 @@ func Setctty() error {
return nil
}
// RunningInUserNS detects whether we are currently running in a user namespace.
// Copied from github.com/lxc/lxd/shared/util.go
/*
* Detect whether we are currently running in a user namespace.
* Copied from github.com/lxc/lxd/shared/util.go
*/
func RunningInUserNS() bool {
file, err := os.Open("/proc/self/uid_map")
if err != nil {
// This kernel-provided file only exists if user namespaces are supported
/*
* This kernel-provided file only exists if user namespaces are
* supported
*/
return false
}
defer file.Close()

View File

@@ -100,22 +100,3 @@ func SearchLabels(labels []string, query string) string {
}
return ""
}
// Annotations returns the bundle path and user defined annotations from the
// libcontianer state. We need to remove the bundle because that is a label
// added by libcontainer.
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
userAnnotations = make(map[string]string)
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == "bundle" {
bundle = parts[1]
} else {
userAnnotations[parts[0]] = parts[1]
}
}
return
}