Update libcontainer dependency
This commit is contained in:
517
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
517
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
@@ -15,30 +15,35 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/criurpc"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
)
|
||||
|
||||
const stdioFdCount = 3
|
||||
|
||||
type linuxContainer struct {
|
||||
id string
|
||||
root string
|
||||
config *configs.Config
|
||||
cgroupManager cgroups.Manager
|
||||
initPath string
|
||||
initArgs []string
|
||||
initProcess parentProcess
|
||||
criuPath string
|
||||
m sync.Mutex
|
||||
criuVersion int
|
||||
state containerState
|
||||
id string
|
||||
root string
|
||||
config *configs.Config
|
||||
cgroupManager cgroups.Manager
|
||||
initPath string
|
||||
initArgs []string
|
||||
initProcess parentProcess
|
||||
initProcessStartTime string
|
||||
criuPath string
|
||||
m sync.Mutex
|
||||
criuVersion int
|
||||
state containerState
|
||||
created time.Time
|
||||
}
|
||||
|
||||
// State represents a running container's state
|
||||
@@ -59,7 +64,7 @@ type State struct {
|
||||
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
|
||||
}
|
||||
|
||||
// A libcontainer container object.
|
||||
// Container is a libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
@@ -75,13 +80,13 @@ type Container interface {
|
||||
// Systemerror - System error.
|
||||
Checkpoint(criuOpts *CriuOpts) error
|
||||
|
||||
// Restore restores the checkpointed container to a running state using the criu(8) utiity.
|
||||
// Restore restores the checkpointed container to a running state using the criu(8) utility.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
Restore(process *Process, criuOpts *CriuOpts) error
|
||||
|
||||
// If the Container state is RUNNING or PAUSING, sets the Container state to PAUSING and pauses
|
||||
// If the Container state is RUNNING, sets the Container state to PAUSING and pauses
|
||||
// the execution of any user processes. Asynchronously, when the container finished being paused the
|
||||
// state is changed to PAUSED.
|
||||
// If the Container state is PAUSED, do nothing.
|
||||
@@ -138,7 +143,7 @@ func (c *linuxContainer) State() (*State, error) {
|
||||
func (c *linuxContainer) Processes() ([]int, error) {
|
||||
pids, err := c.cgroupManager.GetAllPids()
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
|
||||
}
|
||||
return pids, nil
|
||||
}
|
||||
@@ -149,14 +154,14 @@ func (c *linuxContainer) Stats() (*Stats, error) {
|
||||
stats = &Stats{}
|
||||
)
|
||||
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
|
||||
return stats, newSystemError(err)
|
||||
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
|
||||
}
|
||||
for _, iface := range c.config.Networks {
|
||||
switch iface.Type {
|
||||
case "veth":
|
||||
istats, err := getNetworkInterfaceStats(iface.HostInterfaceName)
|
||||
if err != nil {
|
||||
return stats, newSystemError(err)
|
||||
return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName)
|
||||
}
|
||||
stats.Interfaces = append(stats.Interfaces, istats)
|
||||
}
|
||||
@@ -167,6 +172,13 @@ func (c *linuxContainer) Stats() (*Stats, error) {
|
||||
func (c *linuxContainer) Set(config configs.Config) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
}
|
||||
c.config = &config
|
||||
return c.cgroupManager.Set(c.config)
|
||||
}
|
||||
@@ -178,38 +190,90 @@ func (c *linuxContainer) Start(process *Process) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
doInit := status == Destroyed
|
||||
parent, err := c.newParentProcess(process, doInit)
|
||||
return c.start(process, status == Stopped)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Run(process *Process) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return newSystemError(err)
|
||||
return err
|
||||
}
|
||||
if err := c.start(process, status == Stopped); err != nil {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
return c.exec()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Exec() error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
return c.exec()
|
||||
}
|
||||
|
||||
func (c *linuxContainer) exec() error {
|
||||
path := filepath.Join(c.root, execFifoFilename)
|
||||
f, err := os.OpenFile(path, os.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "open exec fifo for reading")
|
||||
}
|
||||
defer f.Close()
|
||||
data, err := ioutil.ReadAll(f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(data) > 0 {
|
||||
os.Remove(path)
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("cannot start an already running container")
|
||||
}
|
||||
|
||||
func (c *linuxContainer) start(process *Process, isInit bool) error {
|
||||
parent, err := c.newParentProcess(process, isInit)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "creating new parent process")
|
||||
}
|
||||
if err := parent.start(); err != nil {
|
||||
// terminate the process to ensure that it properly is reaped.
|
||||
if err := parent.terminate(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return newSystemError(err)
|
||||
return newSystemErrorWithCause(err, "starting container process")
|
||||
}
|
||||
// generate a timestamp indicating when the container was started
|
||||
c.created = time.Now().UTC()
|
||||
c.state = &runningState{
|
||||
c: c,
|
||||
}
|
||||
if doInit {
|
||||
if err := c.updateState(parent); err != nil {
|
||||
if isInit {
|
||||
c.state = &createdState{
|
||||
c: c,
|
||||
}
|
||||
state, err := c.updateState(parent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c.initProcessStartTime = state.InitProcessStartTime
|
||||
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: parent.pid(),
|
||||
Root: c.config.Rootfs,
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: parent.pid(),
|
||||
Root: c.config.Rootfs,
|
||||
BundlePath: utils.SearchLabels(c.config.Labels, "bundle"),
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststart {
|
||||
for i, hook := range c.config.Hooks.Poststart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
if err := parent.terminate(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return newSystemError(err)
|
||||
return newSystemErrorWithCausef(err, "running poststart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -219,7 +283,7 @@ func (c *linuxContainer) Start(process *Process) error {
|
||||
|
||||
func (c *linuxContainer) Signal(s os.Signal) error {
|
||||
if err := c.initProcess.signal(s); err != nil {
|
||||
return newSystemError(err)
|
||||
return newSystemErrorWithCause(err, "signaling init process")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -227,19 +291,23 @@ func (c *linuxContainer) Signal(s os.Signal) error {
|
||||
func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProcess, error) {
|
||||
parentPipe, childPipe, err := newPipe()
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe)
|
||||
rootDir, err := os.Open(c.root)
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
return nil, err
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe, rootDir)
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
if !doInit {
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe, rootDir)
|
||||
}
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe, rootDir)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe, rootDir *os.File) (*exec.Cmd, error) {
|
||||
cmd := &exec.Cmd{
|
||||
Path: c.initPath,
|
||||
Args: c.initArgs,
|
||||
@@ -251,8 +319,10 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
||||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
}
|
||||
cmd.ExtraFiles = append(p.ExtraFiles, childPipe)
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
cmd.ExtraFiles = append(p.ExtraFiles, childPipe, rootDir)
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-2),
|
||||
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%d", stdioFdCount+len(cmd.ExtraFiles)-1))
|
||||
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
|
||||
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||||
// even with the parent still running.
|
||||
@@ -262,38 +332,42 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
||||
return cmd, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
||||
t := "_LIBCONTAINER_INITTYPE=" + string(initStandard)
|
||||
cloneFlags := c.config.Namespaces.CloneFlags()
|
||||
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
|
||||
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil {
|
||||
// user mappings are not supported
|
||||
return nil, err
|
||||
}
|
||||
enableSetgroups(cmd.SysProcAttr)
|
||||
// Default to root user when user namespaces are enabled.
|
||||
if cmd.SysProcAttr.Credential == nil {
|
||||
cmd.SysProcAttr.Credential = &syscall.Credential{}
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
|
||||
nsMaps := make(map[configs.NamespaceType]string)
|
||||
for _, ns := range c.config.Namespaces {
|
||||
if ns.Path != "" {
|
||||
nsMaps[ns.Type] = ns.Path
|
||||
}
|
||||
}
|
||||
cmd.Env = append(cmd.Env, t)
|
||||
cmd.SysProcAttr.Cloneflags = cloneFlags
|
||||
_, sharePidns := nsMaps[configs.NEWPID]
|
||||
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &initProcess{
|
||||
cmd: cmd,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
manager: c.cgroupManager,
|
||||
config: c.newInitConfig(p),
|
||||
container: c,
|
||||
process: p,
|
||||
cmd: cmd,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
manager: c.cgroupManager,
|
||||
config: c.newInitConfig(p),
|
||||
container: c,
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
sharePidns: sharePidns,
|
||||
rootDir: rootDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*setnsProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "getting container's current state")
|
||||
}
|
||||
// for setns process, we dont have to set cloneflags as the process namespaces
|
||||
// will only be set via setns syscall
|
||||
data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath)
|
||||
data, err := c.bootstrapData(0, state.NamespacePaths, p.consolePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -306,11 +380,12 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
rootDir: rootDir,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
return &initConfig{
|
||||
cfg := &initConfig{
|
||||
Config: c.config,
|
||||
Args: process.Args,
|
||||
Env: process.Env,
|
||||
@@ -319,7 +394,26 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig {
|
||||
Console: process.consolePath,
|
||||
Capabilities: process.Capabilities,
|
||||
PassedFilesCount: len(process.ExtraFiles),
|
||||
ContainerId: c.ID(),
|
||||
NoNewPrivileges: c.config.NoNewPrivileges,
|
||||
AppArmorProfile: c.config.AppArmorProfile,
|
||||
ProcessLabel: c.config.ProcessLabel,
|
||||
Rlimits: c.config.Rlimits,
|
||||
ExecFifoPath: filepath.Join(c.root, execFifoFilename),
|
||||
}
|
||||
if process.NoNewPrivileges != nil {
|
||||
cfg.NoNewPrivileges = *process.NoNewPrivileges
|
||||
}
|
||||
if process.AppArmorProfile != "" {
|
||||
cfg.AppArmorProfile = process.AppArmorProfile
|
||||
}
|
||||
if process.Label != "" {
|
||||
cfg.ProcessLabel = process.Label
|
||||
}
|
||||
if len(process.Rlimits) > 0 {
|
||||
cfg.Rlimits = process.Rlimits
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
func newPipe() (parent *os.File, child *os.File, err error) {
|
||||
@@ -343,15 +437,16 @@ func (c *linuxContainer) Pause() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status != Running {
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
switch status {
|
||||
case Running, Created:
|
||||
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.state.transition(&pausedState{
|
||||
c: c,
|
||||
})
|
||||
}
|
||||
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.state.transition(&pausedState{
|
||||
c: c,
|
||||
})
|
||||
return newGenericError(fmt.Errorf("container not running: %s", status), ContainerNotRunning)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Resume() error {
|
||||
@@ -380,23 +475,13 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
|
||||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
}
|
||||
|
||||
// XXX debug support, remove when debugging done.
|
||||
func addArgsFromEnv(evar string, args *[]string) {
|
||||
if e := os.Getenv(evar); e != "" {
|
||||
for _, f := range strings.Fields(e) {
|
||||
*args = append(*args, f)
|
||||
}
|
||||
}
|
||||
fmt.Printf(">>> criu %v\n", *args)
|
||||
}
|
||||
|
||||
// check Criu version greater than or equal to min_version
|
||||
func (c *linuxContainer) checkCriuVersion(min_version string) error {
|
||||
// checkCriuVersion checks Criu version greater than or equal to minVersion
|
||||
func (c *linuxContainer) checkCriuVersion(minVersion string) error {
|
||||
var x, y, z, versionReq int
|
||||
|
||||
_, err := fmt.Sscanf(min_version, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
|
||||
_, err := fmt.Sscanf(minVersion, "%d.%d.%d\n", &x, &y, &z) // 1.5.2
|
||||
if err != nil {
|
||||
_, err = fmt.Sscanf(min_version, "Version: %d.%d\n", &x, &y) // 1.6
|
||||
_, err = fmt.Sscanf(minVersion, "Version: %d.%d\n", &x, &y) // 1.6
|
||||
}
|
||||
versionReq = x*10000 + y*100 + z
|
||||
|
||||
@@ -441,7 +526,7 @@ func (c *linuxContainer) checkCriuVersion(min_version string) error {
|
||||
c.criuVersion = x*10000 + y*100 + z
|
||||
|
||||
if c.criuVersion < versionReq {
|
||||
return fmt.Errorf("CRIU version must be %s or higher", min_version)
|
||||
return fmt.Errorf("CRIU version must be %s or higher", minVersion)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -514,6 +599,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
|
||||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
|
||||
FileLocks: proto.Bool(criuOpts.FileLocks),
|
||||
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
||||
}
|
||||
|
||||
// append optional criu opts, e.g., page-server and port
|
||||
@@ -529,7 +615,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
if err := c.checkCriuVersion("1.7"); err != nil {
|
||||
return err
|
||||
}
|
||||
rpcOpts.ManageCgroupsMode = proto.Uint32(uint32(criuOpts.ManageCgroupsMode))
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
rpcOpts.ManageCgroupsMode = &mode
|
||||
}
|
||||
|
||||
t := criurpc.CriuReqType_DUMP
|
||||
@@ -587,6 +674,27 @@ func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mo
|
||||
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) {
|
||||
for _, iface := range c.config.Networks {
|
||||
switch iface.Type {
|
||||
case "veth":
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(iface.HostInterfaceName)
|
||||
veth.IfIn = proto.String(iface.Name)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
break
|
||||
case "loopback":
|
||||
break
|
||||
}
|
||||
}
|
||||
for _, i := range criuOpts.VethPairs {
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(i.HostInterfaceName)
|
||||
veth.IfIn = proto.String(i.ContainerInterfaceName)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
@@ -650,6 +758,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
|
||||
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
|
||||
FileLocks: proto.Bool(criuOpts.FileLocks),
|
||||
EmptyNs: proto.Uint32(criuOpts.EmptyNs),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -669,23 +778,9 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
break
|
||||
}
|
||||
}
|
||||
for _, iface := range c.config.Networks {
|
||||
switch iface.Type {
|
||||
case "veth":
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(iface.HostInterfaceName)
|
||||
veth.IfIn = proto.String(iface.Name)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
break
|
||||
case "loopback":
|
||||
break
|
||||
}
|
||||
}
|
||||
for _, i := range criuOpts.VethPairs {
|
||||
veth := new(criurpc.CriuVethPair)
|
||||
veth.IfOut = proto.String(i.HostInterfaceName)
|
||||
veth.IfIn = proto.String(i.ContainerInterfaceName)
|
||||
req.Opts.Veths = append(req.Opts.Veths, veth)
|
||||
|
||||
if criuOpts.EmptyNs&syscall.CLONE_NEWNET == 0 {
|
||||
c.restoreNetwork(req, criuOpts)
|
||||
}
|
||||
|
||||
// append optional manage cgroups mode
|
||||
@@ -693,7 +788,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
if err := c.checkCriuVersion("1.7"); err != nil {
|
||||
return err
|
||||
}
|
||||
req.Opts.ManageCgroupsMode = proto.Uint32(uint32(criuOpts.ManageCgroupsMode))
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
req.Opts.ManageCgroupsMode = &mode
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -850,7 +946,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n, err = criuClient.Write(data)
|
||||
_, err = criuClient.Write(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -925,6 +1021,20 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
if err := lockNetwork(c.config); err != nil {
|
||||
return err
|
||||
}
|
||||
case notify.GetScript() == "setup-namespaces":
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: int(notify.GetPid()),
|
||||
Root: c.config.Rootfs,
|
||||
}
|
||||
for i, hook := range c.config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
case notify.GetScript() == "post-restore":
|
||||
pid := notify.GetPid()
|
||||
r, err := newRestoredProcess(int(pid), fds)
|
||||
@@ -938,7 +1048,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.updateState(r); err != nil {
|
||||
if _, err := c.updateState(r); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil {
|
||||
@@ -950,13 +1060,17 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) updateState(process parentProcess) error {
|
||||
func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
|
||||
c.initProcess = process
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
return c.saveState(state)
|
||||
err = c.saveState(state)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) saveState(s *State) error {
|
||||
@@ -991,37 +1105,75 @@ func (c *linuxContainer) refreshState() error {
|
||||
if paused {
|
||||
return c.state.transition(&pausedState{c: c})
|
||||
}
|
||||
running, err := c.isRunning()
|
||||
t, err := c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if running {
|
||||
switch t {
|
||||
case Created:
|
||||
return c.state.transition(&createdState{c: c})
|
||||
case Running:
|
||||
return c.state.transition(&runningState{c: c})
|
||||
}
|
||||
return c.state.transition(&stoppedState{c: c})
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isRunning() (bool, error) {
|
||||
if c.initProcess == nil {
|
||||
// doesInitProcessExist checks if the init process is still the same process
|
||||
// as the initial one, it could happen that the original process has exited
|
||||
// and a new process has been created with the same pid, in this case, the
|
||||
// container would already be stopped.
|
||||
func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) {
|
||||
startTime, err := system.GetProcessStartTime(initPid)
|
||||
if err != nil {
|
||||
return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid)
|
||||
}
|
||||
if c.initProcessStartTime != startTime {
|
||||
return false, nil
|
||||
}
|
||||
// return Running if the init process is alive
|
||||
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemError(err)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) runType() (Status, error) {
|
||||
if c.initProcess == nil {
|
||||
return Stopped, nil
|
||||
}
|
||||
pid := c.initProcess.pid()
|
||||
// return Running if the init process is alive
|
||||
if err := syscall.Kill(pid, 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
// It means the process does not exist anymore, could happen when the
|
||||
// process exited just when we call the function, we should not return
|
||||
// error in this case.
|
||||
return Stopped, nil
|
||||
}
|
||||
return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid)
|
||||
}
|
||||
// check if the process is still the original init process.
|
||||
exist, err := c.doesInitProcessExist(pid)
|
||||
if !exist || err != nil {
|
||||
return Stopped, err
|
||||
}
|
||||
// check if the process that is running is the init process or the user's process.
|
||||
// this is the difference between the container Running and Created.
|
||||
environ, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/environ", pid))
|
||||
if err != nil {
|
||||
return Stopped, newSystemErrorWithCausef(err, "reading /proc/%d/environ", pid)
|
||||
}
|
||||
check := []byte("_LIBCONTAINER")
|
||||
if bytes.Contains(environ, check) {
|
||||
return Created, nil
|
||||
}
|
||||
return Running, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isPaused() (bool, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(c.cgroupManager.GetPaths()["freezer"], "freezer.state"))
|
||||
if err != nil {
|
||||
// If freezer cgroup is not mounted, the container would just be not paused.
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemError(err)
|
||||
return false, newSystemErrorWithCause(err, "checking if container is paused")
|
||||
}
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil
|
||||
}
|
||||
@@ -1043,6 +1195,7 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||
Config: *c.config,
|
||||
InitProcessPid: pid,
|
||||
InitProcessStartTime: startTime,
|
||||
Created: c.created,
|
||||
},
|
||||
CgroupPaths: c.cgroupManager.GetPaths(),
|
||||
NamespacePaths: make(map[configs.NamespaceType]string),
|
||||
@@ -1053,6 +1206,9 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||
state.NamespacePaths[ns.Type] = ns.GetPath(pid)
|
||||
}
|
||||
for _, nsType := range configs.NamespaceTypes() {
|
||||
if !configs.IsNamespaceSupported(nsType) {
|
||||
continue
|
||||
}
|
||||
if _, ok := state.NamespacePaths[nsType]; !ok {
|
||||
ns := configs.Namespace{Type: nsType}
|
||||
state.NamespacePaths[ns.Type] = ns.GetPath(pid)
|
||||
@@ -1062,18 +1218,69 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||
return state, nil
|
||||
}
|
||||
|
||||
// bootstrapData encodes the necessary data in netlink binary format as a io.Reader.
|
||||
// Consumer can write the data to a bootstrap program such as one that uses
|
||||
// nsenter package to bootstrap the container's init process correctly, i.e. with
|
||||
// correct namespaces, uid/gid mapping etc.
|
||||
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath string) (io.Reader, error) {
|
||||
// orderNamespacePaths sorts namespace paths into a list of paths that we
|
||||
// can setns in order.
|
||||
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
|
||||
paths := []string{}
|
||||
nsTypes := []configs.NamespaceType{
|
||||
configs.NEWIPC,
|
||||
configs.NEWUTS,
|
||||
configs.NEWNET,
|
||||
configs.NEWPID,
|
||||
configs.NEWNS,
|
||||
}
|
||||
// join userns if the init process explicitly requires NEWUSER
|
||||
if c.config.Namespaces.Contains(configs.NEWUSER) {
|
||||
nsTypes = append(nsTypes, configs.NEWUSER)
|
||||
}
|
||||
for _, nsType := range nsTypes {
|
||||
if p, ok := namespaces[nsType]; ok && p != "" {
|
||||
// check if the requested namespace is supported
|
||||
if !configs.IsNamespaceSupported(nsType) {
|
||||
return nil, newSystemError(fmt.Errorf("namespace %s is not supported", nsType))
|
||||
}
|
||||
// only set to join this namespace if it exists
|
||||
if _, err := os.Lstat(p); err != nil {
|
||||
return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p)
|
||||
}
|
||||
// do not allow namespace path with comma as we use it to separate
|
||||
// the namespace paths
|
||||
if strings.ContainsRune(p, ',') {
|
||||
return nil, newSystemError(fmt.Errorf("invalid path %s", p))
|
||||
}
|
||||
paths = append(paths, p)
|
||||
}
|
||||
}
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
|
||||
data := bytes.NewBuffer(nil)
|
||||
for _, im := range idMap {
|
||||
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
|
||||
if _, err := data.WriteString(line); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return data.Bytes(), nil
|
||||
}
|
||||
|
||||
// bootstrapData encodes the necessary data in netlink binary format
|
||||
// as a io.Reader.
|
||||
// Consumer can write the data to a bootstrap program
|
||||
// such as one that uses nsenter package to bootstrap the container's
|
||||
// init process correctly, i.e. with correct namespaces, uid/gid
|
||||
// mapping etc.
|
||||
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, consolePath string) (io.Reader, error) {
|
||||
// create the netlink message
|
||||
r := nl.NewNetlinkRequest(int(InitMsg), 0)
|
||||
// write pid
|
||||
|
||||
// write cloneFlags
|
||||
r.AddData(&Int32msg{
|
||||
Type: PidAttr,
|
||||
Value: uint32(pid),
|
||||
Type: CloneFlagsAttr,
|
||||
Value: uint32(cloneFlags),
|
||||
})
|
||||
|
||||
// write console path
|
||||
if consolePath != "" {
|
||||
r.AddData(&Bytemsg{
|
||||
@@ -1081,5 +1288,57 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath
|
||||
Value: []byte(consolePath),
|
||||
})
|
||||
}
|
||||
|
||||
// write custom namespace paths
|
||||
if len(nsMaps) > 0 {
|
||||
nsPaths, err := c.orderNamespacePaths(nsMaps)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.AddData(&Bytemsg{
|
||||
Type: NsPathsAttr,
|
||||
Value: []byte(strings.Join(nsPaths, ",")),
|
||||
})
|
||||
}
|
||||
|
||||
// write namespace paths only when we are not joining an existing user ns
|
||||
_, joinExistingUser := nsMaps[configs.NEWUSER]
|
||||
if !joinExistingUser {
|
||||
// write uid mappings
|
||||
if len(c.config.UidMappings) > 0 {
|
||||
b, err := encodeIDMapping(c.config.UidMappings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.AddData(&Bytemsg{
|
||||
Type: UidmapAttr,
|
||||
Value: b,
|
||||
})
|
||||
}
|
||||
|
||||
// write gid mappings
|
||||
if len(c.config.GidMappings) > 0 {
|
||||
b, err := encodeIDMapping(c.config.GidMappings)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.AddData(&Bytemsg{
|
||||
Type: GidmapAttr,
|
||||
Value: b,
|
||||
})
|
||||
// check if we have CAP_SETGID to setgroup properly
|
||||
pid, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
|
||||
r.AddData(&Boolmsg{
|
||||
Type: SetgroupAttr,
|
||||
Value: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return bytes.NewReader(r.Serialize()), nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user