vendor: update google/cadvisor and opencontainers/runc

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano
2020-06-24 10:56:34 +02:00
parent 78d295d168
commit a6a3bf2eb4
632 changed files with 36493 additions and 89280 deletions

View File

@@ -16,7 +16,6 @@ import (
"reflect"
"strings"
"sync"
"syscall" // only for SysProcAttr and Signal
"time"
securejoin "github.com/cyphar/filepath-securejoin"
@@ -27,8 +26,10 @@ import (
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
criurpc "github.com/checkpoint-restore/go-criu/rpc"
"github.com/checkpoint-restore/go-criu/v4"
criurpc "github.com/checkpoint-restore/go-criu/v4/rpc"
"github.com/golang/protobuf/proto"
errorsf "github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
@@ -65,8 +66,12 @@ type State struct {
// Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups
Rootless bool `json:"rootless"`
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
// with the value as the path.
// Paths to all the container's cgroups, as returned by (*cgroups.Manager).GetPaths
//
// For cgroup v1, a key is cgroup subsystem name, and the value is the path
// to the cgroup for this subsystem.
//
// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
CgroupPaths map[string]string `json:"cgroup_paths"`
// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
@@ -165,7 +170,17 @@ func (c *linuxContainer) OCIState() (*specs.State, error) {
}
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetAllPids()
var pids []int
status, err := c.currentStatus()
if err != nil {
return pids, err
}
// for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
if status == Stopped && !c.cgroupManager.Exists() {
return pids, nil
}
pids, err = c.cgroupManager.GetAllPids()
if err != nil {
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
}
@@ -206,7 +221,7 @@ func (c *linuxContainer) Set(config configs.Config) error {
return err
}
if status == Stopped {
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
return newGenericError(errors.New("container not running"), ContainerNotRunning)
}
if err := c.cgroupManager.Set(&config); err != nil {
// Set configs back
@@ -218,6 +233,9 @@ func (c *linuxContainer) Set(config configs.Config) error {
if c.intelRdtManager != nil {
if err := c.intelRdtManager.Set(&config); err != nil {
// Set configs back
if err2 := c.cgroupManager.Set(c.config); err2 != nil {
logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
}
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
}
@@ -292,7 +310,7 @@ func readFromExecFifo(execFifo io.Reader) error {
return err
}
if len(data) <= 0 {
return fmt.Errorf("cannot start an already running container")
return errors.New("cannot start an already running container")
}
return nil
}
@@ -309,7 +327,7 @@ func awaitFifoOpen(path string) <-chan openResult {
func fifoOpen(path string, block bool) openResult {
flags := os.O_RDONLY
if !block {
flags |= syscall.O_NONBLOCK
flags |= unix.O_NONBLOCK
}
f, err := os.OpenFile(path, flags, 0)
if err != nil {
@@ -365,13 +383,12 @@ func (c *linuxContainer) start(process *Process) error {
if err != nil {
return err
}
for i, hook := range c.config.Hooks.Poststart {
if err := hook.Run(s); err != nil {
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
logrus.Warn(err)
}
return newSystemErrorWithCausef(err, "running poststart hook %d", i)
if err := c.config.Hooks[configs.Poststart].RunHooks(s); err != nil {
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
logrus.Warn(errorsf.Wrapf(err, "Running Poststart hook"))
}
return err
}
}
}
@@ -379,13 +396,19 @@ func (c *linuxContainer) start(process *Process) error {
}
func (c *linuxContainer) Signal(s os.Signal, all bool) error {
if all {
return signalAllProcesses(c.cgroupManager, s)
}
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if all {
// for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
if status == Stopped && !c.cgroupManager.Exists() {
return nil
}
return signalAllProcesses(c.cgroupManager, s)
}
// to avoid a PID reuse attack
if status == Running || status == Created || status == Paused {
if err := c.initProcess.signal(s); err != nil {
@@ -393,7 +416,7 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
}
return nil
}
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
return newGenericError(errors.New("container not running"), ContainerNotRunning)
}
func (c *linuxContainer) createExecFifo() error {
@@ -454,10 +477,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
}
logFilePair := filePair{parentLogPipe, childLogPipe}
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
}
cmd := c.commandTemplate(p, childInitPipe, childLogPipe)
if !p.Init {
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
}
@@ -473,7 +493,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
}
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) {
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) *exec.Cmd {
cmd := exec.Command(c.initPath, c.initArgs[1:]...)
cmd.Args[0] = c.initArgs[0]
cmd.Stdin = p.Stdin
@@ -481,7 +501,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
cmd.Stderr = p.Stderr
cmd.Dir = c.config.Rootfs
if cmd.SysProcAttr == nil {
cmd.SysProcAttr = &syscall.SysProcAttr{}
cmd.SysProcAttr = &unix.SysProcAttr{}
}
cmd.Env = append(cmd.Env, fmt.Sprintf("GOMAXPROCS=%s", os.Getenv("GOMAXPROCS")))
cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...)
@@ -507,9 +527,9 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
if c.config.ParentDeathSignal > 0 {
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal)
}
return cmd, nil
return cmd
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
@@ -555,7 +575,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
}
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
cgroupPaths: state.CgroupPaths,
rootlessCgroups: c.config.RootlessCgroups,
intelRdtPath: state.IntelRdtPath,
messageSockPair: messageSockPair,
@@ -563,6 +583,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
initProcessPid: state.InitProcessPid,
}, nil
}
@@ -650,7 +671,11 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
if c.config.RootlessCgroups {
logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups")
}
return notifyOnOOM(c.cgroupManager.GetPaths())
path := c.cgroupManager.Path("memory")
if cgroups.IsCgroup2UnifiedMode() {
return notifyOnOOMV2(path)
}
return notifyOnOOM(path)
}
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
@@ -658,7 +683,7 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
if c.config.RootlessCgroups {
logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups")
}
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
return notifyMemoryPressure(c.cgroupManager.Path("memory"), level)
}
var criuFeatures *criurpc.CriuFeatures
@@ -668,16 +693,6 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
var t criurpc.CriuReqType
t = criurpc.CriuReqType_FEATURE_CHECK
// criu 1.8 => 10800
if err := c.checkCriuVersion(10800); err != nil {
// Feature checking was introduced with CRIU 1.8.
// Ignore the feature check if an older CRIU version is used
// and just act as before.
// As all automated PR testing is done using CRIU 1.7 this
// code will not be tested by automated PR testing.
return nil
}
// make sure the features we are looking for are really not from
// some previous check
criuFeatures = nil
@@ -691,10 +706,10 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
Features: criuFeat,
}
err := c.criuSwrk(nil, req, criuOpts, false, nil)
err := c.criuSwrk(nil, req, criuOpts, nil)
if err != nil {
logrus.Debugf("%s", err)
return fmt.Errorf("CRIU feature check failed")
return errors.New("CRIU feature check failed")
}
logrus.Debugf("Feature check says: %s", criuFeatures)
@@ -721,56 +736,12 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
}
if missingFeatures {
return fmt.Errorf("CRIU is missing features")
return errors.New("CRIU is missing features")
}
return nil
}
func parseCriuVersion(path string) (int, error) {
var x, y, z int
out, err := exec.Command(path, "-V").Output()
if err != nil {
return 0, fmt.Errorf("Unable to execute CRIU command: %s", path)
}
x = 0
y = 0
z = 0
if ep := strings.Index(string(out), "-"); ep >= 0 {
// criu Git version format
var version string
if sp := strings.Index(string(out), "GitID"); sp > 0 {
version = string(out)[sp:ep]
} else {
return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path)
}
n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
if err != nil {
n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6
y++
} else {
z++
}
if n < 2 || err != nil {
return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err)
}
} else {
// criu release version format
n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2
if err != nil {
n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6
}
if n < 2 || err != nil {
return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err)
}
}
return x*10000 + y*100 + z, nil
}
func compareCriuVersion(criuVersion int, minVersion int) error {
// simple function to perform the actual version compare
if criuVersion < minVersion {
@@ -780,9 +751,6 @@ func compareCriuVersion(criuVersion int, minVersion int) error {
return nil
}
// This is used to store the result of criu version RPC
var criuVersionRPC *criurpc.CriuVersion
// checkCriuVersion checks Criu version greater than or equal to minVersion
func (c *linuxContainer) checkCriuVersion(minVersion int) error {
@@ -792,50 +760,13 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
return compareCriuVersion(c.criuVersion, minVersion)
}
// First try if this version of CRIU support the version RPC.
// The CRIU version RPC was introduced with CRIU 3.0.
// First, reset the variable for the RPC answer to nil
criuVersionRPC = nil
var t criurpc.CriuReqType
t = criurpc.CriuReqType_VERSION
req := &criurpc.CriuReq{
Type: &t,
}
err := c.criuSwrk(nil, req, nil, false, nil)
criu := criu.MakeCriu()
var err error
c.criuVersion, err = criu.GetCriuVersion()
if err != nil {
return fmt.Errorf("CRIU version check failed: %s", err)
}
if criuVersionRPC != nil {
logrus.Debugf("CRIU version: %s", criuVersionRPC)
// major and minor are always set
c.criuVersion = int(*criuVersionRPC.Major) * 10000
c.criuVersion += int(*criuVersionRPC.Minor) * 100
if criuVersionRPC.Sublevel != nil {
c.criuVersion += int(*criuVersionRPC.Sublevel)
}
if criuVersionRPC.Gitid != nil {
// runc's convention is that a CRIU git release is
// always the same as increasing the minor by 1
c.criuVersion -= (c.criuVersion % 100)
c.criuVersion += 100
}
return compareCriuVersion(c.criuVersion, minVersion)
}
// This is CRIU without the version RPC and therefore
// older than 3.0. Parsing the output is required.
// This can be remove once runc does not work with criu older than 3.0
c.criuVersion, err = parseCriuVersion(c.criuPath)
if err != nil {
return err
}
return compareCriuVersion(c.criuVersion, minVersion)
}
@@ -876,26 +807,6 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
return nil
}
func waitForCriuLazyServer(r *os.File, status string) error {
data := make([]byte, 1)
_, err := r.Read(data)
if err != nil {
return err
}
fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
if err != nil {
return err
}
_, err = fd.Write(data)
if err != nil {
return err
}
fd.Close()
return nil
}
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
// CRIU will evaluate a configuration starting with release 3.11.
// Settings in the configuration file will overwrite RPC settings.
@@ -932,13 +843,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
// support for doing unprivileged dumps, but the setup of
// rootless containers might make this complicated.
// criu 1.5.2 => 10502
if err := c.checkCriuVersion(10502); err != nil {
// We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0
if err := c.checkCriuVersion(30000); err != nil {
return err
}
if criuOpts.ImagesDirectory == "" {
return fmt.Errorf("invalid directory to save checkpoint")
return errors.New("invalid directory to save checkpoint")
}
// Since a container can be C/R'ed multiple times,
@@ -1006,8 +917,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
// CRIU expects the information about an external namespace
// like this: --external net[<inode>]:<key>
// This <key> is always 'extRootNetNS'.
var netns syscall.Stat_t
err = syscall.Stat(nsPath, &netns)
var netns unix.Stat_t
err = unix.Stat(nsPath, &netns)
if err != nil {
return err
}
@@ -1016,9 +927,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
}
}
fcg := c.cgroupManager.GetPaths()["freezer"]
if fcg != "" {
rpcOpts.FreezeCgroup = proto.String(fcg)
// CRIU can use cgroup freezer; when rpcOpts.FreezeCgroup
// is not set, CRIU uses ptrace() to pause the processes.
// Note cgroup v2 freezer is only supported since CRIU release 3.14.
if !cgroups.IsCgroup2UnifiedMode() || c.checkCriuVersion(31400) == nil {
if fcg := c.cgroupManager.Path("freezer"); fcg != "" {
rpcOpts.FreezeCgroup = proto.String(fcg)
}
}
// append optional criu opts, e.g., page-server and port
@@ -1037,10 +952,6 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
// append optional manage cgroups mode
if criuOpts.ManageCgroupsMode != 0 {
// criu 1.7 => 10700
if err := c.checkCriuVersion(10700); err != nil {
return err
}
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
rpcOpts.ManageCgroupsMode = &mode
}
@@ -1059,36 +970,53 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
} else {
t = criurpc.CriuReqType_DUMP
}
req := &criurpc.CriuReq{
Type: &t,
Opts: &rpcOpts,
}
if criuOpts.LazyPages {
// lazy migration requested; check if criu supports it
feat := criurpc.CriuFeatures{
LazyPages: proto.Bool(true),
}
if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
return err
}
statusRead, statusWrite, err := os.Pipe()
if err != nil {
return err
if fd := criuOpts.StatusFd; fd != -1 {
// check that the FD is valid
flags, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0)
if err != nil {
return fmt.Errorf("invalid --status-fd argument %d: %w", fd, err)
}
// and writable
if flags&unix.O_WRONLY == 0 {
return fmt.Errorf("invalid --status-fd argument %d: not writable", fd)
}
if c.checkCriuVersion(31500) != nil {
// For criu 3.15+, use notifications (see case "status-ready"
// in criuNotifications). Otherwise, rely on criu status fd.
rpcOpts.StatusFd = proto.Int32(int32(fd))
}
}
rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
}
//no need to dump these information in pre-dump
req := &criurpc.CriuReq{
Type: &t,
Opts: &rpcOpts,
}
// no need to dump all this in pre-dump
if !criuOpts.PreDump {
hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP)
for _, m := range c.config.Mounts {
switch m.Device {
case "bind":
c.addCriuDumpMount(req, m)
case "cgroup":
if cgroups.IsCgroup2UnifiedMode() || hasCgroupns {
// real mount(s)
continue
}
// a set of "external" bind mounts
binds, err := getCgroupMounts(m)
if err != nil {
return err
@@ -1120,7 +1048,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
}
}
err = c.criuSwrk(nil, req, criuOpts, false, nil)
err = c.criuSwrk(nil, req, criuOpts, nil)
if err != nil {
return err
}
@@ -1166,7 +1094,14 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
switch m.Device {
case "cgroup":
// Do nothing for cgroup, CRIU should handle it
// No mount point(s) need to be created:
//
// * for v1, mount points are saved by CRIU because
// /sys/fs/cgroup is a tmpfs mount
//
// * for v2, /sys/fs/cgroup is a real mount, but
// the mountpoint appears as soon as /sys is mounted
return nil
case "bind":
// The prepareBindMount() function checks if source
// exists. So it cannot be used for other filesystem types.
@@ -1174,7 +1109,7 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
return err
}
default:
// for all other file-systems just create the mountpoints
// for all other filesystems just create the mountpoints
dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination)
if err != nil {
return err
@@ -1195,10 +1130,10 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
func isPathInPrefixList(path string, prefix []string) bool {
for _, p := range prefix {
if strings.HasPrefix(path, p+"/") {
return false
return true
}
}
return true
return false
}
// prepareCriuRestoreMounts tries to set up the rootfs of the
@@ -1220,7 +1155,7 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
// if the mountpoints are not on a tmpfs, as CRIU will
// restore the complete tmpfs content from its checkpoint.
for _, m := range mounts {
if isPathInPrefixList(m.Destination, tmpfs) {
if !isPathInPrefixList(m.Destination, tmpfs) {
if err := c.makeCriuRestoreMountpoints(m); err != nil {
return err
}
@@ -1240,8 +1175,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
// support for unprivileged restore at the moment.
// criu 1.5.2 => 10502
if err := c.checkCriuVersion(10502); err != nil {
// We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0
if err := c.checkCriuVersion(30000); err != nil {
return err
}
if criuOpts.WorkDirectory == "" {
@@ -1258,7 +1193,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}
defer workDir.Close()
if criuOpts.ImagesDirectory == "" {
return fmt.Errorf("invalid directory to restore checkpoint")
return errors.New("invalid directory to restore checkpoint")
}
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
@@ -1326,11 +1261,11 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// The <key> needs to be the same as during checkpointing.
// We are always using 'extRootNetNS' as the key in this.
netns, err := os.Open(nsPath)
defer netns.Close()
if err != nil {
logrus.Errorf("If a specific network namespace is defined it must exist: %s", err)
return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
}
defer netns.Close()
inheritFd := new(criurpc.InheritFd)
inheritFd.Key = proto.String("extRootNetNS")
// The offset of four is necessary because 0, 1, 2 and 3 is already
@@ -1348,11 +1283,16 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
return err
}
hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP)
for _, m := range c.config.Mounts {
switch m.Device {
case "bind":
c.addCriuRestoreMount(req, m)
case "cgroup":
if cgroups.IsCgroup2UnifiedMode() || hasCgroupns {
continue
}
// cgroup v1 is a set of bind mounts, unless cgroupns is used
binds, err := getCgroupMounts(m)
if err != nil {
return err
@@ -1379,10 +1319,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
// append optional manage cgroups mode
if criuOpts.ManageCgroupsMode != 0 {
// criu 1.7 => 10700
if err := c.checkCriuVersion(10700); err != nil {
return err
}
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
req.Opts.ManageCgroupsMode = &mode
}
@@ -1406,10 +1342,15 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
}
}
return c.criuSwrk(process, req, criuOpts, true, extraFiles)
return c.criuSwrk(process, req, criuOpts, extraFiles)
}
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
// need to apply cgroups only on restore
if req.GetType() != criurpc.CriuReqType_RESTORE {
return nil
}
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
if err := c.cgroupManager.Apply(pid); err != nil {
return err
@@ -1419,6 +1360,11 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
return newSystemError(err)
}
if cgroups.IsCgroup2UnifiedMode() {
return nil
}
// the stuff below is cgroupv1-specific
path := fmt.Sprintf("/proc/%d/cgroup", pid)
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
if err != nil {
@@ -1436,7 +1382,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
return nil
}
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
if err != nil {
return err
@@ -1484,26 +1430,29 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
if err := cmd.Start(); err != nil {
return err
}
// we close criuServer so that even if CRIU crashes or unexpectedly exits, runc will not hang.
criuServer.Close()
// cmd.Process will be replaced by a restored init.
criuProcess := cmd.Process
var criuProcessState *os.ProcessState
defer func() {
criuClientCon.Close()
_, err := cmd.Process.Wait()
if err != nil {
return
if criuProcessState == nil {
criuClientCon.Close()
_, err := criuProcess.Wait()
if err != nil {
logrus.Warnf("wait on criuProcess returned %v", err)
}
}
}()
if applyCgroups {
err := c.criuApplyCgroups(cmd.Process.Pid, req)
if err != nil {
return err
}
if err := c.criuApplyCgroups(criuProcess.Pid, req); err != nil {
return err
}
var extFds []string
if process != nil {
extFds, err = getPipeFds(cmd.Process.Pid)
extFds, err = getPipeFds(criuProcess.Pid)
if err != nil {
return err
}
@@ -1542,14 +1491,23 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
oob := make([]byte, 4096)
for true {
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
if req.Opts != nil && req.Opts.StatusFd != nil {
// Close status_fd as soon as we got something back from criu,
// assuming it has consumed (reopened) it by this time.
// Otherwise it will might be left open forever and whoever
// is waiting on it will wait forever.
fd := int(*req.Opts.StatusFd)
_ = unix.Close(fd)
req.Opts.StatusFd = nil
}
if err != nil {
return err
}
if n == 0 {
return fmt.Errorf("unexpected EOF")
return errors.New("unexpected EOF")
}
if n == len(buf) {
return fmt.Errorf("buffer is too small")
return errors.New("buffer is too small")
}
resp := new(criurpc.CriuResp)
@@ -1559,25 +1517,16 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
}
if !resp.GetSuccess() {
typeString := req.GetType().String()
if typeString == "VERSION" {
// If the VERSION RPC fails this probably means that the CRIU
// version is too old for this RPC. Just return 'nil'.
return nil
}
return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
}
t := resp.GetType()
switch {
case t == criurpc.CriuReqType_VERSION:
logrus.Debugf("CRIU version: %s", resp)
criuVersionRPC = resp.GetVersion()
break
case t == criurpc.CriuReqType_FEATURE_CHECK:
logrus.Debugf("Feature check says: %s", resp)
criuFeatures = resp.GetFeatures()
case t == criurpc.CriuReqType_NOTIFY:
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
if err := c.criuNotifications(resp, process, cmd, opts, extFds, oob[:oobn]); err != nil {
return err
}
t = criurpc.CriuReqType_NOTIFY
@@ -1607,7 +1556,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
criuClientCon.CloseWrite()
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process.
st, err := cmd.Process.Wait()
criuProcessState, err = criuProcess.Wait()
if err != nil {
return err
}
@@ -1619,8 +1568,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
// and not the whole series of pre-dump, pre-dump, ...m, dump
// If we got the message CriuReqType_PRE_DUMP it means
// CRIU was successful and we need to forcefully stop CRIU
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
if !criuProcessState.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
return fmt.Errorf("criu failed: %s\nlog file: %s", criuProcessState.String(), logPath)
}
return nil
}
@@ -1653,43 +1602,53 @@ func unlockNetwork(config *configs.Config) error {
return nil
}
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, cmd *exec.Cmd, opts *CriuOpts, fds []string, oob []byte) error {
notify := resp.GetNotify()
if notify == nil {
return fmt.Errorf("invalid response: %s", resp.String())
}
logrus.Debugf("notify: %s\n", notify.GetScript())
switch {
case notify.GetScript() == "post-dump":
script := notify.GetScript()
logrus.Debugf("notify: %s\n", script)
switch script {
case "post-dump":
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
if err != nil {
return err
}
f.Close()
case notify.GetScript() == "network-unlock":
case "network-unlock":
if err := unlockNetwork(c.config); err != nil {
return err
}
case notify.GetScript() == "network-lock":
case "network-lock":
if err := lockNetwork(c.config); err != nil {
return err
}
case notify.GetScript() == "setup-namespaces":
case "setup-namespaces":
if c.config.Hooks != nil {
s, err := c.currentOCIState()
if err != nil {
return nil
}
s.Pid = int(notify.GetPid())
for i, hook := range c.config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
}
if err := c.config.Hooks[configs.Prestart].RunHooks(s); err != nil {
return err
}
if err := c.config.Hooks[configs.CreateRuntime].RunHooks(s); err != nil {
return err
}
}
case notify.GetScript() == "post-restore":
case "post-restore":
pid := notify.GetPid()
r, err := newRestoredProcess(int(pid), fds)
p, err := os.FindProcess(int(pid))
if err != nil {
return err
}
cmd.Process = p
r, err := newRestoredProcess(cmd, fds)
if err != nil {
return err
}
@@ -1710,7 +1669,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
logrus.Error(err)
}
}
case notify.GetScript() == "orphan-pts-master":
case "orphan-pts-master":
scm, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return err
@@ -1727,6 +1686,16 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil {
return err
}
case "status-ready":
if opts.StatusFd != -1 {
// write \0 to status fd to notify that lazy page server is ready
_, err := unix.Write(opts.StatusFd, []byte{0})
if err != nil {
logrus.Warnf("can't write \\0 to status fd: %v", err)
}
_ = unix.Close(opts.StatusFd)
opts.StatusFd = -1
}
}
return nil
}
@@ -1746,13 +1715,30 @@ func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
return state, nil
}
func (c *linuxContainer) saveState(s *State) error {
f, err := os.Create(filepath.Join(c.root, stateFilename))
func (c *linuxContainer) saveState(s *State) (retErr error) {
tmpFile, err := ioutil.TempFile(c.root, "state-")
if err != nil {
return err
}
defer f.Close()
return utils.WriteJSON(f, s)
defer func() {
if retErr != nil {
tmpFile.Close()
os.Remove(tmpFile.Name())
}
}()
err = utils.WriteJSON(tmpFile, s)
if err != nil {
return err
}
err = tmpFile.Close()
if err != nil {
return err
}
stateFilePath := filepath.Join(c.root, stateFilename)
return os.Rename(tmpFile.Name(), stateFilePath)
}
func (c *linuxContainer) deleteState() error {
@@ -1778,10 +1764,7 @@ func (c *linuxContainer) refreshState() error {
if paused {
return c.state.transition(&pausedState{c: c})
}
t, err := c.runType()
if err != nil {
return err
}
t := c.runType()
switch t {
case Created:
return c.state.transition(&createdState{c: c})
@@ -1791,48 +1774,32 @@ func (c *linuxContainer) refreshState() error {
return c.state.transition(&stoppedState{c: c})
}
func (c *linuxContainer) runType() (Status, error) {
func (c *linuxContainer) runType() Status {
if c.initProcess == nil {
return Stopped, nil
return Stopped
}
pid := c.initProcess.pid()
stat, err := system.Stat(pid)
if err != nil {
return Stopped, nil
return Stopped
}
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
return Stopped, nil
return Stopped
}
// We'll create exec fifo and blocking on it after container is created,
// and delete it after start container.
if _, err := os.Stat(filepath.Join(c.root, execFifoFilename)); err == nil {
return Created, nil
return Created
}
return Running, nil
return Running
}
func (c *linuxContainer) isPaused() (bool, error) {
fcg := c.cgroupManager.GetPaths()["freezer"]
if fcg == "" {
// A container doesn't have a freezer cgroup
return false, nil
}
pausedState := "FROZEN"
filename := "freezer.state"
if cgroups.IsCgroup2UnifiedMode() {
filename = "cgroup.freeze"
pausedState = "1"
}
data, err := ioutil.ReadFile(filepath.Join(fcg, filename))
state, err := c.cgroupManager.GetFreezerState()
if err != nil {
// If freezer cgroup is not mounted, the container would just be not paused.
if os.IsNotExist(err) || err == syscall.ENODEV {
return false, nil
}
return false, newSystemErrorWithCause(err, "checking if container is paused")
return false, err
}
return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil
return state == configs.Frozen, nil
}
func (c *linuxContainer) currentState() (*State, error) {