vendor: update google/cadvisor and opencontainers/runc
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
487
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
487
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
@@ -16,7 +16,6 @@ import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall" // only for SysProcAttr and Signal
|
||||
"time"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
@@ -27,8 +26,10 @@ import (
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
criurpc "github.com/checkpoint-restore/go-criu/rpc"
|
||||
"github.com/checkpoint-restore/go-criu/v4"
|
||||
criurpc "github.com/checkpoint-restore/go-criu/v4/rpc"
|
||||
"github.com/golang/protobuf/proto"
|
||||
errorsf "github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -65,8 +66,12 @@ type State struct {
|
||||
// Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups
|
||||
Rootless bool `json:"rootless"`
|
||||
|
||||
// Path to all the cgroups setup for a container. Key is cgroup subsystem name
|
||||
// with the value as the path.
|
||||
// Paths to all the container's cgroups, as returned by (*cgroups.Manager).GetPaths
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the path
|
||||
// to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the unified path.
|
||||
CgroupPaths map[string]string `json:"cgroup_paths"`
|
||||
|
||||
// NamespacePaths are filepaths to the container's namespaces. Key is the namespace type
|
||||
@@ -165,7 +170,17 @@ func (c *linuxContainer) OCIState() (*specs.State, error) {
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Processes() ([]int, error) {
|
||||
pids, err := c.cgroupManager.GetAllPids()
|
||||
var pids []int
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return pids, err
|
||||
}
|
||||
// for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
|
||||
if status == Stopped && !c.cgroupManager.Exists() {
|
||||
return pids, nil
|
||||
}
|
||||
|
||||
pids, err = c.cgroupManager.GetAllPids()
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups")
|
||||
}
|
||||
@@ -206,7 +221,7 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
||||
return err
|
||||
}
|
||||
if status == Stopped {
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
return newGenericError(errors.New("container not running"), ContainerNotRunning)
|
||||
}
|
||||
if err := c.cgroupManager.Set(&config); err != nil {
|
||||
// Set configs back
|
||||
@@ -218,6 +233,9 @@ func (c *linuxContainer) Set(config configs.Config) error {
|
||||
if c.intelRdtManager != nil {
|
||||
if err := c.intelRdtManager.Set(&config); err != nil {
|
||||
// Set configs back
|
||||
if err2 := c.cgroupManager.Set(c.config); err2 != nil {
|
||||
logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
|
||||
}
|
||||
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
|
||||
logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
|
||||
}
|
||||
@@ -292,7 +310,7 @@ func readFromExecFifo(execFifo io.Reader) error {
|
||||
return err
|
||||
}
|
||||
if len(data) <= 0 {
|
||||
return fmt.Errorf("cannot start an already running container")
|
||||
return errors.New("cannot start an already running container")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -309,7 +327,7 @@ func awaitFifoOpen(path string) <-chan openResult {
|
||||
func fifoOpen(path string, block bool) openResult {
|
||||
flags := os.O_RDONLY
|
||||
if !block {
|
||||
flags |= syscall.O_NONBLOCK
|
||||
flags |= unix.O_NONBLOCK
|
||||
}
|
||||
f, err := os.OpenFile(path, flags, 0)
|
||||
if err != nil {
|
||||
@@ -365,13 +383,12 @@ func (c *linuxContainer) start(process *Process) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i, hook := range c.config.Hooks.Poststart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return newSystemErrorWithCausef(err, "running poststart hook %d", i)
|
||||
|
||||
if err := c.config.Hooks[configs.Poststart].RunHooks(s); err != nil {
|
||||
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
||||
logrus.Warn(errorsf.Wrapf(err, "Running Poststart hook"))
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -379,13 +396,19 @@ func (c *linuxContainer) start(process *Process) error {
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
||||
if all {
|
||||
return signalAllProcesses(c.cgroupManager, s)
|
||||
}
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if all {
|
||||
// for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited
|
||||
if status == Stopped && !c.cgroupManager.Exists() {
|
||||
return nil
|
||||
}
|
||||
return signalAllProcesses(c.cgroupManager, s)
|
||||
}
|
||||
// to avoid a PID reuse attack
|
||||
if status == Running || status == Created || status == Paused {
|
||||
if err := c.initProcess.signal(s); err != nil {
|
||||
@@ -393,7 +416,7 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
return newGenericError(errors.New("container not running"), ContainerNotRunning)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) createExecFifo() error {
|
||||
@@ -454,10 +477,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
}
|
||||
logFilePair := filePair{parentLogPipe, childLogPipe}
|
||||
|
||||
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
cmd := c.commandTemplate(p, childInitPipe, childLogPipe)
|
||||
if !p.Init {
|
||||
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
@@ -473,7 +493,7 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) {
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) *exec.Cmd {
|
||||
cmd := exec.Command(c.initPath, c.initArgs[1:]...)
|
||||
cmd.Args[0] = c.initArgs[0]
|
||||
cmd.Stdin = p.Stdin
|
||||
@@ -481,7 +501,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
|
||||
cmd.Stderr = p.Stderr
|
||||
cmd.Dir = c.config.Rootfs
|
||||
if cmd.SysProcAttr == nil {
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||
cmd.SysProcAttr = &unix.SysProcAttr{}
|
||||
}
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("GOMAXPROCS=%s", os.Getenv("GOMAXPROCS")))
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...)
|
||||
@@ -507,9 +527,9 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
|
||||
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||||
// even with the parent still running.
|
||||
if c.config.ParentDeathSignal > 0 {
|
||||
cmd.SysProcAttr.Pdeathsig = syscall.Signal(c.config.ParentDeathSignal)
|
||||
cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal)
|
||||
}
|
||||
return cmd, nil
|
||||
return cmd
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
|
||||
@@ -555,7 +575,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
||||
}
|
||||
return &setnsProcess{
|
||||
cmd: cmd,
|
||||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
cgroupPaths: state.CgroupPaths,
|
||||
rootlessCgroups: c.config.RootlessCgroups,
|
||||
intelRdtPath: state.IntelRdtPath,
|
||||
messageSockPair: messageSockPair,
|
||||
@@ -563,6 +583,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
initProcessPid: state.InitProcessPid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -650,7 +671,11 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
|
||||
if c.config.RootlessCgroups {
|
||||
logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups")
|
||||
}
|
||||
return notifyOnOOM(c.cgroupManager.GetPaths())
|
||||
path := c.cgroupManager.Path("memory")
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return notifyOnOOMV2(path)
|
||||
}
|
||||
return notifyOnOOM(path)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
|
||||
@@ -658,7 +683,7 @@ func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struc
|
||||
if c.config.RootlessCgroups {
|
||||
logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups")
|
||||
}
|
||||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
return notifyMemoryPressure(c.cgroupManager.Path("memory"), level)
|
||||
}
|
||||
|
||||
var criuFeatures *criurpc.CriuFeatures
|
||||
@@ -668,16 +693,6 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||
var t criurpc.CriuReqType
|
||||
t = criurpc.CriuReqType_FEATURE_CHECK
|
||||
|
||||
// criu 1.8 => 10800
|
||||
if err := c.checkCriuVersion(10800); err != nil {
|
||||
// Feature checking was introduced with CRIU 1.8.
|
||||
// Ignore the feature check if an older CRIU version is used
|
||||
// and just act as before.
|
||||
// As all automated PR testing is done using CRIU 1.7 this
|
||||
// code will not be tested by automated PR testing.
|
||||
return nil
|
||||
}
|
||||
|
||||
// make sure the features we are looking for are really not from
|
||||
// some previous check
|
||||
criuFeatures = nil
|
||||
@@ -691,10 +706,10 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||
Features: criuFeat,
|
||||
}
|
||||
|
||||
err := c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||
err := c.criuSwrk(nil, req, criuOpts, nil)
|
||||
if err != nil {
|
||||
logrus.Debugf("%s", err)
|
||||
return fmt.Errorf("CRIU feature check failed")
|
||||
return errors.New("CRIU feature check failed")
|
||||
}
|
||||
|
||||
logrus.Debugf("Feature check says: %s", criuFeatures)
|
||||
@@ -721,56 +736,12 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.
|
||||
}
|
||||
|
||||
if missingFeatures {
|
||||
return fmt.Errorf("CRIU is missing features")
|
||||
return errors.New("CRIU is missing features")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseCriuVersion(path string) (int, error) {
|
||||
var x, y, z int
|
||||
|
||||
out, err := exec.Command(path, "-V").Output()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("Unable to execute CRIU command: %s", path)
|
||||
}
|
||||
|
||||
x = 0
|
||||
y = 0
|
||||
z = 0
|
||||
if ep := strings.Index(string(out), "-"); ep >= 0 {
|
||||
// criu Git version format
|
||||
var version string
|
||||
if sp := strings.Index(string(out), "GitID"); sp > 0 {
|
||||
version = string(out)[sp:ep]
|
||||
} else {
|
||||
return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path)
|
||||
}
|
||||
|
||||
n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2
|
||||
if err != nil {
|
||||
n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6
|
||||
y++
|
||||
} else {
|
||||
z++
|
||||
}
|
||||
if n < 2 || err != nil {
|
||||
return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", version, n, err)
|
||||
}
|
||||
} else {
|
||||
// criu release version format
|
||||
n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2
|
||||
if err != nil {
|
||||
n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6
|
||||
}
|
||||
if n < 2 || err != nil {
|
||||
return 0, fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err)
|
||||
}
|
||||
}
|
||||
|
||||
return x*10000 + y*100 + z, nil
|
||||
}
|
||||
|
||||
func compareCriuVersion(criuVersion int, minVersion int) error {
|
||||
// simple function to perform the actual version compare
|
||||
if criuVersion < minVersion {
|
||||
@@ -780,9 +751,6 @@ func compareCriuVersion(criuVersion int, minVersion int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// This is used to store the result of criu version RPC
|
||||
var criuVersionRPC *criurpc.CriuVersion
|
||||
|
||||
// checkCriuVersion checks Criu version greater than or equal to minVersion
|
||||
func (c *linuxContainer) checkCriuVersion(minVersion int) error {
|
||||
|
||||
@@ -792,50 +760,13 @@ func (c *linuxContainer) checkCriuVersion(minVersion int) error {
|
||||
return compareCriuVersion(c.criuVersion, minVersion)
|
||||
}
|
||||
|
||||
// First try if this version of CRIU support the version RPC.
|
||||
// The CRIU version RPC was introduced with CRIU 3.0.
|
||||
|
||||
// First, reset the variable for the RPC answer to nil
|
||||
criuVersionRPC = nil
|
||||
|
||||
var t criurpc.CriuReqType
|
||||
t = criurpc.CriuReqType_VERSION
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
}
|
||||
|
||||
err := c.criuSwrk(nil, req, nil, false, nil)
|
||||
criu := criu.MakeCriu()
|
||||
var err error
|
||||
c.criuVersion, err = criu.GetCriuVersion()
|
||||
if err != nil {
|
||||
return fmt.Errorf("CRIU version check failed: %s", err)
|
||||
}
|
||||
|
||||
if criuVersionRPC != nil {
|
||||
logrus.Debugf("CRIU version: %s", criuVersionRPC)
|
||||
// major and minor are always set
|
||||
c.criuVersion = int(*criuVersionRPC.Major) * 10000
|
||||
c.criuVersion += int(*criuVersionRPC.Minor) * 100
|
||||
if criuVersionRPC.Sublevel != nil {
|
||||
c.criuVersion += int(*criuVersionRPC.Sublevel)
|
||||
}
|
||||
if criuVersionRPC.Gitid != nil {
|
||||
// runc's convention is that a CRIU git release is
|
||||
// always the same as increasing the minor by 1
|
||||
c.criuVersion -= (c.criuVersion % 100)
|
||||
c.criuVersion += 100
|
||||
}
|
||||
return compareCriuVersion(c.criuVersion, minVersion)
|
||||
}
|
||||
|
||||
// This is CRIU without the version RPC and therefore
|
||||
// older than 3.0. Parsing the output is required.
|
||||
|
||||
// This can be remove once runc does not work with criu older than 3.0
|
||||
|
||||
c.criuVersion, err = parseCriuVersion(c.criuPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return compareCriuVersion(c.criuVersion, minVersion)
|
||||
}
|
||||
|
||||
@@ -876,26 +807,6 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func waitForCriuLazyServer(r *os.File, status string) error {
|
||||
|
||||
data := make([]byte, 1)
|
||||
_, err := r.Read(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = fd.Write(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
|
||||
// CRIU will evaluate a configuration starting with release 3.11.
|
||||
// Settings in the configuration file will overwrite RPC settings.
|
||||
@@ -932,13 +843,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
// support for doing unprivileged dumps, but the setup of
|
||||
// rootless containers might make this complicated.
|
||||
|
||||
// criu 1.5.2 => 10502
|
||||
if err := c.checkCriuVersion(10502); err != nil {
|
||||
// We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0
|
||||
if err := c.checkCriuVersion(30000); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if criuOpts.ImagesDirectory == "" {
|
||||
return fmt.Errorf("invalid directory to save checkpoint")
|
||||
return errors.New("invalid directory to save checkpoint")
|
||||
}
|
||||
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
@@ -1006,8 +917,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
// CRIU expects the information about an external namespace
|
||||
// like this: --external net[<inode>]:<key>
|
||||
// This <key> is always 'extRootNetNS'.
|
||||
var netns syscall.Stat_t
|
||||
err = syscall.Stat(nsPath, &netns)
|
||||
var netns unix.Stat_t
|
||||
err = unix.Stat(nsPath, &netns)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1016,9 +927,13 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
}
|
||||
}
|
||||
|
||||
fcg := c.cgroupManager.GetPaths()["freezer"]
|
||||
if fcg != "" {
|
||||
rpcOpts.FreezeCgroup = proto.String(fcg)
|
||||
// CRIU can use cgroup freezer; when rpcOpts.FreezeCgroup
|
||||
// is not set, CRIU uses ptrace() to pause the processes.
|
||||
// Note cgroup v2 freezer is only supported since CRIU release 3.14.
|
||||
if !cgroups.IsCgroup2UnifiedMode() || c.checkCriuVersion(31400) == nil {
|
||||
if fcg := c.cgroupManager.Path("freezer"); fcg != "" {
|
||||
rpcOpts.FreezeCgroup = proto.String(fcg)
|
||||
}
|
||||
}
|
||||
|
||||
// append optional criu opts, e.g., page-server and port
|
||||
@@ -1037,10 +952,6 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
|
||||
// append optional manage cgroups mode
|
||||
if criuOpts.ManageCgroupsMode != 0 {
|
||||
// criu 1.7 => 10700
|
||||
if err := c.checkCriuVersion(10700); err != nil {
|
||||
return err
|
||||
}
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
rpcOpts.ManageCgroupsMode = &mode
|
||||
}
|
||||
@@ -1059,36 +970,53 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
} else {
|
||||
t = criurpc.CriuReqType_DUMP
|
||||
}
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
Opts: &rpcOpts,
|
||||
}
|
||||
|
||||
if criuOpts.LazyPages {
|
||||
// lazy migration requested; check if criu supports it
|
||||
feat := criurpc.CriuFeatures{
|
||||
LazyPages: proto.Bool(true),
|
||||
}
|
||||
|
||||
if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
statusRead, statusWrite, err := os.Pipe()
|
||||
if err != nil {
|
||||
return err
|
||||
if fd := criuOpts.StatusFd; fd != -1 {
|
||||
// check that the FD is valid
|
||||
flags, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid --status-fd argument %d: %w", fd, err)
|
||||
}
|
||||
// and writable
|
||||
if flags&unix.O_WRONLY == 0 {
|
||||
return fmt.Errorf("invalid --status-fd argument %d: not writable", fd)
|
||||
}
|
||||
|
||||
if c.checkCriuVersion(31500) != nil {
|
||||
// For criu 3.15+, use notifications (see case "status-ready"
|
||||
// in criuNotifications). Otherwise, rely on criu status fd.
|
||||
rpcOpts.StatusFd = proto.Int32(int32(fd))
|
||||
}
|
||||
}
|
||||
rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
|
||||
go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
|
||||
}
|
||||
|
||||
//no need to dump these information in pre-dump
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
Opts: &rpcOpts,
|
||||
}
|
||||
|
||||
// no need to dump all this in pre-dump
|
||||
if !criuOpts.PreDump {
|
||||
hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP)
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
c.addCriuDumpMount(req, m)
|
||||
case "cgroup":
|
||||
if cgroups.IsCgroup2UnifiedMode() || hasCgroupns {
|
||||
// real mount(s)
|
||||
continue
|
||||
}
|
||||
// a set of "external" bind mounts
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1120,7 +1048,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
}
|
||||
}
|
||||
|
||||
err = c.criuSwrk(nil, req, criuOpts, false, nil)
|
||||
err = c.criuSwrk(nil, req, criuOpts, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1166,7 +1094,14 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
|
||||
func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
switch m.Device {
|
||||
case "cgroup":
|
||||
// Do nothing for cgroup, CRIU should handle it
|
||||
// No mount point(s) need to be created:
|
||||
//
|
||||
// * for v1, mount points are saved by CRIU because
|
||||
// /sys/fs/cgroup is a tmpfs mount
|
||||
//
|
||||
// * for v2, /sys/fs/cgroup is a real mount, but
|
||||
// the mountpoint appears as soon as /sys is mounted
|
||||
return nil
|
||||
case "bind":
|
||||
// The prepareBindMount() function checks if source
|
||||
// exists. So it cannot be used for other filesystem types.
|
||||
@@ -1174,7 +1109,7 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
// for all other file-systems just create the mountpoints
|
||||
// for all other filesystems just create the mountpoints
|
||||
dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1195,10 +1130,10 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
func isPathInPrefixList(path string, prefix []string) bool {
|
||||
for _, p := range prefix {
|
||||
if strings.HasPrefix(path, p+"/") {
|
||||
return false
|
||||
return true
|
||||
}
|
||||
}
|
||||
return true
|
||||
return false
|
||||
}
|
||||
|
||||
// prepareCriuRestoreMounts tries to set up the rootfs of the
|
||||
@@ -1220,7 +1155,7 @@ func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error
|
||||
// if the mountpoints are not on a tmpfs, as CRIU will
|
||||
// restore the complete tmpfs content from its checkpoint.
|
||||
for _, m := range mounts {
|
||||
if isPathInPrefixList(m.Destination, tmpfs) {
|
||||
if !isPathInPrefixList(m.Destination, tmpfs) {
|
||||
if err := c.makeCriuRestoreMountpoints(m); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1240,8 +1175,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
// TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have
|
||||
// support for unprivileged restore at the moment.
|
||||
|
||||
// criu 1.5.2 => 10502
|
||||
if err := c.checkCriuVersion(10502); err != nil {
|
||||
// We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0
|
||||
if err := c.checkCriuVersion(30000); err != nil {
|
||||
return err
|
||||
}
|
||||
if criuOpts.WorkDirectory == "" {
|
||||
@@ -1258,7 +1193,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
}
|
||||
defer workDir.Close()
|
||||
if criuOpts.ImagesDirectory == "" {
|
||||
return fmt.Errorf("invalid directory to restore checkpoint")
|
||||
return errors.New("invalid directory to restore checkpoint")
|
||||
}
|
||||
imageDir, err := os.Open(criuOpts.ImagesDirectory)
|
||||
if err != nil {
|
||||
@@ -1326,11 +1261,11 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
// The <key> needs to be the same as during checkpointing.
|
||||
// We are always using 'extRootNetNS' as the key in this.
|
||||
netns, err := os.Open(nsPath)
|
||||
defer netns.Close()
|
||||
if err != nil {
|
||||
logrus.Errorf("If a specific network namespace is defined it must exist: %s", err)
|
||||
return fmt.Errorf("Requested network namespace %v does not exist", nsPath)
|
||||
}
|
||||
defer netns.Close()
|
||||
inheritFd := new(criurpc.InheritFd)
|
||||
inheritFd.Key = proto.String("extRootNetNS")
|
||||
// The offset of four is necessary because 0, 1, 2 and 3 is already
|
||||
@@ -1348,11 +1283,16 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
return err
|
||||
}
|
||||
|
||||
hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP)
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
c.addCriuRestoreMount(req, m)
|
||||
case "cgroup":
|
||||
if cgroups.IsCgroup2UnifiedMode() || hasCgroupns {
|
||||
continue
|
||||
}
|
||||
// cgroup v1 is a set of bind mounts, unless cgroupns is used
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1379,10 +1319,6 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
|
||||
// append optional manage cgroups mode
|
||||
if criuOpts.ManageCgroupsMode != 0 {
|
||||
// criu 1.7 => 10700
|
||||
if err := c.checkCriuVersion(10700); err != nil {
|
||||
return err
|
||||
}
|
||||
mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode)
|
||||
req.Opts.ManageCgroupsMode = &mode
|
||||
}
|
||||
@@ -1406,10 +1342,15 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||
}
|
||||
}
|
||||
return c.criuSwrk(process, req, criuOpts, true, extraFiles)
|
||||
return c.criuSwrk(process, req, criuOpts, extraFiles)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
// need to apply cgroups only on restore
|
||||
if req.GetType() != criurpc.CriuReqType_RESTORE {
|
||||
return nil
|
||||
}
|
||||
|
||||
// XXX: Do we need to deal with this case? AFAIK criu still requires root.
|
||||
if err := c.cgroupManager.Apply(pid); err != nil {
|
||||
return err
|
||||
@@ -1419,6 +1360,11 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return nil
|
||||
}
|
||||
// the stuff below is cgroupv1-specific
|
||||
|
||||
path := fmt.Sprintf("/proc/%d/cgroup", pid)
|
||||
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
|
||||
if err != nil {
|
||||
@@ -1436,7 +1382,7 @@ func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, applyCgroups bool, extraFiles []*os.File) error {
|
||||
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error {
|
||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1484,26 +1430,29 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
// we close criuServer so that even if CRIU crashes or unexpectedly exits, runc will not hang.
|
||||
criuServer.Close()
|
||||
// cmd.Process will be replaced by a restored init.
|
||||
criuProcess := cmd.Process
|
||||
|
||||
var criuProcessState *os.ProcessState
|
||||
defer func() {
|
||||
criuClientCon.Close()
|
||||
_, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
return
|
||||
if criuProcessState == nil {
|
||||
criuClientCon.Close()
|
||||
_, err := criuProcess.Wait()
|
||||
if err != nil {
|
||||
logrus.Warnf("wait on criuProcess returned %v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if applyCgroups {
|
||||
err := c.criuApplyCgroups(cmd.Process.Pid, req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.criuApplyCgroups(criuProcess.Pid, req); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var extFds []string
|
||||
if process != nil {
|
||||
extFds, err = getPipeFds(cmd.Process.Pid)
|
||||
extFds, err = getPipeFds(criuProcess.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1542,14 +1491,23 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
oob := make([]byte, 4096)
|
||||
for true {
|
||||
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
|
||||
if req.Opts != nil && req.Opts.StatusFd != nil {
|
||||
// Close status_fd as soon as we got something back from criu,
|
||||
// assuming it has consumed (reopened) it by this time.
|
||||
// Otherwise it will might be left open forever and whoever
|
||||
// is waiting on it will wait forever.
|
||||
fd := int(*req.Opts.StatusFd)
|
||||
_ = unix.Close(fd)
|
||||
req.Opts.StatusFd = nil
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if n == 0 {
|
||||
return fmt.Errorf("unexpected EOF")
|
||||
return errors.New("unexpected EOF")
|
||||
}
|
||||
if n == len(buf) {
|
||||
return fmt.Errorf("buffer is too small")
|
||||
return errors.New("buffer is too small")
|
||||
}
|
||||
|
||||
resp := new(criurpc.CriuResp)
|
||||
@@ -1559,25 +1517,16 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
}
|
||||
if !resp.GetSuccess() {
|
||||
typeString := req.GetType().String()
|
||||
if typeString == "VERSION" {
|
||||
// If the VERSION RPC fails this probably means that the CRIU
|
||||
// version is too old for this RPC. Just return 'nil'.
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath)
|
||||
}
|
||||
|
||||
t := resp.GetType()
|
||||
switch {
|
||||
case t == criurpc.CriuReqType_VERSION:
|
||||
logrus.Debugf("CRIU version: %s", resp)
|
||||
criuVersionRPC = resp.GetVersion()
|
||||
break
|
||||
case t == criurpc.CriuReqType_FEATURE_CHECK:
|
||||
logrus.Debugf("Feature check says: %s", resp)
|
||||
criuFeatures = resp.GetFeatures()
|
||||
case t == criurpc.CriuReqType_NOTIFY:
|
||||
if err := c.criuNotifications(resp, process, opts, extFds, oob[:oobn]); err != nil {
|
||||
if err := c.criuNotifications(resp, process, cmd, opts, extFds, oob[:oobn]); err != nil {
|
||||
return err
|
||||
}
|
||||
t = criurpc.CriuReqType_NOTIFY
|
||||
@@ -1607,7 +1556,7 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
criuClientCon.CloseWrite()
|
||||
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
|
||||
// Here we want to wait only the CRIU process.
|
||||
st, err := cmd.Process.Wait()
|
||||
criuProcessState, err = criuProcess.Wait()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1619,8 +1568,8 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
||||
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
||||
// If we got the message CriuReqType_PRE_DUMP it means
|
||||
// CRIU was successful and we need to forcefully stop CRIU
|
||||
if !st.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
|
||||
return fmt.Errorf("criu failed: %s\nlog file: %s", st.String(), logPath)
|
||||
if !criuProcessState.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP {
|
||||
return fmt.Errorf("criu failed: %s\nlog file: %s", criuProcessState.String(), logPath)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1653,43 +1602,53 @@ func unlockNetwork(config *configs.Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string, oob []byte) error {
|
||||
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, cmd *exec.Cmd, opts *CriuOpts, fds []string, oob []byte) error {
|
||||
notify := resp.GetNotify()
|
||||
if notify == nil {
|
||||
return fmt.Errorf("invalid response: %s", resp.String())
|
||||
}
|
||||
logrus.Debugf("notify: %s\n", notify.GetScript())
|
||||
switch {
|
||||
case notify.GetScript() == "post-dump":
|
||||
script := notify.GetScript()
|
||||
logrus.Debugf("notify: %s\n", script)
|
||||
switch script {
|
||||
case "post-dump":
|
||||
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
case notify.GetScript() == "network-unlock":
|
||||
case "network-unlock":
|
||||
if err := unlockNetwork(c.config); err != nil {
|
||||
return err
|
||||
}
|
||||
case notify.GetScript() == "network-lock":
|
||||
case "network-lock":
|
||||
if err := lockNetwork(c.config); err != nil {
|
||||
return err
|
||||
}
|
||||
case notify.GetScript() == "setup-namespaces":
|
||||
case "setup-namespaces":
|
||||
if c.config.Hooks != nil {
|
||||
s, err := c.currentOCIState()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
s.Pid = int(notify.GetPid())
|
||||
for i, hook := range c.config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
|
||||
if err := c.config.Hooks[configs.Prestart].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.config.Hooks[configs.CreateRuntime].RunHooks(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case notify.GetScript() == "post-restore":
|
||||
case "post-restore":
|
||||
pid := notify.GetPid()
|
||||
r, err := newRestoredProcess(int(pid), fds)
|
||||
|
||||
p, err := os.FindProcess(int(pid))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cmd.Process = p
|
||||
|
||||
r, err := newRestoredProcess(cmd, fds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -1710,7 +1669,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
case notify.GetScript() == "orphan-pts-master":
|
||||
case "orphan-pts-master":
|
||||
scm, err := unix.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -1727,6 +1686,16 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil {
|
||||
return err
|
||||
}
|
||||
case "status-ready":
|
||||
if opts.StatusFd != -1 {
|
||||
// write \0 to status fd to notify that lazy page server is ready
|
||||
_, err := unix.Write(opts.StatusFd, []byte{0})
|
||||
if err != nil {
|
||||
logrus.Warnf("can't write \\0 to status fd: %v", err)
|
||||
}
|
||||
_ = unix.Close(opts.StatusFd)
|
||||
opts.StatusFd = -1
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1746,13 +1715,30 @@ func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) saveState(s *State) error {
|
||||
f, err := os.Create(filepath.Join(c.root, stateFilename))
|
||||
func (c *linuxContainer) saveState(s *State) (retErr error) {
|
||||
tmpFile, err := ioutil.TempFile(c.root, "state-")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
return utils.WriteJSON(f, s)
|
||||
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
tmpFile.Close()
|
||||
os.Remove(tmpFile.Name())
|
||||
}
|
||||
}()
|
||||
|
||||
err = utils.WriteJSON(tmpFile, s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = tmpFile.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stateFilePath := filepath.Join(c.root, stateFilename)
|
||||
return os.Rename(tmpFile.Name(), stateFilePath)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) deleteState() error {
|
||||
@@ -1778,10 +1764,7 @@ func (c *linuxContainer) refreshState() error {
|
||||
if paused {
|
||||
return c.state.transition(&pausedState{c: c})
|
||||
}
|
||||
t, err := c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t := c.runType()
|
||||
switch t {
|
||||
case Created:
|
||||
return c.state.transition(&createdState{c: c})
|
||||
@@ -1791,48 +1774,32 @@ func (c *linuxContainer) refreshState() error {
|
||||
return c.state.transition(&stoppedState{c: c})
|
||||
}
|
||||
|
||||
func (c *linuxContainer) runType() (Status, error) {
|
||||
func (c *linuxContainer) runType() Status {
|
||||
if c.initProcess == nil {
|
||||
return Stopped, nil
|
||||
return Stopped
|
||||
}
|
||||
pid := c.initProcess.pid()
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return Stopped, nil
|
||||
return Stopped
|
||||
}
|
||||
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
|
||||
return Stopped, nil
|
||||
return Stopped
|
||||
}
|
||||
// We'll create exec fifo and blocking on it after container is created,
|
||||
// and delete it after start container.
|
||||
if _, err := os.Stat(filepath.Join(c.root, execFifoFilename)); err == nil {
|
||||
return Created, nil
|
||||
return Created
|
||||
}
|
||||
return Running, nil
|
||||
return Running
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isPaused() (bool, error) {
|
||||
fcg := c.cgroupManager.GetPaths()["freezer"]
|
||||
if fcg == "" {
|
||||
// A container doesn't have a freezer cgroup
|
||||
return false, nil
|
||||
}
|
||||
pausedState := "FROZEN"
|
||||
filename := "freezer.state"
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
filename = "cgroup.freeze"
|
||||
pausedState = "1"
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadFile(filepath.Join(fcg, filename))
|
||||
state, err := c.cgroupManager.GetFreezerState()
|
||||
if err != nil {
|
||||
// If freezer cgroup is not mounted, the container would just be not paused.
|
||||
if os.IsNotExist(err) || err == syscall.ENODEV {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemErrorWithCause(err, "checking if container is paused")
|
||||
return false, err
|
||||
}
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil
|
||||
return state == configs.Frozen, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) currentState() (*State, error) {
|
||||
|
Reference in New Issue
Block a user