Update dependency opencontainer/runc
This commit is contained in:
219
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
219
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
@@ -19,13 +19,15 @@ import (
|
||||
"syscall" // only for SysProcAttr and Signal
|
||||
"time"
|
||||
|
||||
"github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/criurpc"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
criurpc "github.com/checkpoint-restore/go-criu/rpc"
|
||||
"github.com/golang/protobuf/proto"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
@@ -156,6 +158,12 @@ func (c *linuxContainer) State() (*State, error) {
|
||||
return c.currentState()
|
||||
}
|
||||
|
||||
func (c *linuxContainer) OCIState() (*specs.State, error) {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
return c.currentOCIState()
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Processes() ([]int, error) {
|
||||
pids, err := c.cgroupManager.GetAllPids()
|
||||
if err != nil {
|
||||
@@ -329,6 +337,7 @@ func (c *linuxContainer) start(process *Process) error {
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "creating new parent process")
|
||||
}
|
||||
parent.forwardChildLogs()
|
||||
if err := parent.start(); err != nil {
|
||||
// terminate the process to ensure that it properly is reaped.
|
||||
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
|
||||
@@ -349,13 +358,9 @@ func (c *linuxContainer) start(process *Process) error {
|
||||
c.initProcessStartTime = state.InitProcessStartTime
|
||||
|
||||
if c.config.Hooks != nil {
|
||||
bundle, annotations := utils.Annotations(c.config.Labels)
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: parent.pid(),
|
||||
Bundle: bundle,
|
||||
Annotations: annotations,
|
||||
s, err := c.currentOCIState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for i, hook := range c.config.Hooks.Poststart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
@@ -374,10 +379,18 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
|
||||
if all {
|
||||
return signalAllProcesses(c.cgroupManager, s)
|
||||
}
|
||||
if err := c.initProcess.signal(s); err != nil {
|
||||
return newSystemErrorWithCause(err, "signaling init process")
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
// to avoid a PID reuse attack
|
||||
if status == Running || status == Created || status == Paused {
|
||||
if err := c.initProcess.signal(s); err != nil {
|
||||
return newSystemErrorWithCause(err, "signaling init process")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) createExecFifo() error {
|
||||
@@ -426,16 +439,24 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
parentPipe, childPipe, err := utils.NewSockPair("init")
|
||||
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new init pipe")
|
||||
}
|
||||
cmd, err := c.commandTemplate(p, childPipe)
|
||||
messageSockPair := filePair{parentInitPipe, childInitPipe}
|
||||
|
||||
parentLogPipe, childLogPipe, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to create the log pipe: %s", err)
|
||||
}
|
||||
logFilePair := filePair{parentLogPipe, childLogPipe}
|
||||
|
||||
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
|
||||
if err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "creating new command template")
|
||||
}
|
||||
if !p.Init {
|
||||
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
|
||||
// We only set up fifoFd if we're not doing a `runc exec`. The historic
|
||||
@@ -446,10 +467,10 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
|
||||
if err := c.includeExecFifo(cmd); err != nil {
|
||||
return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
|
||||
}
|
||||
return c.newInitProcess(p, cmd, parentPipe, childPipe)
|
||||
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
|
||||
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) {
|
||||
cmd := exec.Command(c.initPath, c.initArgs[1:]...)
|
||||
cmd.Args[0] = c.initArgs[0]
|
||||
cmd.Stdin = p.Stdin
|
||||
@@ -467,10 +488,18 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
||||
fmt.Sprintf("_LIBCONTAINER_CONSOLE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
|
||||
)
|
||||
}
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childInitPipe)
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
|
||||
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%s", c.root),
|
||||
)
|
||||
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe)
|
||||
cmd.Env = append(cmd.Env,
|
||||
fmt.Sprintf("_LIBCONTAINER_LOGPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
|
||||
fmt.Sprintf("_LIBCONTAINER_LOGLEVEL=%s", p.LogLevel),
|
||||
)
|
||||
|
||||
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
|
||||
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
|
||||
// even with the parent still running.
|
||||
@@ -480,7 +509,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
||||
return cmd, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
|
||||
nsMaps := make(map[configs.NamespaceType]string)
|
||||
for _, ns := range c.config.Namespaces {
|
||||
@@ -493,10 +522,10 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &initProcess{
|
||||
init := &initProcess{
|
||||
cmd: cmd,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
messageSockPair: messageSockPair,
|
||||
logFilePair: logFilePair,
|
||||
manager: c.cgroupManager,
|
||||
intelRdtManager: c.intelRdtManager,
|
||||
config: c.newInitConfig(p),
|
||||
@@ -504,10 +533,12 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
sharePidns: sharePidns,
|
||||
}, nil
|
||||
}
|
||||
c.initProcess = init
|
||||
return init, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||
state, err := c.currentState()
|
||||
if err != nil {
|
||||
@@ -524,8 +555,8 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
|
||||
cgroupPaths: c.cgroupManager.GetPaths(),
|
||||
rootlessCgroups: c.config.RootlessCgroups,
|
||||
intelRdtPath: state.IntelRdtPath,
|
||||
childPipe: childPipe,
|
||||
parentPipe: parentPipe,
|
||||
messageSockPair: messageSockPair,
|
||||
logFilePair: logFilePair,
|
||||
config: c.newInitConfig(p),
|
||||
process: p,
|
||||
bootstrapData: data,
|
||||
@@ -862,6 +893,32 @@ func waitForCriuLazyServer(r *os.File, status string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
|
||||
// CRIU will evaluate a configuration starting with release 3.11.
|
||||
// Settings in the configuration file will overwrite RPC settings.
|
||||
// Look for annotations. The annotation 'org.criu.config'
|
||||
// specifies if CRIU should use a different, container specific
|
||||
// configuration file.
|
||||
_, annotations := utils.Annotations(c.config.Labels)
|
||||
configFile, exists := annotations["org.criu.config"]
|
||||
if exists {
|
||||
// If the annotation 'org.criu.config' exists and is set
|
||||
// to a non-empty string, tell CRIU to use that as a
|
||||
// configuration file. If the file does not exist, CRIU
|
||||
// will just ignore it.
|
||||
if configFile != "" {
|
||||
rpcOpts.ConfigFile = proto.String(configFile)
|
||||
}
|
||||
// If 'org.criu.config' exists and is set to an empty
|
||||
// string, a runc specific CRIU configuration file will
|
||||
// be not set at all.
|
||||
} else {
|
||||
// If the mentioned annotation has not been found, specify
|
||||
// a default CRIU configuration file.
|
||||
rpcOpts.ConfigFile = proto.String("/etc/criu/runc.conf")
|
||||
}
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
@@ -927,6 +984,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
||||
LazyPages: proto.Bool(criuOpts.LazyPages),
|
||||
}
|
||||
|
||||
c.handleCriuConfigurationFile(&rpcOpts)
|
||||
|
||||
// If the container is running in a network namespace and has
|
||||
// a path to the network namespace configured, we will dump
|
||||
// that network namespace as an external namespace and we
|
||||
@@ -1098,6 +1157,75 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
|
||||
}
|
||||
}
|
||||
|
||||
// makeCriuRestoreMountpoints makes the actual mountpoints for the
|
||||
// restore using CRIU. This function is inspired from the code in
|
||||
// rootfs_linux.go
|
||||
func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
|
||||
switch m.Device {
|
||||
case "cgroup":
|
||||
// Do nothing for cgroup, CRIU should handle it
|
||||
case "bind":
|
||||
// The prepareBindMount() function checks if source
|
||||
// exists. So it cannot be used for other filesystem types.
|
||||
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
// for all other file-systems just create the mountpoints
|
||||
dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkMountDestination(c.config.Rootfs, dest); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Destination = dest
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isPathInPrefixList is a small function for CRIU restore to make sure
|
||||
// mountpoints, which are on a tmpfs, are not created in the roofs
|
||||
func isPathInPrefixList(path string, prefix []string) bool {
|
||||
for _, p := range prefix {
|
||||
if strings.HasPrefix(path, p+"/") {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// prepareCriuRestoreMounts tries to set up the rootfs of the
|
||||
// container to be restored in the same way runc does it for
|
||||
// initial container creation. Even for a read-only rootfs container
|
||||
// runc modifies the rootfs to add mountpoints which do not exist.
|
||||
// This function also creates missing mountpoints as long as they
|
||||
// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway.
|
||||
func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
|
||||
// First get a list of a all tmpfs mounts
|
||||
tmpfs := []string{}
|
||||
for _, m := range mounts {
|
||||
switch m.Device {
|
||||
case "tmpfs":
|
||||
tmpfs = append(tmpfs, m.Destination)
|
||||
}
|
||||
}
|
||||
// Now go through all mounts and create the mountpoints
|
||||
// if the mountpoints are not on a tmpfs, as CRIU will
|
||||
// restore the complete tmpfs content from its checkpoint.
|
||||
for _, m := range mounts {
|
||||
if isPathInPrefixList(m.Destination, tmpfs) {
|
||||
if err := c.makeCriuRestoreMountpoints(m); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
@@ -1177,6 +1305,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
},
|
||||
}
|
||||
|
||||
c.handleCriuConfigurationFile(req.Opts)
|
||||
|
||||
// Same as during checkpointing. If the container has a specific network namespace
|
||||
// assigned to it, this now expects that the checkpoint will be restored in a
|
||||
// already created network namespace.
|
||||
@@ -1209,6 +1339,12 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
}
|
||||
}
|
||||
|
||||
// This will modify the rootfs of the container in the same way runc
|
||||
// modifies the container during initial creation.
|
||||
if err := c.prepareCriuRestoreMounts(c.config.Mounts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
@@ -1537,14 +1673,11 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
||||
}
|
||||
case notify.GetScript() == "setup-namespaces":
|
||||
if c.config.Hooks != nil {
|
||||
bundle, annotations := utils.Annotations(c.config.Labels)
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: int(notify.GetPid()),
|
||||
Bundle: bundle,
|
||||
Annotations: annotations,
|
||||
s, err := c.currentOCIState()
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
s.Pid = int(notify.GetPid())
|
||||
for i, hook := range c.config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
@@ -1738,11 +1871,31 @@ func (c *linuxContainer) currentState() (*State, error) {
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) currentOCIState() (*specs.State, error) {
|
||||
bundle, annotations := utils.Annotations(c.config.Labels)
|
||||
state := &specs.State{
|
||||
Version: specs.Version,
|
||||
ID: c.ID(),
|
||||
Bundle: bundle,
|
||||
Annotations: annotations,
|
||||
}
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
state.Status = status.String()
|
||||
if status != Stopped {
|
||||
if c.initProcess != nil {
|
||||
state.Pid = c.initProcess.pid()
|
||||
}
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
// orderNamespacePaths sorts namespace paths into a list of paths that we
|
||||
// can setns in order.
|
||||
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
|
||||
paths := []string{}
|
||||
|
||||
for _, ns := range configs.NamespaceTypes() {
|
||||
|
||||
// Remove namespaces that we don't need to join.
|
||||
|
Reference in New Issue
Block a user