Update dependency opencontainer/runc

This commit is contained in:
Odin Ugedal
2019-06-20 20:34:03 +02:00
parent 81c8552d7e
commit 2bcdb944f0
42 changed files with 1415 additions and 631 deletions

View File

@@ -19,13 +19,15 @@ import (
"syscall" // only for SysProcAttr and Signal
"time"
"github.com/cyphar/filepath-securejoin"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
criurpc "github.com/checkpoint-restore/go-criu/rpc"
"github.com/golang/protobuf/proto"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
@@ -156,6 +158,12 @@ func (c *linuxContainer) State() (*State, error) {
return c.currentState()
}
func (c *linuxContainer) OCIState() (*specs.State, error) {
c.m.Lock()
defer c.m.Unlock()
return c.currentOCIState()
}
func (c *linuxContainer) Processes() ([]int, error) {
pids, err := c.cgroupManager.GetAllPids()
if err != nil {
@@ -329,6 +337,7 @@ func (c *linuxContainer) start(process *Process) error {
if err != nil {
return newSystemErrorWithCause(err, "creating new parent process")
}
parent.forwardChildLogs()
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := ignoreTerminateErrors(parent.terminate()); err != nil {
@@ -349,13 +358,9 @@ func (c *linuxContainer) start(process *Process) error {
c.initProcessStartTime = state.InitProcessStartTime
if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Pid: parent.pid(),
Bundle: bundle,
Annotations: annotations,
s, err := c.currentOCIState()
if err != nil {
return err
}
for i, hook := range c.config.Hooks.Poststart {
if err := hook.Run(s); err != nil {
@@ -374,10 +379,18 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error {
if all {
return signalAllProcesses(c.cgroupManager, s)
}
if err := c.initProcess.signal(s); err != nil {
return newSystemErrorWithCause(err, "signaling init process")
status, err := c.currentStatus()
if err != nil {
return err
}
return nil
// to avoid a PID reuse attack
if status == Running || status == Created || status == Paused {
if err := c.initProcess.signal(s); err != nil {
return newSystemErrorWithCause(err, "signaling init process")
}
return nil
}
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
}
func (c *linuxContainer) createExecFifo() error {
@@ -426,16 +439,24 @@ func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error {
}
func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
parentPipe, childPipe, err := utils.NewSockPair("init")
parentInitPipe, childInitPipe, err := utils.NewSockPair("init")
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new init pipe")
}
cmd, err := c.commandTemplate(p, childPipe)
messageSockPair := filePair{parentInitPipe, childInitPipe}
parentLogPipe, childLogPipe, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("Unable to create the log pipe: %s", err)
}
logFilePair := filePair{parentLogPipe, childLogPipe}
cmd, err := c.commandTemplate(p, childInitPipe, childLogPipe)
if err != nil {
return nil, newSystemErrorWithCause(err, "creating new command template")
}
if !p.Init {
return c.newSetnsProcess(p, cmd, parentPipe, childPipe)
return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair)
}
// We only set up fifoFd if we're not doing a `runc exec`. The historic
@@ -446,10 +467,10 @@ func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) {
if err := c.includeExecFifo(cmd); err != nil {
return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup")
}
return c.newInitProcess(p, cmd, parentPipe, childPipe)
return c.newInitProcess(p, cmd, messageSockPair, logFilePair)
}
func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) {
func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) (*exec.Cmd, error) {
cmd := exec.Command(c.initPath, c.initArgs[1:]...)
cmd.Args[0] = c.initArgs[0]
cmd.Stdin = p.Stdin
@@ -467,10 +488,18 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
fmt.Sprintf("_LIBCONTAINER_CONSOLE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
)
}
cmd.ExtraFiles = append(cmd.ExtraFiles, childPipe)
cmd.ExtraFiles = append(cmd.ExtraFiles, childInitPipe)
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_INITPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
fmt.Sprintf("_LIBCONTAINER_STATEDIR=%s", c.root),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe)
cmd.Env = append(cmd.Env,
fmt.Sprintf("_LIBCONTAINER_LOGPIPE=%d", stdioFdCount+len(cmd.ExtraFiles)-1),
fmt.Sprintf("_LIBCONTAINER_LOGLEVEL=%s", p.LogLevel),
)
// NOTE: when running a container with no PID namespace and the parent process spawning the container is
// PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason
// even with the parent still running.
@@ -480,7 +509,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
return cmd, nil
}
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range c.config.Namespaces {
@@ -493,10 +522,10 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
if err != nil {
return nil, err
}
return &initProcess{
init := &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
messageSockPair: messageSockPair,
logFilePair: logFilePair,
manager: c.cgroupManager,
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
@@ -504,10 +533,12 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
process: p,
bootstrapData: data,
sharePidns: sharePidns,
}, nil
}
c.initProcess = init
return init, nil
}
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
state, err := c.currentState()
if err != nil {
@@ -524,8 +555,8 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
cgroupPaths: c.cgroupManager.GetPaths(),
rootlessCgroups: c.config.RootlessCgroups,
intelRdtPath: state.IntelRdtPath,
childPipe: childPipe,
parentPipe: parentPipe,
messageSockPair: messageSockPair,
logFilePair: logFilePair,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
@@ -862,6 +893,32 @@ func waitForCriuLazyServer(r *os.File, status string) error {
return nil
}
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
// CRIU will evaluate a configuration starting with release 3.11.
// Settings in the configuration file will overwrite RPC settings.
// Look for annotations. The annotation 'org.criu.config'
// specifies if CRIU should use a different, container specific
// configuration file.
_, annotations := utils.Annotations(c.config.Labels)
configFile, exists := annotations["org.criu.config"]
if exists {
// If the annotation 'org.criu.config' exists and is set
// to a non-empty string, tell CRIU to use that as a
// configuration file. If the file does not exist, CRIU
// will just ignore it.
if configFile != "" {
rpcOpts.ConfigFile = proto.String(configFile)
}
// If 'org.criu.config' exists and is set to an empty
// string, a runc specific CRIU configuration file will
// be not set at all.
} else {
// If the mentioned annotation has not been found, specify
// a default CRIU configuration file.
rpcOpts.ConfigFile = proto.String("/etc/criu/runc.conf")
}
}
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@@ -927,6 +984,8 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
LazyPages: proto.Bool(criuOpts.LazyPages),
}
c.handleCriuConfigurationFile(&rpcOpts)
// If the container is running in a network namespace and has
// a path to the network namespace configured, we will dump
// that network namespace as an external namespace and we
@@ -1098,6 +1157,75 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
}
}
// makeCriuRestoreMountpoints makes the actual mountpoints for the
// restore using CRIU. This function is inspired from the code in
// rootfs_linux.go
func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
switch m.Device {
case "cgroup":
// Do nothing for cgroup, CRIU should handle it
case "bind":
// The prepareBindMount() function checks if source
// exists. So it cannot be used for other filesystem types.
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
return err
}
default:
// for all other file-systems just create the mountpoints
dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination)
if err != nil {
return err
}
if err := checkMountDestination(c.config.Rootfs, dest); err != nil {
return err
}
m.Destination = dest
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
return nil
}
// isPathInPrefixList is a small function for CRIU restore to make sure
// mountpoints, which are on a tmpfs, are not created in the roofs
func isPathInPrefixList(path string, prefix []string) bool {
for _, p := range prefix {
if strings.HasPrefix(path, p+"/") {
return false
}
}
return true
}
// prepareCriuRestoreMounts tries to set up the rootfs of the
// container to be restored in the same way runc does it for
// initial container creation. Even for a read-only rootfs container
// runc modifies the rootfs to add mountpoints which do not exist.
// This function also creates missing mountpoints as long as they
// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway.
func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
// First get a list of a all tmpfs mounts
tmpfs := []string{}
for _, m := range mounts {
switch m.Device {
case "tmpfs":
tmpfs = append(tmpfs, m.Destination)
}
}
// Now go through all mounts and create the mountpoints
// if the mountpoints are not on a tmpfs, as CRIU will
// restore the complete tmpfs content from its checkpoint.
for _, m := range mounts {
if isPathInPrefixList(m.Destination, tmpfs) {
if err := c.makeCriuRestoreMountpoints(m); err != nil {
return err
}
}
}
return nil
}
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
@@ -1177,6 +1305,8 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
},
}
c.handleCriuConfigurationFile(req.Opts)
// Same as during checkpointing. If the container has a specific network namespace
// assigned to it, this now expects that the checkpoint will be restored in a
// already created network namespace.
@@ -1209,6 +1339,12 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
}
}
// This will modify the rootfs of the container in the same way runc
// modifies the container during initial creation.
if err := c.prepareCriuRestoreMounts(c.config.Mounts); err != nil {
return err
}
for _, m := range c.config.Mounts {
switch m.Device {
case "bind":
@@ -1537,14 +1673,11 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
}
case notify.GetScript() == "setup-namespaces":
if c.config.Hooks != nil {
bundle, annotations := utils.Annotations(c.config.Labels)
s := configs.HookState{
Version: c.config.Version,
ID: c.id,
Pid: int(notify.GetPid()),
Bundle: bundle,
Annotations: annotations,
s, err := c.currentOCIState()
if err != nil {
return nil
}
s.Pid = int(notify.GetPid())
for i, hook := range c.config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
@@ -1738,11 +1871,31 @@ func (c *linuxContainer) currentState() (*State, error) {
return state, nil
}
func (c *linuxContainer) currentOCIState() (*specs.State, error) {
bundle, annotations := utils.Annotations(c.config.Labels)
state := &specs.State{
Version: specs.Version,
ID: c.ID(),
Bundle: bundle,
Annotations: annotations,
}
status, err := c.currentStatus()
if err != nil {
return nil, err
}
state.Status = status.String()
if status != Stopped {
if c.initProcess != nil {
state.Pid = c.initProcess.pid()
}
}
return state, nil
}
// orderNamespacePaths sorts namespace paths into a list of paths that we
// can setns in order.
func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) {
paths := []string{}
for _, ns := range configs.NamespaceTypes() {
// Remove namespaces that we don't need to join.