Update dependency opencontainer/runc

This commit is contained in:
Odin Ugedal
2019-06-20 20:34:03 +02:00
parent 81c8552d7e
commit 2bcdb944f0
42 changed files with 1415 additions and 631 deletions

View File

@@ -16,12 +16,17 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/logs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
// Synchronisation value for cgroup namespace setup.
// The same constant is defined in nsexec.c as "CREATECGROUPNS".
const createCgroupns = 0x80
type parentProcess interface {
// pid returns the pid for the running process.
pid() int
@@ -43,12 +48,19 @@ type parentProcess interface {
externalDescriptors() []string
setExternalDescriptors(fds []string)
forwardChildLogs()
}
type filePair struct {
parent *os.File
child *os.File
}
type setnsProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
messageSockPair filePair
logFilePair filePair
cgroupPaths map[string]string
rootlessCgroups bool
intelRdtPath string
@@ -72,14 +84,16 @@ func (p *setnsProcess) signal(sig os.Signal) error {
}
func (p *setnsProcess) start() (err error) {
defer p.parentPipe.Close()
defer p.messageSockPair.parent.Close()
err = p.cmd.Start()
p.childPipe.Close()
// close the write-side of the pipes (controlled by child)
p.messageSockPair.child.Close()
p.logFilePair.child.Close()
if err != nil {
return newSystemErrorWithCause(err, "starting setns process")
}
if p.bootstrapData != nil {
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
}
@@ -105,11 +119,11 @@ func (p *setnsProcess) start() (err error) {
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
return newSystemErrorWithCause(err, "setting rlimits for process")
}
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil {
return newSystemErrorWithCause(err, "writing config to pipe")
}
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
switch sync.Type {
case procReady:
// This shouldn't happen.
@@ -122,7 +136,7 @@ func (p *setnsProcess) start() (err error) {
}
})
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
}
// Must be done after Shutdown so the child will exit and we can wait for it.
@@ -148,16 +162,14 @@ func (p *setnsProcess) execSetns() error {
return newSystemError(&exec.ExitError{ProcessState: status})
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
p.cmd.Wait()
return newSystemErrorWithCause(err, "reading pid from init pipe")
}
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// On Unix systems FindProcess always succeeds.
firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
@@ -203,10 +215,14 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func (p *setnsProcess) forwardChildLogs() {
go logs.ForwardLogs(p.logFilePair.parent)
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
messageSockPair filePair
logFilePair filePair
config *initConfig
manager cgroups.Manager
intelRdtManager intelrdt.Manager
@@ -225,12 +241,25 @@ func (p *initProcess) externalDescriptors() []string {
return p.fds
}
// execSetns runs the process that executes C code to perform the setns calls
// because setns support requires the C process to fork off a child and perform the setns
// before the go runtime boots, we wait on the process to die and receive the child's pid
// over the provided pipe.
// This is called by initProcess.start function
func (p *initProcess) execSetns() error {
// getChildPid receives the final child's pid over the provided pipe.
func (p *initProcess) getChildPid() (int, error) {
var pid pid
if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil {
p.cmd.Wait()
return -1, err
}
// Clean up the zombie parent process
// On Unix systems FindProcess always succeeds.
firstChildProcess, _ := os.FindProcess(pid.PidFirstChild)
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
return pid.Pid, nil
}
func (p *initProcess) waitForChildExit(childPid int) error {
status, err := p.cmd.Process.Wait()
if err != nil {
p.cmd.Wait()
@@ -240,22 +269,8 @@ func (p *initProcess) execSetns() error {
p.cmd.Wait()
return &exec.ExitError{ProcessState: status}
}
var pid *pid
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
p.cmd.Wait()
return err
}
// Clean up the zombie parent process
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
if err != nil {
return err
}
// Ignore the error in case the child has already been reaped for any reason
_, _ = firstChildProcess.Wait()
process, err := os.FindProcess(pid.Pid)
process, err := os.FindProcess(childPid)
if err != nil {
return err
}
@@ -265,10 +280,12 @@ func (p *initProcess) execSetns() error {
}
func (p *initProcess) start() error {
defer p.parentPipe.Close()
defer p.messageSockPair.parent.Close()
err := p.cmd.Start()
p.process.ops = p
p.childPipe.Close()
// close the write-side of the pipes (controlled by child)
p.messageSockPair.child.Close()
p.logFilePair.child.Close()
if err != nil {
p.process.ops = nil
return newSystemErrorWithCause(err, "starting init process command")
@@ -294,22 +311,53 @@ func (p *initProcess) start() error {
}
}()
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
}
if err := p.execSetns(); err != nil {
return newSystemErrorWithCause(err, "running exec setns process for init")
childPid, err := p.getChildPid()
if err != nil {
return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
}
// Save the standard descriptor names before the container process
// can potentially move them (e.g., via dup2()). If we don't do this now,
// we won't know at checkpoint time which file descriptor to look up.
fds, err := getPipeFds(p.pid())
fds, err := getPipeFds(childPid)
if err != nil {
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid)
}
p.setExternalDescriptors(fds)
// Do this before syncing with child so that no children
// can escape the cgroup
if err := p.manager.Apply(childPid); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Apply(childPid); err != nil {
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
}
}
// Now it's time to setup cgroup namesapce
if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" {
if _, err := p.messageSockPair.parent.Write([]byte{createCgroupns}); err != nil {
return newSystemErrorWithCause(err, "sending synchronization value to init process")
}
}
// Wait for our first child to exit
if err := p.waitForChildExit(childPid); err != nil {
return newSystemErrorWithCause(err, "waiting for our first child to exit")
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
if p.intelRdtManager != nil {
p.intelRdtManager.Destroy()
}
}
}()
if err := p.createNetworkInterfaces(); err != nil {
return newSystemErrorWithCause(err, "creating network interfaces")
}
@@ -321,7 +369,7 @@ func (p *initProcess) start() error {
sentResume bool
)
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
switch sync.Type {
case procReady:
// set rlimits, this has to be done here because we lose permissions
@@ -342,14 +390,13 @@ func (p *initProcess) start() error {
}
if p.config.Config.Hooks != nil {
bundle, annotations := utils.Annotations(p.container.config.Labels)
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: bundle,
Annotations: annotations,
s, err := p.container.currentOCIState()
if err != nil {
return err
}
// initProcessStartTime hasn't been set yet.
s.Pid = p.cmd.Process.Pid
s.Status = "creating"
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
@@ -358,7 +405,7 @@ func (p *initProcess) start() error {
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procRun); err != nil {
if err := writeSync(p.messageSockPair.parent, procRun); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'run'")
}
sentRun = true
@@ -373,14 +420,13 @@ func (p *initProcess) start() error {
}
}
if p.config.Config.Hooks != nil {
bundle, annotations := utils.Annotations(p.container.config.Labels)
s := configs.HookState{
Version: p.container.config.Version,
ID: p.container.id,
Pid: p.pid(),
Bundle: bundle,
Annotations: annotations,
s, err := p.container.currentOCIState()
if err != nil {
return err
}
// initProcessStartTime hasn't been set yet.
s.Pid = p.cmd.Process.Pid
s.Status = "creating"
for i, hook := range p.config.Config.Hooks.Prestart {
if err := hook.Run(s); err != nil {
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
@@ -388,7 +434,7 @@ func (p *initProcess) start() error {
}
}
// Sync with child.
if err := writeSync(p.parentPipe, procResume); err != nil {
if err := writeSync(p.messageSockPair.parent, procResume); err != nil {
return newSystemErrorWithCause(err, "writing syncT 'resume'")
}
sentResume = true
@@ -405,7 +451,7 @@ func (p *initProcess) start() error {
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
}
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
return newSystemErrorWithCause(err, "shutting down init pipe")
}
@@ -449,7 +495,7 @@ func (p *initProcess) sendConfig() error {
// send the config to the container's init process, we don't use JSON Encode
// here because there might be a problem in JSON decoder in some cases, see:
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
return utils.WriteJSON(p.parentPipe, p.config)
return utils.WriteJSON(p.messageSockPair.parent, p.config)
}
func (p *initProcess) createNetworkInterfaces() error {
@@ -481,6 +527,10 @@ func (p *initProcess) setExternalDescriptors(newFds []string) {
p.fds = newFds
}
func (p *initProcess) forwardChildLogs() {
go logs.ForwardLogs(p.logFilePair.parent)
}
func getPipeFds(pid int) ([]string, error) {
fds := make([]string, 3)