From 600b4d11547f2482adadc090feabc011da7bffa0 Mon Sep 17 00:00:00 2001 From: Kenfe-Mickael Laventure Date: Wed, 14 Sep 2016 11:31:09 -0700 Subject: [PATCH] Remove containerd as subreaper Signed-off-by: Kenfe-Mickael Laventure --- containerd/main.go | 2 -- runtime/container.go | 34 ++++++++++++++++++++++++++++------ runtime/process.go | 37 ++++++++++--------------------------- 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/containerd/main.go b/containerd/main.go index 9231e8616..fec36667f 100644 --- a/containerd/main.go +++ b/containerd/main.go @@ -23,7 +23,6 @@ import ( "github.com/docker/containerd/api/grpc/server" "github.com/docker/containerd/api/grpc/types" "github.com/docker/containerd/api/http/pprof" - "github.com/docker/containerd/osutils" "github.com/docker/containerd/supervisor" "github.com/docker/docker/pkg/listeners" "github.com/rcrowley/go-metrics" @@ -160,7 +159,6 @@ func main() { func daemon(context *cli.Context) error { s := make(chan os.Signal, 2048) signal.Notify(s, syscall.SIGTERM, syscall.SIGINT) - osutils.SetSubreaper(1) sv, err := supervisor.New( context.String("state-dir"), context.String("runtime"), diff --git a/runtime/container.go b/runtime/container.go index 9e82a0c28..c4a2f7e2f 100644 --- a/runtime/container.go +++ b/runtime/container.go @@ -14,6 +14,7 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/containerd/specs" ocs "github.com/opencontainers/runtime-spec/specs-go" + "golang.org/x/sys/unix" ) // Container defines the operations allowed on a container @@ -480,12 +481,33 @@ func (c *container) createCmd(pid string, cmd *exec.Cmd, p *process) error { } return err } - go func() { - err := p.cmd.Wait() - if err == nil { - p.cmdSuccess = true - } - close(p.cmdDoneCh) + // We need the pid file to have been written to run + defer func() { + go func() { + err := p.cmd.Wait() + if err == nil { + p.cmdSuccess = true + } + + if same, err := p.isSameProcess(); same && p.pid > 0 { + // The process changed its PR_SET_PDEATHSIG, so force + // kill it + logrus.Infof("containerd: %s:%s (pid %v) has become an orphan, killing it", p.container.id, p.id, p.pid) + err = unix.Kill(p.pid, syscall.SIGKILL) + if err != nil && err != syscall.ESRCH { + logrus.Errorf("containerd: unable to SIGKILL %s:%s (pid %v): %v", p.container.id, p.id, p.pid, err) + } else { + for { + err = unix.Kill(p.pid, 0) + if err != nil { + break + } + time.Sleep(5 * time.Millisecond) + } + } + } + close(p.cmdDoneCh) + }() }() if err := c.waitForCreate(p, cmd); err != nil { return err diff --git a/runtime/process.go b/runtime/process.go index a6a9da4e6..cba1eab63 100644 --- a/runtime/process.go +++ b/runtime/process.go @@ -229,6 +229,9 @@ func (p *process) Resize(w, h int) error { } func (p *process) updateExitStatusFile(status int) (int, error) { + p.stateLock.Lock() + p.state = Stopped + p.stateLock.Unlock() err := ioutil.WriteFile(filepath.Join(p.root, ExitStatusFile), []byte(fmt.Sprintf("%d", status)), 0644) return status, err } @@ -247,9 +250,6 @@ func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) { // the status to 255 if same, err := p.isSameProcess(); !same { logrus.Warnf("containerd: %s:%s (pid %d) is not the same process anymore (%v)", p.container.id, p.id, p.pid, err) - p.stateLock.Lock() - p.state = Stopped - p.stateLock.Unlock() // Create the file so we get the exit event generated once monitor kicks in // without having to go through all this process again return p.updateExitStatusFile(255) @@ -262,13 +262,17 @@ func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) { if ppid == "1" { logrus.Warnf("containerd: %s:%s shim died, killing associated process", p.container.id, p.id) unix.Kill(p.pid, syscall.SIGKILL) + if err != nil && err != syscall.ESRCH { + return 255, fmt.Errorf("containerd: unable to SIGKILL %s:%s (pid %v): %v", p.container.id, p.id, p.pid, err) + } + // wait for the process to die for { e := unix.Kill(p.pid, 0) if e == syscall.ESRCH { break } - time.Sleep(10 * time.Millisecond) + time.Sleep(5 * time.Millisecond) } // Create the file so we get the exit event generated once monitor kicks in // without having to go through all this process again @@ -291,29 +295,8 @@ func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) { if shimStatus.Signaled() && shimStatus.Signal() == syscall.SIGKILL { logrus.Debugf("containerd: ExitStatus(container: %s, process: %s): shim was SIGKILL'ed reaping its child with pid %d", p.container.id, p.id, p.pid) - var ( - status unix.WaitStatus - rusage unix.Rusage - wpid int - ) - - // Some processes change their PR_SET_PDEATHSIG, so force kill them - unix.Kill(p.pid, syscall.SIGKILL) - - for wpid == 0 { - wpid, e = unix.Wait4(p.pid, &status, unix.WNOHANG, &rusage) - if e != nil { - logrus.Debugf("containerd: ExitStatus(container: %s, process: %s): Wait4(%d): %v", p.container.id, p.id, p.pid, rerr) - return rst, rerr - } - } - - if wpid == p.pid { - rerr = nil - rst = 128 + int(shimStatus.Signal()) - } else { - logrus.Errorf("containerd: ExitStatus(container: %s, process: %s): unexpected returned pid from wait4 %v (expected %v)", p.container.id, p.id, wpid, p.pid) - } + rerr = nil + rst = 128 + int(shimStatus.Signal()) p.stateLock.Lock() p.state = Stopped