Update reaper for multipe subscribers

Depends on https://github.com/containerd/go-runc/pull/24

The is currently a race with the reaper where you could miss some exit
events from processes.

The problem before and why the reaper was so complex was because
processes could fork, getting a pid, and then fail on an execve before
we would have time to register the process with the reaper.  This could
cause pids to fill up in a map as a way to reduce the race.

This changes makes the reaper handle multiple subscribers so that the
caller can handle locking, for when they want to wait for a specific
pid, without affecting other callers using the reaper code.

Exit events are broadcast to multiple subscribers, in the case, the runc
commands and container pids that we get from a pid-file.  Locking while
the entire container stats no longs affects runc commands where you want
to call `runc create` and wait until that has been completed.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby
2017-08-31 13:36:40 -04:00
parent c2e894c33a
commit 6b4c4a2937
8 changed files with 153 additions and 156 deletions

View File

@@ -3,10 +3,17 @@ package runc
import (
"os/exec"
"syscall"
"time"
)
var Monitor ProcessMonitor = &defaultMonitor{}
type Exit struct {
Timestamp time.Time
Pid int
Status int
}
// ProcessMonitor is an interface for process monitoring
//
// It allows daemons using go-runc to have a SIGCHLD handler
@@ -18,8 +25,8 @@ type ProcessMonitor interface {
Output(*exec.Cmd) ([]byte, error)
CombinedOutput(*exec.Cmd) ([]byte, error)
Run(*exec.Cmd) error
Start(*exec.Cmd) error
Wait(*exec.Cmd) (int, error)
Start(*exec.Cmd) (chan Exit, error)
Wait(*exec.Cmd, chan Exit) (int, error)
}
type defaultMonitor struct {
@@ -37,18 +44,32 @@ func (m *defaultMonitor) Run(c *exec.Cmd) error {
return c.Run()
}
func (m *defaultMonitor) Start(c *exec.Cmd) error {
return c.Start()
}
func (m *defaultMonitor) Wait(c *exec.Cmd) (int, error) {
if err := c.Wait(); err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
return status.ExitStatus(), nil
func (m *defaultMonitor) Start(c *exec.Cmd) (chan Exit, error) {
if err := c.Start(); err != nil {
return nil, err
}
ec := make(chan Exit, 1)
go func() {
var status int
if err := c.Wait(); err != nil {
status = 255
if exitErr, ok := err.(*exec.ExitError); ok {
if ws, ok := exitErr.Sys().(syscall.WaitStatus); ok {
status = ws.ExitStatus()
}
}
}
return -1, err
}
return 0, nil
ec <- Exit{
Timestamp: time.Now(),
Pid: c.Process.Pid,
Status: status,
}
close(ec)
}()
return ec, nil
}
func (m *defaultMonitor) Wait(c *exec.Cmd, ec chan Exit) (int, error) {
e := <-ec
return e.Status, nil
}

View File

@@ -41,7 +41,7 @@ type Runc struct {
PdeathSignal syscall.Signal
Setpgid bool
Criu string
SystemdCgroup string
SystemdCgroup bool
}
// List returns all containers created inside the provided runc root directory
@@ -134,7 +134,8 @@ func (r *Runc) Create(context context.Context, id, bundle string, opts *CreateOp
}
return nil
}
if err := Monitor.Start(cmd); err != nil {
ec, err := Monitor.Start(cmd)
if err != nil {
return err
}
if opts != nil && opts.IO != nil {
@@ -144,7 +145,7 @@ func (r *Runc) Create(context context.Context, id, bundle string, opts *CreateOp
}
}
}
_, err := Monitor.Wait(cmd)
_, err = Monitor.Wait(cmd, ec)
return err
}
@@ -209,7 +210,8 @@ func (r *Runc) Exec(context context.Context, id string, spec specs.Process, opts
}
return nil
}
if err := Monitor.Start(cmd); err != nil {
ec, err := Monitor.Start(cmd)
if err != nil {
return err
}
if opts != nil && opts.IO != nil {
@@ -219,7 +221,7 @@ func (r *Runc) Exec(context context.Context, id string, spec specs.Process, opts
}
}
}
_, err = Monitor.Wait(cmd)
_, err = Monitor.Wait(cmd, ec)
return err
}
@@ -238,10 +240,11 @@ func (r *Runc) Run(context context.Context, id, bundle string, opts *CreateOpts)
if opts != nil {
opts.Set(cmd)
}
if err := Monitor.Start(cmd); err != nil {
ec, err := Monitor.Start(cmd)
if err != nil {
return -1, err
}
return Monitor.Wait(cmd)
return Monitor.Wait(cmd, ec)
}
type DeleteOpts struct {
@@ -294,13 +297,14 @@ func (r *Runc) Stats(context context.Context, id string) (*Stats, error) {
if err != nil {
return nil, err
}
defer func() {
rd.Close()
Monitor.Wait(cmd)
}()
if err := Monitor.Start(cmd); err != nil {
ec, err := Monitor.Start(cmd)
if err != nil {
return nil, err
}
defer func() {
rd.Close()
Monitor.Wait(cmd, ec)
}()
var e Event
if err := json.NewDecoder(rd).Decode(&e); err != nil {
return nil, err
@@ -315,7 +319,8 @@ func (r *Runc) Events(context context.Context, id string, interval time.Duration
if err != nil {
return nil, err
}
if err := Monitor.Start(cmd); err != nil {
ec, err := Monitor.Start(cmd)
if err != nil {
rd.Close()
return nil, err
}
@@ -327,7 +332,7 @@ func (r *Runc) Events(context context.Context, id string, interval time.Duration
defer func() {
close(c)
rd.Close()
Monitor.Wait(cmd)
Monitor.Wait(cmd, ec)
}()
for {
var e Event
@@ -505,7 +510,8 @@ func (r *Runc) Restore(context context.Context, id, bundle string, opts *Restore
if opts != nil {
opts.Set(cmd)
}
if err := Monitor.Start(cmd); err != nil {
ec, err := Monitor.Start(cmd)
if err != nil {
return -1, err
}
if opts != nil && opts.IO != nil {
@@ -515,7 +521,7 @@ func (r *Runc) Restore(context context.Context, id, bundle string, opts *Restore
}
}
}
return Monitor.Wait(cmd)
return Monitor.Wait(cmd, ec)
}
// Update updates the current container with the provided resource spec
@@ -596,8 +602,8 @@ func (r *Runc) args() (out []string) {
if r.Criu != "" {
out = append(out, "--criu", r.Criu)
}
if r.SystemdCgroup != "" {
out = append(out, "--systemd-cgroup", r.SystemdCgroup)
if r.SystemdCgroup {
out = append(out, "--systemd-cgroup")
}
return out
}