Update reaper for multipe subscribers
Depends on https://github.com/containerd/go-runc/pull/24 The is currently a race with the reaper where you could miss some exit events from processes. The problem before and why the reaper was so complex was because processes could fork, getting a pid, and then fail on an execve before we would have time to register the process with the reaper. This could cause pids to fill up in a map as a way to reduce the race. This changes makes the reaper handle multiple subscribers so that the caller can handle locking, for when they want to wait for a specific pid, without affecting other callers using the reaper code. Exit events are broadcast to multiple subscribers, in the case, the runc commands and container pids that we get from a pid-file. Locking while the entire container stats no longs affects runc commands where you want to call `runc create` and wait until that has been completed. Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
136
reaper/reaper.go
136
reaper/reaper.go
@@ -6,49 +6,45 @@ import (
|
||||
"bytes"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/sys"
|
||||
runc "github.com/containerd/go-runc"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNoSuchProcess = errors.New("no such process")
|
||||
)
|
||||
var ErrNoSuchProcess = errors.New("no such process")
|
||||
|
||||
const bufferSize = 2048
|
||||
|
||||
// Reap should be called when the process receives an SIGCHLD. Reap will reap
|
||||
// all exited processes and close their wait channels
|
||||
func Reap() error {
|
||||
now := time.Now()
|
||||
exits, err := sys.Reap(false)
|
||||
for _, e := range exits {
|
||||
Default.Lock()
|
||||
c, ok := Default.cmds[e.Pid]
|
||||
if !ok {
|
||||
Default.unknown[e.Pid] = e.Status
|
||||
Default.Unlock()
|
||||
continue
|
||||
Default.Lock()
|
||||
for c := range Default.subscribers {
|
||||
for _, e := range exits {
|
||||
c <- runc.Exit{
|
||||
Timestamp: now,
|
||||
Pid: e.Pid,
|
||||
Status: e.Status,
|
||||
}
|
||||
}
|
||||
Default.Unlock()
|
||||
if c.c != nil {
|
||||
// after we get an exit, call wait on the go process to make sure all
|
||||
// pipes are closed and finalizers are run on the process
|
||||
c.c.Wait()
|
||||
}
|
||||
c.exitStatus = e.Status
|
||||
close(c.ExitCh)
|
||||
|
||||
}
|
||||
Default.Unlock()
|
||||
return err
|
||||
}
|
||||
|
||||
var Default = &Monitor{
|
||||
cmds: make(map[int]*Cmd),
|
||||
unknown: make(map[int]int),
|
||||
subscribers: make(map[chan runc.Exit]struct{}),
|
||||
}
|
||||
|
||||
type Monitor struct {
|
||||
sync.Mutex
|
||||
|
||||
cmds map[int]*Cmd
|
||||
unknown map[int]int
|
||||
subscribers map[chan runc.Exit]struct{}
|
||||
}
|
||||
|
||||
func (m *Monitor) Output(c *exec.Cmd) ([]byte, error) {
|
||||
@@ -69,82 +65,50 @@ func (m *Monitor) CombinedOutput(c *exec.Cmd) ([]byte, error) {
|
||||
}
|
||||
|
||||
// Start starts the command a registers the process with the reaper
|
||||
func (m *Monitor) Start(c *exec.Cmd) error {
|
||||
rc := &Cmd{
|
||||
c: c,
|
||||
ExitCh: make(chan struct{}),
|
||||
func (m *Monitor) Start(c *exec.Cmd) (chan runc.Exit, error) {
|
||||
ec := m.Subscribe()
|
||||
if err := c.Start(); err != nil {
|
||||
m.Unsubscribe(ec)
|
||||
return nil, err
|
||||
}
|
||||
// start the process
|
||||
m.Lock()
|
||||
err := c.Start()
|
||||
if c.Process != nil {
|
||||
m.RegisterNL(c.Process.Pid, rc)
|
||||
}
|
||||
m.Unlock()
|
||||
return err
|
||||
return ec, nil
|
||||
}
|
||||
|
||||
// Run runs and waits for the command to finish
|
||||
func (m *Monitor) Run(c *exec.Cmd) error {
|
||||
if err := m.Start(c); err != nil {
|
||||
ec, err := m.Start(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := m.Wait(c)
|
||||
_, err = m.Wait(c, ec)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *Monitor) Wait(c *exec.Cmd) (int, error) {
|
||||
return m.WaitPid(c.Process.Pid)
|
||||
func (m *Monitor) Wait(c *exec.Cmd, ec chan runc.Exit) (int, error) {
|
||||
for e := range ec {
|
||||
if e.Pid == c.Process.Pid {
|
||||
// make sure we flush all IO
|
||||
c.Wait()
|
||||
m.Unsubscribe(ec)
|
||||
return e.Status, nil
|
||||
}
|
||||
}
|
||||
// return no such process if the ec channel is closed and no more exit
|
||||
// events will be sent
|
||||
return -1, ErrNoSuchProcess
|
||||
}
|
||||
|
||||
func (m *Monitor) Register(pid int, c *Cmd) {
|
||||
func (m *Monitor) Subscribe() chan runc.Exit {
|
||||
c := make(chan runc.Exit, bufferSize)
|
||||
m.Lock()
|
||||
m.RegisterNL(pid, c)
|
||||
m.subscribers[c] = struct{}{}
|
||||
m.Unlock()
|
||||
return c
|
||||
}
|
||||
|
||||
func (m *Monitor) Unsubscribe(c chan runc.Exit) {
|
||||
m.Lock()
|
||||
delete(m.subscribers, c)
|
||||
close(c)
|
||||
m.Unlock()
|
||||
}
|
||||
|
||||
// RegisterNL does not grab the lock internally
|
||||
// the caller is responsible for locking the monitor
|
||||
func (m *Monitor) RegisterNL(pid int, c *Cmd) {
|
||||
if status, ok := m.unknown[pid]; ok {
|
||||
delete(m.unknown, pid)
|
||||
m.cmds[pid] = c
|
||||
c.exitStatus = status
|
||||
close(c.ExitCh)
|
||||
return
|
||||
}
|
||||
m.cmds[pid] = c
|
||||
}
|
||||
|
||||
func (m *Monitor) WaitPid(pid int) (int, error) {
|
||||
m.Lock()
|
||||
rc, ok := m.cmds[pid]
|
||||
m.Unlock()
|
||||
if !ok {
|
||||
return 255, errors.Wrapf(ErrNoSuchProcess, "pid %d", pid)
|
||||
}
|
||||
<-rc.ExitCh
|
||||
if rc.exitStatus != 0 {
|
||||
return rc.exitStatus, errors.Errorf("exit status %d", rc.exitStatus)
|
||||
}
|
||||
return rc.exitStatus, nil
|
||||
}
|
||||
|
||||
// Command returns the registered pid for the command created
|
||||
func (m *Monitor) Command(pid int) *Cmd {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
return m.cmds[pid]
|
||||
}
|
||||
|
||||
func (m *Monitor) Delete(pid int) {
|
||||
m.Lock()
|
||||
delete(m.cmds, pid)
|
||||
m.Unlock()
|
||||
}
|
||||
|
||||
type Cmd struct {
|
||||
c *exec.Cmd
|
||||
ExitCh chan struct{}
|
||||
exitStatus int
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user