Update reaper for multipe subscribers

Depends on https://github.com/containerd/go-runc/pull/24

The is currently a race with the reaper where you could miss some exit
events from processes.

The problem before and why the reaper was so complex was because
processes could fork, getting a pid, and then fail on an execve before
we would have time to register the process with the reaper.  This could
cause pids to fill up in a map as a way to reduce the race.

This changes makes the reaper handle multiple subscribers so that the
caller can handle locking, for when they want to wait for a specific
pid, without affecting other callers using the reaper code.

Exit events are broadcast to multiple subscribers, in the case, the runc
commands and container pids that we get from a pid-file.  Locking while
the entire container stats no longs affects runc commands where you want
to call `runc create` and wait until that has been completed.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby
2017-08-31 13:36:40 -04:00
parent c2e894c33a
commit 6b4c4a2937
8 changed files with 153 additions and 156 deletions

View File

@@ -144,6 +144,9 @@ func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts
return nil, errors.Wrapf(err, "invalid task id")
}
ec := reaper.Default.Subscribe()
defer reaper.Default.Unsubscribe(ec)
bundle, err := newBundle(
namespace, id,
filepath.Join(r.state, namespace),
@@ -177,7 +180,7 @@ func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts
"id": id,
"namespace": namespace,
}).Warn("cleaning up after killed shim")
err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id, lc.pid, true)
err = r.cleanupAfterDeadShim(context.Background(), bundle, namespace, id, lc.pid, ec)
if err == nil {
r.tasks.Delete(ctx, lc)
} else {
@@ -320,7 +323,7 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) {
"namespace": ns,
}).Error("connecting to shim")
pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, client.InitPidFile))
err := r.cleanupAfterDeadShim(ctx, bundle, ns, id, pid, false)
err := r.cleanupAfterDeadShim(ctx, bundle, ns, id, pid, nil)
if err != nil {
log.G(ctx).WithError(err).WithField("bundle", bundle.path).
Error("cleaning up after dead shim")
@@ -336,18 +339,20 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) {
return o, nil
}
func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, id string, pid int, reap bool) error {
func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, id string, pid int, ec chan runc.Exit) error {
ctx = namespaces.WithNamespace(ctx, ns)
if err := r.terminate(ctx, bundle, ns, id); err != nil {
return errors.New("failed to terminate task, leaving bundle for debugging")
}
if reap {
if ec != nil {
// if sub-reaper is set, reap our new child
if v, err := sys.GetSubreaper(); err == nil && v == 1 {
reaper.Default.Register(pid, &reaper.Cmd{ExitCh: make(chan struct{})})
reaper.Default.WaitPid(pid)
reaper.Default.Delete(pid)
for e := range ec {
if e.Pid == pid {
break
}
}
}
}