Fix exit event handling in shim

Could issues where when exec processes fail the wait block is not
released.

Second, you could not dump stacks if the reaper loop locks up.

Third, the publisher was not waiting on the correct pid.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2017-11-28 14:17:22 -05:00
parent 723f37d846
commit 74b3cb3391
4 changed files with 18 additions and 7 deletions

View File

@ -11,6 +11,7 @@ import (
"net"
"os"
"os/exec"
"os/signal"
"runtime"
"strings"
"sync"
@ -80,6 +81,9 @@ func executeShim() error {
if err != nil {
return err
}
dump := make(chan os.Signal, 32)
signal.Notify(dump, syscall.SIGUSR1)
path, err := os.Getwd()
if err != nil {
return err
@ -111,6 +115,11 @@ func executeShim() error {
"path": path,
"namespace": namespaceFlag,
})
go func() {
for range dump {
dumpStacks(logger)
}
}()
return handleSignals(logger, signals, server, sv)
}
@ -171,8 +180,6 @@ func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.S
sv.Delete(context.Background(), &ptypes.Empty{})
close(done)
})
case unix.SIGUSR1:
dumpStacks(logger)
}
}
}
@ -213,8 +220,11 @@ func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event
if err != nil {
return err
}
exit := <-c
if exit.Status != 0 {
status, err := reaper.Default.Wait(cmd, c)
if err != nil {
return err
}
if status != 0 {
return errors.New("failed to publish event")
}
return nil

View File

@ -3,6 +3,7 @@ package main
import (
"os"
"os/signal"
"syscall"
"github.com/containerd/containerd/reaper"
"github.com/containerd/containerd/sys"
@ -14,7 +15,7 @@ import (
// sub-reaper so that the container processes are reparented
func setupSignals() (chan os.Signal, error) {
signals := make(chan os.Signal, 2048)
signal.Notify(signals)
signal.Notify(signals, syscall.SIGTERM, syscall.SIGINT, syscall.SIGCHLD)
// make sure runc is setup to use the monitor
// for waiting on processes
runc.Monitor = reaper.Default

View File

@ -143,6 +143,7 @@ func (e *execProcess) start(ctx context.Context) (err error) {
opts.ConsoleSocket = socket
}
if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil {
close(e.waitBlock)
return e.parent.runtimeError(err, "OCI runtime exec failed")
}
if e.stdio.Stdin != "" {

View File

@ -148,7 +148,6 @@ func (s *Service) Delete(ctx context.Context, r *ptypes.Empty) (*shimapi.DeleteR
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := p.Delete(ctx); err != nil {
return nil, err
}
@ -480,7 +479,7 @@ func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, er
func (s *Service) forward(publisher events.Publisher) {
for e := range s.events {
if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil {
logrus.WithError(err).Error("post event")
log.G(s.context).WithError(err).Error("post event")
}
}
}