Merge pull request #1319 from mlaventure/handle-sigkilled-shim

Handle sigkilled shim
This commit is contained in:
Michael Crosby
2017-08-29 14:06:17 -04:00
committed by GitHub
18 changed files with 481 additions and 147 deletions

View File

@@ -10,6 +10,8 @@ import (
"os"
"os/exec"
"strings"
"sync"
"syscall"
"time"
"golang.org/x/sys/unix"
@@ -28,7 +30,7 @@ import (
type ClientOpt func(context.Context, Config) (shim.ShimClient, io.Closer, error)
// WithStart executes a new shim process
func WithStart(binary, address string, debug bool) ClientOpt {
func WithStart(binary, address string, debug bool, exitHandler func()) ClientOpt {
return func(ctx context.Context, config Config) (_ shim.ShimClient, _ io.Closer, err error) {
socket, err := newSocket(config)
if err != nil {
@@ -47,9 +49,14 @@ func WithStart(binary, address string, debug bool) ClientOpt {
}
defer func() {
if err != nil {
terminate(cmd)
cmd.Process.Kill()
}
}()
go func() {
reaper.Default.Wait(cmd)
reaper.Default.Delete(cmd.Process.Pid)
exitHandler()
}()
log.G(ctx).WithFields(logrus.Fields{
"pid": cmd.Process.Pid,
"address": config.Address,
@@ -72,11 +79,6 @@ func WithStart(binary, address string, debug bool) ClientOpt {
}
}
func terminate(cmd *exec.Cmd) {
cmd.Process.Kill()
reaper.Default.Wait(cmd)
}
func newCommand(binary, address string, debug bool, config Config, socket *os.File) *exec.Cmd {
args := []string{
"--namespace", config.Namespace,
@@ -178,15 +180,20 @@ func New(ctx context.Context, config Config, opt ClientOpt) (*Client, error) {
return &Client{
ShimClient: s,
c: c,
exitCh: make(chan struct{}),
}, nil
}
type Client struct {
shim.ShimClient
c io.Closer
c io.Closer
exitCh chan struct{}
exitOnce sync.Once
}
// IsAlive returns true if the shim can be contacted.
// NOTE: a negative answer doesn't mean that the process is gone.
func (c *Client) IsAlive(ctx context.Context) (bool, error) {
_, err := c.ShimInfo(ctx, empty)
if err != nil {
@@ -198,8 +205,24 @@ func (c *Client) IsAlive(ctx context.Context) (bool, error) {
return true, nil
}
// KillShim kills the shim forcefully
// StopShim signals the shim to exit and wait for the process to disappear
func (c *Client) StopShim(ctx context.Context) error {
return c.signalShim(ctx, unix.SIGTERM)
}
// KillShim kills the shim forcefully and wait for the process to disappear
func (c *Client) KillShim(ctx context.Context) error {
return c.signalShim(ctx, unix.SIGKILL)
}
func (c *Client) Close() error {
if c.c == nil {
return nil
}
return c.c.Close()
}
func (c *Client) signalShim(ctx context.Context, sig syscall.Signal) error {
info, err := c.ShimInfo(ctx, empty)
if err != nil {
return err
@@ -209,23 +232,29 @@ func (c *Client) KillShim(ctx context.Context) error {
if os.Getpid() == pid {
return nil
}
if err := unix.Kill(pid, unix.SIGKILL); err != nil {
if err := unix.Kill(pid, sig); err != nil && err != unix.ESRCH {
return err
}
// wait for shim to die after being SIGKILL'd
for {
// use kill(pid, 0) here because the shim could have been reparented
// and we are no longer able to waitpid(pid, ...) on the shim
if err := unix.Kill(pid, 0); err != nil && err == unix.ESRCH {
return nil
}
time.Sleep(10 * time.Millisecond)
// wait for shim to die after being signaled
select {
case <-ctx.Done():
return ctx.Err()
case <-c.waitForExit(pid):
return nil
}
}
func (c *Client) Close() error {
if c.c == nil {
return nil
}
return c.c.Close()
func (c *Client) waitForExit(pid int) <-chan struct{} {
c.exitOnce.Do(func() {
for {
// use kill(pid, 0) here because the shim could have been reparented
// and we are no longer able to waitpid(pid, ...) on the shim
if err := unix.Kill(pid, 0); err == unix.ESRCH {
close(c.exitCh)
return
}
time.Sleep(10 * time.Millisecond)
}
})
return c.exitCh
}

View File

@@ -27,6 +27,8 @@ import (
"github.com/pkg/errors"
)
const InitPidFile = "init.pid"
type initProcess struct {
sync.WaitGroup
initState
@@ -131,7 +133,7 @@ func newInitProcess(context context.Context, plat platform, path, namespace, wor
return nil, errors.Wrap(err, "failed to create OCI runtime io pipes")
}
}
pidFile := filepath.Join(path, "init.pid")
pidFile := filepath.Join(path, InitPidFile)
if r.Checkpoint != "" {
opts := &runc.RestoreOpts{
CheckpointOpts: runc.CheckpointOpts{

View File

@@ -22,6 +22,7 @@ import (
"github.com/containerd/containerd/runtime"
google_protobuf "github.com/golang/protobuf/ptypes/empty"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
)
@@ -35,6 +36,11 @@ func NewService(path, namespace, workDir string, publisher events.Publisher) (*S
return nil, fmt.Errorf("shim namespace cannot be empty")
}
context := namespaces.WithNamespace(context.Background(), namespace)
context = log.WithLogger(context, logrus.WithFields(logrus.Fields{
"namespace": namespace,
"pid": os.Getpid(),
"path": path,
}))
s := &Service{
path: path,
processes: make(map[string]process),
@@ -58,7 +64,6 @@ type platform interface {
}
type Service struct {
initProcess *initProcess
path string
id string
bundle string
@@ -83,12 +88,11 @@ func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (*sh
// save the main task id and bundle to the shim for additional requests
s.id = r.ID
s.bundle = r.Bundle
s.initProcess = process
pid := process.Pid()
s.processes[r.ID] = process
s.mu.Unlock()
cmd := &reaper.Cmd{
ExitCh: make(chan int, 1),
ExitCh: make(chan struct{}),
}
reaper.Default.Register(pid, cmd)
s.events <- &eventsapi.TaskCreate{
@@ -111,8 +115,8 @@ func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (*sh
}
func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.StartResponse, error) {
p, ok := s.processes[r.ID]
if !ok {
p := s.getProcess(r.ID)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process %s not found", r.ID)
}
if err := p.Start(ctx); err != nil {
@@ -121,12 +125,12 @@ func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.
if r.ID == s.id {
s.events <- &eventsapi.TaskStart{
ContainerID: s.id,
Pid: uint32(s.initProcess.Pid()),
Pid: uint32(p.Pid()),
}
} else {
pid := p.Pid()
cmd := &reaper.Cmd{
ExitCh: make(chan int, 1),
ExitCh: make(chan struct{}),
}
reaper.Default.Register(pid, cmd)
go s.waitExit(p, pid, cmd)
@@ -143,16 +147,15 @@ func (s *Service) Start(ctx context.Context, r *shimapi.StartRequest) (*shimapi.
}
func (s *Service) Delete(ctx context.Context, r *google_protobuf.Empty) (*shimapi.DeleteResponse, error) {
if s.initProcess == nil {
p := s.getProcess(s.id)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
p := s.initProcess
if err := p.Delete(ctx); err != nil {
return nil, err
}
s.mu.Lock()
delete(s.processes, p.ID())
s.mu.Unlock()
s.deleteProcess(p.ID())
s.events <- &eventsapi.TaskDelete{
ContainerID: s.id,
ExitStatus: uint32(p.ExitStatus()),
@@ -167,24 +170,17 @@ func (s *Service) Delete(ctx context.Context, r *google_protobuf.Empty) (*shimap
}
func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessRequest) (*shimapi.DeleteResponse, error) {
if s.initProcess == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if r.ID == s.initProcess.id {
if r.ID == s.id {
return nil, grpc.Errorf(codes.InvalidArgument, "cannot delete init process with DeleteProcess")
}
s.mu.Lock()
p, ok := s.processes[r.ID]
s.mu.Unlock()
if !ok {
return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s not found", r.ID)
p := s.getProcess(r.ID)
if p == nil {
return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s", r.ID)
}
if err := p.Delete(ctx); err != nil {
return nil, err
}
s.mu.Lock()
delete(s.processes, p.ID())
s.mu.Unlock()
s.deleteProcess(r.ID)
return &shimapi.DeleteResponse{
ExitStatus: uint32(p.ExitStatus()),
ExitedAt: p.ExitedAt(),
@@ -193,13 +189,19 @@ func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessReq
}
func (s *Service) Exec(ctx context.Context, r *shimapi.ExecProcessRequest) (*google_protobuf.Empty, error) {
if s.initProcess == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
s.mu.Lock()
defer s.mu.Unlock()
process, err := newExecProcess(ctx, s.path, r, s.initProcess, r.ID)
if p := s.processes[r.ID]; p != nil {
return nil, errdefs.ToGRPCf(errdefs.ErrAlreadyExists, "id %s", r.ID)
}
p := s.processes[s.id]
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
process, err := newExecProcess(ctx, s.path, r, p.(*initProcess), r.ID)
if err != nil {
return nil, errdefs.ToGRPC(err)
}
@@ -220,10 +222,8 @@ func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*
Width: uint16(r.Width),
Height: uint16(r.Height),
}
s.mu.Lock()
p, ok := s.processes[r.ID]
s.mu.Unlock()
if !ok {
p := s.getProcess(r.ID)
if p == nil {
return nil, errors.Errorf("process does not exist %s", r.ID)
}
if err := p.Resize(ws); err != nil {
@@ -233,8 +233,8 @@ func (s *Service) ResizePty(ctx context.Context, r *shimapi.ResizePtyRequest) (*
}
func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.StateResponse, error) {
p, ok := s.processes[r.ID]
if !ok {
p := s.getProcess(r.ID)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process id %s not found", r.ID)
}
st, err := p.Status(ctx)
@@ -270,10 +270,11 @@ func (s *Service) State(ctx context.Context, r *shimapi.StateRequest) (*shimapi.
}
func (s *Service) Pause(ctx context.Context, r *google_protobuf.Empty) (*google_protobuf.Empty, error) {
if s.initProcess == nil {
p := s.getProcess(s.id)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := s.initProcess.Pause(ctx); err != nil {
if err := p.(*initProcess).Pause(ctx); err != nil {
return nil, err
}
s.events <- &eventsapi.TaskPaused{
@@ -283,10 +284,11 @@ func (s *Service) Pause(ctx context.Context, r *google_protobuf.Empty) (*google_
}
func (s *Service) Resume(ctx context.Context, r *google_protobuf.Empty) (*google_protobuf.Empty, error) {
if s.initProcess == nil {
p := s.getProcess(s.id)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := s.initProcess.Resume(ctx); err != nil {
if err := p.(*initProcess).Resume(ctx); err != nil {
return nil, err
}
s.events <- &eventsapi.TaskResumed{
@@ -296,17 +298,19 @@ func (s *Service) Resume(ctx context.Context, r *google_protobuf.Empty) (*google
}
func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*google_protobuf.Empty, error) {
if s.initProcess == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if r.ID == "" {
if err := s.initProcess.Kill(ctx, r.Signal, r.All); err != nil {
p := s.getProcess(s.id)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
p, ok := s.processes[r.ID]
if !ok {
p := s.getProcess(r.ID)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process id %s not found", r.ID)
}
if err := p.Kill(ctx, r.Signal, r.All); err != nil {
@@ -326,8 +330,8 @@ func (s *Service) ListPids(ctx context.Context, r *shimapi.ListPidsRequest) (*sh
}
func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*google_protobuf.Empty, error) {
p, ok := s.processes[r.ID]
if !ok {
p := s.getProcess(r.ID)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrNotFound, "process does not exist %s", r.ID)
}
if stdin := p.Stdin(); stdin != nil {
@@ -339,10 +343,11 @@ func (s *Service) CloseIO(ctx context.Context, r *shimapi.CloseIORequest) (*goog
}
func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointTaskRequest) (*google_protobuf.Empty, error) {
if s.initProcess == nil {
p := s.getProcess(s.id)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := s.initProcess.Checkpoint(ctx, r); err != nil {
if err := p.(*initProcess).Checkpoint(ctx, r); err != nil {
return nil, errdefs.ToGRPC(err)
}
s.events <- &eventsapi.TaskCheckpointed{
@@ -358,17 +363,37 @@ func (s *Service) ShimInfo(ctx context.Context, r *google_protobuf.Empty) (*shim
}
func (s *Service) Update(ctx context.Context, r *shimapi.UpdateTaskRequest) (*google_protobuf.Empty, error) {
if s.initProcess == nil {
p := s.getProcess(s.id)
if p == nil {
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
}
if err := s.initProcess.Update(ctx, r); err != nil {
if err := p.(*initProcess).Update(ctx, r); err != nil {
return nil, errdefs.ToGRPC(err)
}
return empty, nil
}
func (s *Service) addProcess(id string, p process) {
s.mu.Lock()
s.processes[id] = p
s.mu.Unlock()
}
func (s *Service) getProcess(id string) process {
s.mu.Lock()
p := s.processes[id]
s.mu.Unlock()
return p
}
func (s *Service) deleteProcess(id string) {
s.mu.Lock()
delete(s.processes, id)
s.mu.Unlock()
}
func (s *Service) waitExit(p process, pid int, cmd *reaper.Cmd) {
status := <-cmd.ExitCh
status, _ := reaper.Default.WaitPid(pid)
p.SetExited(status)
reaper.Default.Delete(pid)
@@ -382,12 +407,17 @@ func (s *Service) waitExit(p process, pid int, cmd *reaper.Cmd) {
}
func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) {
p, err := s.initProcess.runtime.Ps(ctx, id)
p := s.getProcess(s.id)
if p == nil {
return nil, errors.Wrapf(errdefs.ErrFailedPrecondition, "container must be created")
}
ps, err := p.(*initProcess).runtime.Ps(ctx, id)
if err != nil {
return nil, err
}
pids := make([]uint32, 0, len(p))
for _, pid := range p {
pids := make([]uint32, 0, len(ps))
for _, pid := range ps {
pids = append(pids, uint32(pid))
}
return pids, nil
@@ -395,13 +425,13 @@ func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, er
func (s *Service) forward(publisher events.Publisher) {
for e := range s.events {
if err := publisher.Publish(s.context, getTopic(e), e); err != nil {
if err := publisher.Publish(s.context, getTopic(s.context, e), e); err != nil {
log.G(s.context).WithError(err).Error("post event")
}
}
}
func getTopic(e interface{}) string {
func getTopic(ctx context.Context, e interface{}) string {
switch e.(type) {
case *eventsapi.TaskCreate:
return runtime.TaskCreateEventTopic
@@ -415,12 +445,16 @@ func getTopic(e interface{}) string {
return runtime.TaskDeleteEventTopic
case *eventsapi.TaskExecAdded:
return runtime.TaskExecAddedEventTopic
case *eventsapi.TaskExecStarted:
return runtime.TaskExecStartedEventTopic
case *eventsapi.TaskPaused:
return runtime.TaskPausedEventTopic
case *eventsapi.TaskResumed:
return runtime.TaskResumedEventTopic
case *eventsapi.TaskCheckpointed:
return runtime.TaskCheckpointedEventTopic
default:
log.G(ctx).Warnf("no topic for type %#v", e)
}
return "?"
return runtime.TaskUnknownTopic
}