diff --git a/client_test.go b/client_test.go index 7c277e2d9..22495735b 100644 --- a/client_test.go +++ b/client_test.go @@ -132,7 +132,9 @@ func TestMain(m *testing.M) { } } if err := cmd.Wait(); err != nil { - fmt.Fprintln(os.Stderr, "failed to wait for containerd", err) + if _, ok := err.(*exec.ExitError); !ok { + fmt.Fprintln(os.Stderr, "failed to wait for containerd", err) + } } if err := os.RemoveAll(defaultRoot); err != nil { fmt.Fprintln(os.Stderr, "failed to remove test root dir", err) diff --git a/linux/runtime.go b/linux/runtime.go index 43c0b20ed..0430c6943 100644 --- a/linux/runtime.go +++ b/linux/runtime.go @@ -211,9 +211,14 @@ func (r *Runtime) Delete(ctx context.Context, c runtime.Task) (*runtime.Exit, er } r.tasks.Delete(ctx, lc) - bundle := loadBundle(filepath.Join(r.state, namespace, lc.id), filepath.Join(r.root, namespace, lc.id), namespace, r.events) + bundle := loadBundle( + filepath.Join(r.state, namespace, lc.id), + filepath.Join(r.root, namespace, lc.id), + namespace, + r.events, + ) if err := bundle.Delete(); err != nil { - return nil, err + log.G(ctx).WithError(err).Error("failed to delete bundle") } return &runtime.Exit{ Status: rsp.ExitStatus, diff --git a/linux/shim/client.go b/linux/shim/client.go index ea634a8f8..2b5016fc5 100644 --- a/linux/shim/client.go +++ b/linux/shim/client.go @@ -209,7 +209,18 @@ func (c *Client) KillShim(ctx context.Context) error { if os.Getpid() == pid { return nil } - return unix.Kill(pid, unix.SIGKILL) + if err := unix.Kill(pid, unix.SIGKILL); err != nil { + return err + } + // wait for shim to die after being SIGKILL'd + for { + // use kill(pid, 0) here because the shim could have been reparented + // and we are no longer able to waitpid(pid, ...) on the shim + if err := unix.Kill(pid, 0); err != nil && err == unix.ESRCH { + return nil + } + time.Sleep(10 * time.Millisecond) + } } func (c *Client) Close() error { diff --git a/linux/shim/init.go b/linux/shim/init.go index 4f4f42933..6777940c8 100644 --- a/linux/shim/init.go +++ b/linux/shim/init.go @@ -237,31 +237,33 @@ func (p *initProcess) SetExited(status int) { } func (p *initProcess) Delete(context context.Context) error { - status, err := p.Status(context) - if err != nil { - return err - } - if status != "stopped" { - return fmt.Errorf("cannot delete a running container") - } p.killAll(context) p.Wait() - err = p.runtime.Delete(context, p.id, nil) + err := p.runtime.Delete(context, p.id, nil) + // ignore errors if a runtime has already deleted the process + // but we still hold metadata and pipes + // + // this is common during a checkpoint, runc will delete the container state + // after a checkpoint and the container will no longer exist within runc + if err != nil { + if strings.Contains(err.Error(), "does not exist") { + err = nil + } else { + err = p.runtimeError(err, "failed to delete task") + } + } if p.io != nil { for _, c := range p.closers { c.Close() } p.io.Close() } - err = p.runtimeError(err, "OCI runtime delete failed") - if err2 := mount.UnmountAll(p.rootfs, 0); err2 != nil { - log.G(context).WithError(err2).Warn("Failed to cleanup rootfs mount") + log.G(context).WithError(err2).Warn("failed to cleanup rootfs mount") if err == nil { - err = errors.Wrap(err2, "Failed rootfs umount") + err = errors.Wrap(err2, "failed rootfs umount") } } - return err } diff --git a/linux/shim/service.go b/linux/shim/service.go index be54104dc..7406035a1 100644 --- a/linux/shim/service.go +++ b/linux/shim/service.go @@ -147,8 +147,9 @@ func (s *Service) Delete(ctx context.Context, r *google_protobuf.Empty) (*shimap return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") } p := s.initProcess - // TODO (@crosbymichael): how to handle errors here - p.Delete(ctx) + if err := p.Delete(ctx); err != nil { + return nil, err + } s.mu.Lock() delete(s.processes, p.ID()) s.mu.Unlock() @@ -178,8 +179,9 @@ func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessReq if !ok { return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s not found", r.ID) } - // TODO (@crosbymichael): how to handle errors here - p.Delete(ctx) + if err := p.Delete(ctx); err != nil { + return nil, err + } s.mu.Lock() delete(s.processes, p.ID()) s.mu.Unlock()