Merge pull request #1320 from crosbymichael/cr-race

Fix race with task checkpoint
This commit is contained in:
Michael Crosby 2017-08-11 10:30:51 -04:00 committed by GitHub
commit f27f8dd120
5 changed files with 43 additions and 21 deletions

View File

@ -132,7 +132,9 @@ func TestMain(m *testing.M) {
} }
} }
if err := cmd.Wait(); err != nil { if err := cmd.Wait(); err != nil {
fmt.Fprintln(os.Stderr, "failed to wait for containerd", err) if _, ok := err.(*exec.ExitError); !ok {
fmt.Fprintln(os.Stderr, "failed to wait for containerd", err)
}
} }
if err := os.RemoveAll(defaultRoot); err != nil { if err := os.RemoveAll(defaultRoot); err != nil {
fmt.Fprintln(os.Stderr, "failed to remove test root dir", err) fmt.Fprintln(os.Stderr, "failed to remove test root dir", err)

View File

@ -211,9 +211,14 @@ func (r *Runtime) Delete(ctx context.Context, c runtime.Task) (*runtime.Exit, er
} }
r.tasks.Delete(ctx, lc) r.tasks.Delete(ctx, lc)
bundle := loadBundle(filepath.Join(r.state, namespace, lc.id), filepath.Join(r.root, namespace, lc.id), namespace, r.events) bundle := loadBundle(
filepath.Join(r.state, namespace, lc.id),
filepath.Join(r.root, namespace, lc.id),
namespace,
r.events,
)
if err := bundle.Delete(); err != nil { if err := bundle.Delete(); err != nil {
return nil, err log.G(ctx).WithError(err).Error("failed to delete bundle")
} }
return &runtime.Exit{ return &runtime.Exit{
Status: rsp.ExitStatus, Status: rsp.ExitStatus,

View File

@ -209,7 +209,18 @@ func (c *Client) KillShim(ctx context.Context) error {
if os.Getpid() == pid { if os.Getpid() == pid {
return nil return nil
} }
return unix.Kill(pid, unix.SIGKILL) if err := unix.Kill(pid, unix.SIGKILL); err != nil {
return err
}
// wait for shim to die after being SIGKILL'd
for {
// use kill(pid, 0) here because the shim could have been reparented
// and we are no longer able to waitpid(pid, ...) on the shim
if err := unix.Kill(pid, 0); err != nil && err == unix.ESRCH {
return nil
}
time.Sleep(10 * time.Millisecond)
}
} }
func (c *Client) Close() error { func (c *Client) Close() error {

View File

@ -237,31 +237,33 @@ func (p *initProcess) SetExited(status int) {
} }
func (p *initProcess) Delete(context context.Context) error { func (p *initProcess) Delete(context context.Context) error {
status, err := p.Status(context)
if err != nil {
return err
}
if status != "stopped" {
return fmt.Errorf("cannot delete a running container")
}
p.killAll(context) p.killAll(context)
p.Wait() p.Wait()
err = p.runtime.Delete(context, p.id, nil) err := p.runtime.Delete(context, p.id, nil)
// ignore errors if a runtime has already deleted the process
// but we still hold metadata and pipes
//
// this is common during a checkpoint, runc will delete the container state
// after a checkpoint and the container will no longer exist within runc
if err != nil {
if strings.Contains(err.Error(), "does not exist") {
err = nil
} else {
err = p.runtimeError(err, "failed to delete task")
}
}
if p.io != nil { if p.io != nil {
for _, c := range p.closers { for _, c := range p.closers {
c.Close() c.Close()
} }
p.io.Close() p.io.Close()
} }
err = p.runtimeError(err, "OCI runtime delete failed")
if err2 := mount.UnmountAll(p.rootfs, 0); err2 != nil { if err2 := mount.UnmountAll(p.rootfs, 0); err2 != nil {
log.G(context).WithError(err2).Warn("Failed to cleanup rootfs mount") log.G(context).WithError(err2).Warn("failed to cleanup rootfs mount")
if err == nil { if err == nil {
err = errors.Wrap(err2, "Failed rootfs umount") err = errors.Wrap(err2, "failed rootfs umount")
} }
} }
return err return err
} }

View File

@ -147,8 +147,9 @@ func (s *Service) Delete(ctx context.Context, r *google_protobuf.Empty) (*shimap
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created") return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
} }
p := s.initProcess p := s.initProcess
// TODO (@crosbymichael): how to handle errors here if err := p.Delete(ctx); err != nil {
p.Delete(ctx) return nil, err
}
s.mu.Lock() s.mu.Lock()
delete(s.processes, p.ID()) delete(s.processes, p.ID())
s.mu.Unlock() s.mu.Unlock()
@ -178,8 +179,9 @@ func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessReq
if !ok { if !ok {
return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s not found", r.ID) return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s not found", r.ID)
} }
// TODO (@crosbymichael): how to handle errors here if err := p.Delete(ctx); err != nil {
p.Delete(ctx) return nil, err
}
s.mu.Lock() s.mu.Lock()
delete(s.processes, p.ID()) delete(s.processes, p.ID())
s.mu.Unlock() s.mu.Unlock()