Merge pull request #1320 from crosbymichael/cr-race
Fix race with task checkpoint
This commit is contained in:
commit
f27f8dd120
@ -132,8 +132,10 @@ func TestMain(m *testing.M) {
|
||||
}
|
||||
}
|
||||
if err := cmd.Wait(); err != nil {
|
||||
if _, ok := err.(*exec.ExitError); !ok {
|
||||
fmt.Fprintln(os.Stderr, "failed to wait for containerd", err)
|
||||
}
|
||||
}
|
||||
if err := os.RemoveAll(defaultRoot); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "failed to remove test root dir", err)
|
||||
os.Exit(1)
|
||||
|
@ -211,9 +211,14 @@ func (r *Runtime) Delete(ctx context.Context, c runtime.Task) (*runtime.Exit, er
|
||||
}
|
||||
r.tasks.Delete(ctx, lc)
|
||||
|
||||
bundle := loadBundle(filepath.Join(r.state, namespace, lc.id), filepath.Join(r.root, namespace, lc.id), namespace, r.events)
|
||||
bundle := loadBundle(
|
||||
filepath.Join(r.state, namespace, lc.id),
|
||||
filepath.Join(r.root, namespace, lc.id),
|
||||
namespace,
|
||||
r.events,
|
||||
)
|
||||
if err := bundle.Delete(); err != nil {
|
||||
return nil, err
|
||||
log.G(ctx).WithError(err).Error("failed to delete bundle")
|
||||
}
|
||||
return &runtime.Exit{
|
||||
Status: rsp.ExitStatus,
|
||||
|
@ -209,7 +209,18 @@ func (c *Client) KillShim(ctx context.Context) error {
|
||||
if os.Getpid() == pid {
|
||||
return nil
|
||||
}
|
||||
return unix.Kill(pid, unix.SIGKILL)
|
||||
if err := unix.Kill(pid, unix.SIGKILL); err != nil {
|
||||
return err
|
||||
}
|
||||
// wait for shim to die after being SIGKILL'd
|
||||
for {
|
||||
// use kill(pid, 0) here because the shim could have been reparented
|
||||
// and we are no longer able to waitpid(pid, ...) on the shim
|
||||
if err := unix.Kill(pid, 0); err != nil && err == unix.ESRCH {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Client) Close() error {
|
||||
|
@ -237,31 +237,33 @@ func (p *initProcess) SetExited(status int) {
|
||||
}
|
||||
|
||||
func (p *initProcess) Delete(context context.Context) error {
|
||||
status, err := p.Status(context)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status != "stopped" {
|
||||
return fmt.Errorf("cannot delete a running container")
|
||||
}
|
||||
p.killAll(context)
|
||||
p.Wait()
|
||||
err = p.runtime.Delete(context, p.id, nil)
|
||||
err := p.runtime.Delete(context, p.id, nil)
|
||||
// ignore errors if a runtime has already deleted the process
|
||||
// but we still hold metadata and pipes
|
||||
//
|
||||
// this is common during a checkpoint, runc will delete the container state
|
||||
// after a checkpoint and the container will no longer exist within runc
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "does not exist") {
|
||||
err = nil
|
||||
} else {
|
||||
err = p.runtimeError(err, "failed to delete task")
|
||||
}
|
||||
}
|
||||
if p.io != nil {
|
||||
for _, c := range p.closers {
|
||||
c.Close()
|
||||
}
|
||||
p.io.Close()
|
||||
}
|
||||
err = p.runtimeError(err, "OCI runtime delete failed")
|
||||
|
||||
if err2 := mount.UnmountAll(p.rootfs, 0); err2 != nil {
|
||||
log.G(context).WithError(err2).Warn("Failed to cleanup rootfs mount")
|
||||
log.G(context).WithError(err2).Warn("failed to cleanup rootfs mount")
|
||||
if err == nil {
|
||||
err = errors.Wrap(err2, "Failed rootfs umount")
|
||||
err = errors.Wrap(err2, "failed rootfs umount")
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -147,8 +147,9 @@ func (s *Service) Delete(ctx context.Context, r *google_protobuf.Empty) (*shimap
|
||||
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
|
||||
}
|
||||
p := s.initProcess
|
||||
// TODO (@crosbymichael): how to handle errors here
|
||||
p.Delete(ctx)
|
||||
if err := p.Delete(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.mu.Lock()
|
||||
delete(s.processes, p.ID())
|
||||
s.mu.Unlock()
|
||||
@ -178,8 +179,9 @@ func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessReq
|
||||
if !ok {
|
||||
return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s not found", r.ID)
|
||||
}
|
||||
// TODO (@crosbymichael): how to handle errors here
|
||||
p.Delete(ctx)
|
||||
if err := p.Delete(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.mu.Lock()
|
||||
delete(s.processes, p.ID())
|
||||
s.mu.Unlock()
|
||||
|
Loading…
Reference in New Issue
Block a user