Merge pull request #1320 from crosbymichael/cr-race
Fix race with task checkpoint
This commit is contained in:
commit
f27f8dd120
@ -132,7 +132,9 @@ func TestMain(m *testing.M) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := cmd.Wait(); err != nil {
|
if err := cmd.Wait(); err != nil {
|
||||||
fmt.Fprintln(os.Stderr, "failed to wait for containerd", err)
|
if _, ok := err.(*exec.ExitError); !ok {
|
||||||
|
fmt.Fprintln(os.Stderr, "failed to wait for containerd", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err := os.RemoveAll(defaultRoot); err != nil {
|
if err := os.RemoveAll(defaultRoot); err != nil {
|
||||||
fmt.Fprintln(os.Stderr, "failed to remove test root dir", err)
|
fmt.Fprintln(os.Stderr, "failed to remove test root dir", err)
|
||||||
|
@ -211,9 +211,14 @@ func (r *Runtime) Delete(ctx context.Context, c runtime.Task) (*runtime.Exit, er
|
|||||||
}
|
}
|
||||||
r.tasks.Delete(ctx, lc)
|
r.tasks.Delete(ctx, lc)
|
||||||
|
|
||||||
bundle := loadBundle(filepath.Join(r.state, namespace, lc.id), filepath.Join(r.root, namespace, lc.id), namespace, r.events)
|
bundle := loadBundle(
|
||||||
|
filepath.Join(r.state, namespace, lc.id),
|
||||||
|
filepath.Join(r.root, namespace, lc.id),
|
||||||
|
namespace,
|
||||||
|
r.events,
|
||||||
|
)
|
||||||
if err := bundle.Delete(); err != nil {
|
if err := bundle.Delete(); err != nil {
|
||||||
return nil, err
|
log.G(ctx).WithError(err).Error("failed to delete bundle")
|
||||||
}
|
}
|
||||||
return &runtime.Exit{
|
return &runtime.Exit{
|
||||||
Status: rsp.ExitStatus,
|
Status: rsp.ExitStatus,
|
||||||
|
@ -209,7 +209,18 @@ func (c *Client) KillShim(ctx context.Context) error {
|
|||||||
if os.Getpid() == pid {
|
if os.Getpid() == pid {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return unix.Kill(pid, unix.SIGKILL)
|
if err := unix.Kill(pid, unix.SIGKILL); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// wait for shim to die after being SIGKILL'd
|
||||||
|
for {
|
||||||
|
// use kill(pid, 0) here because the shim could have been reparented
|
||||||
|
// and we are no longer able to waitpid(pid, ...) on the shim
|
||||||
|
if err := unix.Kill(pid, 0); err != nil && err == unix.ESRCH {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) Close() error {
|
func (c *Client) Close() error {
|
||||||
|
@ -237,31 +237,33 @@ func (p *initProcess) SetExited(status int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *initProcess) Delete(context context.Context) error {
|
func (p *initProcess) Delete(context context.Context) error {
|
||||||
status, err := p.Status(context)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if status != "stopped" {
|
|
||||||
return fmt.Errorf("cannot delete a running container")
|
|
||||||
}
|
|
||||||
p.killAll(context)
|
p.killAll(context)
|
||||||
p.Wait()
|
p.Wait()
|
||||||
err = p.runtime.Delete(context, p.id, nil)
|
err := p.runtime.Delete(context, p.id, nil)
|
||||||
|
// ignore errors if a runtime has already deleted the process
|
||||||
|
// but we still hold metadata and pipes
|
||||||
|
//
|
||||||
|
// this is common during a checkpoint, runc will delete the container state
|
||||||
|
// after a checkpoint and the container will no longer exist within runc
|
||||||
|
if err != nil {
|
||||||
|
if strings.Contains(err.Error(), "does not exist") {
|
||||||
|
err = nil
|
||||||
|
} else {
|
||||||
|
err = p.runtimeError(err, "failed to delete task")
|
||||||
|
}
|
||||||
|
}
|
||||||
if p.io != nil {
|
if p.io != nil {
|
||||||
for _, c := range p.closers {
|
for _, c := range p.closers {
|
||||||
c.Close()
|
c.Close()
|
||||||
}
|
}
|
||||||
p.io.Close()
|
p.io.Close()
|
||||||
}
|
}
|
||||||
err = p.runtimeError(err, "OCI runtime delete failed")
|
|
||||||
|
|
||||||
if err2 := mount.UnmountAll(p.rootfs, 0); err2 != nil {
|
if err2 := mount.UnmountAll(p.rootfs, 0); err2 != nil {
|
||||||
log.G(context).WithError(err2).Warn("Failed to cleanup rootfs mount")
|
log.G(context).WithError(err2).Warn("failed to cleanup rootfs mount")
|
||||||
if err == nil {
|
if err == nil {
|
||||||
err = errors.Wrap(err2, "Failed rootfs umount")
|
err = errors.Wrap(err2, "failed rootfs umount")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,8 +147,9 @@ func (s *Service) Delete(ctx context.Context, r *google_protobuf.Empty) (*shimap
|
|||||||
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
|
return nil, errdefs.ToGRPCf(errdefs.ErrFailedPrecondition, "container must be created")
|
||||||
}
|
}
|
||||||
p := s.initProcess
|
p := s.initProcess
|
||||||
// TODO (@crosbymichael): how to handle errors here
|
if err := p.Delete(ctx); err != nil {
|
||||||
p.Delete(ctx)
|
return nil, err
|
||||||
|
}
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
delete(s.processes, p.ID())
|
delete(s.processes, p.ID())
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
@ -178,8 +179,9 @@ func (s *Service) DeleteProcess(ctx context.Context, r *shimapi.DeleteProcessReq
|
|||||||
if !ok {
|
if !ok {
|
||||||
return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s not found", r.ID)
|
return nil, errors.Wrapf(errdefs.ErrNotFound, "process %s not found", r.ID)
|
||||||
}
|
}
|
||||||
// TODO (@crosbymichael): how to handle errors here
|
if err := p.Delete(ctx); err != nil {
|
||||||
p.Delete(ctx)
|
return nil, err
|
||||||
|
}
|
||||||
s.mu.Lock()
|
s.mu.Lock()
|
||||||
delete(s.processes, p.ID())
|
delete(s.processes, p.ID())
|
||||||
s.mu.Unlock()
|
s.mu.Unlock()
|
||||||
|
Loading…
Reference in New Issue
Block a user