runtime/v2: cleanup dead shim before delete bundle

The shim delete action needs bundle information to cleanup resources
created by shim. If the cleanup dead shim is called after delete bundle,
the part of resources maybe leaky.

The ttrpc client UserOnCloseWait() can make sure that resources are
cleanup before delete bundle, which synchronizes task deletion and
cleanup deadshim. It might slow down the task deletion, but it can make
sure that resources can be cleanup and avoid EBUSY umount case. For
example, the sandbox container like Kata/Firecracker might have mount
points over the rootfs. If containerd handles task deletion and cleanup
deadshim parallelly, the task deletion will meet EBUSY during umount and
fail to cleanup bundle, which makes case worse.

And also update cleanupAfterDeadshim, which makes sure that
cleanupAfterDeadshim must be called after shim disconnected. In some
case, shim fails to call runc-create for some reason, but the runc-create
already makes runc-init into ready state. If containerd doesn't call shim
deletion, the runc-init process will be leaky and hold the cgroup, which
makes pod terminating :(.

Signed-off-by: Wei Fu <fuweid89@gmail.com>
This commit is contained in:
Wei Fu
2020-09-07 18:23:01 +08:00
parent fabebe5d55
commit 4b05d03903
4 changed files with 44 additions and 29 deletions

View File

@@ -47,8 +47,9 @@ type Client struct {
ctx context.Context
closed func()
closeOnce sync.Once
userCloseFunc func()
closeOnce sync.Once
userCloseFunc func()
userCloseWaitCh chan struct{}
errOnce sync.Once
err error
@@ -75,14 +76,15 @@ func WithUnaryClientInterceptor(i UnaryClientInterceptor) ClientOpts {
func NewClient(conn net.Conn, opts ...ClientOpts) *Client {
ctx, cancel := context.WithCancel(context.Background())
c := &Client{
codec: codec{},
conn: conn,
channel: newChannel(conn),
calls: make(chan *callRequest),
closed: cancel,
ctx: ctx,
userCloseFunc: func() {},
interceptor: defaultClientInterceptor,
codec: codec{},
conn: conn,
channel: newChannel(conn),
calls: make(chan *callRequest),
closed: cancel,
ctx: ctx,
userCloseFunc: func() {},
userCloseWaitCh: make(chan struct{}),
interceptor: defaultClientInterceptor,
}
for _, o := range opts {
@@ -175,6 +177,17 @@ func (c *Client) Close() error {
return nil
}
// UserOnCloseWait is used to blocks untils the user's on-close callback
// finishes.
func (c *Client) UserOnCloseWait(ctx context.Context) error {
select {
case <-c.userCloseWaitCh:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
type message struct {
messageHeader
p []byte
@@ -251,6 +264,7 @@ func (c *Client) run() {
defer func() {
c.conn.Close()
c.userCloseFunc()
close(c.userCloseWaitCh)
}()
for {
@@ -339,7 +353,8 @@ func filterCloseErr(err error) error {
return ErrClosed
default:
// if we have an epipe on a write or econnreset on a read , we cast to errclosed
if oerr, ok := err.(*net.OpError); ok && (oerr.Op == "write" || oerr.Op == "read") {
var oerr *net.OpError
if errors.As(err, &oerr) && (oerr.Op == "write" || oerr.Op == "read") {
serr, sok := oerr.Err.(*os.SyscallError)
if sok && ((serr.Err == syscall.EPIPE && oerr.Op == "write") ||
(serr.Err == syscall.ECONNRESET && oerr.Op == "read")) {