From 3e51312a61ea9d727d082818e487b76a7739385a Mon Sep 17 00:00:00 2001 From: botieking98 Date: Wed, 27 Oct 2021 14:58:31 +0800 Subject: [PATCH] fix shim reaper wait command execute blocked wait no timeout will lead to event publish process hang in some special scenarios. Signed-off-by: botieking98 --- cmd/containerd-shim/main_unix.go | 2 +- sys/reaper/reaper_unix.go | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/cmd/containerd-shim/main_unix.go b/cmd/containerd-shim/main_unix.go index 0ba452d35..e1d3eb13f 100644 --- a/cmd/containerd-shim/main_unix.go +++ b/cmd/containerd-shim/main_unix.go @@ -308,7 +308,7 @@ func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event if err != nil { return err } - status, err := reaper.Default.Wait(cmd, c) + status, err := reaper.Default.WaitTimeout(cmd, c, 30*time.Second) if err != nil { return errors.Wrapf(err, "failed to publish event: %s", b.String()) } diff --git a/sys/reaper/reaper_unix.go b/sys/reaper/reaper_unix.go index a735e1cbd..54cfe9eed 100644 --- a/sys/reaper/reaper_unix.go +++ b/sys/reaper/reaper_unix.go @@ -21,6 +21,7 @@ package reaper import ( "sync" + "syscall" "time" runc "github.com/containerd/go-runc" @@ -116,6 +117,28 @@ func (m *Monitor) Wait(c *exec.Cmd, ec chan runc.Exit) (int, error) { return -1, ErrNoSuchProcess } +// WaitTimeout is used to skip the blocked command and kill the left process. +func (m *Monitor) WaitTimeout(c *exec.Cmd, ec chan runc.Exit, timeout time.Duration) (int, error) { + sch := make(chan int) + ech := make(chan error) + go func() { + status, err := m.Wait(c, ec) + sch <- status + if err != nil { + ech <- err + } + }() + select { + case <-time.After(timeout): + syscall.Kill(c.Process.Pid, syscall.SIGKILL) + return 0, errors.Errorf("timeout %ds for cmd(pid=%d): %s, %s", timeout/time.Second, c.Process.Pid, c.Path, c.Args) + case status := <-sch: + return status, nil + case err := <-ech: + return -1, err + } +} + // Subscribe to process exit changes func (m *Monitor) Subscribe() chan runc.Exit { c := make(chan runc.Exit, bufferSize)