diff --git a/pkg/server/container_execsync.go b/pkg/server/container_execsync.go index 6116ee523..2358b9941 100644 --- a/pkg/server/container_execsync.go +++ b/pkg/server/container_execsync.go @@ -164,16 +164,15 @@ func (c *criService) execInContainer(ctx context.Context, id string, opts execOp }, }) - var timeoutCh <-chan time.Time - if opts.timeout == 0 { - // Do not set timeout if it's 0. - timeoutCh = make(chan time.Time) - } else { - timeoutCh = time.After(opts.timeout) + execCtx := ctx + if opts.timeout > 0 { + var execCtxCancel context.CancelFunc + execCtx, execCtxCancel = context.WithTimeout(ctx, opts.timeout) + defer execCtxCancel() } + select { - case <-timeoutCh: - //TODO(Abhi) Use context.WithDeadline instead of timeout. + case <-execCtx.Done(): // Ignore the not found error because the process may exit itself before killing. if err := process.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) { return nil, errors.Wrapf(err, "failed to kill exec %q", execID) @@ -184,7 +183,7 @@ func (c *criService) execInContainer(ctx context.Context, id string, opts execOp execID, exitRes.ExitCode(), exitRes.Error()) <-attachDone logrus.Debugf("Stream pipe for exec process %q done", execID) - return nil, errors.Errorf("timeout %v exceeded", opts.timeout) + return nil, errors.Wrapf(execCtx.Err(), "timeout %v exceeded", opts.timeout) case exitRes := <-exitCh: code, _, err := exitRes.Result() logrus.Infof("Exec process %q exits with exit code %d and error %v", execID, code, err) diff --git a/pkg/server/container_stop.go b/pkg/server/container_stop.go index 0165bcf46..388ea65c7 100644 --- a/pkg/server/container_stop.go +++ b/pkg/server/container_stop.go @@ -33,13 +33,6 @@ import ( containerstore "github.com/containerd/cri/pkg/store/container" ) -// killContainerTimeout is the timeout that we wait for the container to -// be SIGKILLed. -// The timeout is set to 1 min, because the default CRI operation timeout -// for StopContainer is (2 min + stop timeout). Set to 1 min, so that we -// have enough time for kill(all=true) and kill(all=false). -const killContainerTimeout = 1 * time.Minute - // StopContainer stops a running container with a grace period (i.e., timeout). func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) { // Get container config from container store. @@ -141,11 +134,21 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore return errors.Wrapf(err, "failed to stop container %q", id) } - if err = c.waitContainerStop(ctx, container, timeout); err == nil || errors.Cause(err) == ctx.Err() { - // Do not SIGKILL container if the context is cancelled. - return err + sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout) + err = c.waitContainerStop(sigTermCtx, container) + if err == nil { + // Container stopped on first signal no need for SIGKILL + sigTermCtxCancel() + return nil } - logrus.WithError(err).Errorf("An error occurs during waiting for container %q to be stopped", id) + // If the parent context was cancelled or exceeded return immediately + if ctx.Err() != nil { + sigTermCtxCancel() + return ctx.Err() + } + sigTermCtxCancel() + // sigTermCtx was exceeded. Send SIGKILL + logrus.Debugf("Stop container %q with signal %v timed out", id, sig) } logrus.Infof("Kill container %q", id) @@ -154,21 +157,19 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore } // Wait for a fixed timeout until container stop is observed by event monitor. - if err = c.waitContainerStop(ctx, container, killContainerTimeout); err == nil { - return nil + err = c.waitContainerStop(ctx, container) + if err != nil { + return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id) } - return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id) + return nil } -// waitContainerStop waits for container to be stopped until timeout exceeds or context is cancelled. -func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container, timeout time.Duration) error { - timeoutTimer := time.NewTimer(timeout) - defer timeoutTimer.Stop() +// waitContainerStop waits for container to be stopped until context is +// cancelled or the context deadline is exceeded. +func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error { select { case <-ctx.Done(): - return errors.Wrapf(ctx.Err(), "wait container %q is cancelled", container.ID) - case <-timeoutTimer.C: - return errors.Errorf("wait container %q stop timeout", container.ID) + return errors.Wrapf(ctx.Err(), "wait container %q", container.ID) case <-container.Stopped(): return nil } diff --git a/pkg/server/container_stop_test.go b/pkg/server/container_stop_test.go index 9da58a0c4..7274a05f5 100644 --- a/pkg/server/container_stop_test.go +++ b/pkg/server/container_stop_test.go @@ -74,7 +74,12 @@ func TestWaitContainerStop(t *testing.T) { cancel() ctx = cancelledCtx } - err = c.waitContainerStop(ctx, container, test.timeout) + if test.timeout > 0 { + timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout) + defer cancel() + ctx = timeoutCtx + } + err = c.waitContainerStop(ctx, container) assert.Equal(t, test.expectErr, err != nil, desc) } } diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go index 2ca8c9661..a3d5a2f11 100644 --- a/pkg/server/sandbox_stop.go +++ b/pkg/server/sandbox_stop.go @@ -142,18 +142,15 @@ func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxst return errors.Wrap(err, "failed to kill sandbox container") } - return c.waitSandboxStop(ctx, sandbox, killContainerTimeout) + return c.waitSandboxStop(ctx, sandbox) } -// waitSandboxStop waits for sandbox to be stopped until timeout exceeds or context is cancelled. -func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox, timeout time.Duration) error { - timeoutTimer := time.NewTimer(timeout) - defer timeoutTimer.Stop() +// waitSandboxStop waits for sandbox to be stopped until context is cancelled or +// the context deadline is exceeded. +func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox) error { select { case <-ctx.Done(): - return errors.Wrapf(ctx.Err(), "wait sandbox container %q is cancelled", sandbox.ID) - case <-timeoutTimer.C: - return errors.Errorf("wait sandbox container %q stop timeout", sandbox.ID) + return errors.Wrapf(ctx.Err(), "wait sandbox container %q", sandbox.ID) case <-sandbox.Stopped(): return nil } diff --git a/pkg/server/sandbox_stop_test.go b/pkg/server/sandbox_stop_test.go index 93a4f29a8..a1ba500bf 100644 --- a/pkg/server/sandbox_stop_test.go +++ b/pkg/server/sandbox_stop_test.go @@ -62,7 +62,12 @@ func TestWaitSandboxStop(t *testing.T) { cancel() ctx = cancelledCtx } - err := c.waitSandboxStop(ctx, sandbox, test.timeout) + if test.timeout > 0 { + timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout) + defer cancel() + ctx = timeoutCtx + } + err := c.waitSandboxStop(ctx, sandbox) assert.Equal(t, test.expectErr, err != nil, desc) } }