From 71cecedc4404b0851675de05db321f0a2d37d2d0 Mon Sep 17 00:00:00 2001 From: "Justin Terry (VM)" Date: Tue, 21 May 2019 13:53:46 -0700 Subject: [PATCH] StopContainer should block unless client context is canceled A call to StopContainer should only return if the client context is canceled or its deadline was exceeded. The Timeout parameter on StopContainerRequest is now used as the time AFTER sending the stop signal before the SIGKILL is delivered. The call will remain until the container has exited or the client context has finished. Signed-off-by: Justin Terry (VM) --- pkg/server/container_stop.go | 43 ++++++++++++++++--------------- pkg/server/container_stop_test.go | 7 ++++- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/pkg/server/container_stop.go b/pkg/server/container_stop.go index 0165bcf46..388ea65c7 100644 --- a/pkg/server/container_stop.go +++ b/pkg/server/container_stop.go @@ -33,13 +33,6 @@ import ( containerstore "github.com/containerd/cri/pkg/store/container" ) -// killContainerTimeout is the timeout that we wait for the container to -// be SIGKILLed. -// The timeout is set to 1 min, because the default CRI operation timeout -// for StopContainer is (2 min + stop timeout). Set to 1 min, so that we -// have enough time for kill(all=true) and kill(all=false). -const killContainerTimeout = 1 * time.Minute - // StopContainer stops a running container with a grace period (i.e., timeout). func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) { // Get container config from container store. @@ -141,11 +134,21 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore return errors.Wrapf(err, "failed to stop container %q", id) } - if err = c.waitContainerStop(ctx, container, timeout); err == nil || errors.Cause(err) == ctx.Err() { - // Do not SIGKILL container if the context is cancelled. - return err + sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout) + err = c.waitContainerStop(sigTermCtx, container) + if err == nil { + // Container stopped on first signal no need for SIGKILL + sigTermCtxCancel() + return nil } - logrus.WithError(err).Errorf("An error occurs during waiting for container %q to be stopped", id) + // If the parent context was cancelled or exceeded return immediately + if ctx.Err() != nil { + sigTermCtxCancel() + return ctx.Err() + } + sigTermCtxCancel() + // sigTermCtx was exceeded. Send SIGKILL + logrus.Debugf("Stop container %q with signal %v timed out", id, sig) } logrus.Infof("Kill container %q", id) @@ -154,21 +157,19 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore } // Wait for a fixed timeout until container stop is observed by event monitor. - if err = c.waitContainerStop(ctx, container, killContainerTimeout); err == nil { - return nil + err = c.waitContainerStop(ctx, container) + if err != nil { + return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id) } - return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id) + return nil } -// waitContainerStop waits for container to be stopped until timeout exceeds or context is cancelled. -func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container, timeout time.Duration) error { - timeoutTimer := time.NewTimer(timeout) - defer timeoutTimer.Stop() +// waitContainerStop waits for container to be stopped until context is +// cancelled or the context deadline is exceeded. +func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error { select { case <-ctx.Done(): - return errors.Wrapf(ctx.Err(), "wait container %q is cancelled", container.ID) - case <-timeoutTimer.C: - return errors.Errorf("wait container %q stop timeout", container.ID) + return errors.Wrapf(ctx.Err(), "wait container %q", container.ID) case <-container.Stopped(): return nil } diff --git a/pkg/server/container_stop_test.go b/pkg/server/container_stop_test.go index 9da58a0c4..7274a05f5 100644 --- a/pkg/server/container_stop_test.go +++ b/pkg/server/container_stop_test.go @@ -74,7 +74,12 @@ func TestWaitContainerStop(t *testing.T) { cancel() ctx = cancelledCtx } - err = c.waitContainerStop(ctx, container, test.timeout) + if test.timeout > 0 { + timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout) + defer cancel() + ctx = timeoutCtx + } + err = c.waitContainerStop(ctx, container) assert.Equal(t, test.expectErr, err != nil, desc) } }