StopContainer should block unless client context is canceled
A call to StopContainer should only return if the client context is canceled or its deadline was exceeded. The Timeout parameter on StopContainerRequest is now used as the time AFTER sending the stop signal before the SIGKILL is delivered. The call will remain until the container has exited or the client context has finished. Signed-off-by: Justin Terry (VM) <juterry@microsoft.com>
This commit is contained in:
parent
d7c3ecd0fb
commit
71cecedc44
@ -33,13 +33,6 @@ import (
|
|||||||
containerstore "github.com/containerd/cri/pkg/store/container"
|
containerstore "github.com/containerd/cri/pkg/store/container"
|
||||||
)
|
)
|
||||||
|
|
||||||
// killContainerTimeout is the timeout that we wait for the container to
|
|
||||||
// be SIGKILLed.
|
|
||||||
// The timeout is set to 1 min, because the default CRI operation timeout
|
|
||||||
// for StopContainer is (2 min + stop timeout). Set to 1 min, so that we
|
|
||||||
// have enough time for kill(all=true) and kill(all=false).
|
|
||||||
const killContainerTimeout = 1 * time.Minute
|
|
||||||
|
|
||||||
// StopContainer stops a running container with a grace period (i.e., timeout).
|
// StopContainer stops a running container with a grace period (i.e., timeout).
|
||||||
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
|
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
|
||||||
// Get container config from container store.
|
// Get container config from container store.
|
||||||
@ -141,11 +134,21 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
|
|||||||
return errors.Wrapf(err, "failed to stop container %q", id)
|
return errors.Wrapf(err, "failed to stop container %q", id)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = c.waitContainerStop(ctx, container, timeout); err == nil || errors.Cause(err) == ctx.Err() {
|
sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout)
|
||||||
// Do not SIGKILL container if the context is cancelled.
|
err = c.waitContainerStop(sigTermCtx, container)
|
||||||
return err
|
if err == nil {
|
||||||
|
// Container stopped on first signal no need for SIGKILL
|
||||||
|
sigTermCtxCancel()
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
logrus.WithError(err).Errorf("An error occurs during waiting for container %q to be stopped", id)
|
// If the parent context was cancelled or exceeded return immediately
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
sigTermCtxCancel()
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
sigTermCtxCancel()
|
||||||
|
// sigTermCtx was exceeded. Send SIGKILL
|
||||||
|
logrus.Debugf("Stop container %q with signal %v timed out", id, sig)
|
||||||
}
|
}
|
||||||
|
|
||||||
logrus.Infof("Kill container %q", id)
|
logrus.Infof("Kill container %q", id)
|
||||||
@ -154,21 +157,19 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Wait for a fixed timeout until container stop is observed by event monitor.
|
// Wait for a fixed timeout until container stop is observed by event monitor.
|
||||||
if err = c.waitContainerStop(ctx, container, killContainerTimeout); err == nil {
|
err = c.waitContainerStop(ctx, container)
|
||||||
return nil
|
if err != nil {
|
||||||
}
|
|
||||||
return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id)
|
return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// waitContainerStop waits for container to be stopped until timeout exceeds or context is cancelled.
|
// waitContainerStop waits for container to be stopped until context is
|
||||||
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
|
// cancelled or the context deadline is exceeded.
|
||||||
timeoutTimer := time.NewTimer(timeout)
|
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error {
|
||||||
defer timeoutTimer.Stop()
|
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return errors.Wrapf(ctx.Err(), "wait container %q is cancelled", container.ID)
|
return errors.Wrapf(ctx.Err(), "wait container %q", container.ID)
|
||||||
case <-timeoutTimer.C:
|
|
||||||
return errors.Errorf("wait container %q stop timeout", container.ID)
|
|
||||||
case <-container.Stopped():
|
case <-container.Stopped():
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -74,7 +74,12 @@ func TestWaitContainerStop(t *testing.T) {
|
|||||||
cancel()
|
cancel()
|
||||||
ctx = cancelledCtx
|
ctx = cancelledCtx
|
||||||
}
|
}
|
||||||
err = c.waitContainerStop(ctx, container, test.timeout)
|
if test.timeout > 0 {
|
||||||
|
timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout)
|
||||||
|
defer cancel()
|
||||||
|
ctx = timeoutCtx
|
||||||
|
}
|
||||||
|
err = c.waitContainerStop(ctx, container)
|
||||||
assert.Equal(t, test.expectErr, err != nil, desc)
|
assert.Equal(t, test.expectErr, err != nil, desc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user