Merge pull request #1152 from jterry75/deadline_exceeded

Return gRPC codes.DeadlineExceeded for all timeout operations
This commit is contained in:
Lantao Liu 2019-07-25 14:58:29 -07:00 committed by GitHub
commit 74d2b6947c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 40 deletions

View File

@ -164,16 +164,15 @@ func (c *criService) execInContainer(ctx context.Context, id string, opts execOp
},
})
var timeoutCh <-chan time.Time
if opts.timeout == 0 {
// Do not set timeout if it's 0.
timeoutCh = make(chan time.Time)
} else {
timeoutCh = time.After(opts.timeout)
execCtx := ctx
if opts.timeout > 0 {
var execCtxCancel context.CancelFunc
execCtx, execCtxCancel = context.WithTimeout(ctx, opts.timeout)
defer execCtxCancel()
}
select {
case <-timeoutCh:
//TODO(Abhi) Use context.WithDeadline instead of timeout.
case <-execCtx.Done():
// Ignore the not found error because the process may exit itself before killing.
if err := process.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
return nil, errors.Wrapf(err, "failed to kill exec %q", execID)
@ -184,7 +183,7 @@ func (c *criService) execInContainer(ctx context.Context, id string, opts execOp
execID, exitRes.ExitCode(), exitRes.Error())
<-attachDone
logrus.Debugf("Stream pipe for exec process %q done", execID)
return nil, errors.Errorf("timeout %v exceeded", opts.timeout)
return nil, errors.Wrapf(execCtx.Err(), "timeout %v exceeded", opts.timeout)
case exitRes := <-exitCh:
code, _, err := exitRes.Result()
logrus.Infof("Exec process %q exits with exit code %d and error %v", execID, code, err)

View File

@ -33,13 +33,6 @@ import (
containerstore "github.com/containerd/cri/pkg/store/container"
)
// killContainerTimeout is the timeout that we wait for the container to
// be SIGKILLed.
// The timeout is set to 1 min, because the default CRI operation timeout
// for StopContainer is (2 min + stop timeout). Set to 1 min, so that we
// have enough time for kill(all=true) and kill(all=false).
const killContainerTimeout = 1 * time.Minute
// StopContainer stops a running container with a grace period (i.e., timeout).
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
// Get container config from container store.
@ -141,11 +134,21 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
return errors.Wrapf(err, "failed to stop container %q", id)
}
if err = c.waitContainerStop(ctx, container, timeout); err == nil || errors.Cause(err) == ctx.Err() {
// Do not SIGKILL container if the context is cancelled.
return err
sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout)
err = c.waitContainerStop(sigTermCtx, container)
if err == nil {
// Container stopped on first signal no need for SIGKILL
sigTermCtxCancel()
return nil
}
logrus.WithError(err).Errorf("An error occurs during waiting for container %q to be stopped", id)
// If the parent context was cancelled or exceeded return immediately
if ctx.Err() != nil {
sigTermCtxCancel()
return ctx.Err()
}
sigTermCtxCancel()
// sigTermCtx was exceeded. Send SIGKILL
logrus.Debugf("Stop container %q with signal %v timed out", id, sig)
}
logrus.Infof("Kill container %q", id)
@ -154,21 +157,19 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore
}
// Wait for a fixed timeout until container stop is observed by event monitor.
if err = c.waitContainerStop(ctx, container, killContainerTimeout); err == nil {
return nil
err = c.waitContainerStop(ctx, container)
if err != nil {
return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id)
}
return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id)
return nil
}
// waitContainerStop waits for container to be stopped until timeout exceeds or context is cancelled.
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
timeoutTimer := time.NewTimer(timeout)
defer timeoutTimer.Stop()
// waitContainerStop waits for container to be stopped until context is
// cancelled or the context deadline is exceeded.
func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error {
select {
case <-ctx.Done():
return errors.Wrapf(ctx.Err(), "wait container %q is cancelled", container.ID)
case <-timeoutTimer.C:
return errors.Errorf("wait container %q stop timeout", container.ID)
return errors.Wrapf(ctx.Err(), "wait container %q", container.ID)
case <-container.Stopped():
return nil
}

View File

@ -74,7 +74,12 @@ func TestWaitContainerStop(t *testing.T) {
cancel()
ctx = cancelledCtx
}
err = c.waitContainerStop(ctx, container, test.timeout)
if test.timeout > 0 {
timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout)
defer cancel()
ctx = timeoutCtx
}
err = c.waitContainerStop(ctx, container)
assert.Equal(t, test.expectErr, err != nil, desc)
}
}

View File

@ -142,18 +142,15 @@ func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxst
return errors.Wrap(err, "failed to kill sandbox container")
}
return c.waitSandboxStop(ctx, sandbox, killContainerTimeout)
return c.waitSandboxStop(ctx, sandbox)
}
// waitSandboxStop waits for sandbox to be stopped until timeout exceeds or context is cancelled.
func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox, timeout time.Duration) error {
timeoutTimer := time.NewTimer(timeout)
defer timeoutTimer.Stop()
// waitSandboxStop waits for sandbox to be stopped until context is cancelled or
// the context deadline is exceeded.
func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox) error {
select {
case <-ctx.Done():
return errors.Wrapf(ctx.Err(), "wait sandbox container %q is cancelled", sandbox.ID)
case <-timeoutTimer.C:
return errors.Errorf("wait sandbox container %q stop timeout", sandbox.ID)
return errors.Wrapf(ctx.Err(), "wait sandbox container %q", sandbox.ID)
case <-sandbox.Stopped():
return nil
}

View File

@ -62,7 +62,12 @@ func TestWaitSandboxStop(t *testing.T) {
cancel()
ctx = cancelledCtx
}
err := c.waitSandboxStop(ctx, sandbox, test.timeout)
if test.timeout > 0 {
timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout)
defer cancel()
ctx = timeoutCtx
}
err := c.waitSandboxStop(ctx, sandbox)
assert.Equal(t, test.expectErr, err != nil, desc)
}
}