From 1a0228d5207d16a8988aee51f488b4543ab767ab Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Wed, 27 Mar 2019 00:49:41 -0700 Subject: [PATCH] Do not SIGKILL container if container stop is cancelled. Signed-off-by: Lantao Liu --- integration/container_stop_test.go | 66 ++++++++++++++++++++++++++++++ pkg/server/container_stop.go | 7 ++-- pkg/server/sandbox_stop.go | 2 +- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/integration/container_stop_test.go b/integration/container_stop_test.go index 3eb994a32..c681d7d8d 100644 --- a/integration/container_stop_test.go +++ b/integration/container_stop_test.go @@ -17,7 +17,9 @@ limitations under the License. package integration import ( + "context" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -71,3 +73,67 @@ func TestSharedPidMultiProcessContainerStop(t *testing.T) { }) } } + +func TestContainerStopCancellation(t *testing.T) { + t.Log("Create a pod sandbox") + sbConfig := PodSandboxConfig("sandbox", "cancel-container-stop") + sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler) + require.NoError(t, err) + defer func() { + assert.NoError(t, runtimeService.StopPodSandbox(sb)) + assert.NoError(t, runtimeService.RemovePodSandbox(sb)) + }() + + const ( + testImage = "busybox" + containerName = "test-container" + ) + t.Logf("Pull test image %q", testImage) + img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig) + require.NoError(t, err) + defer func() { + assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img})) + }() + + t.Log("Create a container which traps sigterm") + cnConfig := ContainerConfig( + containerName, + testImage, + WithCommand("sh", "-c", `trap "echo ignore sigterm" TERM; sleep 1000`), + ) + cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig) + require.NoError(t, err) + + t.Log("Start the container") + require.NoError(t, runtimeService.StartContainer(cn)) + + t.Log("Stop the container with 3s timeout, but 1s context timeout") + // Note that with container pid namespace, the sleep process + // is pid 1, and SIGTERM sent by `StopContainer` will be ignored. + rawClient, err := RawRuntimeClient() + require.NoError(t, err) + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + _, err = rawClient.StopContainer(ctx, &runtime.StopContainerRequest{ + ContainerId: cn, + Timeout: 3, + }) + assert.Error(t, err) + + t.Log("The container should still be running even after 5 seconds") + assert.NoError(t, Consistently(func() (bool, error) { + s, err := runtimeService.ContainerStatus(cn) + if err != nil { + return false, err + } + return s.GetState() == runtime.ContainerState_CONTAINER_RUNNING, nil + }, 100*time.Millisecond, 5*time.Second)) + + t.Log("Stop the container with 1s timeout, without shorter context timeout") + assert.NoError(t, runtimeService.StopContainer(cn, 1)) + + t.Log("The container state should be exited") + s, err := runtimeService.ContainerStatus(cn) + require.NoError(t, err) + assert.Equal(t, s.GetState(), runtime.ContainerState_CONTAINER_EXITED) +} diff --git a/pkg/server/container_stop.go b/pkg/server/container_stop.go index c18091cce..0ad59deb6 100644 --- a/pkg/server/container_stop.go +++ b/pkg/server/container_stop.go @@ -142,8 +142,9 @@ func (c *criService) stopContainer(ctx context.Context, container containerstore return errors.Wrapf(err, "failed to stop container %q", id) } - if err = c.waitContainerStop(ctx, container, timeout); err == nil { - return nil + if err = c.waitContainerStop(ctx, container, timeout); err == nil || errors.Cause(err) == ctx.Err() { + // Do not SIGKILL container if the context is cancelled. + return err } logrus.WithError(err).Errorf("An error occurs during waiting for container %q to be stopped", id) } @@ -166,7 +167,7 @@ func (c *criService) waitContainerStop(ctx context.Context, container containers defer timeoutTimer.Stop() select { case <-ctx.Done(): - return errors.Errorf("wait container %q is cancelled", container.ID) + return errors.Wrapf(ctx.Err(), "wait container %q is cancelled", container.ID) case <-timeoutTimer.C: return errors.Errorf("wait container %q stop timeout", container.ID) case <-container.Stopped(): diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go index 69e2038b1..a648f6fa2 100644 --- a/pkg/server/sandbox_stop.go +++ b/pkg/server/sandbox_stop.go @@ -144,7 +144,7 @@ func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.S defer timeoutTimer.Stop() select { case <-ctx.Done(): - return errors.Errorf("wait sandbox container %q is cancelled", sandbox.ID) + return errors.Wrapf(ctx.Err(), "wait sandbox container %q is cancelled", sandbox.ID) case <-timeoutTimer.C: return errors.Errorf("wait sandbox container %q stop timeout", sandbox.ID) case <-sandbox.Stopped():