From 53367bbd142472ec3919aa8103996ce5c77d30af Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Wed, 14 Jun 2017 02:02:20 +0000 Subject: [PATCH] Stop/remove all containers when stop/remove sandbox. Signed-off-by: Lantao Liu --- pkg/server/container_stop.go | 33 +++++---- pkg/server/sandbox_remove.go | 21 +++++- pkg/server/sandbox_remove_test.go | 55 +++++++++++++++ pkg/server/sandbox_stop.go | 20 +++++- pkg/server/sandbox_stop_test.go | 107 ++++++++++++++++++++++++++++++ 5 files changed, 221 insertions(+), 15 deletions(-) diff --git a/pkg/server/container_stop.go b/pkg/server/container_stop.go index 6fd49aa95..95abc7968 100644 --- a/pkg/server/container_stop.go +++ b/pkg/server/container_stop.go @@ -55,59 +55,68 @@ func (c *criContainerdService) StopContainer(ctx context.Context, r *runtime.Sto if err != nil { return nil, fmt.Errorf("an error occurred when try to find container %q: %v", r.GetContainerId(), err) } - id := r.GetContainerId() + + if err := c.stopContainer(ctx, meta, time.Duration(r.GetTimeout())*time.Second); err != nil { + return nil, err + } + + return &runtime.StopContainerResponse{}, nil +} + +// stopContainer stops a container based on the container metadata. +func (c *criContainerdService) stopContainer(ctx context.Context, meta *metadata.ContainerMetadata, timeout time.Duration) error { + id := meta.ID // Return without error if container is not running. This makes sure that // stop only takes real action after the container is started. if meta.State() != runtime.ContainerState_CONTAINER_RUNNING { glog.V(2).Infof("Container to stop %q is not running, current state %q", id, criContainerStateToString(meta.State())) - return &runtime.StopContainerResponse{}, nil + return nil } - if r.GetTimeout() > 0 { + if timeout > 0 { // TODO(random-liu): [P1] Get stop signal from image config. stopSignal := unix.SIGTERM glog.V(2).Infof("Stop container %q with signal %v", id, stopSignal) - _, err = c.taskService.Kill(ctx, &execution.KillRequest{ + _, err := c.taskService.Kill(ctx, &execution.KillRequest{ ContainerID: id, Signal: uint32(stopSignal), PidOrAll: &execution.KillRequest_All{All: true}, }) if err != nil { if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) { - return nil, fmt.Errorf("failed to stop container %q: %v", id, err) + return fmt.Errorf("failed to stop container %q: %v", id, err) } // Move on to make sure container status is updated. } - err = c.waitContainerStop(ctx, id, time.Duration(r.GetTimeout())*time.Second) + err = c.waitContainerStop(ctx, id, timeout) if err == nil { - return &runtime.StopContainerResponse{}, nil + return nil } glog.Errorf("Stop container %q timed out: %v", id, err) } // Event handler will Delete the container from containerd after it handles the Exited event. glog.V(2).Infof("Kill container %q", id) - _, err = c.taskService.Kill(ctx, &execution.KillRequest{ + _, err := c.taskService.Kill(ctx, &execution.KillRequest{ ContainerID: id, Signal: uint32(unix.SIGKILL), PidOrAll: &execution.KillRequest_All{All: true}, }) if err != nil { if !isContainerdGRPCNotFoundError(err) && !isRuncProcessAlreadyFinishedError(err) { - return nil, fmt.Errorf("failed to kill container %q: %v", id, err) + return fmt.Errorf("failed to kill container %q: %v", id, err) } // Move on to make sure container status is updated. } // Wait for a fixed timeout until container stop is observed by event monitor. if err := c.waitContainerStop(ctx, id, killContainerTimeout); err != nil { - return nil, fmt.Errorf("an error occurs during waiting for container %q to stop: %v", - id, err) + return fmt.Errorf("an error occurs during waiting for container %q to stop: %v", id, err) } - return &runtime.StopContainerResponse{}, nil + return nil } // waitContainerStop polls container state until timeout exceeds or container is stopped. diff --git a/pkg/server/sandbox_remove.go b/pkg/server/sandbox_remove.go index d7d018aa3..298bb888e 100644 --- a/pkg/server/sandbox_remove.go +++ b/pkg/server/sandbox_remove.go @@ -53,8 +53,6 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime. // Use the full sandbox id. id := sandbox.ID - // TODO(random-liu): [P2] Remove all containers in the sandbox. - // Return error if sandbox container is not fully stopped. // TODO(random-liu): [P0] Make sure network is torn down, may need to introduce a state. _, err = c.taskService.Info(ctx, &execution.InfoRequest{ContainerID: id}) @@ -73,6 +71,25 @@ func (c *criContainerdService) RemovePodSandbox(ctx context.Context, r *runtime. glog.V(5).Infof("Remove called for snapshot %q that does not exist", id) } + // Remove all containers inside the sandbox. + // NOTE(random-liu): container could still be created after this point, Kubelet should + // not rely on this behavior. + // TODO(random-liu): Introduce an intermediate state to avoid container creation after + // this point. + cntrs, err := c.containerStore.List() + if err != nil { + return nil, fmt.Errorf("failed to list all containers: %v", err) + } + for _, cntr := range cntrs { + if cntr.SandboxID != id { + continue + } + _, err = c.RemoveContainer(ctx, &runtime.RemoveContainerRequest{ContainerId: cntr.ID}) + if err != nil { + return nil, fmt.Errorf("failed to remove container %q: %v", cntr.ID, err) + } + } + // TODO(random-liu): [P1] Remove permanent namespace once used. // Cleanup the sandbox root directory. diff --git a/pkg/server/sandbox_remove_test.go b/pkg/server/sandbox_remove_test.go index 378d6fee5..13ffae32c 100644 --- a/pkg/server/sandbox_remove_test.go +++ b/pkg/server/sandbox_remove_test.go @@ -17,6 +17,7 @@ package server import ( "fmt" "testing" + "time" "github.com/containerd/containerd/api/services/containers" snapshotapi "github.com/containerd/containerd/api/services/snapshot" @@ -176,3 +177,57 @@ func TestRemovePodSandbox(t *testing.T) { assert.NotNil(t, res, "remove should be idempotent") } } + +func TestRemoveContainersInSandbox(t *testing.T) { + testID := "test-id" + testName := "test-name" + testMetadata := metadata.SandboxMetadata{ + ID: testID, + Name: testName, + } + testContainersMetadata := []*metadata.ContainerMetadata{ + { + ID: "test-cid-1", + Name: "test-cname-1", + SandboxID: testID, + FinishedAt: time.Now().UnixNano(), + }, + { + ID: "test-cid-2", + Name: "test-cname-2", + SandboxID: testID, + FinishedAt: time.Now().UnixNano(), + }, + { + ID: "test-cid-3", + Name: "test-cname-3", + SandboxID: "other-sandbox-id", + FinishedAt: time.Now().UnixNano(), + }, + } + + c := newTestCRIContainerdService() + WithFakeSnapshotClient(c) + assert.NoError(t, c.sandboxNameIndex.Reserve(testName, testID)) + assert.NoError(t, c.sandboxIDIndex.Add(testID)) + assert.NoError(t, c.sandboxStore.Create(testMetadata)) + for _, cntr := range testContainersMetadata { + assert.NoError(t, c.containerNameIndex.Reserve(cntr.Name, cntr.ID)) + assert.NoError(t, c.containerStore.Create(*cntr)) + } + + res, err := c.RemovePodSandbox(context.Background(), &runtime.RemovePodSandboxRequest{ + PodSandboxId: testID, + }) + assert.NoError(t, err) + assert.NotNil(t, res) + + meta, err := c.sandboxStore.Get(testID) + assert.Error(t, err) + assert.True(t, metadata.IsNotExistError(err)) + assert.Nil(t, meta, "sandbox metadata should be removed") + + cntrs, err := c.containerStore.List() + assert.NoError(t, err) + assert.Equal(t, testContainersMetadata[2:], cntrs, "container metadata should be removed") +} diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go index 8482353da..2474951a0 100644 --- a/pkg/server/sandbox_stop.go +++ b/pkg/server/sandbox_stop.go @@ -46,6 +46,25 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St // Use the full sandbox id. id := sandbox.ID + // Stop all containers inside the sandbox. This terminates the container forcibly, + // and container may still be so production should not rely on this behavior. + // TODO(random-liu): Delete the sandbox container before this after permanent network namespace + // is introduced, so that no container will be started after that. + containers, err := c.containerStore.List() + if err != nil { + return nil, fmt.Errorf("failed to list all containers: %v", err) + } + for _, container := range containers { + if container.SandboxID != id { + continue + } + // Forcibly stop the container. Do not use `StopContainer`, because it introduces a race + // if a container is removed after list. + if err = c.stopContainer(ctx, container, 0); err != nil { + return nil, fmt.Errorf("failed to stop container %q: %v", container.ID, err) + } + } + // Teardown network for sandbox. _, err = c.os.Stat(sandbox.NetNS) if err == nil { @@ -70,6 +89,5 @@ func (c *criContainerdService) StopPodSandbox(ctx context.Context, r *runtime.St return nil, fmt.Errorf("failed to delete sandbox container %q: %v", id, err) } - // TODO(random-liu): [P2] Stop all containers inside the sandbox. return &runtime.StopPodSandboxResponse{}, nil } diff --git a/pkg/server/sandbox_stop_test.go b/pkg/server/sandbox_stop_test.go index d039da477..e23ee1468 100644 --- a/pkg/server/sandbox_stop_test.go +++ b/pkg/server/sandbox_stop_test.go @@ -20,7 +20,9 @@ import ( "errors" "os" "testing" + "time" + "github.com/containerd/containerd/api/services/execution" "github.com/containerd/containerd/api/types/task" "github.com/stretchr/testify/assert" "golang.org/x/net/context" @@ -166,3 +168,108 @@ func TestStopPodSandbox(t *testing.T) { assert.Equal(t, test.expectedCNICalls, fakeCNIPlugin.GetCalledNames()) } } + +func TestStopContainersInSandbox(t *testing.T) { + testID := "test-id" + testSandbox := metadata.SandboxMetadata{ + ID: testID, + Name: "test-name", + Config: &runtime.PodSandboxConfig{ + Metadata: &runtime.PodSandboxMetadata{ + Name: "test-name", + Uid: "test-uid", + Namespace: "test-ns", + }}, + NetNS: "test-netns", + } + testContainers := []metadata.ContainerMetadata{ + { + ID: "test-cid-1", + Name: "test-cname-1", + SandboxID: testID, + Pid: 2, + StartedAt: time.Now().UnixNano(), + }, + { + ID: "test-cid-2", + Name: "test-cname-2", + SandboxID: testID, + Pid: 3, + StartedAt: time.Now().UnixNano(), + }, + { + ID: "test-cid-3", + Name: "test-cname-3", + SandboxID: "other-sandbox-id", + Pid: 4, + StartedAt: time.Now().UnixNano(), + }, + } + testContainerdContainers := []task.Task{ + { + ID: testID, + Pid: 1, + Status: task.StatusRunning, + }, + { + ID: "test-cid-1", + Pid: 2, + Status: task.StatusRunning, + }, + { + ID: "test-cid-2", + Pid: 3, + Status: task.StatusRunning, + }, + { + ID: "test-cid-3", + Pid: 4, + Status: task.StatusRunning, + }, + } + + c := newTestCRIContainerdService() + fake := servertesting.NewFakeExecutionClient().WithEvents() + defer fake.Stop() + c.taskService = fake + fake.SetFakeTasks(testContainerdContainers) + assert.NoError(t, c.sandboxStore.Create(testSandbox)) + assert.NoError(t, c.sandboxIDIndex.Add(testID)) + for _, cntr := range testContainers { + assert.NoError(t, c.containerStore.Create(cntr)) + } + + fakeCNIPlugin := c.netPlugin.(*servertesting.FakeCNIPlugin) + fakeCNIPlugin.SetFakePodNetwork(testSandbox.NetNS, testSandbox.Config.GetMetadata().GetNamespace(), + testSandbox.Config.GetMetadata().GetName(), testID, sandboxStatusTestIP) + + eventClient, err := fake.Events(context.Background(), &execution.EventsRequest{}) + assert.NoError(t, err) + // Start a simple test event monitor. + go func(e execution.Tasks_EventsClient) { + for { + if err := c.handleEventStream(e); err != nil { // nolint: vetshadow + return + } + } + }(eventClient) + res, err := c.StopPodSandbox(context.Background(), &runtime.StopPodSandboxRequest{ + PodSandboxId: testID, + }) + assert.NoError(t, err) + assert.NotNil(t, res) + + cntrs, err := c.containerStore.List() + assert.NoError(t, err) + assert.Len(t, cntrs, 3) + expectedStates := map[string]runtime.ContainerState{ + "test-cid-1": runtime.ContainerState_CONTAINER_EXITED, + "test-cid-2": runtime.ContainerState_CONTAINER_EXITED, + "test-cid-3": runtime.ContainerState_CONTAINER_RUNNING, + } + for id, expected := range expectedStates { + cntr, err := c.containerStore.Get(id) + assert.NoError(t, err) + assert.Equal(t, expected, cntr.State()) + } +}