From 31a710c492ecb225ecbc4d43fddc2c36ec962f16 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Fri, 21 Jan 2022 00:20:34 +0800 Subject: [PATCH] fix: should not send 137 code event if cmd is notfound ShimV2 has shim.Delete command to cleanup task's temporary resource, like bundle folder. Since the shim server exits and no persistent store is for task's exit code, the result of shim.Delete is always 137 exit code, like the task has been killed. And the result of shim.Delete can be used as task event only when the shim server is killed somehow after container is running. Therefore, dockerd, which watches task exit event to update status of container, can report correct status. Back to the issue #6429, the container is not running because the entrypoint is not found. Based on this design, we should not send 137 exitcode event to subscriber. This commit is aimed to remove shim instance first and then the `cleanupAfterDeadShim` should not send event. Similar Issue: #4769 Fix #6429 Signed-off-by: Wei Fu --- integration/client/container_test.go | 61 ++++++++++++++++++++++++++++ runtime/v2/manager.go | 8 ++-- 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/integration/client/container_test.go b/integration/client/container_test.go index c8aae1eb1..7d6078af0 100644 --- a/integration/client/container_test.go +++ b/integration/client/container_test.go @@ -1946,6 +1946,67 @@ func TestRegressionIssue4769(t *testing.T) { } } +// TestRegressionIssue6429 should not send exit event out if command is not found. +// +// Issue: https://github.com/containerd/containerd/issues/6429. +func TestRegressionIssue6429(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("Test relies on runc") + } + + client, err := newClient(t, address) + if err != nil { + t.Fatal(err) + } + defer client.Close() + + // use unique namespace to get unique task events + id := t.Name() + ns := fmt.Sprintf("%s-%s", testNamespace, id) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + ctx = namespaces.WithNamespace(ctx, ns) + ctx = logtest.WithT(ctx, t) + + image, err := client.Pull(ctx, testImage, WithPullUnpack) + if err != nil { + t.Fatal(err) + } + defer client.ImageService().Delete(ctx, testImage, images.SynchronousDelete()) + + container, err := client.NewContainer(ctx, id, + WithNewSnapshot(id, image), + WithNewSpec(oci.WithImageConfig(image), withProcessArgs("notfound404")), + WithRuntime(client.Runtime(), nil), + ) + if err != nil { + t.Fatal(err) + } + defer container.Delete(ctx, WithSnapshotCleanup) + + eventStream, errC := client.EventService().Subscribe(ctx, "namespace=="+ns+",topic~=|^/tasks/exit|") + + if _, err := container.NewTask(ctx, empty()); err == nil { + t.Fatalf("expected error but got nil") + } + + var timeout = 10 * time.Second + + // start to check events + select { + case et := <-eventStream: + t.Fatal(fmt.Errorf("unexpected task exit event: %+v", et)) + case err := <-errC: + t.Fatal(fmt.Errorf("unexpected error from event service: %w", err)) + + case <-time.After(timeout): + } +} + func TestDaemonRestart(t *testing.T) { client, err := newClient(t, address) if err != nil { diff --git a/runtime/v2/manager.go b/runtime/v2/manager.go index 5b8b579ea..1927cbb3f 100644 --- a/runtime/v2/manager.go +++ b/runtime/v2/manager.go @@ -375,13 +375,13 @@ func (m *TaskManager) Create(ctx context.Context, taskID string, opts runtime.Cr shim := process.(*shimTask) t, err := shim.Create(ctx, opts) if err != nil { + // NOTE: ctx contains required namespace information. + m.manager.shims.Delete(ctx, taskID) + dctx, cancel := timeout.WithContext(context.Background(), cleanupTimeout) defer cancel() - _, errShim := shim.delete(dctx, func(ctx context.Context, id string) { - m.manager.shims.Delete(ctx, id) - }) - + _, errShim := shim.delete(dctx, func(context.Context, string) {}) if errShim != nil { if errdefs.IsDeadlineExceeded(errShim) { dctx, cancel = timeout.WithContext(context.Background(), cleanupTimeout)