fix: should not send 137 code event if cmd is notfound

ShimV2 has shim.Delete command to cleanup task's temporary resource,
like bundle folder. Since the shim server exits and no persistent store
is for task's exit code, the result of shim.Delete is always 137 exit
code, like the task has been killed.

And the result of shim.Delete can be used as task event only when the
shim server is killed somehow after container is running. Therefore,
dockerd, which watches task exit event to update status of container,
can report correct status.

Back to the issue #6429, the container is not running because the
entrypoint is not found. Based on this design, we should not send
137 exitcode event to subscriber.

This commit is aimed to remove shim instance first and then the
`cleanupAfterDeadShim` should not send event.

Similar Issue: #4769
Fix #6429

Signed-off-by: Wei Fu <fuweid89@gmail.com>
This commit is contained in:
Wei Fu 2022-01-21 00:20:34 +08:00
parent ad771115b8
commit 31a710c492
2 changed files with 65 additions and 4 deletions

View File

@ -1946,6 +1946,67 @@ func TestRegressionIssue4769(t *testing.T) {
}
}
// TestRegressionIssue6429 should not send exit event out if command is not found.
//
// Issue: https://github.com/containerd/containerd/issues/6429.
func TestRegressionIssue6429(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
t.Skip("Test relies on runc")
}
client, err := newClient(t, address)
if err != nil {
t.Fatal(err)
}
defer client.Close()
// use unique namespace to get unique task events
id := t.Name()
ns := fmt.Sprintf("%s-%s", testNamespace, id)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ctx = namespaces.WithNamespace(ctx, ns)
ctx = logtest.WithT(ctx, t)
image, err := client.Pull(ctx, testImage, WithPullUnpack)
if err != nil {
t.Fatal(err)
}
defer client.ImageService().Delete(ctx, testImage, images.SynchronousDelete())
container, err := client.NewContainer(ctx, id,
WithNewSnapshot(id, image),
WithNewSpec(oci.WithImageConfig(image), withProcessArgs("notfound404")),
WithRuntime(client.Runtime(), nil),
)
if err != nil {
t.Fatal(err)
}
defer container.Delete(ctx, WithSnapshotCleanup)
eventStream, errC := client.EventService().Subscribe(ctx, "namespace=="+ns+",topic~=|^/tasks/exit|")
if _, err := container.NewTask(ctx, empty()); err == nil {
t.Fatalf("expected error but got nil")
}
var timeout = 10 * time.Second
// start to check events
select {
case et := <-eventStream:
t.Fatal(fmt.Errorf("unexpected task exit event: %+v", et))
case err := <-errC:
t.Fatal(fmt.Errorf("unexpected error from event service: %w", err))
case <-time.After(timeout):
}
}
func TestDaemonRestart(t *testing.T) {
client, err := newClient(t, address)
if err != nil {

View File

@ -375,13 +375,13 @@ func (m *TaskManager) Create(ctx context.Context, taskID string, opts runtime.Cr
shim := process.(*shimTask)
t, err := shim.Create(ctx, opts)
if err != nil {
// NOTE: ctx contains required namespace information.
m.manager.shims.Delete(ctx, taskID)
dctx, cancel := timeout.WithContext(context.Background(), cleanupTimeout)
defer cancel()
_, errShim := shim.delete(dctx, func(ctx context.Context, id string) {
m.manager.shims.Delete(ctx, id)
})
_, errShim := shim.delete(dctx, func(context.Context, string) {})
if errShim != nil {
if errdefs.IsDeadlineExceeded(errShim) {
dctx, cancel = timeout.WithContext(context.Background(), cleanupTimeout)