From 27c8f4085cdaca80bf302fbafcb636a6d4642706 Mon Sep 17 00:00:00 2001 From: ruiwen-zhao Date: Fri, 3 Feb 2023 20:09:33 +0000 Subject: [PATCH 1/2] Move PLEG event generation back to sbserver to avoid missing pod sandbox status Signed-off-by: ruiwen-zhao --- integration/container_event_test.go | 10 +++------- pkg/cri/sbserver/events.go | 4 +--- pkg/cri/sbserver/podsandbox/sandbox_delete.go | 4 ++-- pkg/cri/sbserver/podsandbox/sandbox_run.go | 8 -------- pkg/cri/sbserver/sandbox_run.go | 8 ++++++++ 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/integration/container_event_test.go b/integration/container_event_test.go index 935b44747..ef13e44ba 100644 --- a/integration/container_event_test.go +++ b/integration/container_event_test.go @@ -18,7 +18,6 @@ package integration import ( "context" - "fmt" "testing" "time" @@ -134,21 +133,19 @@ func drainContainerEventsChan(containerEventsChan chan *runtime.ContainerEventRe } } -func checkContainerEventResponse(t *testing.T, containerEventsChan chan *runtime.ContainerEventResponse, expectedType runtime.ContainerEventType, expectedPodSandboxStatus *runtime.PodSandboxStatus, expectedContainerStates []runtime.ContainerState) error { +func checkContainerEventResponse(t *testing.T, containerEventsChan chan *runtime.ContainerEventResponse, expectedType runtime.ContainerEventType, expectedPodSandboxStatus *runtime.PodSandboxStatus, expectedContainerStates []runtime.ContainerState) { t.Helper() var resp *runtime.ContainerEventResponse select { case resp = <-containerEventsChan: case <-time.After(readContainerEventChannelTimeout): - return fmt.Errorf("assertContainerEventResponse: timeout waiting for events from channel") + t.Error("assertContainerEventResponse: timeout waiting for events from channel") } t.Logf("Container Event response received: %+v", *resp) assert.Equal(t, expectedType, resp.ContainerEventType) // Checking only the State field of PodSandboxStatus - if expectedPodSandboxStatus == nil { - assert.Nil(t, resp.PodSandboxStatus) - } else { + if expectedPodSandboxStatus != nil { assert.Equal(t, expectedPodSandboxStatus.State, resp.PodSandboxStatus.State) } @@ -156,5 +153,4 @@ func checkContainerEventResponse(t *testing.T, containerEventsChan chan *runtime for i, cs := range resp.ContainersStatuses { assert.Equal(t, expectedContainerStates[i], cs.State) } - return nil } diff --git a/pkg/cri/sbserver/events.go b/pkg/cri/sbserver/events.go index b14ee8c51..d231a06a6 100644 --- a/pkg/cri/sbserver/events.go +++ b/pkg/cri/sbserver/events.go @@ -434,8 +434,6 @@ func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxst } // Move on to make sure container status is updated. } - - c.generateAndSendContainerEvent(ctx, sb.ID, sb.ID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT) } } @@ -449,7 +447,7 @@ func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxst // Using channel to propagate the information of sandbox stop sb.Stop() - + c.generateAndSendContainerEvent(ctx, sb.ID, sb.ID, runtime.ContainerEventType_CONTAINER_STOPPED_EVENT) return nil } diff --git a/pkg/cri/sbserver/podsandbox/sandbox_delete.go b/pkg/cri/sbserver/podsandbox/sandbox_delete.go index eda4ec178..678362053 100644 --- a/pkg/cri/sbserver/podsandbox/sandbox_delete.go +++ b/pkg/cri/sbserver/podsandbox/sandbox_delete.go @@ -59,10 +59,10 @@ func (c *Controller) Shutdown(ctx context.Context, sandboxID string) error { } } - c.sandboxStore.Delete(sandboxID) - // Send CONTAINER_DELETED event with ContainerId equal to SandboxId. c.cri.GenerateAndSendContainerEvent(ctx, sandboxID, sandboxID, runtime.ContainerEventType_CONTAINER_DELETED_EVENT) + c.sandboxStore.Delete(sandboxID) + return nil } diff --git a/pkg/cri/sbserver/podsandbox/sandbox_run.go b/pkg/cri/sbserver/podsandbox/sandbox_run.go index ed8f1aef7..0917d7065 100644 --- a/pkg/cri/sbserver/podsandbox/sandbox_run.go +++ b/pkg/cri/sbserver/podsandbox/sandbox_run.go @@ -159,11 +159,6 @@ func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.Controll } }() - // Send CONTAINER_CREATED event with both ContainerId and SandboxId equal to SandboxId. - // Note that this has to be done after sandboxStore.Add() because we need to get - // SandboxStatus from the store and include it in the event. - c.cri.GenerateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) - // Create sandbox container root directories. sandboxRootDir := c.getSandboxRootDir(id) if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil { @@ -264,9 +259,6 @@ func (c *Controller) Start(ctx context.Context, id string) (cin sandbox.Controll return cin, fmt.Errorf("failed to start sandbox container task %q: %w", id, err) } - // Send CONTAINER_STARTED event with ContainerId equal to SandboxId. - c.cri.GenerateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_STARTED_EVENT) - cin.SandboxID = id cin.Pid = task.Pid() cin.CreatedAt = info.CreatedAt diff --git a/pkg/cri/sbserver/sandbox_run.go b/pkg/cri/sbserver/sandbox_run.go index 0dde4fa84..d77dcaa83 100644 --- a/pkg/cri/sbserver/sandbox_run.go +++ b/pkg/cri/sbserver/sandbox_run.go @@ -277,6 +277,11 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err) } + // Send CONTAINER_CREATED event with both ContainerId and SandboxId equal to SandboxId. + // Note that this has to be done after sandboxStore.Add() because we need to get + // SandboxStatus from the store and include it in the event. + c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_CREATED_EVENT) + // TODO: Use sandbox client instead exitCh := make(chan containerd.ExitStatus, 1) go func() { @@ -300,6 +305,9 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox // but we don't care about sandbox TaskOOM right now, so it is fine. c.eventMonitor.startSandboxExitMonitor(context.Background(), id, ctrl.Pid, exitCh) + // Send CONTAINER_STARTED event with ContainerId equal to SandboxId. + c.generateAndSendContainerEvent(ctx, id, id, runtime.ContainerEventType_CONTAINER_STARTED_EVENT) + sandboxRuntimeCreateTimer.WithValues(labels["oci_runtime_type"]).UpdateSince(runtimeStart) return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil From 51a8db233d5db8142705424d4b0ccf2db13c4140 Mon Sep 17 00:00:00 2001 From: ruiwen-zhao Date: Fri, 3 Feb 2023 20:09:50 +0000 Subject: [PATCH 2/2] Send container events with nil PodSandboxStatus Signed-off-by: ruiwen-zhao --- pkg/cri/sbserver/helpers.go | 6 ++---- pkg/cri/server/helpers.go | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pkg/cri/sbserver/helpers.go b/pkg/cri/sbserver/helpers.go index eb7afb660..9c0367dc6 100644 --- a/pkg/cri/sbserver/helpers.go +++ b/pkg/cri/sbserver/helpers.go @@ -525,10 +525,8 @@ func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status) func (c *criService) generateAndSendContainerEvent(ctx context.Context, containerID string, sandboxID string, eventType runtime.ContainerEventType) { podSandboxStatus, err := c.getPodSandboxStatus(ctx, sandboxID) if err != nil { - // TODO(https://github.com/containerd/containerd/issues/7785): - // Do not skip events with nil PodSandboxStatus. - logrus.Errorf("Failed to get podSandbox status for container event for sandboxID %q: %v. Skipping sending the event.", sandboxID, err) - return + logrus.Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err) + podSandboxStatus = nil } containerStatuses, err := c.getContainerStatuses(ctx, sandboxID) if err != nil { diff --git a/pkg/cri/server/helpers.go b/pkg/cri/server/helpers.go index 9844b5564..7a9e6b647 100644 --- a/pkg/cri/server/helpers.go +++ b/pkg/cri/server/helpers.go @@ -523,10 +523,8 @@ func copyResourcesToStatus(spec *runtimespec.Spec, status containerstore.Status) func (c *criService) generateAndSendContainerEvent(ctx context.Context, containerID string, sandboxID string, eventType runtime.ContainerEventType) { podSandboxStatus, err := c.getPodSandboxStatus(ctx, sandboxID) if err != nil { - // TODO(https://github.com/containerd/containerd/issues/7785): - // Do not skip events with nil PodSandboxStatus. - logrus.Errorf("Failed to get podSandbox status for container event for sandboxID %q: %v. Skipping sending the event.", sandboxID, err) - return + logrus.Warnf("Failed to get podSandbox status for container event for sandboxID %q: %v. Sending the event with nil podSandboxStatus.", sandboxID, err) + podSandboxStatus = nil } containerStatuses, err := c.getContainerStatuses(ctx, sandboxID) if err != nil {