From d9f3e387c6d8e7c89a68e6c5839813630e833764 Mon Sep 17 00:00:00 2001 From: Kirtana Ashok Date: Tue, 18 Apr 2023 16:26:02 -0700 Subject: [PATCH] Remove entry for container from container store on error If containerd does not see a container but criservice's container store does, then we should try to recover from this error state by removing the container from criservice's container store as well. Signed-off-by: Kirtana Ashok --- pkg/cri/sbserver/container_remove.go | 18 ++++++++++++++---- pkg/cri/server/container_remove.go | 18 ++++++++++++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/pkg/cri/sbserver/container_remove.go b/pkg/cri/sbserver/container_remove.go index ff4062944..e40d1fae0 100644 --- a/pkg/cri/sbserver/container_remove.go +++ b/pkg/cri/sbserver/container_remove.go @@ -33,19 +33,29 @@ import ( // RemoveContainer removes the container. func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) { start := time.Now() - container, err := c.containerStore.Get(r.GetContainerId()) + ctrID := r.GetContainerId() + container, err := c.containerStore.Get(ctrID) if err != nil { if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + return nil, fmt.Errorf("an error occurred when try to find container %q: %w", ctrID, err) } // Do not return error if container metadata doesn't exist. - log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", r.GetContainerId()) + log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID) return &runtime.RemoveContainerResponse{}, nil } id := container.ID i, err := container.Container.Info(ctx) if err != nil { - return nil, fmt.Errorf("get container info: %w", err) + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("get container info: %w", err) + } + // Since containerd doesn't see the container and criservice's content store does, + // we should try to recover from this state by removing entry for this container + // from the container store as well and return successfully. + log.G(ctx).WithError(err).Warn("get container info failed") + c.containerStore.Delete(ctrID) + c.containerNameIndex.ReleaseByKey(ctrID) + return &runtime.RemoveContainerResponse{}, nil } // Forcibly stop the containers if they are in running or unknown state diff --git a/pkg/cri/server/container_remove.go b/pkg/cri/server/container_remove.go index f1537c959..8f95e05f1 100644 --- a/pkg/cri/server/container_remove.go +++ b/pkg/cri/server/container_remove.go @@ -33,19 +33,29 @@ import ( // RemoveContainer removes the container. func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) { start := time.Now() - container, err := c.containerStore.Get(r.GetContainerId()) + ctrID := r.GetContainerId() + container, err := c.containerStore.Get(ctrID) if err != nil { if !errdefs.IsNotFound(err) { - return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err) + return nil, fmt.Errorf("an error occurred when try to find container %q: %w", ctrID, err) } // Do not return error if container metadata doesn't exist. - log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", r.GetContainerId()) + log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID) return &runtime.RemoveContainerResponse{}, nil } id := container.ID i, err := container.Container.Info(ctx) if err != nil { - return nil, fmt.Errorf("get container info: %w", err) + if !errdefs.IsNotFound(err) { + return nil, fmt.Errorf("get container info: %w", err) + } + // Since containerd doesn't see the container and criservice's content store does, + // we should try to recover from this state by removing entry for this container + // from the container store as well and return successfully. + log.G(ctx).WithError(err).Warn("get container info failed") + c.containerStore.Delete(ctrID) + c.containerNameIndex.ReleaseByKey(ctrID) + return &runtime.RemoveContainerResponse{}, nil } // Forcibly stop the containers if they are in running or unknown state