diff --git a/runtime/v2/manager.go b/runtime/v2/manager.go index 99ee7d16d..17f39fbfc 100644 --- a/runtime/v2/manager.go +++ b/runtime/v2/manager.go @@ -38,6 +38,7 @@ import ( "github.com/containerd/containerd/protobuf" "github.com/containerd/containerd/runtime" shimbinary "github.com/containerd/containerd/runtime/v2/shim" + "github.com/containerd/containerd/sandbox" ocispec "github.com/opencontainers/image-spec/specs-go/v1" ) @@ -78,6 +79,7 @@ func init() { return nil, err } cs := metadata.NewContainerStore(m.(*metadata.DB)) + ss := metadata.NewSandboxStore(m.(*metadata.DB)) events := ep.(*exchange.Exchange) shimManager, err := NewShimManager(ic.Context, &ManagerConfig{ @@ -88,6 +90,7 @@ func init() { Events: events, Store: cs, SchedCore: config.SchedCore, + SandboxStore: ss, }) if err != nil { return nil, err @@ -124,6 +127,7 @@ type ManagerConfig struct { Address string TTRPCAddress string SchedCore bool + SandboxStore sandbox.Store } // NewShimManager creates a manager for v2 shims @@ -143,6 +147,7 @@ func NewShimManager(ctx context.Context, config *ManagerConfig) (*ShimManager, e events: config.Events, containers: config.Store, schedCore: config.SchedCore, + sandboxStore: config.SandboxStore, } if err := m.loadExistingTasks(ctx); err != nil { @@ -167,6 +172,7 @@ type ShimManager struct { containers containers.Store // runtimePaths is a cache of `runtime names` -> `resolved fs path` runtimePaths sync.Map + sandboxStore sandbox.Store } // ID of the shim manager diff --git a/runtime/v2/shim_load.go b/runtime/v2/shim_load.go index afd2dfb4f..760a6a7f2 100644 --- a/runtime/v2/shim_load.go +++ b/runtime/v2/shim_load.go @@ -18,9 +18,11 @@ package v2 import ( "context" + "errors" "os" "path/filepath" + "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/log" "github.com/containerd/containerd/mount" "github.com/containerd/containerd/namespaces" @@ -139,7 +141,26 @@ func (m *ShimManager) loadShims(ctx context.Context) error { cleanupAfterDeadShim(ctx, id, ns, m.shims, m.events, binaryCall) continue } - m.shims.Add(ctx, shim) + + // There are 3 possibilities for the loaded shim here: + // 1. It could be a shim that is running a task. + // 2. It could be a sandbox shim. + // 3. Or it could be a shim that was created for running a task but + // something happened (probably a containerd crash) and the task was never + // created. This shim process should be cleaned up here. Look at + // containerd/containerd#6860 for further details. + + _, sgetErr := m.sandboxStore.Get(ctx, id) + pInfo, pidErr := shim.Pids(ctx) + if sgetErr != nil && errors.Is(sgetErr, errdefs.ErrNotFound) && (len(pInfo) == 0 || errors.Is(pidErr, errdefs.ErrNotFound)) { + log.G(ctx).WithField("id", id).Info("cleaning leaked shim process") + // We are unable to get Pids from the shim and it's not a sandbox + // shim. We should clean it up her. + // No need to do anything for removeTask since we never added this shim. + shim.delete(ctx, false, func(ctx context.Context, id string) {}) + } else { + m.shims.Add(ctx, shim) + } } return nil }