Merge pull request #6866 from ambarve/cleanup_leaked_shim

Cleanup leaked shim process
This commit is contained in:
Derek McGowan 2022-05-02 16:52:31 -07:00 committed by GitHub
commit 6fda809e1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 1 deletions

View File

@ -38,6 +38,7 @@ import (
"github.com/containerd/containerd/protobuf"
"github.com/containerd/containerd/runtime"
shimbinary "github.com/containerd/containerd/runtime/v2/shim"
"github.com/containerd/containerd/sandbox"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
@ -78,6 +79,7 @@ func init() {
return nil, err
}
cs := metadata.NewContainerStore(m.(*metadata.DB))
ss := metadata.NewSandboxStore(m.(*metadata.DB))
events := ep.(*exchange.Exchange)
shimManager, err := NewShimManager(ic.Context, &ManagerConfig{
@ -88,6 +90,7 @@ func init() {
Events: events,
Store: cs,
SchedCore: config.SchedCore,
SandboxStore: ss,
})
if err != nil {
return nil, err
@ -124,6 +127,7 @@ type ManagerConfig struct {
Address string
TTRPCAddress string
SchedCore bool
SandboxStore sandbox.Store
}
// NewShimManager creates a manager for v2 shims
@ -143,6 +147,7 @@ func NewShimManager(ctx context.Context, config *ManagerConfig) (*ShimManager, e
events: config.Events,
containers: config.Store,
schedCore: config.SchedCore,
sandboxStore: config.SandboxStore,
}
if err := m.loadExistingTasks(ctx); err != nil {
@ -167,6 +172,7 @@ type ShimManager struct {
containers containers.Store
// runtimePaths is a cache of `runtime names` -> `resolved fs path`
runtimePaths sync.Map
sandboxStore sandbox.Store
}
// ID of the shim manager

View File

@ -18,9 +18,11 @@ package v2
import (
"context"
"errors"
"os"
"path/filepath"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/namespaces"
@ -139,7 +141,26 @@ func (m *ShimManager) loadShims(ctx context.Context) error {
cleanupAfterDeadShim(ctx, id, ns, m.shims, m.events, binaryCall)
continue
}
m.shims.Add(ctx, shim)
// There are 3 possibilities for the loaded shim here:
// 1. It could be a shim that is running a task.
// 2. It could be a sandbox shim.
// 3. Or it could be a shim that was created for running a task but
// something happened (probably a containerd crash) and the task was never
// created. This shim process should be cleaned up here. Look at
// containerd/containerd#6860 for further details.
_, sgetErr := m.sandboxStore.Get(ctx, id)
pInfo, pidErr := shim.Pids(ctx)
if sgetErr != nil && errors.Is(sgetErr, errdefs.ErrNotFound) && (len(pInfo) == 0 || errors.Is(pidErr, errdefs.ErrNotFound)) {
log.G(ctx).WithField("id", id).Info("cleaning leaked shim process")
// We are unable to get Pids from the shim and it's not a sandbox
// shim. We should clean it up her.
// No need to do anything for removeTask since we never added this shim.
shim.delete(ctx, false, func(ctx context.Context, id string) {})
} else {
m.shims.Add(ctx, shim)
}
}
return nil
}