sandbox: make a independent shim plugin

Signed-off-by: Abel Feng <fshb1988@gmail.com>
This commit is contained in:
Abel Feng
2024-02-22 16:05:45 +08:00
parent f8fbdfdd6f
commit a12acedfad
6 changed files with 388 additions and 301 deletions

View File

@@ -19,19 +19,23 @@ package v2
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/internal/cleanup"
"github.com/containerd/containerd/v2/pkg/namespaces"
"github.com/containerd/containerd/v2/pkg/timeout"
"github.com/containerd/errdefs"
"github.com/containerd/log"
)
func (m *ShimManager) loadExistingTasks(ctx context.Context) error {
nsDirs, err := os.ReadDir(m.state)
// LoadExistingShims loads existing shims from the path specified by stateDir
// rootDir is for cleaning up the unused paths of removed shims.
func (m *ShimManager) LoadExistingShims(ctx context.Context, stateDir string, rootDir string) error {
nsDirs, err := os.ReadDir(stateDir)
if err != nil {
return err
}
@@ -45,11 +49,11 @@ func (m *ShimManager) loadExistingTasks(ctx context.Context) error {
continue
}
log.G(ctx).WithField("namespace", ns).Debug("loading tasks in namespace")
if err := m.loadShims(namespaces.WithNamespace(ctx, ns)); err != nil {
if err := m.loadShims(namespaces.WithNamespace(ctx, ns), stateDir); err != nil {
log.G(ctx).WithField("namespace", ns).WithError(err).Error("loading tasks in namespace")
continue
}
if err := m.cleanupWorkDirs(namespaces.WithNamespace(ctx, ns)); err != nil {
if err := m.cleanupWorkDirs(namespaces.WithNamespace(ctx, ns), rootDir); err != nil {
log.G(ctx).WithField("namespace", ns).WithError(err).Error("cleanup working directory in namespace")
continue
}
@@ -57,14 +61,14 @@ func (m *ShimManager) loadExistingTasks(ctx context.Context) error {
return nil
}
func (m *ShimManager) loadShims(ctx context.Context) error {
func (m *ShimManager) loadShims(ctx context.Context, stateDir string) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
ctx = log.WithLogger(ctx, log.G(ctx).WithField("namespace", ns))
shimDirs, err := os.ReadDir(filepath.Join(m.state, ns))
shimDirs, err := os.ReadDir(filepath.Join(stateDir, ns))
if err != nil {
return err
}
@@ -77,7 +81,7 @@ func (m *ShimManager) loadShims(ctx context.Context) error {
if len(id) > 0 && id[0] == '.' {
continue
}
bundle, err := LoadBundle(ctx, m.state, id)
bundle, err := LoadBundle(ctx, stateDir, id)
if err != nil {
// fine to return error here, it is a programmer error if the context
// does not have a namespace
@@ -102,78 +106,87 @@ func (m *ShimManager) loadShims(ctx context.Context) error {
bundle.Delete()
continue
}
var (
runtime string
)
// If we're on 1.6+ and specified custom path to the runtime binary, path will be saved in 'shim-binary-path' file.
if data, err := os.ReadFile(filepath.Join(bundle.Path, "shim-binary-path")); err == nil {
runtime = string(data)
} else if err != nil && !os.IsNotExist(err) {
log.G(ctx).WithError(err).Error("failed to read `runtime` path from bundle")
}
// Query runtime name from metadata store
if runtime == "" {
container, err := m.containers.Get(ctx, id)
if err != nil {
log.G(ctx).WithError(err).Errorf("loading container %s", id)
if err := mount.UnmountRecursive(filepath.Join(bundle.Path, "rootfs"), 0); err != nil {
log.G(ctx).WithError(err).Errorf("failed to unmount of rootfs %s", id)
}
bundle.Delete()
continue
}
runtime = container.Runtime.Name
}
runtime, err = m.resolveRuntimePath(runtime)
if err != nil {
if err := m.loadShim(ctx, bundle); err != nil {
log.G(ctx).WithError(err).Errorf("failed to load shim %s", bundle.Path)
bundle.Delete()
log.G(ctx).WithError(err).Error("failed to resolve runtime path")
continue
}
binaryCall := shimBinary(bundle,
shimBinaryConfig{
runtime: runtime,
address: m.containerdAddress,
ttrpcAddress: m.containerdTTRPCAddress,
schedCore: m.schedCore,
})
shim, err := loadShimTask(ctx, bundle, func() {
log.G(ctx).WithField("id", id).Info("shim disconnected")
}
return nil
}
cleanupAfterDeadShim(cleanup.Background(ctx), id, m.shims, m.events, binaryCall)
// Remove self from the runtime task list.
m.shims.Delete(ctx, id)
})
func (m *ShimManager) loadShim(ctx context.Context, bundle *Bundle) error {
var (
runtime string
id = bundle.ID
)
// If we're on 1.6+ and specified custom path to the runtime binary, path will be saved in 'shim-binary-path' file.
if data, err := os.ReadFile(filepath.Join(bundle.Path, "shim-binary-path")); err == nil {
runtime = string(data)
} else if err != nil && !os.IsNotExist(err) {
log.G(ctx).WithError(err).Error("failed to read `runtime` path from bundle")
}
// Query runtime name from metadata store
if runtime == "" {
container, err := m.containers.Get(ctx, id)
if err != nil {
log.G(ctx).WithError(err).Errorf("unable to load shim %q", id)
cleanupAfterDeadShim(ctx, id, m.shims, m.events, binaryCall)
continue
log.G(ctx).WithError(err).Errorf("loading container %s", id)
if err := mount.UnmountRecursive(filepath.Join(bundle.Path, "rootfs"), 0); err != nil {
log.G(ctx).WithError(err).Errorf("failed to unmount of rootfs %s", id)
}
return err
}
runtime = container.Runtime.Name
}
// There are 3 possibilities for the loaded shim here:
// 1. It could be a shim that is running a task.
// 2. It could be a sandbox shim.
// 3. Or it could be a shim that was created for running a task but
// something happened (probably a containerd crash) and the task was never
// created. This shim process should be cleaned up here. Look at
// containerd/containerd#6860 for further details.
runtime, err := m.resolveRuntimePath(runtime)
if err != nil {
bundle.Delete()
_, sgetErr := m.sandboxStore.Get(ctx, id)
pInfo, pidErr := shim.Pids(ctx)
if sgetErr != nil && errors.Is(sgetErr, errdefs.ErrNotFound) && (len(pInfo) == 0 || errors.Is(pidErr, errdefs.ErrNotFound)) {
log.G(ctx).WithField("id", id).Info("cleaning leaked shim process")
// We are unable to get Pids from the shim and it's not a sandbox
// shim. We should clean it up her.
// No need to do anything for removeTask since we never added this shim.
shim.delete(ctx, false, func(ctx context.Context, id string) {})
} else {
m.shims.Add(ctx, shim.ShimInstance)
}
return fmt.Errorf("failed to resolve runtime path: %w", err)
}
binaryCall := shimBinary(bundle,
shimBinaryConfig{
runtime: runtime,
address: m.containerdAddress,
ttrpcAddress: m.containerdTTRPCAddress,
schedCore: m.schedCore,
})
// TODO: It seems we can only call loadShim here if it is a sandbox shim?
shim, err := loadShimTask(ctx, bundle, func() {
log.G(ctx).WithField("id", id).Info("shim disconnected")
cleanupAfterDeadShim(cleanup.Background(ctx), id, m.shims, m.events, binaryCall)
// Remove self from the runtime task list.
m.shims.Delete(ctx, id)
})
if err != nil {
cleanupAfterDeadShim(ctx, id, m.shims, m.events, binaryCall)
return fmt.Errorf("unable to load shim %q: %w", id, err)
}
// There are 3 possibilities for the loaded shim here:
// 1. It could be a shim that is running a task.
// 2. It could be a sandbox shim.
// 3. Or it could be a shim that was created for running a task but
// something happened (probably a containerd crash) and the task was never
// created. This shim process should be cleaned up here. Look at
// containerd/containerd#6860 for further details.
_, sgetErr := m.sandboxStore.Get(ctx, id)
pInfo, pidErr := shim.Pids(ctx)
if sgetErr != nil && errors.Is(sgetErr, errdefs.ErrNotFound) && (len(pInfo) == 0 || errors.Is(pidErr, errdefs.ErrNotFound)) {
log.G(ctx).WithField("id", id).Info("cleaning leaked shim process")
// We are unable to get Pids from the shim and it's not a sandbox
// shim. We should clean it up her.
// No need to do anything for removeTask since we never added this shim.
shim.delete(ctx, false, func(ctx context.Context, id string) {})
} else {
m.shims.Add(ctx, shim.ShimInstance)
}
return nil
}
@@ -198,13 +211,13 @@ func loadShimTask(ctx context.Context, bundle *Bundle, onClose func()) (_ *shimT
return s, nil
}
func (m *ShimManager) cleanupWorkDirs(ctx context.Context) error {
func (m *ShimManager) cleanupWorkDirs(ctx context.Context, rootDir string) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
f, err := os.Open(filepath.Join(m.root, ns))
f, err := os.Open(filepath.Join(rootDir, ns))
if err != nil {
return err
}
@@ -220,7 +233,7 @@ func (m *ShimManager) cleanupWorkDirs(ctx context.Context) error {
// this can happen on a reboot where /run for the bundle state is cleaned up
// but that persistent working dir is left
if _, err := m.shims.Get(ctx, dir); err != nil {
path := filepath.Join(m.root, ns, dir)
path := filepath.Join(rootDir, ns, dir)
if err := os.RemoveAll(path); err != nil {
log.G(ctx).WithError(err).Errorf("cleanup working dir %s", path)
}