From 23fbdbaf13e054cb94d6d3452ed93b491f605f18 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 30 Jul 2018 09:41:42 -0400 Subject: [PATCH] Cleanup workdirs on manager load This cleans up persistent work dirs on TaskManager boot. These dirs can be left behind in a machine reboot. The state in /run will not exist but the work dir in the root does, we should cleanup work dirs when tasks are not loaded. This also improves error handling that would prevent the task manager from loading when a single task fails to load or cleanup. Signed-off-by: Michael Crosby --- runtime/restart/monitor/monitor.go | 3 ++- runtime/v2/manager.go | 40 +++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/runtime/restart/monitor/monitor.go b/runtime/restart/monitor/monitor.go index b357c3455..7b293fead 100644 --- a/runtime/restart/monitor/monitor.go +++ b/runtime/restart/monitor/monitor.go @@ -168,7 +168,8 @@ func (m *monitor) reconcile(ctx context.Context) error { ctx = namespaces.WithNamespace(ctx, name) changes, err := m.monitor(ctx) if err != nil { - return err + logrus.WithError(err).Error("monitor for changes") + continue } for _, c := range changes { if err := c.apply(ctx, m.client); err != nil { diff --git a/runtime/v2/manager.go b/runtime/v2/manager.go index 6aa250241..fb45823ea 100644 --- a/runtime/v2/manager.go +++ b/runtime/v2/manager.go @@ -34,7 +34,6 @@ import ( "github.com/containerd/containerd/plugin" "github.com/containerd/containerd/runtime" ocispec "github.com/opencontainers/image-spec/specs-go/v1" - "github.com/pkg/errors" ) func init() { @@ -150,7 +149,12 @@ func (m *TaskManager) loadExistingTasks(ctx context.Context) error { ns := nsd.Name() log.G(ctx).WithField("namespace", ns).Debug("loading tasks in namespace") if err := m.loadTasks(namespaces.WithNamespace(ctx, ns)); err != nil { - return err + log.G(ctx).WithField("namespace", ns).WithError(err).Error("loading tasks in namespace") + continue + } + if err := m.cleanupWorkDirs(namespaces.WithNamespace(ctx, ns)); err != nil { + log.G(ctx).WithField("namespace", ns).WithError(err).Error("cleanup working directory in namespace") + continue } } return nil @@ -172,12 +176,16 @@ func (m *TaskManager) loadTasks(ctx context.Context) error { id := sd.Name() bundle, err := LoadBundle(ctx, m.state, id) if err != nil { + // fine to return error here, it is a programmer error if the context + // does not have a namespace return err } // fast path bf, err := ioutil.ReadDir(bundle.Path) if err != nil { - return err + bundle.Delete() + log.G(ctx).WithError(err).Errorf("fast path read bundle path for %s", bundle.Path) + continue } if len(bf) == 0 { bundle.Delete() @@ -197,7 +205,8 @@ func (m *TaskManager) loadTasks(ctx context.Context) error { } binaryCall := shimBinary(ctx, bundle, container.Runtime.Name, m.containerdAddress, m.events, m.tasks) if _, err := binaryCall.Delete(ctx); err != nil { - return errors.Wrapf(err, "remove disk state %s", id) + log.G(ctx).WithError(err).Errorf("binary call to delete for %s", id) + continue } continue } @@ -218,3 +227,26 @@ func (m *TaskManager) container(ctx context.Context, id string) (*containers.Con } return &container, nil } + +func (m *TaskManager) cleanupWorkDirs(ctx context.Context) error { + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return err + } + dirs, err := ioutil.ReadDir(filepath.Join(m.root, ns)) + if err != nil { + return err + } + for _, d := range dirs { + // if the task was not loaded, cleanup and empty working directory + // this can happen on a reboot where /run for the bundle state is cleaned up + // but that persistent working dir is left + if _, err := m.tasks.Get(ctx, d.Name()); err != nil { + path := filepath.Join(m.root, ns, d.Name()) + if err := os.RemoveAll(path); err != nil { + log.G(ctx).WithError(err).Errorf("cleanup working dir %s", path) + } + } + } + return nil +}