From a3685262fe7d501a2a236c7ce39209a7161f2519 Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Thu, 25 Apr 2019 14:30:15 +0800 Subject: [PATCH] snapshots/devmapper: do not stop snapshot GC when one snapshot removing fails Snapshots GC takes use of pruneBranch() function to remove snapshots, but GC will stop if snapshotter.Remove() returns error and the error number is not ErrFailedPrecondition. This results in thousands of dm snapshots not deleted if one snapshot is not deleted, due to errors like "contains a filesystem in use". So return ErrFailedPrecondition error number in Remove() function where appropriate, and let GC process go on collecting other snapshots. Fix: #3923 Signed-off-by: Eryu Guan Signed-off-by: Eric Ren --- snapshots/devmapper/snapshotter.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/snapshots/devmapper/snapshotter.go b/snapshots/devmapper/snapshotter.go index c244cf8e6..c6ed8340b 100644 --- a/snapshots/devmapper/snapshotter.go +++ b/snapshots/devmapper/snapshotter.go @@ -27,6 +27,7 @@ import ( "strings" "sync" + "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/log" "github.com/containerd/containerd/mount" "github.com/containerd/containerd/plugin" @@ -306,7 +307,11 @@ func (s *Snapshotter) removeDevice(ctx context.Context, key string) error { if !s.config.AsyncRemove { if err := s.pool.RemoveDevice(ctx, deviceName); err != nil { log.G(ctx).WithError(err).Errorf("failed to remove device") - return err + // Tell snapshot GC continue to collect other snapshots. + // Otherwise, one snapshot collection failure will stop + // the GC, and all snapshots won't be collected even though + // having no relationship with the failed one. + return errdefs.ErrFailedPrecondition } } else { // The asynchronous cleanup will do the real device remove work.