diff --git a/snapshots/devmapper/pool_device.go b/snapshots/devmapper/pool_device.go index 95e94a4a6..52b045658 100644 --- a/snapshots/devmapper/pool_device.go +++ b/snapshots/devmapper/pool_device.go @@ -347,6 +347,16 @@ func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error return nil } +func (p *PoolDevice) ResumeDevice(ctx context.Context, deviceName string) error { + if err := p.transition(ctx, deviceName, Resuming, Resumed, func() error { + return dmsetup.ResumeDevice(deviceName) + }); err != nil { + return errors.Wrapf(err, "failed to resume device %q", deviceName) + } + + return nil +} + // DeactivateDevice deactivates thin device func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error { if !p.IsLoaded(deviceName) { diff --git a/snapshots/devmapper/snapshotter.go b/snapshots/devmapper/snapshotter.go index c6ed8340b..f75327f21 100644 --- a/snapshots/devmapper/snapshotter.go +++ b/snapshots/devmapper/snapshotter.go @@ -277,14 +277,26 @@ func (s *Snapshotter) Commit(ctx context.Context, name, key string, opts ...snap return err } - // The thin snapshot is not used for IO after committed, so - // suspend to flush the IO and deactivate the device. + // After committed, the snapshot device will not be directly + // used anymore. We'd better deativate it to make it *invisible* + // in userspace, so that tools like LVM2 and fdisk cannot touch it, + // and avoid useless IOs on it. + // + // Before deactivation, we need to flush the outstanding IO by suspend. + // Afterward, we resume it again to prevent a race window which may cause + // a process IO hang. See the issue below for details: + // (https://github.com/containerd/containerd/issues/4234) err = s.pool.SuspendDevice(ctx, deviceName) if err != nil { return err } - return s.pool.DeactivateDevice(ctx, deviceName, true, false) + err = s.pool.ResumeDevice(ctx, deviceName) + if err != nil { + return err + } + + return s.pool.DeactivateDevice(ctx, deviceName, false, false) }) }