From 3887053177198e9ebd4cbc4a438a3f77aa048f83 Mon Sep 17 00:00:00 2001 From: "renzhen.rz" Date: Tue, 7 May 2019 21:06:43 +0800 Subject: [PATCH 1/2] snapshots/devmapper: deactivate thin device after committed 1. reason to deactivate committed snapshot The thin device will not be used for IO after committed, and further thin snapshotting is OK using an inactive thin device as origin. The benefits to deactivate are: - device is not unneccesary visible avoiding any unexpected IO; - save useless kernel data structs for maintaining active dm. Quote from kernel doc (Documentation/device-mapper/provisioning.txt): " ii) Using an internal snapshot. Once created, the user doesn't have to worry about any connection between the origin and the snapshot. Indeed the snapshot is no different from any other thinly-provisioned device and can be snapshotted itself via the same method. It's perfectly legal to have only one of them active, and there's no ordering requirement on activating or removing them both. (This differs from conventional device-mapper snapshots.) " 2. an thinpool metadata bug is naturally removed An problem happens when failed to suspend/resume origin thin device when creating snapshot: "failed to create snapshot device from parent vg0-mythinpool-snap-3" error="failed to save initial metadata for snapshot "vg0-mythinpool-snap-19": object already exists" This issue occurs because when failed to create snapshot, the snapshotter.store can be rollbacked, but the thin pool metadata boltdb failed to rollback in PoolDevice.CreateSnapshotDevice(), therefore metadata becomes inconsistent: the snapshotID is not taken in snapshotter.store, but saved in pool metadata boltdb. The cause is, in PoolDevice.CreateSnapshotDevice(), the defer calls are invoked on "first-in-last-out" order. When the error happens on the "resume device" defer call, the metadata is saved and snapshot is created, which has no chance to be rollbacked. Signed-off-by: Eric Ren --- snapshots/devmapper/dmsetup/dmsetup.go | 15 +++++- snapshots/devmapper/pool_device.go | 63 ++++++++++--------------- snapshots/devmapper/pool_device_test.go | 19 +++++--- snapshots/devmapper/snapshotter.go | 13 ++++- 4 files changed, 62 insertions(+), 48 deletions(-) diff --git a/snapshots/devmapper/dmsetup/dmsetup.go b/snapshots/devmapper/dmsetup/dmsetup.go index 0b75cda3f..ce07235f6 100644 --- a/snapshots/devmapper/dmsetup/dmsetup.go +++ b/snapshots/devmapper/dmsetup/dmsetup.go @@ -201,6 +201,13 @@ func RemoveDevice(deviceName string, opts ...RemoveDeviceOpt) error { args = append(args, GetFullDevicePath(deviceName)) _, err := dmsetup(args...) + if err == unix.ENXIO { + // Ignore "No such device or address" error because we dmsetup + // remove with "deferred" option, there is chance for the device + // having been removed. + return nil + } + return err } @@ -372,9 +379,13 @@ func parseDmsetupError(output string) string { return "" } - const failedSubstr = "failed: " - line := lines[0] + // Handle output like "Device /dev/mapper/snapshotter-suite-pool-snap-1 not found" + if strings.HasSuffix(line, "not found") { + return unix.ENXIO.Error() + } + + const failedSubstr = "failed: " idx := strings.LastIndex(line, failedSubstr) if idx == -1 { return "" diff --git a/snapshots/devmapper/pool_device.go b/snapshots/devmapper/pool_device.go index cd14b9412..c9cc0d5ec 100644 --- a/snapshots/devmapper/pool_device.go +++ b/snapshots/devmapper/pool_device.go @@ -178,19 +178,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string return errors.Wrapf(err, "failed to query device metadata for %q", deviceName) } - // Suspend thin device if it was activated previously to avoid corruptions - isActivated := p.IsActivated(baseInfo.Name) - if isActivated { - if err := p.suspendDevice(ctx, baseInfo); err != nil { - return err - } - - // Resume back base thin device on exit - defer func() { - retErr = multierror.Append(retErr, p.resumeDevice(ctx, baseInfo)).ErrorOrNil() - }() - } - snapInfo := &DeviceInfo{ Name: snapshotName, Size: virtualSizeBytes, @@ -230,26 +217,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string return p.activateDevice(ctx, snapInfo) } -func (p *PoolDevice) suspendDevice(ctx context.Context, info *DeviceInfo) error { - if err := p.transition(ctx, info.Name, Suspending, Suspended, func() error { - return dmsetup.SuspendDevice(info.Name) - }); err != nil { - return errors.Wrapf(err, "failed to suspend device %q", info.Name) - } - - return nil -} - -func (p *PoolDevice) resumeDevice(ctx context.Context, info *DeviceInfo) error { - if err := p.transition(ctx, info.Name, Resuming, Resumed, func() error { - return dmsetup.ResumeDevice(info.Name) - }); err != nil { - return errors.Wrapf(err, "failed to resume device %q", info.Name) - } - - return nil -} - func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error { if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error { return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID) @@ -265,16 +232,30 @@ func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *Dev return nil } +// SuspendDevice flushes the outstanding IO and blocks the further IO +func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error { + if err := p.transition(ctx, deviceName, Suspending, Suspended, func() error { + return dmsetup.SuspendDevice(deviceName) + }); err != nil { + return errors.Wrapf(err, "failed to suspend device %q", deviceName) + } + + return nil +} + // DeactivateDevice deactivates thin device -func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred bool) error { - if !p.IsActivated(deviceName) { +func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error { + if !p.IsLoaded(deviceName) { return nil } - opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries} + opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithRetries} if deferred { opts = append(opts, dmsetup.RemoveDeferred) } + if withForce { + opts = append(opts, dmsetup.RemoveWithForce) + } if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error { return dmsetup.RemoveDevice(deviceName, opts...) @@ -300,6 +281,12 @@ func (p *PoolDevice) IsActivated(deviceName string) bool { return true } +// IsLoaded returns true if thin-device is visible for dmsetup +func (p *PoolDevice) IsLoaded(deviceName string) bool { + _, err := dmsetup.Info(deviceName) + return err == nil +} + // GetUsage reports total size in bytes consumed by a thin-device. // It relies on the number of used blocks reported by 'dmsetup status'. // The output looks like: @@ -330,7 +317,7 @@ func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error return errors.Wrapf(err, "can't query metadata for device %q", deviceName) } - if err := p.DeactivateDevice(ctx, deviceName, true); err != nil { + if err := p.DeactivateDevice(ctx, deviceName, true, true); err != nil { return err } @@ -368,7 +355,7 @@ func (p *PoolDevice) RemovePool(ctx context.Context) error { // Deactivate devices if any for _, name := range deviceNames { - if err := p.DeactivateDevice(ctx, name, true); err != nil { + if err := p.DeactivateDevice(ctx, name, true, true); err != nil { result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name)) } } diff --git a/snapshots/devmapper/pool_device_test.go b/snapshots/devmapper/pool_device_test.go index 4a54bda60..83253121b 100644 --- a/snapshots/devmapper/pool_device_test.go +++ b/snapshots/devmapper/pool_device_test.go @@ -107,19 +107,24 @@ func TestPoolDevice(t *testing.T) { testMakeFileSystem(t, pool) }) - // Mount 'thin-1' + // Mount 'thin-1' and write v1 test file on 'thin-1' device err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error { // Write v1 test file on 'thin-1' device thin1TestFilePath := filepath.Join(thin1MountPath, "TEST") err := ioutil.WriteFile(thin1TestFilePath, []byte("test file (v1)"), 0700) assert.NilError(t, err, "failed to write test file v1 on '%s' volume", thinDevice1) - // Take snapshot of 'thin-1' - t.Run("CreateSnapshotDevice", func(t *testing.T) { - testCreateSnapshot(t, pool) - }) + return nil + }) - // Update TEST file on 'thin-1' to v2 + // Take snapshot of 'thin-1' + t.Run("CreateSnapshotDevice", func(t *testing.T) { + testCreateSnapshot(t, pool) + }) + + // Update TEST file on 'thin-1' to v2 + err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error { + thin1TestFilePath := filepath.Join(thin1MountPath, "TEST") err = ioutil.WriteFile(thin1TestFilePath, []byte("test file (v2)"), 0700) assert.NilError(t, err, "failed to write test file v2 on 'thin-1' volume after taking snapshot") @@ -204,7 +209,7 @@ func testDeactivateThinDevice(t *testing.T, pool *PoolDevice) { for _, deviceName := range deviceList { assert.Assert(t, pool.IsActivated(deviceName)) - err := pool.DeactivateDevice(context.Background(), deviceName, false) + err := pool.DeactivateDevice(context.Background(), deviceName, false, true) assert.NilError(t, err, "failed to remove '%s'", deviceName) assert.Assert(t, !pool.IsActivated(deviceName)) diff --git a/snapshots/devmapper/snapshotter.go b/snapshots/devmapper/snapshotter.go index 5f8336ad7..c6e0332fe 100644 --- a/snapshots/devmapper/snapshotter.go +++ b/snapshots/devmapper/snapshotter.go @@ -272,7 +272,18 @@ func (s *Snapshotter) Commit(ctx context.Context, name, key string, opts ...snap } _, err = storage.CommitActive(ctx, key, name, usage, opts...) - return err + if err != nil { + return err + } + + // The thin snapshot is not used for IO after committed, so + // suspend to flush the IO and deactivate the device. + err = s.pool.SuspendDevice(ctx, deviceName) + if err != nil { + return err + } + + return s.pool.DeactivateDevice(ctx, deviceName, true, false) }) } From 6f463d35057134cfdf6736d3c19daedc41b101e3 Mon Sep 17 00:00:00 2001 From: Eric Ren Date: Wed, 8 May 2019 11:19:23 +0800 Subject: [PATCH 2/2] test/snapshots: umount before committing snapshot For block device like devicemapper, umount before committing active snapshot can sync data onto disk. Otherewise: 1. deactivating a block device in use will fail or IO error when forced; 2. new snapshot on it will not catch the data not laid on disk yet. Signed-off-by: Eric Ren --- snapshots/testsuite/testsuite.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/snapshots/testsuite/testsuite.go b/snapshots/testsuite/testsuite.go index 8fa64fe61..ea2a88b6a 100644 --- a/snapshots/testsuite/testsuite.go +++ b/snapshots/testsuite/testsuite.go @@ -149,11 +149,12 @@ func checkSnapshotterBasic(ctx context.Context, t *testing.T, snapshotter snapsh if err := mount.All(mounts, preparing); err != nil { t.Fatalf("failure reason: %+v", err) } - defer testutil.Unmount(t, preparing) if err := initialApplier.Apply(preparing); err != nil { t.Fatalf("failure reason: %+v", err) } + // unmount before commit + testutil.Unmount(t, preparing) committed := filepath.Join(work, "committed") if err := snapshotter.Commit(ctx, committed, preparing, opt); err != nil { @@ -185,7 +186,6 @@ func checkSnapshotterBasic(ctx context.Context, t *testing.T, snapshotter snapsh if err := mount.All(mounts, next); err != nil { t.Fatalf("failure reason: %+v", err) } - defer testutil.Unmount(t, next) if err := fstest.CheckDirectoryEqualWithApplier(next, initialApplier); err != nil { t.Fatalf("failure reason: %+v", err) @@ -194,6 +194,8 @@ func checkSnapshotterBasic(ctx context.Context, t *testing.T, snapshotter snapsh if err := diffApplier.Apply(next); err != nil { t.Fatalf("failure reason: %+v", err) } + // unmount before commit + testutil.Unmount(t, next) ni, err := snapshotter.Stat(ctx, next) if err != nil {