snapshots/devmapper: deactivate thin device after committed
1. reason to deactivate committed snapshot The thin device will not be used for IO after committed, and further thin snapshotting is OK using an inactive thin device as origin. The benefits to deactivate are: - device is not unneccesary visible avoiding any unexpected IO; - save useless kernel data structs for maintaining active dm. Quote from kernel doc (Documentation/device-mapper/provisioning.txt): " ii) Using an internal snapshot. Once created, the user doesn't have to worry about any connection between the origin and the snapshot. Indeed the snapshot is no different from any other thinly-provisioned device and can be snapshotted itself via the same method. It's perfectly legal to have only one of them active, and there's no ordering requirement on activating or removing them both. (This differs from conventional device-mapper snapshots.) " 2. an thinpool metadata bug is naturally removed An problem happens when failed to suspend/resume origin thin device when creating snapshot: "failed to create snapshot device from parent vg0-mythinpool-snap-3" error="failed to save initial metadata for snapshot "vg0-mythinpool-snap-19": object already exists" This issue occurs because when failed to create snapshot, the snapshotter.store can be rollbacked, but the thin pool metadata boltdb failed to rollback in PoolDevice.CreateSnapshotDevice(), therefore metadata becomes inconsistent: the snapshotID is not taken in snapshotter.store, but saved in pool metadata boltdb. The cause is, in PoolDevice.CreateSnapshotDevice(), the defer calls are invoked on "first-in-last-out" order. When the error happens on the "resume device" defer call, the metadata is saved and snapshot is created, which has no chance to be rollbacked. Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
This commit is contained in:
parent
d68b593de4
commit
3887053177
@ -201,6 +201,13 @@ func RemoveDevice(deviceName string, opts ...RemoveDeviceOpt) error {
|
|||||||
args = append(args, GetFullDevicePath(deviceName))
|
args = append(args, GetFullDevicePath(deviceName))
|
||||||
|
|
||||||
_, err := dmsetup(args...)
|
_, err := dmsetup(args...)
|
||||||
|
if err == unix.ENXIO {
|
||||||
|
// Ignore "No such device or address" error because we dmsetup
|
||||||
|
// remove with "deferred" option, there is chance for the device
|
||||||
|
// having been removed.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -372,9 +379,13 @@ func parseDmsetupError(output string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
const failedSubstr = "failed: "
|
|
||||||
|
|
||||||
line := lines[0]
|
line := lines[0]
|
||||||
|
// Handle output like "Device /dev/mapper/snapshotter-suite-pool-snap-1 not found"
|
||||||
|
if strings.HasSuffix(line, "not found") {
|
||||||
|
return unix.ENXIO.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
const failedSubstr = "failed: "
|
||||||
idx := strings.LastIndex(line, failedSubstr)
|
idx := strings.LastIndex(line, failedSubstr)
|
||||||
if idx == -1 {
|
if idx == -1 {
|
||||||
return ""
|
return ""
|
||||||
|
@ -178,19 +178,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
|
|||||||
return errors.Wrapf(err, "failed to query device metadata for %q", deviceName)
|
return errors.Wrapf(err, "failed to query device metadata for %q", deviceName)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Suspend thin device if it was activated previously to avoid corruptions
|
|
||||||
isActivated := p.IsActivated(baseInfo.Name)
|
|
||||||
if isActivated {
|
|
||||||
if err := p.suspendDevice(ctx, baseInfo); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resume back base thin device on exit
|
|
||||||
defer func() {
|
|
||||||
retErr = multierror.Append(retErr, p.resumeDevice(ctx, baseInfo)).ErrorOrNil()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
snapInfo := &DeviceInfo{
|
snapInfo := &DeviceInfo{
|
||||||
Name: snapshotName,
|
Name: snapshotName,
|
||||||
Size: virtualSizeBytes,
|
Size: virtualSizeBytes,
|
||||||
@ -230,26 +217,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
|
|||||||
return p.activateDevice(ctx, snapInfo)
|
return p.activateDevice(ctx, snapInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *PoolDevice) suspendDevice(ctx context.Context, info *DeviceInfo) error {
|
|
||||||
if err := p.transition(ctx, info.Name, Suspending, Suspended, func() error {
|
|
||||||
return dmsetup.SuspendDevice(info.Name)
|
|
||||||
}); err != nil {
|
|
||||||
return errors.Wrapf(err, "failed to suspend device %q", info.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PoolDevice) resumeDevice(ctx context.Context, info *DeviceInfo) error {
|
|
||||||
if err := p.transition(ctx, info.Name, Resuming, Resumed, func() error {
|
|
||||||
return dmsetup.ResumeDevice(info.Name)
|
|
||||||
}); err != nil {
|
|
||||||
return errors.Wrapf(err, "failed to resume device %q", info.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
|
func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
|
||||||
if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error {
|
if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error {
|
||||||
return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID)
|
return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID)
|
||||||
@ -265,16 +232,30 @@ func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *Dev
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SuspendDevice flushes the outstanding IO and blocks the further IO
|
||||||
|
func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error {
|
||||||
|
if err := p.transition(ctx, deviceName, Suspending, Suspended, func() error {
|
||||||
|
return dmsetup.SuspendDevice(deviceName)
|
||||||
|
}); err != nil {
|
||||||
|
return errors.Wrapf(err, "failed to suspend device %q", deviceName)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// DeactivateDevice deactivates thin device
|
// DeactivateDevice deactivates thin device
|
||||||
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred bool) error {
|
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error {
|
||||||
if !p.IsActivated(deviceName) {
|
if !p.IsLoaded(deviceName) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries}
|
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithRetries}
|
||||||
if deferred {
|
if deferred {
|
||||||
opts = append(opts, dmsetup.RemoveDeferred)
|
opts = append(opts, dmsetup.RemoveDeferred)
|
||||||
}
|
}
|
||||||
|
if withForce {
|
||||||
|
opts = append(opts, dmsetup.RemoveWithForce)
|
||||||
|
}
|
||||||
|
|
||||||
if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error {
|
if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error {
|
||||||
return dmsetup.RemoveDevice(deviceName, opts...)
|
return dmsetup.RemoveDevice(deviceName, opts...)
|
||||||
@ -300,6 +281,12 @@ func (p *PoolDevice) IsActivated(deviceName string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsLoaded returns true if thin-device is visible for dmsetup
|
||||||
|
func (p *PoolDevice) IsLoaded(deviceName string) bool {
|
||||||
|
_, err := dmsetup.Info(deviceName)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetUsage reports total size in bytes consumed by a thin-device.
|
// GetUsage reports total size in bytes consumed by a thin-device.
|
||||||
// It relies on the number of used blocks reported by 'dmsetup status'.
|
// It relies on the number of used blocks reported by 'dmsetup status'.
|
||||||
// The output looks like:
|
// The output looks like:
|
||||||
@ -330,7 +317,7 @@ func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error
|
|||||||
return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
|
return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.DeactivateDevice(ctx, deviceName, true); err != nil {
|
if err := p.DeactivateDevice(ctx, deviceName, true, true); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -368,7 +355,7 @@ func (p *PoolDevice) RemovePool(ctx context.Context) error {
|
|||||||
|
|
||||||
// Deactivate devices if any
|
// Deactivate devices if any
|
||||||
for _, name := range deviceNames {
|
for _, name := range deviceNames {
|
||||||
if err := p.DeactivateDevice(ctx, name, true); err != nil {
|
if err := p.DeactivateDevice(ctx, name, true, true); err != nil {
|
||||||
result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name))
|
result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -107,19 +107,24 @@ func TestPoolDevice(t *testing.T) {
|
|||||||
testMakeFileSystem(t, pool)
|
testMakeFileSystem(t, pool)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Mount 'thin-1'
|
// Mount 'thin-1' and write v1 test file on 'thin-1' device
|
||||||
err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error {
|
err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error {
|
||||||
// Write v1 test file on 'thin-1' device
|
// Write v1 test file on 'thin-1' device
|
||||||
thin1TestFilePath := filepath.Join(thin1MountPath, "TEST")
|
thin1TestFilePath := filepath.Join(thin1MountPath, "TEST")
|
||||||
err := ioutil.WriteFile(thin1TestFilePath, []byte("test file (v1)"), 0700)
|
err := ioutil.WriteFile(thin1TestFilePath, []byte("test file (v1)"), 0700)
|
||||||
assert.NilError(t, err, "failed to write test file v1 on '%s' volume", thinDevice1)
|
assert.NilError(t, err, "failed to write test file v1 on '%s' volume", thinDevice1)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
// Take snapshot of 'thin-1'
|
// Take snapshot of 'thin-1'
|
||||||
t.Run("CreateSnapshotDevice", func(t *testing.T) {
|
t.Run("CreateSnapshotDevice", func(t *testing.T) {
|
||||||
testCreateSnapshot(t, pool)
|
testCreateSnapshot(t, pool)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Update TEST file on 'thin-1' to v2
|
// Update TEST file on 'thin-1' to v2
|
||||||
|
err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error {
|
||||||
|
thin1TestFilePath := filepath.Join(thin1MountPath, "TEST")
|
||||||
err = ioutil.WriteFile(thin1TestFilePath, []byte("test file (v2)"), 0700)
|
err = ioutil.WriteFile(thin1TestFilePath, []byte("test file (v2)"), 0700)
|
||||||
assert.NilError(t, err, "failed to write test file v2 on 'thin-1' volume after taking snapshot")
|
assert.NilError(t, err, "failed to write test file v2 on 'thin-1' volume after taking snapshot")
|
||||||
|
|
||||||
@ -204,7 +209,7 @@ func testDeactivateThinDevice(t *testing.T, pool *PoolDevice) {
|
|||||||
for _, deviceName := range deviceList {
|
for _, deviceName := range deviceList {
|
||||||
assert.Assert(t, pool.IsActivated(deviceName))
|
assert.Assert(t, pool.IsActivated(deviceName))
|
||||||
|
|
||||||
err := pool.DeactivateDevice(context.Background(), deviceName, false)
|
err := pool.DeactivateDevice(context.Background(), deviceName, false, true)
|
||||||
assert.NilError(t, err, "failed to remove '%s'", deviceName)
|
assert.NilError(t, err, "failed to remove '%s'", deviceName)
|
||||||
|
|
||||||
assert.Assert(t, !pool.IsActivated(deviceName))
|
assert.Assert(t, !pool.IsActivated(deviceName))
|
||||||
|
@ -272,7 +272,18 @@ func (s *Snapshotter) Commit(ctx context.Context, name, key string, opts ...snap
|
|||||||
}
|
}
|
||||||
|
|
||||||
_, err = storage.CommitActive(ctx, key, name, usage, opts...)
|
_, err = storage.CommitActive(ctx, key, name, usage, opts...)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// The thin snapshot is not used for IO after committed, so
|
||||||
|
// suspend to flush the IO and deactivate the device.
|
||||||
|
err = s.pool.SuspendDevice(ctx, deviceName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.pool.DeactivateDevice(ctx, deviceName, true, false)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user