snapshots/devmapper: deactivate thin device after committed
1. reason to deactivate committed snapshot The thin device will not be used for IO after committed, and further thin snapshotting is OK using an inactive thin device as origin. The benefits to deactivate are: - device is not unneccesary visible avoiding any unexpected IO; - save useless kernel data structs for maintaining active dm. Quote from kernel doc (Documentation/device-mapper/provisioning.txt): " ii) Using an internal snapshot. Once created, the user doesn't have to worry about any connection between the origin and the snapshot. Indeed the snapshot is no different from any other thinly-provisioned device and can be snapshotted itself via the same method. It's perfectly legal to have only one of them active, and there's no ordering requirement on activating or removing them both. (This differs from conventional device-mapper snapshots.) " 2. an thinpool metadata bug is naturally removed An problem happens when failed to suspend/resume origin thin device when creating snapshot: "failed to create snapshot device from parent vg0-mythinpool-snap-3" error="failed to save initial metadata for snapshot "vg0-mythinpool-snap-19": object already exists" This issue occurs because when failed to create snapshot, the snapshotter.store can be rollbacked, but the thin pool metadata boltdb failed to rollback in PoolDevice.CreateSnapshotDevice(), therefore metadata becomes inconsistent: the snapshotID is not taken in snapshotter.store, but saved in pool metadata boltdb. The cause is, in PoolDevice.CreateSnapshotDevice(), the defer calls are invoked on "first-in-last-out" order. When the error happens on the "resume device" defer call, the metadata is saved and snapshot is created, which has no chance to be rollbacked. Signed-off-by: Eric Ren <renzhen@linux.alibaba.com>
This commit is contained in:
@@ -178,19 +178,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
|
||||
return errors.Wrapf(err, "failed to query device metadata for %q", deviceName)
|
||||
}
|
||||
|
||||
// Suspend thin device if it was activated previously to avoid corruptions
|
||||
isActivated := p.IsActivated(baseInfo.Name)
|
||||
if isActivated {
|
||||
if err := p.suspendDevice(ctx, baseInfo); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Resume back base thin device on exit
|
||||
defer func() {
|
||||
retErr = multierror.Append(retErr, p.resumeDevice(ctx, baseInfo)).ErrorOrNil()
|
||||
}()
|
||||
}
|
||||
|
||||
snapInfo := &DeviceInfo{
|
||||
Name: snapshotName,
|
||||
Size: virtualSizeBytes,
|
||||
@@ -230,26 +217,6 @@ func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string
|
||||
return p.activateDevice(ctx, snapInfo)
|
||||
}
|
||||
|
||||
func (p *PoolDevice) suspendDevice(ctx context.Context, info *DeviceInfo) error {
|
||||
if err := p.transition(ctx, info.Name, Suspending, Suspended, func() error {
|
||||
return dmsetup.SuspendDevice(info.Name)
|
||||
}); err != nil {
|
||||
return errors.Wrapf(err, "failed to suspend device %q", info.Name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *PoolDevice) resumeDevice(ctx context.Context, info *DeviceInfo) error {
|
||||
if err := p.transition(ctx, info.Name, Resuming, Resumed, func() error {
|
||||
return dmsetup.ResumeDevice(info.Name)
|
||||
}); err != nil {
|
||||
return errors.Wrapf(err, "failed to resume device %q", info.Name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
|
||||
if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error {
|
||||
return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID)
|
||||
@@ -265,16 +232,30 @@ func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *Dev
|
||||
return nil
|
||||
}
|
||||
|
||||
// SuspendDevice flushes the outstanding IO and blocks the further IO
|
||||
func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error {
|
||||
if err := p.transition(ctx, deviceName, Suspending, Suspended, func() error {
|
||||
return dmsetup.SuspendDevice(deviceName)
|
||||
}); err != nil {
|
||||
return errors.Wrapf(err, "failed to suspend device %q", deviceName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeactivateDevice deactivates thin device
|
||||
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred bool) error {
|
||||
if !p.IsActivated(deviceName) {
|
||||
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error {
|
||||
if !p.IsLoaded(deviceName) {
|
||||
return nil
|
||||
}
|
||||
|
||||
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries}
|
||||
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithRetries}
|
||||
if deferred {
|
||||
opts = append(opts, dmsetup.RemoveDeferred)
|
||||
}
|
||||
if withForce {
|
||||
opts = append(opts, dmsetup.RemoveWithForce)
|
||||
}
|
||||
|
||||
if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error {
|
||||
return dmsetup.RemoveDevice(deviceName, opts...)
|
||||
@@ -300,6 +281,12 @@ func (p *PoolDevice) IsActivated(deviceName string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// IsLoaded returns true if thin-device is visible for dmsetup
|
||||
func (p *PoolDevice) IsLoaded(deviceName string) bool {
|
||||
_, err := dmsetup.Info(deviceName)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// GetUsage reports total size in bytes consumed by a thin-device.
|
||||
// It relies on the number of used blocks reported by 'dmsetup status'.
|
||||
// The output looks like:
|
||||
@@ -330,7 +317,7 @@ func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error
|
||||
return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
|
||||
}
|
||||
|
||||
if err := p.DeactivateDevice(ctx, deviceName, true); err != nil {
|
||||
if err := p.DeactivateDevice(ctx, deviceName, true, true); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -368,7 +355,7 @@ func (p *PoolDevice) RemovePool(ctx context.Context) error {
|
||||
|
||||
// Deactivate devices if any
|
||||
for _, name := range deviceNames {
|
||||
if err := p.DeactivateDevice(ctx, name, true); err != nil {
|
||||
if err := p.DeactivateDevice(ctx, name, true, true); err != nil {
|
||||
result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user