
When an error is returned here, unlike the other error returns in the function, nothing is done to mark the added device as faulty or remove it. I have observed this causing future snapshot creations to continue to attempt to use the same ID (from the sequence) to create new devices and get blocked because the device already exists because it was not rolled back here. Hopefully fixes #5110 Signed-off-by: Jeremy Williams <ctrlaltdel121@gmail.com>
561 lines
16 KiB
Go
561 lines
16 KiB
Go
// +build linux
|
|
|
|
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package devmapper
|
|
|
|
import (
|
|
"context"
|
|
"path/filepath"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/hashicorp/go-multierror"
|
|
"github.com/pkg/errors"
|
|
"golang.org/x/sys/unix"
|
|
|
|
"github.com/containerd/containerd/log"
|
|
"github.com/containerd/containerd/snapshots/devmapper/dmsetup"
|
|
)
|
|
|
|
// PoolDevice ties together data and metadata volumes, represents thin-pool and manages volumes, snapshots and device ids.
|
|
type PoolDevice struct {
|
|
poolName string
|
|
metadata *PoolMetadata
|
|
}
|
|
|
|
// NewPoolDevice creates new thin-pool from existing data and metadata volumes.
|
|
// If pool 'poolName' already exists, it'll be reloaded with new parameters.
|
|
func NewPoolDevice(ctx context.Context, config *Config) (*PoolDevice, error) {
|
|
log.G(ctx).Infof("initializing pool device %q", config.PoolName)
|
|
|
|
version, err := dmsetup.Version()
|
|
if err != nil {
|
|
log.G(ctx).Errorf("dmsetup not available")
|
|
return nil, err
|
|
}
|
|
|
|
log.G(ctx).Infof("using dmsetup:\n%s", version)
|
|
|
|
dbpath := filepath.Join(config.RootPath, config.PoolName+".db")
|
|
poolMetaStore, err := NewPoolMetadata(dbpath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Make sure pool exists and available
|
|
poolPath := dmsetup.GetFullDevicePath(config.PoolName)
|
|
if _, err := dmsetup.Info(poolPath); err != nil {
|
|
return nil, errors.Wrapf(err, "failed to query pool %q", poolPath)
|
|
}
|
|
|
|
poolDevice := &PoolDevice{
|
|
poolName: config.PoolName,
|
|
metadata: poolMetaStore,
|
|
}
|
|
|
|
if err := poolDevice.ensureDeviceStates(ctx); err != nil {
|
|
return nil, errors.Wrap(err, "failed to check devices state")
|
|
}
|
|
|
|
return poolDevice, nil
|
|
}
|
|
|
|
func retry(ctx context.Context, f func() error) error {
|
|
var (
|
|
maxRetries = 100
|
|
retryDelay = 100 * time.Millisecond
|
|
retryErr error
|
|
)
|
|
|
|
for attempt := 1; attempt <= maxRetries; attempt++ {
|
|
retryErr = f()
|
|
if retryErr == nil {
|
|
return nil
|
|
} else if retryErr != unix.EBUSY {
|
|
return retryErr
|
|
}
|
|
|
|
// Don't spam logs
|
|
if attempt%10 == 0 {
|
|
log.G(ctx).WithError(retryErr).Warnf("retrying... (%d of %d)", attempt, maxRetries)
|
|
}
|
|
|
|
// Devmapper device is busy, give it a bit of time and retry removal
|
|
time.Sleep(retryDelay)
|
|
}
|
|
|
|
return retryErr
|
|
}
|
|
|
|
// ensureDeviceStates updates devices to their real state:
|
|
// - marks devices with incomplete states (after crash) as 'Faulty'
|
|
// - activates devices if they are marked as 'Activated' but the dm
|
|
// device is not active, which can happen to a stopped container
|
|
// after a reboot
|
|
func (p *PoolDevice) ensureDeviceStates(ctx context.Context) error {
|
|
var faultyDevices []*DeviceInfo
|
|
var activatedDevices []*DeviceInfo
|
|
|
|
if err := p.WalkDevices(ctx, func(info *DeviceInfo) error {
|
|
switch info.State {
|
|
case Suspended, Resumed, Deactivated, Removed, Faulty:
|
|
case Activated:
|
|
activatedDevices = append(activatedDevices, info)
|
|
default:
|
|
faultyDevices = append(faultyDevices, info)
|
|
}
|
|
return nil
|
|
}); err != nil {
|
|
return errors.Wrap(err, "failed to query devices from metastore")
|
|
}
|
|
|
|
var result *multierror.Error
|
|
for _, dev := range activatedDevices {
|
|
if p.IsActivated(dev.Name) {
|
|
continue
|
|
}
|
|
|
|
log.G(ctx).Warnf("devmapper device %q marked as %q but not active, activating it", dev.Name, dev.State)
|
|
if err := p.activateDevice(ctx, dev); err != nil {
|
|
result = multierror.Append(result, err)
|
|
}
|
|
}
|
|
|
|
for _, dev := range faultyDevices {
|
|
log.G(ctx).
|
|
WithField("dev_id", dev.DeviceID).
|
|
WithField("parent", dev.ParentName).
|
|
WithField("error", dev.Error).
|
|
Warnf("devmapper device %q has invalid state %q, marking as faulty", dev.Name, dev.State)
|
|
|
|
if err := p.metadata.MarkFaulty(ctx, dev.Name); err != nil {
|
|
result = multierror.Append(result, err)
|
|
}
|
|
}
|
|
|
|
return multierror.Prefix(result.ErrorOrNil(), "devmapper:")
|
|
}
|
|
|
|
// transition invokes 'updateStateFn' callback to perform devmapper operation and reflects device state changes/errors in meta store.
|
|
// 'tryingState' will be set before invoking callback. If callback succeeded 'successState' will be set, otherwise
|
|
// error details will be recorded in meta store.
|
|
func (p *PoolDevice) transition(ctx context.Context, deviceName string, tryingState DeviceState, successState DeviceState, updateStateFn func() error) error {
|
|
// Set device to trying state
|
|
uerr := p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error {
|
|
deviceInfo.State = tryingState
|
|
return nil
|
|
})
|
|
|
|
if uerr != nil {
|
|
return errors.Wrapf(uerr, "failed to set device %q state to %q", deviceName, tryingState)
|
|
}
|
|
|
|
var result *multierror.Error
|
|
|
|
// Invoke devmapper operation
|
|
err := updateStateFn()
|
|
|
|
if err != nil {
|
|
result = multierror.Append(result, err)
|
|
}
|
|
|
|
// If operation succeeded transition to success state, otherwise save error details
|
|
uerr = p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error {
|
|
if err == nil {
|
|
deviceInfo.State = successState
|
|
deviceInfo.Error = ""
|
|
} else {
|
|
deviceInfo.Error = err.Error()
|
|
}
|
|
return nil
|
|
})
|
|
|
|
if uerr != nil {
|
|
result = multierror.Append(result, uerr)
|
|
}
|
|
|
|
return unwrapError(result)
|
|
}
|
|
|
|
// unwrapError converts multierror.Error to the original error when it is possible.
|
|
// multierror 1.1.0 has the similar function named Unwrap, but it requires Go 1.14.
|
|
func unwrapError(e *multierror.Error) error {
|
|
if e == nil {
|
|
return nil
|
|
}
|
|
|
|
// If the error can be expressed without multierror, return the original error.
|
|
if len(e.Errors) == 1 {
|
|
return e.Errors[0]
|
|
}
|
|
|
|
return e.ErrorOrNil()
|
|
}
|
|
|
|
// CreateThinDevice creates new devmapper thin-device with given name and size.
|
|
// Device ID for thin-device will be allocated from metadata store.
|
|
// If allocation successful, device will be activated with /dev/mapper/<deviceName>
|
|
func (p *PoolDevice) CreateThinDevice(ctx context.Context, deviceName string, virtualSizeBytes uint64) (retErr error) {
|
|
info := &DeviceInfo{
|
|
Name: deviceName,
|
|
Size: virtualSizeBytes,
|
|
State: Unknown,
|
|
}
|
|
|
|
var (
|
|
metaErr error
|
|
devErr error
|
|
activeErr error
|
|
)
|
|
|
|
defer func() {
|
|
// We've created a devmapper device, but failed to activate it, try rollback everything
|
|
if activeErr != nil {
|
|
retErr = p.rollbackActivate(ctx, info, activeErr)
|
|
return
|
|
}
|
|
|
|
// We're unable to create the devmapper device, most likely something wrong with the deviceID
|
|
if devErr != nil {
|
|
retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, info.Name))
|
|
return
|
|
}
|
|
}()
|
|
|
|
// Save initial device metadata and allocate new device ID from store
|
|
metaErr = p.metadata.AddDevice(ctx, info)
|
|
if metaErr != nil {
|
|
return metaErr
|
|
}
|
|
|
|
// Create thin device
|
|
devErr = p.createDevice(ctx, info)
|
|
if devErr != nil {
|
|
return devErr
|
|
}
|
|
|
|
// Activate thin device
|
|
activeErr = p.activateDevice(ctx, info)
|
|
if activeErr != nil {
|
|
return activeErr
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *PoolDevice) rollbackActivate(ctx context.Context, info *DeviceInfo, activateErr error) error {
|
|
// Delete the device first.
|
|
delErr := p.deleteDevice(ctx, info)
|
|
if delErr != nil {
|
|
// Failed to rollback, mark the device as faulty and keep metadata in order to
|
|
// preserve the faulty device ID
|
|
return multierror.Append(activateErr, delErr, p.metadata.MarkFaulty(ctx, info.Name))
|
|
}
|
|
|
|
// The devmapper device has been successfully deleted, deallocate device ID
|
|
if err := p.RemoveDevice(ctx, info.Name); err != nil {
|
|
return multierror.Append(activateErr, err)
|
|
}
|
|
|
|
return activateErr
|
|
}
|
|
|
|
// createDevice creates thin device
|
|
func (p *PoolDevice) createDevice(ctx context.Context, info *DeviceInfo) error {
|
|
if err := p.transition(ctx, info.Name, Creating, Created, func() error {
|
|
return dmsetup.CreateDevice(p.poolName, info.DeviceID)
|
|
}); err != nil {
|
|
return errors.Wrapf(err, "failed to create new thin device %q (dev: %d)", info.Name, info.DeviceID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// activateDevice activates thin device
|
|
func (p *PoolDevice) activateDevice(ctx context.Context, info *DeviceInfo) error {
|
|
if err := p.transition(ctx, info.Name, Activating, Activated, func() error {
|
|
return dmsetup.ActivateDevice(p.poolName, info.Name, info.DeviceID, info.Size, "")
|
|
}); err != nil {
|
|
return errors.Wrapf(err, "failed to activate new thin device %q (dev: %d)", info.Name, info.DeviceID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// CreateSnapshotDevice creates and activates new thin-device from parent thin-device (makes snapshot)
|
|
func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string, snapshotName string, virtualSizeBytes uint64) (retErr error) {
|
|
baseInfo, err := p.metadata.GetDevice(ctx, deviceName)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to query device metadata for %q", deviceName)
|
|
}
|
|
|
|
snapInfo := &DeviceInfo{
|
|
Name: snapshotName,
|
|
Size: virtualSizeBytes,
|
|
ParentName: deviceName,
|
|
State: Unknown,
|
|
}
|
|
|
|
var (
|
|
metaErr error
|
|
devErr error
|
|
activeErr error
|
|
)
|
|
|
|
defer func() {
|
|
// We've created a devmapper device, but failed to activate it, try rollback everything
|
|
if activeErr != nil {
|
|
retErr = p.rollbackActivate(ctx, snapInfo, activeErr)
|
|
return
|
|
}
|
|
|
|
// We're unable to create the devmapper device, most likely something wrong with the deviceID
|
|
if devErr != nil {
|
|
retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, snapInfo.Name))
|
|
return
|
|
}
|
|
}()
|
|
|
|
// The base device must be suspend before taking a snapshot to
|
|
// avoid corruption.
|
|
// https://github.com/torvalds/linux/blob/v5.7/Documentation/admin-guide/device-mapper/thin-provisioning.rst#internal-snapshots
|
|
if p.IsLoaded(deviceName) {
|
|
log.G(ctx).Debugf("suspending %q before taking its snapshot", deviceName)
|
|
suspendErr := p.SuspendDevice(ctx, deviceName)
|
|
if suspendErr != nil {
|
|
return suspendErr
|
|
}
|
|
defer func() {
|
|
err := p.ResumeDevice(ctx, deviceName)
|
|
if err != nil {
|
|
log.G(ctx).WithError(err).Errorf("failed to resume base device %q after taking its snapshot", baseInfo.Name)
|
|
}
|
|
}()
|
|
}
|
|
|
|
// Save snapshot metadata and allocate new device ID
|
|
metaErr = p.metadata.AddDevice(ctx, snapInfo)
|
|
if metaErr != nil {
|
|
return metaErr
|
|
}
|
|
|
|
// Create thin device snapshot
|
|
devErr = p.createSnapshot(ctx, baseInfo, snapInfo)
|
|
if devErr != nil {
|
|
return devErr
|
|
}
|
|
|
|
// Activate the snapshot device
|
|
activeErr = p.activateDevice(ctx, snapInfo)
|
|
if activeErr != nil {
|
|
return activeErr
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
|
|
if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error {
|
|
return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID)
|
|
}); err != nil {
|
|
return errors.Wrapf(err,
|
|
"failed to create snapshot %q (dev: %d) from %q (dev: %d)",
|
|
snapInfo.Name,
|
|
snapInfo.DeviceID,
|
|
baseInfo.Name,
|
|
baseInfo.DeviceID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// SuspendDevice flushes the outstanding IO and blocks the further IO
|
|
func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error {
|
|
if err := p.transition(ctx, deviceName, Suspending, Suspended, func() error {
|
|
return dmsetup.SuspendDevice(deviceName)
|
|
}); err != nil {
|
|
return errors.Wrapf(err, "failed to suspend device %q", deviceName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *PoolDevice) ResumeDevice(ctx context.Context, deviceName string) error {
|
|
if err := p.transition(ctx, deviceName, Resuming, Resumed, func() error {
|
|
return dmsetup.ResumeDevice(deviceName)
|
|
}); err != nil {
|
|
return errors.Wrapf(err, "failed to resume device %q", deviceName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// DeactivateDevice deactivates thin device
|
|
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error {
|
|
if !p.IsLoaded(deviceName) {
|
|
return nil
|
|
}
|
|
|
|
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithRetries}
|
|
if deferred {
|
|
opts = append(opts, dmsetup.RemoveDeferred)
|
|
}
|
|
if withForce {
|
|
opts = append(opts, dmsetup.RemoveWithForce)
|
|
}
|
|
|
|
if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error {
|
|
return retry(ctx, func() error {
|
|
if err := dmsetup.RemoveDevice(deviceName, opts...); err != nil {
|
|
return errors.Wrap(err, "failed to deactivate device")
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}); err != nil {
|
|
return errors.Wrapf(err, "failed to deactivate device %q", deviceName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// IsActivated returns true if thin-device is activated
|
|
func (p *PoolDevice) IsActivated(deviceName string) bool {
|
|
infos, err := dmsetup.Info(deviceName)
|
|
if err != nil || len(infos) != 1 {
|
|
// Couldn't query device info, device not active
|
|
return false
|
|
}
|
|
|
|
if devInfo := infos[0]; devInfo.TableLive {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// IsLoaded returns true if thin-device is visible for dmsetup
|
|
func (p *PoolDevice) IsLoaded(deviceName string) bool {
|
|
_, err := dmsetup.Info(deviceName)
|
|
return err == nil
|
|
}
|
|
|
|
// GetUsage reports total size in bytes consumed by a thin-device.
|
|
// It relies on the number of used blocks reported by 'dmsetup status'.
|
|
// The output looks like:
|
|
// device2: 0 204800 thin 17280 204799
|
|
// Where 17280 is the number of used sectors
|
|
func (p *PoolDevice) GetUsage(deviceName string) (int64, error) {
|
|
status, err := dmsetup.Status(deviceName)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "can't get status for device %q", deviceName)
|
|
}
|
|
|
|
if len(status.Params) == 0 {
|
|
return 0, errors.Errorf("failed to get the number of used blocks, unexpected output from dmsetup status")
|
|
}
|
|
|
|
count, err := strconv.ParseInt(status.Params[0], 10, 64)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "failed to parse status params: %q", status.Params[0])
|
|
}
|
|
|
|
return count * dmsetup.SectorSize, nil
|
|
}
|
|
|
|
// RemoveDevice completely wipes out thin device from thin-pool and frees it's device ID
|
|
func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error {
|
|
info, err := p.metadata.GetDevice(ctx, deviceName)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
|
|
}
|
|
|
|
if err := p.DeactivateDevice(ctx, deviceName, false, true); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := p.deleteDevice(ctx, info); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Remove record from meta store and free device ID
|
|
if err := p.metadata.RemoveDevice(ctx, deviceName); err != nil {
|
|
return errors.Wrapf(err, "can't remove device %q metadata from store after removal", deviceName)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *PoolDevice) deleteDevice(ctx context.Context, info *DeviceInfo) error {
|
|
if err := p.transition(ctx, info.Name, Removing, Removed, func() error {
|
|
return retry(ctx, func() error {
|
|
// Send 'delete' message to thin-pool
|
|
e := dmsetup.DeleteDevice(p.poolName, info.DeviceID)
|
|
// Ignores the error if the device has been deleted already.
|
|
if e != nil && !errors.Is(e, unix.ENODATA) {
|
|
return e
|
|
}
|
|
return nil
|
|
})
|
|
}); err != nil {
|
|
return errors.Wrapf(err, "failed to delete device %q (dev id: %d)", info.Name, info.DeviceID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// RemovePool deactivates all child thin-devices and removes thin-pool device
|
|
func (p *PoolDevice) RemovePool(ctx context.Context) error {
|
|
deviceNames, err := p.metadata.GetDeviceNames(ctx)
|
|
if err != nil {
|
|
return errors.Wrap(err, "can't query device names")
|
|
}
|
|
|
|
var result *multierror.Error
|
|
|
|
// Deactivate devices if any
|
|
for _, name := range deviceNames {
|
|
if err := p.DeactivateDevice(ctx, name, true, true); err != nil {
|
|
result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name))
|
|
}
|
|
}
|
|
|
|
if err := dmsetup.RemoveDevice(p.poolName, dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries, dmsetup.RemoveDeferred); err != nil {
|
|
result = multierror.Append(result, errors.Wrapf(err, "failed to remove pool %q", p.poolName))
|
|
}
|
|
|
|
return result.ErrorOrNil()
|
|
}
|
|
|
|
// MarkDeviceState changes the device's state in metastore
|
|
func (p *PoolDevice) MarkDeviceState(ctx context.Context, name string, state DeviceState) error {
|
|
return p.metadata.ChangeDeviceState(ctx, name, state)
|
|
}
|
|
|
|
// WalkDevices iterates all devices in pool metadata
|
|
func (p *PoolDevice) WalkDevices(ctx context.Context, cb func(info *DeviceInfo) error) error {
|
|
return p.metadata.WalkDevices(ctx, func(info *DeviceInfo) error {
|
|
return cb(info)
|
|
})
|
|
}
|
|
|
|
// Close closes pool device (thin-pool will not be removed)
|
|
func (p *PoolDevice) Close() error {
|
|
return p.metadata.Close()
|
|
}
|