containerd/snapshots/devmapper/pool_device.go
Jeremy Williams 51a72f0492 mark device faulty after parent fails to suspend
When an error is returned here, unlike the other error returns in the function, nothing is done to mark the added device as faulty or remove it.
I have observed this causing future snapshot creations to continue to attempt to use the same ID (from the sequence) to create new devices
and get blocked because the device already exists because it was not rolled back here.

Hopefully fixes #5110

Signed-off-by: Jeremy Williams <ctrlaltdel121@gmail.com>
2021-03-03 17:02:07 -05:00

561 lines
16 KiB
Go

// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package devmapper
import (
"context"
"path/filepath"
"strconv"
"time"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/snapshots/devmapper/dmsetup"
)
// PoolDevice ties together data and metadata volumes, represents thin-pool and manages volumes, snapshots and device ids.
type PoolDevice struct {
poolName string
metadata *PoolMetadata
}
// NewPoolDevice creates new thin-pool from existing data and metadata volumes.
// If pool 'poolName' already exists, it'll be reloaded with new parameters.
func NewPoolDevice(ctx context.Context, config *Config) (*PoolDevice, error) {
log.G(ctx).Infof("initializing pool device %q", config.PoolName)
version, err := dmsetup.Version()
if err != nil {
log.G(ctx).Errorf("dmsetup not available")
return nil, err
}
log.G(ctx).Infof("using dmsetup:\n%s", version)
dbpath := filepath.Join(config.RootPath, config.PoolName+".db")
poolMetaStore, err := NewPoolMetadata(dbpath)
if err != nil {
return nil, err
}
// Make sure pool exists and available
poolPath := dmsetup.GetFullDevicePath(config.PoolName)
if _, err := dmsetup.Info(poolPath); err != nil {
return nil, errors.Wrapf(err, "failed to query pool %q", poolPath)
}
poolDevice := &PoolDevice{
poolName: config.PoolName,
metadata: poolMetaStore,
}
if err := poolDevice.ensureDeviceStates(ctx); err != nil {
return nil, errors.Wrap(err, "failed to check devices state")
}
return poolDevice, nil
}
func retry(ctx context.Context, f func() error) error {
var (
maxRetries = 100
retryDelay = 100 * time.Millisecond
retryErr error
)
for attempt := 1; attempt <= maxRetries; attempt++ {
retryErr = f()
if retryErr == nil {
return nil
} else if retryErr != unix.EBUSY {
return retryErr
}
// Don't spam logs
if attempt%10 == 0 {
log.G(ctx).WithError(retryErr).Warnf("retrying... (%d of %d)", attempt, maxRetries)
}
// Devmapper device is busy, give it a bit of time and retry removal
time.Sleep(retryDelay)
}
return retryErr
}
// ensureDeviceStates updates devices to their real state:
// - marks devices with incomplete states (after crash) as 'Faulty'
// - activates devices if they are marked as 'Activated' but the dm
// device is not active, which can happen to a stopped container
// after a reboot
func (p *PoolDevice) ensureDeviceStates(ctx context.Context) error {
var faultyDevices []*DeviceInfo
var activatedDevices []*DeviceInfo
if err := p.WalkDevices(ctx, func(info *DeviceInfo) error {
switch info.State {
case Suspended, Resumed, Deactivated, Removed, Faulty:
case Activated:
activatedDevices = append(activatedDevices, info)
default:
faultyDevices = append(faultyDevices, info)
}
return nil
}); err != nil {
return errors.Wrap(err, "failed to query devices from metastore")
}
var result *multierror.Error
for _, dev := range activatedDevices {
if p.IsActivated(dev.Name) {
continue
}
log.G(ctx).Warnf("devmapper device %q marked as %q but not active, activating it", dev.Name, dev.State)
if err := p.activateDevice(ctx, dev); err != nil {
result = multierror.Append(result, err)
}
}
for _, dev := range faultyDevices {
log.G(ctx).
WithField("dev_id", dev.DeviceID).
WithField("parent", dev.ParentName).
WithField("error", dev.Error).
Warnf("devmapper device %q has invalid state %q, marking as faulty", dev.Name, dev.State)
if err := p.metadata.MarkFaulty(ctx, dev.Name); err != nil {
result = multierror.Append(result, err)
}
}
return multierror.Prefix(result.ErrorOrNil(), "devmapper:")
}
// transition invokes 'updateStateFn' callback to perform devmapper operation and reflects device state changes/errors in meta store.
// 'tryingState' will be set before invoking callback. If callback succeeded 'successState' will be set, otherwise
// error details will be recorded in meta store.
func (p *PoolDevice) transition(ctx context.Context, deviceName string, tryingState DeviceState, successState DeviceState, updateStateFn func() error) error {
// Set device to trying state
uerr := p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error {
deviceInfo.State = tryingState
return nil
})
if uerr != nil {
return errors.Wrapf(uerr, "failed to set device %q state to %q", deviceName, tryingState)
}
var result *multierror.Error
// Invoke devmapper operation
err := updateStateFn()
if err != nil {
result = multierror.Append(result, err)
}
// If operation succeeded transition to success state, otherwise save error details
uerr = p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error {
if err == nil {
deviceInfo.State = successState
deviceInfo.Error = ""
} else {
deviceInfo.Error = err.Error()
}
return nil
})
if uerr != nil {
result = multierror.Append(result, uerr)
}
return unwrapError(result)
}
// unwrapError converts multierror.Error to the original error when it is possible.
// multierror 1.1.0 has the similar function named Unwrap, but it requires Go 1.14.
func unwrapError(e *multierror.Error) error {
if e == nil {
return nil
}
// If the error can be expressed without multierror, return the original error.
if len(e.Errors) == 1 {
return e.Errors[0]
}
return e.ErrorOrNil()
}
// CreateThinDevice creates new devmapper thin-device with given name and size.
// Device ID for thin-device will be allocated from metadata store.
// If allocation successful, device will be activated with /dev/mapper/<deviceName>
func (p *PoolDevice) CreateThinDevice(ctx context.Context, deviceName string, virtualSizeBytes uint64) (retErr error) {
info := &DeviceInfo{
Name: deviceName,
Size: virtualSizeBytes,
State: Unknown,
}
var (
metaErr error
devErr error
activeErr error
)
defer func() {
// We've created a devmapper device, but failed to activate it, try rollback everything
if activeErr != nil {
retErr = p.rollbackActivate(ctx, info, activeErr)
return
}
// We're unable to create the devmapper device, most likely something wrong with the deviceID
if devErr != nil {
retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, info.Name))
return
}
}()
// Save initial device metadata and allocate new device ID from store
metaErr = p.metadata.AddDevice(ctx, info)
if metaErr != nil {
return metaErr
}
// Create thin device
devErr = p.createDevice(ctx, info)
if devErr != nil {
return devErr
}
// Activate thin device
activeErr = p.activateDevice(ctx, info)
if activeErr != nil {
return activeErr
}
return nil
}
func (p *PoolDevice) rollbackActivate(ctx context.Context, info *DeviceInfo, activateErr error) error {
// Delete the device first.
delErr := p.deleteDevice(ctx, info)
if delErr != nil {
// Failed to rollback, mark the device as faulty and keep metadata in order to
// preserve the faulty device ID
return multierror.Append(activateErr, delErr, p.metadata.MarkFaulty(ctx, info.Name))
}
// The devmapper device has been successfully deleted, deallocate device ID
if err := p.RemoveDevice(ctx, info.Name); err != nil {
return multierror.Append(activateErr, err)
}
return activateErr
}
// createDevice creates thin device
func (p *PoolDevice) createDevice(ctx context.Context, info *DeviceInfo) error {
if err := p.transition(ctx, info.Name, Creating, Created, func() error {
return dmsetup.CreateDevice(p.poolName, info.DeviceID)
}); err != nil {
return errors.Wrapf(err, "failed to create new thin device %q (dev: %d)", info.Name, info.DeviceID)
}
return nil
}
// activateDevice activates thin device
func (p *PoolDevice) activateDevice(ctx context.Context, info *DeviceInfo) error {
if err := p.transition(ctx, info.Name, Activating, Activated, func() error {
return dmsetup.ActivateDevice(p.poolName, info.Name, info.DeviceID, info.Size, "")
}); err != nil {
return errors.Wrapf(err, "failed to activate new thin device %q (dev: %d)", info.Name, info.DeviceID)
}
return nil
}
// CreateSnapshotDevice creates and activates new thin-device from parent thin-device (makes snapshot)
func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string, snapshotName string, virtualSizeBytes uint64) (retErr error) {
baseInfo, err := p.metadata.GetDevice(ctx, deviceName)
if err != nil {
return errors.Wrapf(err, "failed to query device metadata for %q", deviceName)
}
snapInfo := &DeviceInfo{
Name: snapshotName,
Size: virtualSizeBytes,
ParentName: deviceName,
State: Unknown,
}
var (
metaErr error
devErr error
activeErr error
)
defer func() {
// We've created a devmapper device, but failed to activate it, try rollback everything
if activeErr != nil {
retErr = p.rollbackActivate(ctx, snapInfo, activeErr)
return
}
// We're unable to create the devmapper device, most likely something wrong with the deviceID
if devErr != nil {
retErr = multierror.Append(retErr, p.metadata.MarkFaulty(ctx, snapInfo.Name))
return
}
}()
// The base device must be suspend before taking a snapshot to
// avoid corruption.
// https://github.com/torvalds/linux/blob/v5.7/Documentation/admin-guide/device-mapper/thin-provisioning.rst#internal-snapshots
if p.IsLoaded(deviceName) {
log.G(ctx).Debugf("suspending %q before taking its snapshot", deviceName)
suspendErr := p.SuspendDevice(ctx, deviceName)
if suspendErr != nil {
return suspendErr
}
defer func() {
err := p.ResumeDevice(ctx, deviceName)
if err != nil {
log.G(ctx).WithError(err).Errorf("failed to resume base device %q after taking its snapshot", baseInfo.Name)
}
}()
}
// Save snapshot metadata and allocate new device ID
metaErr = p.metadata.AddDevice(ctx, snapInfo)
if metaErr != nil {
return metaErr
}
// Create thin device snapshot
devErr = p.createSnapshot(ctx, baseInfo, snapInfo)
if devErr != nil {
return devErr
}
// Activate the snapshot device
activeErr = p.activateDevice(ctx, snapInfo)
if activeErr != nil {
return activeErr
}
return nil
}
func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error {
if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error {
return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID)
}); err != nil {
return errors.Wrapf(err,
"failed to create snapshot %q (dev: %d) from %q (dev: %d)",
snapInfo.Name,
snapInfo.DeviceID,
baseInfo.Name,
baseInfo.DeviceID)
}
return nil
}
// SuspendDevice flushes the outstanding IO and blocks the further IO
func (p *PoolDevice) SuspendDevice(ctx context.Context, deviceName string) error {
if err := p.transition(ctx, deviceName, Suspending, Suspended, func() error {
return dmsetup.SuspendDevice(deviceName)
}); err != nil {
return errors.Wrapf(err, "failed to suspend device %q", deviceName)
}
return nil
}
func (p *PoolDevice) ResumeDevice(ctx context.Context, deviceName string) error {
if err := p.transition(ctx, deviceName, Resuming, Resumed, func() error {
return dmsetup.ResumeDevice(deviceName)
}); err != nil {
return errors.Wrapf(err, "failed to resume device %q", deviceName)
}
return nil
}
// DeactivateDevice deactivates thin device
func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred, withForce bool) error {
if !p.IsLoaded(deviceName) {
return nil
}
opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithRetries}
if deferred {
opts = append(opts, dmsetup.RemoveDeferred)
}
if withForce {
opts = append(opts, dmsetup.RemoveWithForce)
}
if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error {
return retry(ctx, func() error {
if err := dmsetup.RemoveDevice(deviceName, opts...); err != nil {
return errors.Wrap(err, "failed to deactivate device")
}
return nil
})
}); err != nil {
return errors.Wrapf(err, "failed to deactivate device %q", deviceName)
}
return nil
}
// IsActivated returns true if thin-device is activated
func (p *PoolDevice) IsActivated(deviceName string) bool {
infos, err := dmsetup.Info(deviceName)
if err != nil || len(infos) != 1 {
// Couldn't query device info, device not active
return false
}
if devInfo := infos[0]; devInfo.TableLive {
return true
}
return false
}
// IsLoaded returns true if thin-device is visible for dmsetup
func (p *PoolDevice) IsLoaded(deviceName string) bool {
_, err := dmsetup.Info(deviceName)
return err == nil
}
// GetUsage reports total size in bytes consumed by a thin-device.
// It relies on the number of used blocks reported by 'dmsetup status'.
// The output looks like:
// device2: 0 204800 thin 17280 204799
// Where 17280 is the number of used sectors
func (p *PoolDevice) GetUsage(deviceName string) (int64, error) {
status, err := dmsetup.Status(deviceName)
if err != nil {
return 0, errors.Wrapf(err, "can't get status for device %q", deviceName)
}
if len(status.Params) == 0 {
return 0, errors.Errorf("failed to get the number of used blocks, unexpected output from dmsetup status")
}
count, err := strconv.ParseInt(status.Params[0], 10, 64)
if err != nil {
return 0, errors.Wrapf(err, "failed to parse status params: %q", status.Params[0])
}
return count * dmsetup.SectorSize, nil
}
// RemoveDevice completely wipes out thin device from thin-pool and frees it's device ID
func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error {
info, err := p.metadata.GetDevice(ctx, deviceName)
if err != nil {
return errors.Wrapf(err, "can't query metadata for device %q", deviceName)
}
if err := p.DeactivateDevice(ctx, deviceName, false, true); err != nil {
return err
}
if err := p.deleteDevice(ctx, info); err != nil {
return err
}
// Remove record from meta store and free device ID
if err := p.metadata.RemoveDevice(ctx, deviceName); err != nil {
return errors.Wrapf(err, "can't remove device %q metadata from store after removal", deviceName)
}
return nil
}
func (p *PoolDevice) deleteDevice(ctx context.Context, info *DeviceInfo) error {
if err := p.transition(ctx, info.Name, Removing, Removed, func() error {
return retry(ctx, func() error {
// Send 'delete' message to thin-pool
e := dmsetup.DeleteDevice(p.poolName, info.DeviceID)
// Ignores the error if the device has been deleted already.
if e != nil && !errors.Is(e, unix.ENODATA) {
return e
}
return nil
})
}); err != nil {
return errors.Wrapf(err, "failed to delete device %q (dev id: %d)", info.Name, info.DeviceID)
}
return nil
}
// RemovePool deactivates all child thin-devices and removes thin-pool device
func (p *PoolDevice) RemovePool(ctx context.Context) error {
deviceNames, err := p.metadata.GetDeviceNames(ctx)
if err != nil {
return errors.Wrap(err, "can't query device names")
}
var result *multierror.Error
// Deactivate devices if any
for _, name := range deviceNames {
if err := p.DeactivateDevice(ctx, name, true, true); err != nil {
result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name))
}
}
if err := dmsetup.RemoveDevice(p.poolName, dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries, dmsetup.RemoveDeferred); err != nil {
result = multierror.Append(result, errors.Wrapf(err, "failed to remove pool %q", p.poolName))
}
return result.ErrorOrNil()
}
// MarkDeviceState changes the device's state in metastore
func (p *PoolDevice) MarkDeviceState(ctx context.Context, name string, state DeviceState) error {
return p.metadata.ChangeDeviceState(ctx, name, state)
}
// WalkDevices iterates all devices in pool metadata
func (p *PoolDevice) WalkDevices(ctx context.Context, cb func(info *DeviceInfo) error) error {
return p.metadata.WalkDevices(ctx, func(info *DeviceInfo) error {
return cb(info)
})
}
// Close closes pool device (thin-pool will not be removed)
func (p *PoolDevice) Close() error {
return p.metadata.Close()
}