diff --git a/BUILDING.md b/BUILDING.md index 4cf1729d4..dd8573bb2 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -104,6 +104,7 @@ make generate > * `no_btrfs`: A build tag disables building the btrfs snapshot driver. > * `no_cri`: A build tag disables building Kubernetes [CRI](http://blog.kubernetes.io/2016/12/container-runtime-interface-cri-in-kubernetes.html) support into containerd. > See [here](https://github.com/containerd/cri-containerd#build-tags) for build tags of CRI plugin. +> * `no_devmapper`: A build tag disables building the device mapper snapshot driver. > > For example, adding `BUILDTAGS=no_btrfs` to your environment before calling the **binaries** > Makefile target will disable the btrfs driver within the containerd Go build. diff --git a/cmd/containerd/builtins_devmapper_linux.go b/cmd/containerd/builtins_devmapper_linux.go new file mode 100644 index 000000000..aada976b3 --- /dev/null +++ b/cmd/containerd/builtins_devmapper_linux.go @@ -0,0 +1,21 @@ +// +build !no_devmapper + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package main + +import _ "github.com/containerd/containerd/snapshots/devmapper" diff --git a/snapshots/devmapper/README.md b/snapshots/devmapper/README.md new file mode 100644 index 000000000..f188a88a0 --- /dev/null +++ b/snapshots/devmapper/README.md @@ -0,0 +1,42 @@ +## Devmapper snapshotter + +Devmapper is a `containerd` snapshotter plugin that stores snapshots in ext4-formatted filesystem images +in a devicemapper thin pool. + +## Setup + +To make it work you need to prepare `thin-pool` in advance and update containerd's configuration file. +This file is typically located at `/etc/containerd/config.toml`. + +Here's minimal sample entry that can be made in the configuration file: + +``` +[plugins] + ... + [plugins.devmapper] + pool_name = "containerd-pool" + base_image_size = "128MB" + ... +``` + +The following configuration flags are supported: +* `root_path` - a directory where the metadata will be available (if empty + default location for `containerd` plugins will be used) +* `pool_name` - a name to use for the devicemapper thin pool. Pool name + should be the same as in `/dev/mapper/` directory +* `base_image_size` - defines how much space to allocate when creating the base device + +Pool name and base image size are required snapshotter parameters. + +## Run +Give it a try with the following commands: + +```bash +ctr images pull --snapshotter devmapper docker.io/library/hello-world:latest +ctr run --snapshotter devmapper docker.io/library/hello-world:latest test +``` + +## Requirements + +The devicemapper snapshotter requires `dmsetup` (>= 1.02.110) command line tool to be installed and +available on your computer. On Ubuntu, it can be installed with `apt-get install dmsetup` command. \ No newline at end of file diff --git a/snapshots/devmapper/config.go b/snapshots/devmapper/config.go new file mode 100644 index 000000000..1cec2d258 --- /dev/null +++ b/snapshots/devmapper/config.go @@ -0,0 +1,98 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "fmt" + "os" + + "github.com/BurntSushi/toml" + "github.com/docker/go-units" + "github.com/hashicorp/go-multierror" + "github.com/pkg/errors" +) + +// Config represents device mapper configuration loaded from file. +// Size units can be specified in human-readable string format (like "32KIB", "32GB", "32Tb") +type Config struct { + // Device snapshotter root directory for metadata + RootPath string `toml:"root_path"` + + // Name for 'thin-pool' device to be used by snapshotter (without /dev/mapper/ prefix) + PoolName string `toml:"pool_name"` + + // Defines how much space to allocate when creating base image for container + BaseImageSize string `toml:"base_image_size"` + BaseImageSizeBytes uint64 `toml:"-"` +} + +// LoadConfig reads devmapper configuration file from disk in TOML format +func LoadConfig(path string) (*Config, error) { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + return nil, os.ErrNotExist + } + + return nil, err + } + + config := Config{} + if _, err := toml.DecodeFile(path, &config); err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal data at '%s'", path) + } + + if err := config.parse(); err != nil { + return nil, err + } + + if err := config.Validate(); err != nil { + return nil, err + } + + return &config, nil +} + +func (c *Config) parse() error { + baseImageSize, err := units.RAMInBytes(c.BaseImageSize) + if err != nil { + return errors.Wrapf(err, "failed to parse base image size: '%s'", c.BaseImageSize) + } + + c.BaseImageSizeBytes = uint64(baseImageSize) + return nil +} + +// Validate makes sure configuration fields are valid +func (c *Config) Validate() error { + var result *multierror.Error + + if c.PoolName == "" { + result = multierror.Append(result, fmt.Errorf("pool_name is required")) + } + + if c.RootPath == "" { + result = multierror.Append(result, fmt.Errorf("root_path is required")) + } + + if c.BaseImageSize == "" { + result = multierror.Append(result, fmt.Errorf("base_image_size is required")) + } + + return result.ErrorOrNil() +} diff --git a/snapshots/devmapper/config_test.go b/snapshots/devmapper/config_test.go new file mode 100644 index 000000000..b6b9cde2f --- /dev/null +++ b/snapshots/devmapper/config_test.go @@ -0,0 +1,103 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/BurntSushi/toml" + "github.com/hashicorp/go-multierror" + "gotest.tools/assert" + is "gotest.tools/assert/cmp" +) + +func TestLoadConfig(t *testing.T) { + expected := Config{ + RootPath: "/tmp", + PoolName: "test", + BaseImageSize: "128Mb", + } + + file, err := ioutil.TempFile("", "devmapper-config-") + assert.NilError(t, err) + + encoder := toml.NewEncoder(file) + err = encoder.Encode(&expected) + assert.NilError(t, err) + + defer func() { + err := file.Close() + assert.NilError(t, err) + + err = os.Remove(file.Name()) + assert.NilError(t, err) + }() + + loaded, err := LoadConfig(file.Name()) + assert.NilError(t, err) + + assert.Equal(t, loaded.RootPath, expected.RootPath) + assert.Equal(t, loaded.PoolName, expected.PoolName) + assert.Equal(t, loaded.BaseImageSize, expected.BaseImageSize) + + assert.Assert(t, loaded.BaseImageSizeBytes == 128*1024*1024) +} + +func TestLoadConfigInvalidPath(t *testing.T) { + _, err := LoadConfig("") + assert.Equal(t, os.ErrNotExist, err) + + _, err = LoadConfig("/dev/null") + assert.Assert(t, err != nil) +} + +func TestParseInvalidData(t *testing.T) { + config := Config{ + BaseImageSize: "y", + } + + err := config.parse() + assert.Error(t, err, "failed to parse base image size: 'y': invalid size: 'y'") +} + +func TestFieldValidation(t *testing.T) { + config := &Config{} + err := config.Validate() + assert.Assert(t, err != nil) + + multErr := (err).(*multierror.Error) + assert.Assert(t, is.Len(multErr.Errors, 3)) + + assert.Assert(t, multErr.Errors[0] != nil, "pool_name is empty") + assert.Assert(t, multErr.Errors[1] != nil, "root_path is empty") + assert.Assert(t, multErr.Errors[2] != nil, "base_image_size is empty") +} + +func TestExistingPoolFieldValidation(t *testing.T) { + config := &Config{ + PoolName: "test", + RootPath: "test", + BaseImageSize: "10mb", + } + + err := config.Validate() + assert.NilError(t, err) +} diff --git a/snapshots/devmapper/device_info.go b/snapshots/devmapper/device_info.go new file mode 100644 index 000000000..6d65d557a --- /dev/null +++ b/snapshots/devmapper/device_info.go @@ -0,0 +1,106 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "fmt" +) + +const ( + maxDeviceID = 0xffffff // Device IDs are 24-bit numbers +) + +// DeviceState represents current devmapper device state reflected in meta store +type DeviceState int + +const ( + // Unknown means that device just allocated and no operations were performed + Unknown DeviceState = iota + // Creating means that device is going to be created + Creating + // Created means that devices successfully created + Created + // Activating means that device is going to be activated + Activating + // Activated means that device successfully activated + Activated + // Suspending means that device is going to be suspended + Suspending + // Suspended means that device successfully suspended + Suspended + // Resuming means that device is going to be resumed from suspended state + Resuming + // Resumed means that device successfully resumed + Resumed + // Deactivating means that device is going to be deactivated + Deactivating + // Deactivated means that device successfully deactivated + Deactivated + // Removing means that device is going to be removed + Removing + // Removed means that device successfully removed but not yet deleted from meta store + Removed +) + +func (s DeviceState) String() string { + switch s { + case Creating: + return "Creating" + case Created: + return "Created" + case Activating: + return "Activating" + case Activated: + return "Activated" + case Suspending: + return "Suspending" + case Suspended: + return "Suspended" + case Resuming: + return "Resuming" + case Resumed: + return "Resumed" + case Deactivating: + return "Deactivating" + case Deactivated: + return "Deactivated" + case Removing: + return "Removing" + case Removed: + return "Removed" + default: + return fmt.Sprintf("unknown %d", s) + } +} + +// DeviceInfo represents metadata for thin device within thin-pool +type DeviceInfo struct { + // DeviceID is a 24-bit number assigned to a device within thin-pool device + DeviceID uint32 `json:"device_id"` + // Size is a thin device size + Size uint64 `json:"size"` + // Name is a device name to be used in /dev/mapper/ + Name string `json:"name"` + // ParentName is a name of parent device (if snapshot) + ParentName string `json:"parent_name"` + // State represents current device state + State DeviceState `json:"state"` + // Error details if device state change failed + Error string `json:"error"` +} diff --git a/snapshots/devmapper/dmsetup/dmsetup.go b/snapshots/devmapper/dmsetup/dmsetup.go new file mode 100644 index 000000000..0b75cda3f --- /dev/null +++ b/snapshots/devmapper/dmsetup/dmsetup.go @@ -0,0 +1,390 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package dmsetup + +import ( + "fmt" + "os/exec" + "strconv" + "strings" + + "github.com/pkg/errors" + "golang.org/x/sys/unix" +) + +const ( + // DevMapperDir represents devmapper devices location + DevMapperDir = "/dev/mapper/" + // SectorSize represents the number of bytes in one sector on devmapper devices + SectorSize = 512 +) + +// DeviceInfo represents device info returned by "dmsetup info". +// dmsetup(8) provides more information on each of these fields. +type DeviceInfo struct { + Name string + BlockDeviceName string + TableLive bool + TableInactive bool + Suspended bool + ReadOnly bool + Major uint32 + Minor uint32 + OpenCount uint32 // Open reference count + TargetCount uint32 // Number of targets in the live table + EventNumber uint32 // Last event sequence number (used by wait) +} + +var errTable map[string]unix.Errno + +func init() { + // Precompute map of = for optimal lookup + errTable = make(map[string]unix.Errno) + for errno := unix.EPERM; errno <= unix.EHWPOISON; errno++ { + errTable[errno.Error()] = errno + } +} + +// CreatePool creates a device with the given name, data and metadata file and block size (see "dmsetup create") +func CreatePool(poolName, dataFile, metaFile string, blockSizeSectors uint32) error { + thinPool, err := makeThinPoolMapping(dataFile, metaFile, blockSizeSectors) + if err != nil { + return err + } + + _, err = dmsetup("create", poolName, "--table", thinPool) + return err +} + +// ReloadPool reloads existing thin-pool (see "dmsetup reload") +func ReloadPool(deviceName, dataFile, metaFile string, blockSizeSectors uint32) error { + thinPool, err := makeThinPoolMapping(dataFile, metaFile, blockSizeSectors) + if err != nil { + return err + } + + _, err = dmsetup("reload", deviceName, "--table", thinPool) + return err +} + +const ( + lowWaterMark = 32768 // Picked arbitrary, might need tuning + skipZeroing = "skip_block_zeroing" // Skipping zeroing to reduce latency for device creation +) + +// makeThinPoolMapping makes thin-pool table entry +func makeThinPoolMapping(dataFile, metaFile string, blockSizeSectors uint32) (string, error) { + dataDeviceSizeBytes, err := BlockDeviceSize(dataFile) + if err != nil { + return "", errors.Wrapf(err, "failed to get block device size: %s", dataFile) + } + + // Thin-pool mapping target has the following format: + // start - starting block in virtual device + // length - length of this segment + // metadata_dev - the metadata device + // data_dev - the data device + // data_block_size - the data block size in sectors + // low_water_mark - the low water mark, expressed in blocks of size data_block_size + // feature_args - the number of feature arguments + // args + lengthSectors := dataDeviceSizeBytes / SectorSize + target := fmt.Sprintf("0 %d thin-pool %s %s %d %d 1 %s", + lengthSectors, + metaFile, + dataFile, + blockSizeSectors, + lowWaterMark, + skipZeroing) + + return target, nil +} + +// CreateDevice sends "create_thin " message to the given thin-pool +func CreateDevice(poolName string, deviceID uint32) error { + _, err := dmsetup("message", poolName, "0", fmt.Sprintf("create_thin %d", deviceID)) + return err +} + +// ActivateDevice activates the given thin-device using the 'thin' target +func ActivateDevice(poolName string, deviceName string, deviceID uint32, size uint64, external string) error { + mapping := makeThinMapping(poolName, deviceID, size, external) + _, err := dmsetup("create", deviceName, "--table", mapping) + return err +} + +// makeThinMapping makes thin target table entry +func makeThinMapping(poolName string, deviceID uint32, sizeBytes uint64, externalOriginDevice string) string { + lengthSectors := sizeBytes / SectorSize + + // Thin target has the following format: + // start - starting block in virtual device + // length - length of this segment + // pool_dev - the thin-pool device, can be /dev/mapper/pool_name or 253:0 + // dev_id - the internal device id of the device to be activated + // external_origin_dev - an optional block device outside the pool to be treated as a read-only snapshot origin. + target := fmt.Sprintf("0 %d thin %s %d %s", lengthSectors, GetFullDevicePath(poolName), deviceID, externalOriginDevice) + return strings.TrimSpace(target) +} + +// SuspendDevice suspends the given device (see "dmsetup suspend") +func SuspendDevice(deviceName string) error { + _, err := dmsetup("suspend", deviceName) + return err +} + +// ResumeDevice resumes the given device (see "dmsetup resume") +func ResumeDevice(deviceName string) error { + _, err := dmsetup("resume", deviceName) + return err +} + +// Table returns the current table for the device +func Table(deviceName string) (string, error) { + return dmsetup("table", deviceName) +} + +// CreateSnapshot sends "create_snap" message to the given thin-pool. +// Caller needs to suspend and resume device if it is active. +func CreateSnapshot(poolName string, deviceID uint32, baseDeviceID uint32) error { + _, err := dmsetup("message", poolName, "0", fmt.Sprintf("create_snap %d %d", deviceID, baseDeviceID)) + return err +} + +// DeleteDevice sends "delete " message to the given thin-pool +func DeleteDevice(poolName string, deviceID uint32) error { + _, err := dmsetup("message", poolName, "0", fmt.Sprintf("delete %d", deviceID)) + return err +} + +// RemoveDeviceOpt represents command line arguments for "dmsetup remove" command +type RemoveDeviceOpt string + +const ( + // RemoveWithForce flag replaces the table with one that fails all I/O if + // open device can't be removed + RemoveWithForce RemoveDeviceOpt = "--force" + // RemoveWithRetries option will cause the operation to be retried + // for a few seconds before failing + RemoveWithRetries RemoveDeviceOpt = "--retry" + // RemoveDeferred flag will enable deferred removal of open devices, + // the device will be removed when the last user closes it + RemoveDeferred RemoveDeviceOpt = "--deferred" +) + +// RemoveDevice removes a device (see "dmsetup remove") +func RemoveDevice(deviceName string, opts ...RemoveDeviceOpt) error { + args := []string{ + "remove", + } + + for _, opt := range opts { + args = append(args, string(opt)) + } + + args = append(args, GetFullDevicePath(deviceName)) + + _, err := dmsetup(args...) + return err +} + +// Info outputs device information (see "dmsetup info"). +// If device name is empty, all device infos will be returned. +func Info(deviceName string) ([]*DeviceInfo, error) { + output, err := dmsetup( + "info", + "--columns", + "--noheadings", + "-o", + "name,blkdevname,attr,major,minor,open,segments,events", + "--separator", + " ", + deviceName) + + if err != nil { + return nil, err + } + + var ( + lines = strings.Split(output, "\n") + devices = make([]*DeviceInfo, len(lines)) + ) + + for i, line := range lines { + var ( + attr = "" + info = &DeviceInfo{} + ) + + _, err := fmt.Sscan(line, + &info.Name, + &info.BlockDeviceName, + &attr, + &info.Major, + &info.Minor, + &info.OpenCount, + &info.TargetCount, + &info.EventNumber) + + if err != nil { + return nil, errors.Wrapf(err, "failed to parse line %q", line) + } + + // Parse attributes (see "man 8 dmsetup" for details) + info.Suspended = strings.Contains(attr, "s") + info.ReadOnly = strings.Contains(attr, "r") + info.TableLive = strings.Contains(attr, "L") + info.TableInactive = strings.Contains(attr, "I") + + devices[i] = info + } + + return devices, nil +} + +// Version returns "dmsetup version" output +func Version() (string, error) { + return dmsetup("version") +} + +// DeviceStatus represents devmapper device status information +type DeviceStatus struct { + Offset int64 + Length int64 + Target string + Params []string +} + +// Status provides status information for devmapper device +func Status(deviceName string) (*DeviceStatus, error) { + var ( + err error + status DeviceStatus + ) + + output, err := dmsetup("status", deviceName) + if err != nil { + return nil, err + } + + // Status output format: + // Offset (int64) + // Length (int64) + // Target type (string) + // Params (Array of strings) + const MinParseCount = 4 + parts := strings.Split(output, " ") + if len(parts) < MinParseCount { + return nil, errors.Errorf("failed to parse output: %q", output) + } + + status.Offset, err = strconv.ParseInt(parts[0], 10, 64) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse offset: %q", parts[0]) + } + + status.Length, err = strconv.ParseInt(parts[1], 10, 64) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse length: %q", parts[1]) + } + + status.Target = parts[2] + status.Params = parts[3:] + + return &status, nil +} + +// GetFullDevicePath returns full path for the given device name (like "/dev/mapper/name") +func GetFullDevicePath(deviceName string) string { + if strings.HasPrefix(deviceName, DevMapperDir) { + return deviceName + } + + return DevMapperDir + deviceName +} + +// BlockDeviceSize returns size of block device in bytes +func BlockDeviceSize(devicePath string) (uint64, error) { + data, err := exec.Command("blockdev", "--getsize64", "-q", devicePath).CombinedOutput() + output := string(data) + if err != nil { + return 0, errors.Wrapf(err, output) + } + + output = strings.TrimSuffix(output, "\n") + return strconv.ParseUint(output, 10, 64) +} + +func dmsetup(args ...string) (string, error) { + data, err := exec.Command("dmsetup", args...).CombinedOutput() + output := string(data) + if err != nil { + // Try find Linux error code otherwise return generic error with dmsetup output + if errno, ok := tryGetUnixError(output); ok { + return "", errno + } + + return "", errors.Wrapf(err, "dmsetup %s\nerror: %s\n", strings.Join(args, " "), output) + } + + output = strings.TrimSuffix(output, "\n") + output = strings.TrimSpace(output) + + return output, nil +} + +// tryGetUnixError tries to find Linux error code from dmsetup output +func tryGetUnixError(output string) (unix.Errno, bool) { + // It's useful to have Linux error codes like EBUSY, EPERM, ..., instead of just text. + // Unfortunately there is no better way than extracting/comparing error text. + text := parseDmsetupError(output) + if text == "" { + return 0, false + } + + err, ok := errTable[text] + return err, ok +} + +// dmsetup returns error messages in format: +// device-mapper: message ioctl on failed: File exists\n +// Command failed\n +// parseDmsetupError extracts text between "failed: " and "\n" +func parseDmsetupError(output string) string { + lines := strings.SplitN(output, "\n", 2) + if len(lines) < 2 { + return "" + } + + const failedSubstr = "failed: " + + line := lines[0] + idx := strings.LastIndex(line, failedSubstr) + if idx == -1 { + return "" + } + + str := line[idx:] + + // Strip "failed: " prefix + str = strings.TrimPrefix(str, failedSubstr) + + str = strings.ToLower(str) + return str +} diff --git a/snapshots/devmapper/dmsetup/dmsetup_test.go b/snapshots/devmapper/dmsetup/dmsetup_test.go new file mode 100644 index 000000000..97fdeb9b1 --- /dev/null +++ b/snapshots/devmapper/dmsetup/dmsetup_test.go @@ -0,0 +1,208 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package dmsetup + +import ( + "io/ioutil" + "os" + "strings" + "testing" + + "github.com/docker/go-units" + "golang.org/x/sys/unix" + "gotest.tools/assert" + is "gotest.tools/assert/cmp" + + "github.com/containerd/containerd/pkg/testutil" + "github.com/containerd/containerd/snapshots/devmapper/losetup" +) + +const ( + testPoolName = "test-pool" + testDeviceName = "test-device" + deviceID = 1 + snapshotID = 2 +) + +func TestDMSetup(t *testing.T) { + testutil.RequiresRoot(t) + + tempDir, err := ioutil.TempDir("", "dmsetup-tests-") + assert.NilError(t, err, "failed to make temp dir for tests") + + defer func() { + err := os.RemoveAll(tempDir) + assert.NilError(t, err) + }() + + dataImage, loopDataDevice := createLoopbackDevice(t, tempDir) + metaImage, loopMetaDevice := createLoopbackDevice(t, tempDir) + + defer func() { + err = losetup.RemoveLoopDevicesAssociatedWithImage(dataImage) + assert.NilError(t, err, "failed to detach loop devices for data image: %s", dataImage) + + err = losetup.RemoveLoopDevicesAssociatedWithImage(metaImage) + assert.NilError(t, err, "failed to detach loop devices for meta image: %s", metaImage) + }() + + t.Run("CreatePool", func(t *testing.T) { + err := CreatePool(testPoolName, loopDataDevice, loopMetaDevice, 128) + assert.NilError(t, err, "failed to create thin-pool") + + table, err := Table(testPoolName) + t.Logf("table: %s", table) + assert.NilError(t, err) + assert.Assert(t, strings.HasPrefix(table, "0 32768 thin-pool")) + assert.Assert(t, strings.HasSuffix(table, "128 32768 1 skip_block_zeroing")) + }) + + t.Run("ReloadPool", func(t *testing.T) { + err := ReloadPool(testPoolName, loopDataDevice, loopMetaDevice, 256) + assert.NilError(t, err, "failed to reload thin-pool") + }) + + t.Run("CreateDevice", testCreateDevice) + + t.Run("CreateSnapshot", testCreateSnapshot) + t.Run("DeleteSnapshot", testDeleteSnapshot) + + t.Run("ActivateDevice", testActivateDevice) + t.Run("DeviceStatus", testDeviceStatus) + t.Run("SuspendResumeDevice", testSuspendResumeDevice) + t.Run("RemoveDevice", testRemoveDevice) + + t.Run("RemovePool", func(t *testing.T) { + err = RemoveDevice(testPoolName, RemoveWithForce, RemoveWithRetries) + assert.NilError(t, err, "failed to remove thin-pool") + }) + + t.Run("Version", testVersion) +} + +func testCreateDevice(t *testing.T) { + err := CreateDevice(testPoolName, deviceID) + assert.NilError(t, err, "failed to create test device") + + err = CreateDevice(testPoolName, deviceID) + assert.Assert(t, err == unix.EEXIST) + + infos, err := Info(testPoolName) + assert.NilError(t, err) + assert.Assert(t, is.Len(infos, 1), "got unexpected number of device infos") +} + +func testCreateSnapshot(t *testing.T) { + err := CreateSnapshot(testPoolName, snapshotID, deviceID) + assert.NilError(t, err) +} + +func testDeleteSnapshot(t *testing.T) { + err := DeleteDevice(testPoolName, snapshotID) + assert.NilError(t, err, "failed to send delete message") + + err = DeleteDevice(testPoolName, snapshotID) + assert.Assert(t, err == unix.ENODATA) +} + +func testActivateDevice(t *testing.T) { + err := ActivateDevice(testPoolName, testDeviceName, 1, 1024, "") + assert.NilError(t, err, "failed to activate device") + + err = ActivateDevice(testPoolName, testDeviceName, 1, 1024, "") + assert.Equal(t, err, unix.EBUSY) + + if _, err := os.Stat("/dev/mapper/" + testDeviceName); err != nil && !os.IsExist(err) { + assert.NilError(t, err, "failed to stat device") + } + + list, err := Info(testPoolName) + assert.NilError(t, err) + assert.Assert(t, is.Len(list, 1)) + + info := list[0] + assert.Equal(t, testPoolName, info.Name) + assert.Assert(t, info.TableLive) +} + +func testDeviceStatus(t *testing.T) { + status, err := Status(testDeviceName) + assert.NilError(t, err) + + assert.Equal(t, int64(0), status.Offset) + assert.Equal(t, int64(2), status.Length) + assert.Equal(t, "thin", status.Target) + assert.DeepEqual(t, status.Params, []string{"0", "-"}) +} + +func testSuspendResumeDevice(t *testing.T) { + err := SuspendDevice(testDeviceName) + assert.NilError(t, err) + + err = SuspendDevice(testDeviceName) + assert.NilError(t, err) + + list, err := Info(testDeviceName) + assert.NilError(t, err) + assert.Assert(t, is.Len(list, 1)) + + info := list[0] + assert.Assert(t, info.Suspended) + + err = ResumeDevice(testDeviceName) + assert.NilError(t, err) + + err = ResumeDevice(testDeviceName) + assert.NilError(t, err) +} + +func testRemoveDevice(t *testing.T) { + err := RemoveDevice(testPoolName) + assert.Assert(t, err == unix.EBUSY, "removing thin-pool with dependencies shouldn't be allowed") + + err = RemoveDevice(testDeviceName, RemoveWithRetries) + assert.NilError(t, err, "failed to remove thin-device") +} + +func testVersion(t *testing.T) { + version, err := Version() + assert.NilError(t, err) + assert.Assert(t, version != "") +} + +func createLoopbackDevice(t *testing.T, dir string) (string, string) { + file, err := ioutil.TempFile(dir, "dmsetup-tests-") + assert.NilError(t, err) + + size, err := units.RAMInBytes("16Mb") + assert.NilError(t, err) + + err = file.Truncate(size) + assert.NilError(t, err) + + err = file.Close() + assert.NilError(t, err) + + imagePath := file.Name() + + loopDevice, err := losetup.AttachLoopDevice(imagePath) + assert.NilError(t, err) + + return imagePath, loopDevice +} diff --git a/snapshots/devmapper/losetup/losetup.go b/snapshots/devmapper/losetup/losetup.go new file mode 100644 index 000000000..89faebf81 --- /dev/null +++ b/snapshots/devmapper/losetup/losetup.go @@ -0,0 +1,85 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package losetup + +import ( + "os/exec" + "strings" + + "github.com/pkg/errors" +) + +// FindAssociatedLoopDevices returns a list of loop devices attached to a given image +func FindAssociatedLoopDevices(imagePath string) ([]string, error) { + output, err := losetup("--list", "--output", "NAME", "--associated", imagePath) + if err != nil { + return nil, errors.Wrapf(err, "failed to get loop devices: '%s'", output) + } + + if output == "" { + return []string{}, nil + } + + items := strings.Split(output, "\n") + if len(items) <= 1 { + return []string{}, nil + } + + // Skip header with column names + return items[1:], nil +} + +// AttachLoopDevice finds first available loop device and associates it with an image. +func AttachLoopDevice(imagePath string) (string, error) { + return losetup("--find", "--show", imagePath) +} + +// DetachLoopDevice detaches loop devices +func DetachLoopDevice(loopDevice ...string) error { + args := append([]string{"--detach"}, loopDevice...) + _, err := losetup(args...) + return err +} + +// RemoveLoopDevicesAssociatedWithImage detaches all loop devices attached to a given sparse image +func RemoveLoopDevicesAssociatedWithImage(imagePath string) error { + loopDevices, err := FindAssociatedLoopDevices(imagePath) + if err != nil { + return err + } + + for _, loopDevice := range loopDevices { + if err = DetachLoopDevice(loopDevice); err != nil { + return err + } + } + + return nil +} + +// losetup is a wrapper around losetup command line tool +func losetup(args ...string) (string, error) { + data, err := exec.Command("losetup", args...).CombinedOutput() + output := string(data) + if err != nil { + return "", errors.Wrapf(err, "losetup %s\nerror: %s\n", strings.Join(args, " "), output) + } + + return strings.TrimSuffix(output, "\n"), err +} diff --git a/snapshots/devmapper/losetup/losetup_test.go b/snapshots/devmapper/losetup/losetup_test.go new file mode 100644 index 000000000..9eb4da558 --- /dev/null +++ b/snapshots/devmapper/losetup/losetup_test.go @@ -0,0 +1,117 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package losetup + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/containerd/containerd/pkg/testutil" + "github.com/docker/go-units" + "gotest.tools/assert" + is "gotest.tools/assert/cmp" +) + +func TestLosetup(t *testing.T) { + testutil.RequiresRoot(t) + + var ( + imagePath = createSparseImage(t) + loopDevice1 string + loopDevice2 string + ) + + defer func() { + err := os.Remove(imagePath) + assert.NilError(t, err) + }() + + t.Run("AttachLoopDevice", func(t *testing.T) { + dev1, err := AttachLoopDevice(imagePath) + assert.NilError(t, err) + assert.Assert(t, dev1 != "") + + dev2, err := AttachLoopDevice(imagePath) + assert.NilError(t, err) + assert.Assert(t, dev2 != dev1, "should attach different loop device") + + loopDevice1 = dev1 + loopDevice2 = dev2 + }) + + t.Run("AttachEmptyLoopDevice", func(t *testing.T) { + _, err := AttachLoopDevice("") + assert.Assert(t, err != nil, "shouldn't attach empty path") + }) + + t.Run("FindAssociatedLoopDevices", func(t *testing.T) { + devices, err := FindAssociatedLoopDevices(imagePath) + assert.NilError(t, err) + assert.Assert(t, is.Len(devices, 2), "unexpected number of attached devices") + assert.Assert(t, is.Contains(devices, loopDevice1)) + assert.Assert(t, is.Contains(devices, loopDevice2)) + }) + + t.Run("FindAssociatedLoopDevicesForInvalidImage", func(t *testing.T) { + devices, err := FindAssociatedLoopDevices("") + assert.NilError(t, err) + assert.Assert(t, is.Len(devices, 0)) + }) + + t.Run("DetachLoopDevice", func(t *testing.T) { + err := DetachLoopDevice(loopDevice2) + assert.NilError(t, err, "failed to detach %q", loopDevice2) + }) + + t.Run("DetachEmptyDevice", func(t *testing.T) { + err := DetachLoopDevice("") + assert.Assert(t, err != nil, "shouldn't detach empty path") + }) + + t.Run("RemoveLoopDevicesAssociatedWithImage", func(t *testing.T) { + err := RemoveLoopDevicesAssociatedWithImage(imagePath) + assert.NilError(t, err) + + devices, err := FindAssociatedLoopDevices(imagePath) + assert.NilError(t, err) + assert.Assert(t, is.Len(devices, 0)) + }) + + t.Run("RemoveLoopDevicesAssociatedWithInvalidImage", func(t *testing.T) { + err := RemoveLoopDevicesAssociatedWithImage("") + assert.NilError(t, err) + }) +} + +func createSparseImage(t *testing.T) string { + file, err := ioutil.TempFile("", "losetup-tests-") + assert.NilError(t, err) + + size, err := units.RAMInBytes("16Mb") + assert.NilError(t, err) + + err = file.Truncate(size) + assert.NilError(t, err) + + err = file.Close() + assert.NilError(t, err) + + return file.Name() +} diff --git a/snapshots/devmapper/metadata.go b/snapshots/devmapper/metadata.go new file mode 100644 index 000000000..e40430b9b --- /dev/null +++ b/snapshots/devmapper/metadata.go @@ -0,0 +1,315 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "context" + "encoding/json" + "fmt" + "strconv" + + "github.com/pkg/errors" + bolt "go.etcd.io/bbolt" +) + +type ( + // DeviceInfoCallback is a callback used for device updates + DeviceInfoCallback func(deviceInfo *DeviceInfo) error +) + +type deviceIDState byte + +const ( + deviceFree deviceIDState = iota + deviceTaken +) + +// Bucket names +var ( + devicesBucketName = []byte("devices") // Contains thin devices metadata = + deviceIDBucketName = []byte("device_ids") // Tracks used device ids = +) + +var ( + // ErrNotFound represents an error returned when object not found in meta store + ErrNotFound = errors.New("not found") + // ErrAlreadyExists represents an error returned when object can't be duplicated in meta store + ErrAlreadyExists = errors.New("object already exists") +) + +// PoolMetadata keeps device info for the given thin-pool device, it also responsible for +// generating next available device ids and tracking devmapper transaction numbers +type PoolMetadata struct { + db *bolt.DB +} + +// NewPoolMetadata creates new or open existing pool metadata database +func NewPoolMetadata(dbfile string) (*PoolMetadata, error) { + db, err := bolt.Open(dbfile, 0600, nil) + if err != nil { + return nil, err + } + + metadata := &PoolMetadata{db: db} + if err := metadata.ensureDatabaseInitialized(); err != nil { + return nil, errors.Wrap(err, "failed to initialize database") + } + + return metadata, nil +} + +// ensureDatabaseInitialized creates buckets required for metadata store in order +// to avoid bucket existence checks across the code +func (m *PoolMetadata) ensureDatabaseInitialized() error { + return m.db.Update(func(tx *bolt.Tx) error { + if _, err := tx.CreateBucketIfNotExists(devicesBucketName); err != nil { + return err + } + + if _, err := tx.CreateBucketIfNotExists(deviceIDBucketName); err != nil { + return err + } + + return nil + }) +} + +// AddDevice saves device info to database. +func (m *PoolMetadata) AddDevice(ctx context.Context, info *DeviceInfo) error { + return m.db.Update(func(tx *bolt.Tx) error { + devicesBucket := tx.Bucket(devicesBucketName) + + // Make sure device name is unique + if err := getObject(devicesBucket, info.Name, nil); err == nil { + return ErrAlreadyExists + } + + // Find next available device ID + deviceID, err := getNextDeviceID(tx) + if err != nil { + return err + } + + info.DeviceID = deviceID + + return putObject(devicesBucket, info.Name, info, false) + }) +} + +// getNextDeviceID finds the next free device ID by taking a cursor +// through the deviceIDBucketName bucket and finding the next sequentially +// unassigned ID. Device ID state is marked by a byte deviceFree or +// deviceTaken. Low device IDs will be reused sooner. +func getNextDeviceID(tx *bolt.Tx) (uint32, error) { + bucket := tx.Bucket(deviceIDBucketName) + cursor := bucket.Cursor() + + // Check if any device id can be reused. + // Bolt stores its keys in byte-sorted order within a bucket. + // This makes sequential iteration extremely fast. + for key, taken := cursor.First(); key != nil; key, taken = cursor.Next() { + isFree := taken[0] == byte(deviceFree) + if !isFree { + continue + } + + parsedID, err := strconv.ParseUint(string(key), 10, 32) + if err != nil { + return 0, err + } + + id := uint32(parsedID) + if err := markDeviceID(tx, id, deviceTaken); err != nil { + return 0, err + } + + return id, nil + } + + // Try allocate new device ID + seq, err := bucket.NextSequence() + if err != nil { + return 0, err + } + + if seq >= maxDeviceID { + return 0, errors.Errorf("dm-meta: couldn't find free device key") + } + + id := uint32(seq) + if err := markDeviceID(tx, id, deviceTaken); err != nil { + return 0, err + } + + return id, nil +} + +// markDeviceID marks a device as deviceFree or deviceTaken +func markDeviceID(tx *bolt.Tx, deviceID uint32, state deviceIDState) error { + var ( + bucket = tx.Bucket(deviceIDBucketName) + key = strconv.FormatUint(uint64(deviceID), 10) + value = []byte{byte(state)} + ) + + if err := bucket.Put([]byte(key), value); err != nil { + return errors.Wrapf(err, "failed to free device id %q", key) + } + + return nil +} + +// UpdateDevice updates device info in metadata store. +// The callback should be used to indicate whether device info update was successful or not. +// An error returned from the callback will rollback the update transaction in the database. +// Name and Device ID are not allowed to change. +func (m *PoolMetadata) UpdateDevice(ctx context.Context, name string, fn DeviceInfoCallback) error { + return m.db.Update(func(tx *bolt.Tx) error { + var ( + device = &DeviceInfo{} + bucket = tx.Bucket(devicesBucketName) + ) + + if err := getObject(bucket, name, device); err != nil { + return err + } + + // Don't allow changing these values, keep things in sync with devmapper + name := device.Name + devID := device.DeviceID + + if err := fn(device); err != nil { + return err + } + + if name != device.Name { + return fmt.Errorf("failed to update device info, name didn't match: %q %q", name, device.Name) + } + + if devID != device.DeviceID { + return fmt.Errorf("failed to update device info, device id didn't match: %d %d", devID, device.DeviceID) + } + + return putObject(bucket, name, device, true) + }) +} + +// GetDevice retrieves device info by name from database +func (m *PoolMetadata) GetDevice(ctx context.Context, name string) (*DeviceInfo, error) { + var ( + dev DeviceInfo + err error + ) + + err = m.db.View(func(tx *bolt.Tx) error { + bucket := tx.Bucket(devicesBucketName) + return getObject(bucket, name, &dev) + }) + + return &dev, err +} + +// RemoveDevice removes device info from store. +func (m *PoolMetadata) RemoveDevice(ctx context.Context, name string) error { + return m.db.Update(func(tx *bolt.Tx) error { + var ( + device = &DeviceInfo{} + bucket = tx.Bucket(devicesBucketName) + ) + + if err := getObject(bucket, name, device); err != nil { + return err + } + + if err := bucket.Delete([]byte(name)); err != nil { + return errors.Wrapf(err, "failed to delete device info for %q", name) + } + + if err := markDeviceID(tx, device.DeviceID, deviceFree); err != nil { + return err + } + + return nil + }) +} + +// GetDeviceNames retrieves the list of device names currently stored in database +func (m *PoolMetadata) GetDeviceNames(ctx context.Context) ([]string, error) { + var ( + names []string + err error + ) + + err = m.db.View(func(tx *bolt.Tx) error { + bucket := tx.Bucket(devicesBucketName) + return bucket.ForEach(func(k, _ []byte) error { + names = append(names, string(k)) + return nil + }) + }) + + if err != nil { + return nil, err + } + + return names, nil +} + +// Close closes metadata store +func (m *PoolMetadata) Close() error { + if err := m.db.Close(); err != nil && err != bolt.ErrDatabaseNotOpen { + return err + } + + return nil +} + +func putObject(bucket *bolt.Bucket, key string, obj interface{}, overwrite bool) error { + keyBytes := []byte(key) + + if !overwrite && bucket.Get(keyBytes) != nil { + return errors.Errorf("object with key %q already exists", key) + } + + data, err := json.Marshal(obj) + if err != nil { + return errors.Wrapf(err, "failed to marshal object with key %q", key) + } + + if err := bucket.Put(keyBytes, data); err != nil { + return errors.Wrapf(err, "failed to insert object with key %q", key) + } + + return nil +} + +func getObject(bucket *bolt.Bucket, key string, obj interface{}) error { + data := bucket.Get([]byte(key)) + if data == nil { + return ErrNotFound + } + + if obj != nil { + if err := json.Unmarshal(data, obj); err != nil { + return errors.Wrapf(err, "failed to unmarshal object with key %q", key) + } + } + + return nil +} diff --git a/snapshots/devmapper/metadata_test.go b/snapshots/devmapper/metadata_test.go new file mode 100644 index 000000000..1149b79b1 --- /dev/null +++ b/snapshots/devmapper/metadata_test.go @@ -0,0 +1,189 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "gotest.tools/assert" + is "gotest.tools/assert/cmp" +) + +var ( + testCtx = context.Background() +) + +func TestPoolMetadata_AddDevice(t *testing.T) { + tempDir, store := createStore(t) + defer cleanupStore(t, tempDir, store) + + expected := &DeviceInfo{ + Name: "test2", + ParentName: "test1", + Size: 1, + State: Activated, + } + + err := store.AddDevice(testCtx, expected) + assert.NilError(t, err) + + result, err := store.GetDevice(testCtx, "test2") + assert.NilError(t, err) + + assert.Equal(t, expected.Name, result.Name) + assert.Equal(t, expected.ParentName, result.ParentName) + assert.Equal(t, expected.Size, result.Size) + assert.Equal(t, expected.State, result.State) + assert.Assert(t, result.DeviceID != 0) + assert.Equal(t, expected.DeviceID, result.DeviceID) +} + +func TestPoolMetadata_AddDeviceRollback(t *testing.T) { + tempDir, store := createStore(t) + defer cleanupStore(t, tempDir, store) + + err := store.AddDevice(testCtx, &DeviceInfo{Name: ""}) + assert.Assert(t, err != nil) + + _, err = store.GetDevice(testCtx, "") + assert.Equal(t, ErrNotFound, err) +} + +func TestPoolMetadata_AddDeviceDuplicate(t *testing.T) { + tempDir, store := createStore(t) + defer cleanupStore(t, tempDir, store) + + err := store.AddDevice(testCtx, &DeviceInfo{Name: "test"}) + assert.NilError(t, err) + + err = store.AddDevice(testCtx, &DeviceInfo{Name: "test"}) + assert.Equal(t, ErrAlreadyExists, err) +} + +func TestPoolMetadata_ReuseDeviceID(t *testing.T) { + tempDir, store := createStore(t) + defer cleanupStore(t, tempDir, store) + + info1 := &DeviceInfo{Name: "test1"} + err := store.AddDevice(testCtx, info1) + assert.NilError(t, err) + + info2 := &DeviceInfo{Name: "test2"} + err = store.AddDevice(testCtx, info2) + assert.NilError(t, err) + + assert.Assert(t, info1.DeviceID != info2.DeviceID) + assert.Assert(t, info1.DeviceID != 0) + + err = store.RemoveDevice(testCtx, info2.Name) + assert.NilError(t, err) + + info3 := &DeviceInfo{Name: "test3"} + err = store.AddDevice(testCtx, info3) + assert.NilError(t, err) + + assert.Equal(t, info2.DeviceID, info3.DeviceID) +} + +func TestPoolMetadata_RemoveDevice(t *testing.T) { + tempDir, store := createStore(t) + defer cleanupStore(t, tempDir, store) + + err := store.AddDevice(testCtx, &DeviceInfo{Name: "test"}) + assert.NilError(t, err) + + err = store.RemoveDevice(testCtx, "test") + assert.NilError(t, err) + + _, err = store.GetDevice(testCtx, "test") + assert.Equal(t, ErrNotFound, err) +} + +func TestPoolMetadata_UpdateDevice(t *testing.T) { + tempDir, store := createStore(t) + defer cleanupStore(t, tempDir, store) + + oldInfo := &DeviceInfo{ + Name: "test1", + ParentName: "test2", + Size: 3, + State: Activated, + } + + err := store.AddDevice(testCtx, oldInfo) + assert.NilError(t, err) + + err = store.UpdateDevice(testCtx, oldInfo.Name, func(info *DeviceInfo) error { + info.ParentName = "test5" + info.Size = 6 + info.State = Created + return nil + }) + + assert.NilError(t, err) + + newInfo, err := store.GetDevice(testCtx, "test1") + assert.NilError(t, err) + + assert.Equal(t, "test1", newInfo.Name) + assert.Equal(t, "test5", newInfo.ParentName) + assert.Assert(t, newInfo.Size == 6) + assert.Equal(t, Created, newInfo.State) +} + +func TestPoolMetadata_GetDeviceNames(t *testing.T) { + tempDir, store := createStore(t) + defer cleanupStore(t, tempDir, store) + + err := store.AddDevice(testCtx, &DeviceInfo{Name: "test1"}) + assert.NilError(t, err) + + err = store.AddDevice(testCtx, &DeviceInfo{Name: "test2"}) + assert.NilError(t, err) + + names, err := store.GetDeviceNames(testCtx) + assert.NilError(t, err) + assert.Assert(t, is.Len(names, 2)) + + assert.Equal(t, "test1", names[0]) + assert.Equal(t, "test2", names[1]) +} + +func createStore(t *testing.T) (tempDir string, store *PoolMetadata) { + tempDir, err := ioutil.TempDir("", "pool-metadata-") + assert.NilError(t, err, "couldn't create temp directory for metadata tests") + + path := filepath.Join(tempDir, "test.db") + metadata, err := NewPoolMetadata(path) + assert.NilError(t, err) + + return tempDir, metadata +} + +func cleanupStore(t *testing.T, tempDir string, store *PoolMetadata) { + err := store.Close() + assert.NilError(t, err, "failed to close metadata store") + + err = os.RemoveAll(tempDir) + assert.NilError(t, err, "failed to cleanup temp directory") +} diff --git a/snapshots/devmapper/pool_device.go b/snapshots/devmapper/pool_device.go new file mode 100644 index 000000000..cd14b9412 --- /dev/null +++ b/snapshots/devmapper/pool_device.go @@ -0,0 +1,386 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "context" + "path/filepath" + "strconv" + + "github.com/hashicorp/go-multierror" + "github.com/pkg/errors" + + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/snapshots/devmapper/dmsetup" +) + +// PoolDevice ties together data and metadata volumes, represents thin-pool and manages volumes, snapshots and device ids. +type PoolDevice struct { + poolName string + metadata *PoolMetadata +} + +// NewPoolDevice creates new thin-pool from existing data and metadata volumes. +// If pool 'poolName' already exists, it'll be reloaded with new parameters. +func NewPoolDevice(ctx context.Context, config *Config) (*PoolDevice, error) { + log.G(ctx).Infof("initializing pool device %q", config.PoolName) + + version, err := dmsetup.Version() + if err != nil { + log.G(ctx).Errorf("dmsetup not available") + return nil, err + } + + log.G(ctx).Infof("using dmsetup:\n%s", version) + + dbpath := filepath.Join(config.RootPath, config.PoolName+".db") + poolMetaStore, err := NewPoolMetadata(dbpath) + if err != nil { + return nil, err + } + + // Make sure pool exists and available + poolPath := dmsetup.GetFullDevicePath(config.PoolName) + if _, err := dmsetup.Info(poolPath); err != nil { + return nil, errors.Wrapf(err, "failed to query pool %q", poolPath) + } + + return &PoolDevice{ + poolName: config.PoolName, + metadata: poolMetaStore, + }, nil +} + +// transition invokes 'updateStateFn' callback to perform devmapper operation and reflects device state changes/errors in meta store. +// 'tryingState' will be set before invoking callback. If callback succeeded 'successState' will be set, otherwise +// error details will be recorded in meta store. +func (p *PoolDevice) transition(ctx context.Context, deviceName string, tryingState DeviceState, successState DeviceState, updateStateFn func() error) error { + // Set device to trying state + uerr := p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error { + deviceInfo.State = tryingState + return nil + }) + + if uerr != nil { + return errors.Wrapf(uerr, "failed to set device %q state to %q", deviceName, tryingState) + } + + var result *multierror.Error + + // Invoke devmapper operation + err := updateStateFn() + + if err != nil { + result = multierror.Append(result, err) + } + + // If operation succeeded transition to success state, otherwise save error details + uerr = p.metadata.UpdateDevice(ctx, deviceName, func(deviceInfo *DeviceInfo) error { + if err == nil { + deviceInfo.State = successState + deviceInfo.Error = "" + } else { + deviceInfo.Error = err.Error() + } + return nil + }) + + if uerr != nil { + result = multierror.Append(result, uerr) + } + + return result.ErrorOrNil() +} + +// CreateThinDevice creates new devmapper thin-device with given name and size. +// Device ID for thin-device will be allocated from metadata store. +// If allocation successful, device will be activated with /dev/mapper/ +func (p *PoolDevice) CreateThinDevice(ctx context.Context, deviceName string, virtualSizeBytes uint64) (retErr error) { + info := &DeviceInfo{ + Name: deviceName, + Size: virtualSizeBytes, + State: Unknown, + } + + // Save initial device metadata and allocate new device ID from store + if err := p.metadata.AddDevice(ctx, info); err != nil { + return errors.Wrapf(err, "failed to save initial metadata for new thin device %q", deviceName) + } + + defer func() { + if retErr == nil { + return + } + + // Rollback metadata + retErr = multierror.Append(retErr, p.metadata.RemoveDevice(ctx, info.Name)) + }() + + // Create thin device + if err := p.createDevice(ctx, info); err != nil { + return err + } + + defer func() { + if retErr == nil { + return + } + + // Rollback creation + retErr = multierror.Append(retErr, p.deleteDevice(ctx, info)) + }() + + return p.activateDevice(ctx, info) +} + +// createDevice creates thin device +func (p *PoolDevice) createDevice(ctx context.Context, info *DeviceInfo) error { + if err := p.transition(ctx, info.Name, Creating, Created, func() error { + return dmsetup.CreateDevice(p.poolName, info.DeviceID) + }); err != nil { + return errors.Wrapf(err, "failed to create new thin device %q (dev: %d)", info.Name, info.DeviceID) + } + + return nil +} + +// activateDevice activates thin device +func (p *PoolDevice) activateDevice(ctx context.Context, info *DeviceInfo) error { + if err := p.transition(ctx, info.Name, Activating, Activated, func() error { + return dmsetup.ActivateDevice(p.poolName, info.Name, info.DeviceID, info.Size, "") + }); err != nil { + return errors.Wrapf(err, "failed to activate new thin device %q (dev: %d)", info.Name, info.DeviceID) + } + + return nil +} + +// CreateSnapshotDevice creates and activates new thin-device from parent thin-device (makes snapshot) +func (p *PoolDevice) CreateSnapshotDevice(ctx context.Context, deviceName string, snapshotName string, virtualSizeBytes uint64) (retErr error) { + baseInfo, err := p.metadata.GetDevice(ctx, deviceName) + if err != nil { + return errors.Wrapf(err, "failed to query device metadata for %q", deviceName) + } + + // Suspend thin device if it was activated previously to avoid corruptions + isActivated := p.IsActivated(baseInfo.Name) + if isActivated { + if err := p.suspendDevice(ctx, baseInfo); err != nil { + return err + } + + // Resume back base thin device on exit + defer func() { + retErr = multierror.Append(retErr, p.resumeDevice(ctx, baseInfo)).ErrorOrNil() + }() + } + + snapInfo := &DeviceInfo{ + Name: snapshotName, + Size: virtualSizeBytes, + ParentName: deviceName, + State: Unknown, + } + + // Save snapshot metadata and allocate new device ID + if err := p.metadata.AddDevice(ctx, snapInfo); err != nil { + return errors.Wrapf(err, "failed to save initial metadata for snapshot %q", snapshotName) + } + + defer func() { + if retErr == nil { + return + } + + // Rollback metadata + retErr = multierror.Append(retErr, p.metadata.RemoveDevice(ctx, snapInfo.Name)) + }() + + // Create thin device snapshot + if err := p.createSnapshot(ctx, baseInfo, snapInfo); err != nil { + return err + } + + defer func() { + if retErr == nil { + return + } + + // Rollback snapshot creation + retErr = multierror.Append(retErr, p.deleteDevice(ctx, snapInfo)) + }() + + // Activate snapshot device + return p.activateDevice(ctx, snapInfo) +} + +func (p *PoolDevice) suspendDevice(ctx context.Context, info *DeviceInfo) error { + if err := p.transition(ctx, info.Name, Suspending, Suspended, func() error { + return dmsetup.SuspendDevice(info.Name) + }); err != nil { + return errors.Wrapf(err, "failed to suspend device %q", info.Name) + } + + return nil +} + +func (p *PoolDevice) resumeDevice(ctx context.Context, info *DeviceInfo) error { + if err := p.transition(ctx, info.Name, Resuming, Resumed, func() error { + return dmsetup.ResumeDevice(info.Name) + }); err != nil { + return errors.Wrapf(err, "failed to resume device %q", info.Name) + } + + return nil +} + +func (p *PoolDevice) createSnapshot(ctx context.Context, baseInfo, snapInfo *DeviceInfo) error { + if err := p.transition(ctx, snapInfo.Name, Creating, Created, func() error { + return dmsetup.CreateSnapshot(p.poolName, snapInfo.DeviceID, baseInfo.DeviceID) + }); err != nil { + return errors.Wrapf(err, + "failed to create snapshot %q (dev: %d) from %q (dev: %d)", + snapInfo.Name, + snapInfo.DeviceID, + baseInfo.Name, + baseInfo.DeviceID) + } + + return nil +} + +// DeactivateDevice deactivates thin device +func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, deferred bool) error { + if !p.IsActivated(deviceName) { + return nil + } + + opts := []dmsetup.RemoveDeviceOpt{dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries} + if deferred { + opts = append(opts, dmsetup.RemoveDeferred) + } + + if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error { + return dmsetup.RemoveDevice(deviceName, opts...) + }); err != nil { + return errors.Wrapf(err, "failed to deactivate device %q", deviceName) + } + + return nil +} + +// IsActivated returns true if thin-device is activated and not suspended +func (p *PoolDevice) IsActivated(deviceName string) bool { + infos, err := dmsetup.Info(deviceName) + if err != nil || len(infos) != 1 { + // Couldn't query device info, device not active + return false + } + + if devInfo := infos[0]; devInfo.Suspended { + return false + } + + return true +} + +// GetUsage reports total size in bytes consumed by a thin-device. +// It relies on the number of used blocks reported by 'dmsetup status'. +// The output looks like: +// device2: 0 204800 thin 17280 204799 +// Where 17280 is the number of used sectors +func (p *PoolDevice) GetUsage(deviceName string) (int64, error) { + status, err := dmsetup.Status(deviceName) + if err != nil { + return 0, errors.Wrapf(err, "can't get status for device %q", deviceName) + } + + if len(status.Params) == 0 { + return 0, errors.Errorf("failed to get the number of used blocks, unexpected output from dmsetup status") + } + + count, err := strconv.ParseInt(status.Params[0], 10, 64) + if err != nil { + return 0, errors.Wrapf(err, "failed to parse status params: %q", status.Params[0]) + } + + return count * dmsetup.SectorSize, nil +} + +// RemoveDevice completely wipes out thin device from thin-pool and frees it's device ID +func (p *PoolDevice) RemoveDevice(ctx context.Context, deviceName string) error { + info, err := p.metadata.GetDevice(ctx, deviceName) + if err != nil { + return errors.Wrapf(err, "can't query metadata for device %q", deviceName) + } + + if err := p.DeactivateDevice(ctx, deviceName, true); err != nil { + return err + } + + if err := p.deleteDevice(ctx, info); err != nil { + return err + } + + // Remove record from meta store and free device ID + if err := p.metadata.RemoveDevice(ctx, deviceName); err != nil { + return errors.Wrapf(err, "can't remove device %q metadata from store after removal", deviceName) + } + + return nil +} + +func (p *PoolDevice) deleteDevice(ctx context.Context, info *DeviceInfo) error { + if err := p.transition(ctx, info.Name, Removing, Removed, func() error { + // Send 'delete' message to thin-pool + return dmsetup.DeleteDevice(p.poolName, info.DeviceID) + }); err != nil { + return errors.Wrapf(err, "failed to delete device %q (dev id: %d)", info.Name, info.DeviceID) + } + + return nil +} + +// RemovePool deactivates all child thin-devices and removes thin-pool device +func (p *PoolDevice) RemovePool(ctx context.Context) error { + deviceNames, err := p.metadata.GetDeviceNames(ctx) + if err != nil { + return errors.Wrap(err, "can't query device names") + } + + var result *multierror.Error + + // Deactivate devices if any + for _, name := range deviceNames { + if err := p.DeactivateDevice(ctx, name, true); err != nil { + result = multierror.Append(result, errors.Wrapf(err, "failed to remove %q", name)) + } + } + + if err := dmsetup.RemoveDevice(p.poolName, dmsetup.RemoveWithForce, dmsetup.RemoveWithRetries, dmsetup.RemoveDeferred); err != nil { + result = multierror.Append(result, errors.Wrapf(err, "failed to remove pool %q", p.poolName)) + } + + return result.ErrorOrNil() +} + +// Close closes pool device (thin-pool will not be removed) +func (p *PoolDevice) Close() error { + return p.metadata.Close() +} diff --git a/snapshots/devmapper/pool_device_test.go b/snapshots/devmapper/pool_device_test.go new file mode 100644 index 000000000..4a54bda60 --- /dev/null +++ b/snapshots/devmapper/pool_device_test.go @@ -0,0 +1,247 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/pkg/testutil" + "github.com/containerd/containerd/snapshots/devmapper/dmsetup" + "github.com/containerd/containerd/snapshots/devmapper/losetup" + "github.com/docker/go-units" + "github.com/sirupsen/logrus" + "gotest.tools/assert" +) + +const ( + thinDevice1 = "thin-1" + thinDevice2 = "thin-2" + snapDevice1 = "snap-1" + device1Size = 100000 + device2Size = 200000 + testsPrefix = "devmapper-snapshotter-tests-" +) + +// TestPoolDevice runs integration tests for pool device. +// The following scenario implemented: +// - Create pool device with name 'test-pool-device' +// - Create two thin volumes 'thin-1' and 'thin-2' +// - Write ext4 file system on 'thin-1' and make sure it'errs moutable +// - Write v1 test file on 'thin-1' volume +// - Take 'thin-1' snapshot 'snap-1' +// - Change v1 file to v2 on 'thin-1' +// - Mount 'snap-1' and make sure test file is v1 +// - Unmount volumes and remove all devices +func TestPoolDevice(t *testing.T) { + testutil.RequiresRoot(t) + + logrus.SetLevel(logrus.DebugLevel) + ctx := context.Background() + + tempDir, err := ioutil.TempDir("", "pool-device-test-") + assert.NilError(t, err, "couldn't get temp directory for testing") + + _, loopDataDevice := createLoopbackDevice(t, tempDir) + _, loopMetaDevice := createLoopbackDevice(t, tempDir) + + poolName := fmt.Sprintf("test-pool-device-%d", time.Now().Nanosecond()) + err = dmsetup.CreatePool(poolName, loopDataDevice, loopMetaDevice, 64*1024/dmsetup.SectorSize) + assert.NilError(t, err, "failed to create pool %q", poolName) + + defer func() { + // Detach loop devices and remove images + err := losetup.DetachLoopDevice(loopDataDevice, loopMetaDevice) + assert.NilError(t, err) + + err = os.RemoveAll(tempDir) + assert.NilError(t, err, "couldn't cleanup temp directory") + }() + + config := &Config{ + PoolName: poolName, + RootPath: tempDir, + BaseImageSize: "16mb", + BaseImageSizeBytes: 16 * 1024 * 1024, + } + + pool, err := NewPoolDevice(ctx, config) + assert.NilError(t, err, "can't create device pool") + assert.Assert(t, pool != nil) + + defer func() { + err := pool.RemovePool(ctx) + assert.NilError(t, err, "can't close device pool") + }() + + // Create thin devices + t.Run("CreateThinDevice", func(t *testing.T) { + testCreateThinDevice(t, pool) + }) + + // Make ext4 filesystem on 'thin-1' + t.Run("MakeFileSystem", func(t *testing.T) { + testMakeFileSystem(t, pool) + }) + + // Mount 'thin-1' + err = mount.WithTempMount(ctx, getMounts(thinDevice1), func(thin1MountPath string) error { + // Write v1 test file on 'thin-1' device + thin1TestFilePath := filepath.Join(thin1MountPath, "TEST") + err := ioutil.WriteFile(thin1TestFilePath, []byte("test file (v1)"), 0700) + assert.NilError(t, err, "failed to write test file v1 on '%s' volume", thinDevice1) + + // Take snapshot of 'thin-1' + t.Run("CreateSnapshotDevice", func(t *testing.T) { + testCreateSnapshot(t, pool) + }) + + // Update TEST file on 'thin-1' to v2 + err = ioutil.WriteFile(thin1TestFilePath, []byte("test file (v2)"), 0700) + assert.NilError(t, err, "failed to write test file v2 on 'thin-1' volume after taking snapshot") + + return nil + }) + + assert.NilError(t, err) + + // Mount 'snap-1' and make sure TEST file is v1 + err = mount.WithTempMount(ctx, getMounts(snapDevice1), func(snap1MountPath string) error { + // Read test file from snapshot device and make sure it's v1 + fileData, err := ioutil.ReadFile(filepath.Join(snap1MountPath, "TEST")) + assert.NilError(t, err, "couldn't read test file from '%s' device", snapDevice1) + assert.Equal(t, "test file (v1)", string(fileData), "test file content is invalid on snapshot") + + return nil + }) + + assert.NilError(t, err) + + t.Run("DeactivateDevice", func(t *testing.T) { + testDeactivateThinDevice(t, pool) + }) + + t.Run("RemoveDevice", func(t *testing.T) { + testRemoveThinDevice(t, pool) + }) +} + +func testCreateThinDevice(t *testing.T, pool *PoolDevice) { + ctx := context.Background() + + err := pool.CreateThinDevice(ctx, thinDevice1, device1Size) + assert.NilError(t, err, "can't create first thin device") + + err = pool.CreateThinDevice(ctx, thinDevice1, device1Size) + assert.Assert(t, err != nil, "device pool allows duplicated device names") + + err = pool.CreateThinDevice(ctx, thinDevice2, device2Size) + assert.NilError(t, err, "can't create second thin device") + + deviceInfo1, err := pool.metadata.GetDevice(ctx, thinDevice1) + assert.NilError(t, err) + + deviceInfo2, err := pool.metadata.GetDevice(ctx, thinDevice2) + assert.NilError(t, err) + + assert.Assert(t, deviceInfo1.DeviceID != deviceInfo2.DeviceID, "assigned device ids should be different") + + usage, err := pool.GetUsage(thinDevice1) + assert.NilError(t, err) + assert.Equal(t, usage, int64(0)) +} + +func testMakeFileSystem(t *testing.T, pool *PoolDevice) { + devicePath := dmsetup.GetFullDevicePath(thinDevice1) + args := []string{ + devicePath, + "-E", + "nodiscard,lazy_itable_init=0,lazy_journal_init=0", + } + + output, err := exec.Command("mkfs.ext4", args...).CombinedOutput() + assert.NilError(t, err, "failed to make filesystem on '%s': %s", thinDevice1, string(output)) + + usage, err := pool.GetUsage(thinDevice1) + assert.NilError(t, err) + assert.Assert(t, usage > 0) +} + +func testCreateSnapshot(t *testing.T, pool *PoolDevice) { + err := pool.CreateSnapshotDevice(context.Background(), thinDevice1, snapDevice1, device1Size) + assert.NilError(t, err, "failed to create snapshot from '%s' volume", thinDevice1) +} + +func testDeactivateThinDevice(t *testing.T, pool *PoolDevice) { + deviceList := []string{ + thinDevice2, + snapDevice1, + } + + for _, deviceName := range deviceList { + assert.Assert(t, pool.IsActivated(deviceName)) + + err := pool.DeactivateDevice(context.Background(), deviceName, false) + assert.NilError(t, err, "failed to remove '%s'", deviceName) + + assert.Assert(t, !pool.IsActivated(deviceName)) + } +} + +func testRemoveThinDevice(t *testing.T, pool *PoolDevice) { + err := pool.RemoveDevice(testCtx, thinDevice1) + assert.NilError(t, err, "should delete thin device from pool") +} + +func getMounts(thinDeviceName string) []mount.Mount { + return []mount.Mount{ + { + Source: dmsetup.GetFullDevicePath(thinDeviceName), + Type: "ext4", + }, + } +} + +func createLoopbackDevice(t *testing.T, dir string) (string, string) { + file, err := ioutil.TempFile(dir, testsPrefix) + assert.NilError(t, err) + + size, err := units.RAMInBytes("128Mb") + assert.NilError(t, err) + + err = file.Truncate(size) + assert.NilError(t, err) + + err = file.Close() + assert.NilError(t, err) + + imagePath := file.Name() + + loopDevice, err := losetup.AttachLoopDevice(imagePath) + assert.NilError(t, err) + + return imagePath, loopDevice +} diff --git a/snapshots/devmapper/snapshotter.go b/snapshots/devmapper/snapshotter.go new file mode 100644 index 000000000..5f8336ad7 --- /dev/null +++ b/snapshots/devmapper/snapshotter.go @@ -0,0 +1,477 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/plugin" + "github.com/containerd/containerd/snapshots" + "github.com/containerd/containerd/snapshots/devmapper/dmsetup" + "github.com/containerd/containerd/snapshots/storage" + "github.com/hashicorp/go-multierror" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +func init() { + plugin.Register(&plugin.Registration{ + Type: plugin.SnapshotPlugin, + ID: "devmapper", + Config: &Config{}, + InitFn: func(ic *plugin.InitContext) (interface{}, error) { + ic.Meta.Platforms = append(ic.Meta.Platforms, ocispec.Platform{ + OS: "linux", + Architecture: "amd64", + }) + + config, ok := ic.Config.(*Config) + if !ok { + return nil, errors.New("invalid devmapper configuration") + } + + if config.PoolName == "" { + return nil, errors.New("devmapper not configured") + } + + if config.RootPath == "" { + config.RootPath = ic.Root + } + + return NewSnapshotter(ic.Context, config) + }, + }) +} + +const ( + metadataFileName = "metadata.db" + fsTypeExt4 = "ext4" +) + +type closeFunc func() error + +// Snapshotter implements containerd's snapshotter (https://godoc.org/github.com/containerd/containerd/snapshots#Snapshotter) +// based on Linux device-mapper targets. +type Snapshotter struct { + store *storage.MetaStore + pool *PoolDevice + config *Config + cleanupFn []closeFunc + closeOnce sync.Once +} + +// NewSnapshotter creates new device mapper snapshotter. +// Internally it creates thin-pool device (or reloads if it's already exists) and +// initializes a database file for metadata. +func NewSnapshotter(ctx context.Context, config *Config) (*Snapshotter, error) { + // Make sure snapshotter configuration valid before running + if err := config.parse(); err != nil { + return nil, err + } + + if err := config.Validate(); err != nil { + return nil, err + } + + var cleanupFn []closeFunc + + if err := os.MkdirAll(config.RootPath, 0750); err != nil && !os.IsExist(err) { + return nil, errors.Wrapf(err, "failed to create root directory: %s", config.RootPath) + } + + store, err := storage.NewMetaStore(filepath.Join(config.RootPath, metadataFileName)) + if err != nil { + return nil, errors.Wrap(err, "failed to create metastore") + } + + cleanupFn = append(cleanupFn, store.Close) + + poolDevice, err := NewPoolDevice(ctx, config) + if err != nil { + return nil, err + } + + cleanupFn = append(cleanupFn, poolDevice.Close) + + return &Snapshotter{ + store: store, + config: config, + pool: poolDevice, + cleanupFn: cleanupFn, + }, nil +} + +// Stat returns the info for an active or committed snapshot from store +func (s *Snapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) { + log.G(ctx).WithField("key", key).Debug("stat") + + var ( + info snapshots.Info + err error + ) + + err = s.withTransaction(ctx, false, func(ctx context.Context) error { + _, info, _, err = storage.GetInfo(ctx, key) + return err + }) + + return info, err +} + +// Update updates an existing snapshot info's data +func (s *Snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) { + log.G(ctx).Debugf("update: %s", strings.Join(fieldpaths, ", ")) + + var err error + err = s.withTransaction(ctx, true, func(ctx context.Context) error { + info, err = storage.UpdateInfo(ctx, info, fieldpaths...) + return err + }) + + return info, err +} + +// Usage returns the resource usage of an active or committed snapshot excluding the usage of parent snapshots. +func (s *Snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) { + log.G(ctx).WithField("key", key).Debug("usage") + + var ( + id string + err error + info snapshots.Info + usage snapshots.Usage + ) + + err = s.withTransaction(ctx, false, func(ctx context.Context) error { + id, info, usage, err = storage.GetInfo(ctx, key) + if err != nil { + return err + } + + if info.Kind == snapshots.KindActive { + deviceName := s.getDeviceName(id) + usage.Size, err = s.pool.GetUsage(deviceName) + if err != nil { + return err + } + } + + if info.Parent != "" { + // GetInfo returns total number of bytes used by a snapshot (including parent). + // So subtract parent usage in order to get delta consumed by layer itself. + _, _, parentUsage, err := storage.GetInfo(ctx, info.Parent) + if err != nil { + return err + } + + usage.Size -= parentUsage.Size + } + + return err + }) + + return usage, err +} + +// Mounts return the list of mounts for the active or view snapshot +func (s *Snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) { + log.G(ctx).WithField("key", key).Debug("mounts") + + var ( + snap storage.Snapshot + err error + ) + + err = s.withTransaction(ctx, false, func(ctx context.Context) error { + snap, err = storage.GetSnapshot(ctx, key) + return err + }) + + return s.buildMounts(snap), nil +} + +// Prepare creates thin device for an active snapshot identified by key +func (s *Snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) { + log.G(ctx).WithFields(logrus.Fields{"key": key, "parent": parent}).Debug("prepare") + + var ( + mounts []mount.Mount + err error + ) + + err = s.withTransaction(ctx, true, func(ctx context.Context) error { + mounts, err = s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts...) + return err + }) + + return mounts, err +} + +// View creates readonly thin device for the given snapshot key +func (s *Snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) { + log.G(ctx).WithFields(logrus.Fields{"key": key, "parent": parent}).Debug("prepare") + + var ( + mounts []mount.Mount + err error + ) + + err = s.withTransaction(ctx, true, func(ctx context.Context) error { + mounts, err = s.createSnapshot(ctx, snapshots.KindView, key, parent, opts...) + return err + }) + + return mounts, err +} + +// Commit marks an active snapshot as committed in meta store. +// Block device unmount operation captures snapshot changes by itself, so no +// additional actions needed within Commit operation. +func (s *Snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error { + log.G(ctx).WithFields(logrus.Fields{"name": name, "key": key}).Debug("commit") + + return s.withTransaction(ctx, true, func(ctx context.Context) error { + id, _, _, err := storage.GetInfo(ctx, key) + if err != nil { + return err + } + + deviceName := s.getDeviceName(id) + size, err := s.pool.GetUsage(deviceName) + if err != nil { + return err + } + + usage := snapshots.Usage{ + Size: size, + } + + _, err = storage.CommitActive(ctx, key, name, usage, opts...) + return err + }) +} + +// Remove removes thin device and snapshot metadata by key +func (s *Snapshotter) Remove(ctx context.Context, key string) error { + log.G(ctx).WithField("key", key).Debug("remove") + + return s.withTransaction(ctx, true, func(ctx context.Context) error { + return s.removeDevice(ctx, key) + }) +} + +func (s *Snapshotter) removeDevice(ctx context.Context, key string) error { + snapID, _, err := storage.Remove(ctx, key) + if err != nil { + return err + } + + deviceName := s.getDeviceName(snapID) + if err := s.pool.RemoveDevice(ctx, deviceName); err != nil { + log.G(ctx).WithError(err).Errorf("failed to remove device") + return err + } + + return nil +} + +// Walk iterates through all metadata Info for the stored snapshots and calls the provided function for each. +func (s *Snapshotter) Walk(ctx context.Context, fn func(context.Context, snapshots.Info) error) error { + log.G(ctx).Debug("walk") + return s.withTransaction(ctx, false, func(ctx context.Context) error { + return storage.WalkInfo(ctx, fn) + }) +} + +// ResetPool deactivates and deletes all thin devices in thin-pool. +// Used for cleaning pool after benchmarking. +func (s *Snapshotter) ResetPool(ctx context.Context) error { + names, err := s.pool.metadata.GetDeviceNames(ctx) + if err != nil { + return err + } + + var result *multierror.Error + for _, name := range names { + if err := s.pool.RemoveDevice(ctx, name); err != nil { + result = multierror.Append(result, err) + } + } + + return result.ErrorOrNil() +} + +// Close releases devmapper snapshotter resources. +// All subsequent Close calls will be ignored. +func (s *Snapshotter) Close() error { + log.L.Debug("close") + + var result *multierror.Error + s.closeOnce.Do(func() { + for _, fn := range s.cleanupFn { + if err := fn(); err != nil { + result = multierror.Append(result, err) + } + } + }) + + return result.ErrorOrNil() +} + +func (s *Snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) { + snap, err := storage.CreateSnapshot(ctx, kind, key, parent, opts...) + if err != nil { + return nil, err + } + + if len(snap.ParentIDs) == 0 { + deviceName := s.getDeviceName(snap.ID) + log.G(ctx).Debugf("creating new thin device '%s'", deviceName) + + err := s.pool.CreateThinDevice(ctx, deviceName, s.config.BaseImageSizeBytes) + if err != nil { + log.G(ctx).WithError(err).Errorf("failed to create thin device for snapshot %s", snap.ID) + return nil, err + } + + if err := s.mkfs(ctx, deviceName); err != nil { + // Rollback thin device creation if mkfs failed + return nil, multierror.Append(err, + s.pool.RemoveDevice(ctx, deviceName)) + } + } else { + parentDeviceName := s.getDeviceName(snap.ParentIDs[0]) + snapDeviceName := s.getDeviceName(snap.ID) + log.G(ctx).Debugf("creating snapshot device '%s' from '%s'", snapDeviceName, parentDeviceName) + + err := s.pool.CreateSnapshotDevice(ctx, parentDeviceName, snapDeviceName, s.config.BaseImageSizeBytes) + if err != nil { + log.G(ctx).WithError(err).Errorf("failed to create snapshot device from parent %s", parentDeviceName) + return nil, err + } + } + + mounts := s.buildMounts(snap) + + // Remove default directories not expected by the container image + _ = mount.WithTempMount(ctx, mounts, func(root string) error { + return os.Remove(filepath.Join(root, "lost+found")) + }) + + return mounts, nil +} + +// mkfs creates ext4 filesystem on the given devmapper device +func (s *Snapshotter) mkfs(ctx context.Context, deviceName string) error { + args := []string{ + "-E", + // We don't want any zeroing in advance when running mkfs on thin devices (see "man mkfs.ext4") + "nodiscard,lazy_itable_init=0,lazy_journal_init=0", + dmsetup.GetFullDevicePath(deviceName), + } + + log.G(ctx).Debugf("mkfs.ext4 %s", strings.Join(args, " ")) + output, err := exec.Command("mkfs.ext4", args...).CombinedOutput() + if err != nil { + log.G(ctx).WithError(err).Errorf("failed to write fs:\n%s", string(output)) + return err + } + + log.G(ctx).Debugf("mkfs:\n%s", string(output)) + return nil +} + +func (s *Snapshotter) getDeviceName(snapID string) string { + // Add pool name as prefix to avoid collisions with devices from other pools + return fmt.Sprintf("%s-snap-%s", s.config.PoolName, snapID) +} + +func (s *Snapshotter) getDevicePath(snap storage.Snapshot) string { + name := s.getDeviceName(snap.ID) + return dmsetup.GetFullDevicePath(name) +} + +func (s *Snapshotter) buildMounts(snap storage.Snapshot) []mount.Mount { + var options []string + + if snap.Kind != snapshots.KindActive { + options = append(options, "ro") + } + + mounts := []mount.Mount{ + { + Source: s.getDevicePath(snap), + Type: fsTypeExt4, + Options: options, + }, + } + + return mounts +} + +// withTransaction wraps fn callback with containerd's meta store transaction. +// If callback returns an error or transaction is not writable, database transaction will be discarded. +func (s *Snapshotter) withTransaction(ctx context.Context, writable bool, fn func(ctx context.Context) error) error { + ctx, trans, err := s.store.TransactionContext(ctx, writable) + if err != nil { + return err + } + + var result *multierror.Error + + err = fn(ctx) + if err != nil { + result = multierror.Append(result, err) + } + + // Always rollback if transaction is not writable + if err != nil || !writable { + if terr := trans.Rollback(); terr != nil { + log.G(ctx).WithError(terr).Error("failed to rollback transaction") + result = multierror.Append(result, errors.Wrap(terr, "rollback failed")) + } + } else { + if terr := trans.Commit(); terr != nil { + log.G(ctx).WithError(terr).Error("failed to commit transaction") + result = multierror.Append(result, errors.Wrap(terr, "commit failed")) + } + } + + if err := result.ErrorOrNil(); err != nil { + log.G(ctx).WithError(err).Debug("snapshotter error") + + // Unwrap if just one error + if len(result.Errors) == 1 { + return result.Errors[0] + } + + return err + } + + return nil +} diff --git a/snapshots/devmapper/snapshotter_test.go b/snapshots/devmapper/snapshotter_test.go new file mode 100644 index 000000000..1ecde0528 --- /dev/null +++ b/snapshots/devmapper/snapshotter_test.go @@ -0,0 +1,142 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package devmapper + +import ( + "context" + _ "crypto/sha256" + "fmt" + "io/ioutil" + "os" + "testing" + "time" + + "github.com/containerd/continuity/fs/fstest" + "github.com/hashicorp/go-multierror" + "github.com/sirupsen/logrus" + "gotest.tools/assert" + + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/namespaces" + "github.com/containerd/containerd/pkg/testutil" + "github.com/containerd/containerd/snapshots" + "github.com/containerd/containerd/snapshots/devmapper/dmsetup" + "github.com/containerd/containerd/snapshots/devmapper/losetup" + "github.com/containerd/containerd/snapshots/testsuite" +) + +func TestSnapshotterSuite(t *testing.T) { + testutil.RequiresRoot(t) + + logrus.SetLevel(logrus.DebugLevel) + + snapshotterFn := func(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error) { + // Create loopback devices for each test case + _, loopDataDevice := createLoopbackDevice(t, root) + _, loopMetaDevice := createLoopbackDevice(t, root) + + poolName := fmt.Sprintf("containerd-snapshotter-suite-pool-%d", time.Now().Nanosecond()) + err := dmsetup.CreatePool(poolName, loopDataDevice, loopMetaDevice, 64*1024/dmsetup.SectorSize) + assert.NilError(t, err, "failed to create pool %q", poolName) + + config := &Config{ + RootPath: root, + PoolName: poolName, + BaseImageSize: "16Mb", + } + + snap, err := NewSnapshotter(context.Background(), config) + if err != nil { + return nil, nil, err + } + + // Remove device mapper pool and detach loop devices after test completes + removePool := func() error { + result := multierror.Append( + snap.pool.RemovePool(ctx), + losetup.DetachLoopDevice(loopDataDevice, loopMetaDevice)) + + return result.ErrorOrNil() + } + + // Pool cleanup should be called before closing metadata store (as we need to retrieve device names) + snap.cleanupFn = append([]closeFunc{removePool}, snap.cleanupFn...) + + return snap, snap.Close, nil + } + + testsuite.SnapshotterSuite(t, "devmapper", snapshotterFn) + + ctx := context.Background() + ctx = namespaces.WithNamespace(ctx, "testsuite") + + t.Run("DevMapperUsage", func(t *testing.T) { + tempDir, err := ioutil.TempDir("", "snapshot-suite-usage") + assert.NilError(t, err) + defer os.RemoveAll(tempDir) + + snapshotter, closer, err := snapshotterFn(ctx, tempDir) + assert.NilError(t, err) + defer closer() + + testUsage(t, snapshotter) + }) +} + +// testUsage tests devmapper's Usage implementation. This is an approximate test as it's hard to +// predict how many blocks will be consumed under different conditions and parameters. +func testUsage(t *testing.T, snapshotter snapshots.Snapshotter) { + ctx := context.Background() + + // Create empty base layer + _, err := snapshotter.Prepare(ctx, "prepare-1", "") + assert.NilError(t, err) + + emptyLayerUsage, err := snapshotter.Usage(ctx, "prepare-1") + assert.NilError(t, err) + + // Should be > 0 as just written file system also consumes blocks + assert.Assert(t, emptyLayerUsage.Size > 0) + + err = snapshotter.Commit(ctx, "layer-1", "prepare-1") + assert.NilError(t, err) + + // Create child layer with 1MB file + + var ( + sizeBytes int64 = 1048576 // 1MB + baseApplier = fstest.Apply(fstest.CreateRandomFile("/a", 12345679, sizeBytes, 0777)) + ) + + mounts, err := snapshotter.Prepare(ctx, "prepare-2", "layer-1") + assert.NilError(t, err) + + err = mount.WithTempMount(ctx, mounts, baseApplier.Apply) + assert.NilError(t, err) + + err = snapshotter.Commit(ctx, "layer-2", "prepare-2") + assert.NilError(t, err) + + layer2Usage, err := snapshotter.Usage(ctx, "layer-2") + assert.NilError(t, err) + + // Should be at least 1 MB + fs metadata + assert.Assert(t, layer2Usage.Size > sizeBytes) + assert.Assert(t, layer2Usage.Size < sizeBytes+256*dmsetup.SectorSize) +}