From f1d79d33b8479ddfa741e61b83dffda259fa16aa Mon Sep 17 00:00:00 2001 From: Kern Walster Date: Mon, 19 Jul 2021 16:37:46 +0000 Subject: [PATCH] Discard blocks when removing a thin device dmsetup does not discard blocks when removing a thin device. The unused blocks are reused by the thin-pool, but will remain allocated in the underlying device indefinitely. For loop device backed thin-pools, this results in "lost" disk space in the underlying file system as the blocks remain allocated in the loop device's backing file. This change adds an option, discard_blocks, to the devmapper snapshotter which causes the snapshotter to issue blkdiscard ioctls on the thin device before removal. With this option enabled, loop device setups will see disk space return to the underlying filesystem immediately on exiting a container. Fixes #5691 Signed-off-by: Kern Walster --- snapshots/devmapper/README.md | 2 + snapshots/devmapper/blkdiscard/blkdiscard.go | 41 ++++++++++++++++++++ snapshots/devmapper/config.go | 3 ++ snapshots/devmapper/dmsetup/dmsetup.go | 35 +++++++++++++++++ snapshots/devmapper/dmsetup/dmsetup_test.go | 6 +++ snapshots/devmapper/pool_device.go | 30 ++++++++++++-- snapshots/devmapper/pool_device_test.go | 1 + 7 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 snapshots/devmapper/blkdiscard/blkdiscard.go diff --git a/snapshots/devmapper/README.md b/snapshots/devmapper/README.md index aa578cd74..6279db466 100644 --- a/snapshots/devmapper/README.md +++ b/snapshots/devmapper/README.md @@ -26,6 +26,7 @@ The following configuration flags are supported: should be the same as in `/dev/mapper/` directory * `base_image_size` - defines how much space to allocate when creating the base device * `async_remove` - flag to async remove device using snapshot GC's cleanup callback +* `discard_blocks` - whether to discard blocks when removing a device. This is especially useful for returning disk space to the filesystem when using loopback devices. Pool name and base image size are required snapshotter parameters. @@ -93,6 +94,7 @@ cat << EOF pool_name = "${POOL_NAME}" root_path = "${DATA_DIR}" base_image_size = "10GB" + discard_blocks = true EOF ``` diff --git a/snapshots/devmapper/blkdiscard/blkdiscard.go b/snapshots/devmapper/blkdiscard/blkdiscard.go new file mode 100644 index 000000000..f04b8ce25 --- /dev/null +++ b/snapshots/devmapper/blkdiscard/blkdiscard.go @@ -0,0 +1,41 @@ +// +build linux + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package blkdiscard + +import "os/exec" + +// Version returns the output of "blkdiscard --version" +func Version() (string, error) { + return blkdiscard("--version") +} + +// BlkDiscard discards all blocks of a device. +// devicePath is expected to be a fully qualified path. +// BlkDiscard expects the caller to verify that the device is not in use. +func BlkDiscard(devicePath string) (string, error) { + return blkdiscard(devicePath) +} + +func blkdiscard(args ...string) (string, error) { + output, err := exec.Command("blkdiscard", args...).CombinedOutput() + if err != nil { + return "", err + } + return string(output), nil +} diff --git a/snapshots/devmapper/config.go b/snapshots/devmapper/config.go index 24e36b856..8f9aaa6ae 100644 --- a/snapshots/devmapper/config.go +++ b/snapshots/devmapper/config.go @@ -43,6 +43,9 @@ type Config struct { // Flag to async remove device using Cleanup() callback in snapshots GC AsyncRemove bool `toml:"async_remove"` + + // Whether to discard blocks when removing a thin device. + DiscardBlocks bool `toml:"discard_blocks"` } // LoadConfig reads devmapper configuration file from disk in TOML format diff --git a/snapshots/devmapper/dmsetup/dmsetup.go b/snapshots/devmapper/dmsetup/dmsetup.go index c3e3c785d..f39067e41 100644 --- a/snapshots/devmapper/dmsetup/dmsetup.go +++ b/snapshots/devmapper/dmsetup/dmsetup.go @@ -16,6 +16,8 @@ limitations under the License. */ +// Copyright 2012-2017 Docker, Inc. + package dmsetup import ( @@ -26,6 +28,7 @@ import ( "strconv" "strings" + blkdiscard "github.com/containerd/containerd/snapshots/devmapper/blkdiscard" "github.com/pkg/errors" "golang.org/x/sys/unix" ) @@ -37,6 +40,9 @@ const ( SectorSize = 512 ) +// ErrInUse represents an error mutating a device because it is in use elsewhere +var ErrInUse = errors.New("device is in use") + // DeviceInfo represents device info returned by "dmsetup info". // dmsetup(8) provides more information on each of these fields. type DeviceInfo struct { @@ -345,6 +351,24 @@ func BlockDeviceSize(path string) (int64, error) { return size, nil } +// DiscardBlocks discards all blocks for the given thin device +// ported from https://github.com/moby/moby/blob/7b9275c0da707b030e62c96b679a976f31f929d3/pkg/devicemapper/devmapper.go#L416 +func DiscardBlocks(deviceName string) error { + inUse, err := isInUse(deviceName) + if err != nil { + return err + } + if inUse { + return ErrInUse + } + path := GetFullDevicePath(deviceName) + _, err = blkdiscard.BlkDiscard(path) + if err != nil { + return err + } + return nil +} + func dmsetup(args ...string) (string, error) { data, err := exec.Command("dmsetup", args...).CombinedOutput() output := string(data) @@ -406,3 +430,14 @@ func parseDmsetupError(output string) string { str = strings.ToLower(str) return str } + +func isInUse(deviceName string) (bool, error) { + info, err := Info(deviceName) + if err != nil { + return true, err + } + if len(info) != 1 { + return true, errors.New("could not get device info") + } + return info[0].OpenCount != 0, nil +} diff --git a/snapshots/devmapper/dmsetup/dmsetup_test.go b/snapshots/devmapper/dmsetup/dmsetup_test.go index e8b95330b..9f64f9cc0 100644 --- a/snapshots/devmapper/dmsetup/dmsetup_test.go +++ b/snapshots/devmapper/dmsetup/dmsetup_test.go @@ -83,6 +83,7 @@ func TestDMSetup(t *testing.T) { t.Run("ActivateDevice", testActivateDevice) t.Run("DeviceStatus", testDeviceStatus) t.Run("SuspendResumeDevice", testSuspendResumeDevice) + t.Run("DiscardBlocks", testDiscardBlocks) t.Run("RemoveDevice", testRemoveDevice) t.Run("RemovePool", func(t *testing.T) { @@ -169,6 +170,11 @@ func testSuspendResumeDevice(t *testing.T) { assert.NilError(t, err) } +func testDiscardBlocks(t *testing.T) { + err := DiscardBlocks(testDeviceName) + assert.NilError(t, err, "failed to discard blocks") +} + func testRemoveDevice(t *testing.T) { err := RemoveDevice(testPoolName) assert.Assert(t, err == unix.EBUSY, "removing thin-pool with dependencies shouldn't be allowed") diff --git a/snapshots/devmapper/pool_device.go b/snapshots/devmapper/pool_device.go index 77a90bd8a..c5ef52695 100644 --- a/snapshots/devmapper/pool_device.go +++ b/snapshots/devmapper/pool_device.go @@ -29,13 +29,15 @@ import ( "golang.org/x/sys/unix" "github.com/containerd/containerd/log" + blkdiscard "github.com/containerd/containerd/snapshots/devmapper/blkdiscard" "github.com/containerd/containerd/snapshots/devmapper/dmsetup" ) // PoolDevice ties together data and metadata volumes, represents thin-pool and manages volumes, snapshots and device ids. type PoolDevice struct { - poolName string - metadata *PoolMetadata + poolName string + metadata *PoolMetadata + discardBlocks bool } // NewPoolDevice creates new thin-pool from existing data and metadata volumes. @@ -51,6 +53,15 @@ func NewPoolDevice(ctx context.Context, config *Config) (*PoolDevice, error) { log.G(ctx).Infof("using dmsetup:\n%s", version) + if config.DiscardBlocks { + blkdiscardVersion, err := blkdiscard.Version() + if err != nil { + log.G(ctx).Error("blkdiscard is not available") + return nil, err + } + log.G(ctx).Infof("using blkdiscard:\n%s", blkdiscardVersion) + } + dbpath := filepath.Join(config.RootPath, config.PoolName+".db") poolMetaStore, err := NewPoolMetadata(dbpath) if err != nil { @@ -64,8 +75,9 @@ func NewPoolDevice(ctx context.Context, config *Config) (*PoolDevice, error) { } poolDevice := &PoolDevice{ - poolName: config.PoolName, - metadata: poolMetaStore, + poolName: config.PoolName, + metadata: poolMetaStore, + discardBlocks: config.DiscardBlocks, } if err := poolDevice.ensureDeviceStates(ctx); err != nil { @@ -422,6 +434,16 @@ func (p *PoolDevice) DeactivateDevice(ctx context.Context, deviceName string, de if err := p.transition(ctx, deviceName, Deactivating, Deactivated, func() error { return retry(ctx, func() error { + if !deferred && p.discardBlocks { + err := dmsetup.DiscardBlocks(deviceName) + if err != nil { + if err == dmsetup.ErrInUse { + log.G(ctx).Warnf("device %q is in use, skipping blkdiscard", deviceName) + } else { + return err + } + } + } if err := dmsetup.RemoveDevice(deviceName, opts...); err != nil { return errors.Wrap(err, "failed to deactivate device") } diff --git a/snapshots/devmapper/pool_device_test.go b/snapshots/devmapper/pool_device_test.go index 8525c0028..f1b21c0b4 100644 --- a/snapshots/devmapper/pool_device_test.go +++ b/snapshots/devmapper/pool_device_test.go @@ -85,6 +85,7 @@ func TestPoolDevice(t *testing.T) { RootPath: tempDir, BaseImageSize: "16mb", BaseImageSizeBytes: 16 * 1024 * 1024, + DiscardBlocks: true, } pool, err := NewPoolDevice(ctx, config)