Merge pull request #8511 from dmcgowan/blockfile-snapshotter

Add blockfile snapshotter
This commit is contained in:
Akihiro Suda 2023-05-18 11:52:37 +09:00 committed by GitHub
commit 878132923d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 633 additions and 2 deletions

View File

@ -20,6 +20,7 @@ import (
_ "github.com/containerd/containerd/metrics/cgroups" _ "github.com/containerd/containerd/metrics/cgroups"
_ "github.com/containerd/containerd/metrics/cgroups/v2" _ "github.com/containerd/containerd/metrics/cgroups/v2"
_ "github.com/containerd/containerd/runtime/v2/runc/options" _ "github.com/containerd/containerd/runtime/v2/runc/options"
_ "github.com/containerd/containerd/snapshots/blockfile/plugin"
_ "github.com/containerd/containerd/snapshots/native/plugin" _ "github.com/containerd/containerd/snapshots/native/plugin"
_ "github.com/containerd/containerd/snapshots/overlay/plugin" _ "github.com/containerd/containerd/snapshots/overlay/plugin"
) )

View File

@ -19,5 +19,6 @@
package builtins package builtins
import ( import (
_ "github.com/containerd/containerd/snapshots/blockfile/plugin"
_ "github.com/containerd/containerd/snapshots/native/plugin" _ "github.com/containerd/containerd/snapshots/native/plugin"
) )

View File

@ -10,10 +10,13 @@ Generic:
- `overlayfs` (default): OverlayFS. This driver is akin to Docker/Moby's "overlay2" storage driver, but containerd's implementation is not called "overlay2". - `overlayfs` (default): OverlayFS. This driver is akin to Docker/Moby's "overlay2" storage driver, but containerd's implementation is not called "overlay2".
- `native`: Native file copying driver. Akin to Docker/Moby's "vfs" driver. - `native`: Native file copying driver. Akin to Docker/Moby's "vfs" driver.
Block-based:
- `blockfile`: A driver using raw block files for each snapshot. Block files are copied from a parent or base empty block file. Mounting requires a virtual machine or support for loopback mounts.
- `devmapper`: ext4/xfs device mapper. See [`devmapper.md`](./devmapper.md).
Filesystem-specific: Filesystem-specific:
- `btrfs`: btrfs. Needs the plugin root (`/var/lib/containerd/io.containerd.snapshotter.v1.btrfs`) to be mounted as btrfs. - `btrfs`: btrfs. Needs the plugin root (`/var/lib/containerd/io.containerd.snapshotter.v1.btrfs`) to be mounted as btrfs.
- `zfs`: ZFS. Needs the plugin root (`/var/lib/containerd/io.containerd.snapshotter.v1.zfs`) to be mounted as ZFS. See also https://github.com/containerd/zfs . - `zfs`: ZFS. Needs the plugin root (`/var/lib/containerd/io.containerd.snapshotter.v1.zfs`) to be mounted as ZFS. See also https://github.com/containerd/zfs .
- `devmapper`: ext4/xfs device mapper. See [`devmapper.md`](./devmapper.md).
[Deprecated](https://github.com/containerd/containerd/blob/main/RELEASES.md#deprecated-features): [Deprecated](https://github.com/containerd/containerd/blob/main/RELEASES.md#deprecated-features):
- `aufs`: AUFS. Deprecated since containerd 1.5. Removed in containerd 2.0. See also https://github.com/containerd/aufs . - `aufs`: AUFS. Deprecated since containerd 1.5. Removed in containerd 2.0. See also https://github.com/containerd/aufs .

View File

@ -0,0 +1,389 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package blockfile
import (
"context"
"fmt"
"os"
"path/filepath"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/snapshots"
"github.com/containerd/containerd/snapshots/storage"
"github.com/containerd/continuity/fs"
)
// SnapshotterConfig holds the configurable properties for the blockfile snapshotter
type SnapshotterConfig struct {
// recreateScratch is whether scratch should be recreated even
// if already exists
recreateScratch bool
scratchGenerator func(string) error
// fsType is the filesystem type for the mount (defaults to ext4)
fsType string
}
// Opt is an option to configure the overlay snapshotter
type Opt func(string, *SnapshotterConfig)
// WithScratchFile provides a scratch file which will get copied on startup
// if the scratch file needs to be generated.
func WithScratchFile(src string) Opt {
return func(root string, config *SnapshotterConfig) {
config.scratchGenerator = func(dst string) error {
// Copy src to dst
scratch := filepath.Join(root, "scratch")
if err := fs.CopyFile(scratch, src); err != nil {
return fmt.Errorf("failed to copy scratch: %w", err)
}
return nil
}
}
}
// WithFSType defines the filesystem type to apply to mounts of the blockfile
func WithFSType(fsType string) Opt {
return func(root string, config *SnapshotterConfig) {
config.fsType = fsType
}
}
type snapshotter struct {
root string
scratch string
fsType string
ms *storage.MetaStore
}
// NewSnapshotter returns a Snapshotter which copies layers on the underlying
// file system. A metadata file is stored under the root.
func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
var config SnapshotterConfig
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
for _, opt := range opts {
opt(root, &config)
}
scratch := filepath.Join(root, "scratch")
createScratch := config.recreateScratch
if !createScratch {
if _, err := os.Stat(scratch); err != nil {
if !os.IsNotExist(err) {
return nil, fmt.Errorf("unable to stat scratch file: %w", err)
}
createScratch = true
}
}
if createScratch {
if config.scratchGenerator == nil {
return nil, fmt.Errorf("no scratch file generator: %w", plugin.ErrSkipPlugin)
}
if err := config.scratchGenerator(scratch); err != nil {
return nil, fmt.Errorf("failed to generate scratch file: %w", err)
}
}
if config.fsType == "" {
config.fsType = "ext4"
}
ms, err := storage.NewMetaStore(filepath.Join(root, "metadata.db"))
if err != nil {
return nil, err
}
if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) {
return nil, err
}
return &snapshotter{
root: root,
scratch: scratch,
fsType: config.fsType,
ms: ms,
}, nil
}
// Stat returns the info for an active or committed snapshot by name or
// key.
//
// Should be used for parent resolution, existence checks and to discern
// the kind of snapshot.
func (o *snapshotter) Stat(ctx context.Context, key string) (info snapshots.Info, err error) {
err = o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
_, info, _, err = storage.GetInfo(ctx, key)
return err
})
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (o *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (_ snapshots.Info, err error) {
err = o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
info, err = storage.UpdateInfo(ctx, info, fieldpaths...)
return err
})
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (o *snapshotter) Usage(ctx context.Context, key string) (usage snapshots.Usage, err error) {
var (
id string
info snapshots.Info
)
err = o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
id, info, usage, err = storage.GetInfo(ctx, key)
if err != nil {
return err
}
// Current usage calculation is an approximation based on the size
// of the block file - the size of its parent. This does not consider
// that the filesystem may not support shared extents between the block
// file and its parents, in which case the accurate calculation would just
// be the size of the block file. Additionally, this does not take into
// consideration that file may have been removed before being adding,
// making the number of shared extents between the parent and the block
// file smaller than the parent, under reporting actual usage.
//
// A more ideal calculation would look like:
// size(block) - usage(extent_intersection(block,parent))
// OR
// usage(extent_union(block,parent)) - size(parent)
if info.Kind == snapshots.KindActive {
// TODO: Use size calculator from fs package
st, err := os.Stat(o.getBlockFile(id))
if err != nil {
return err
}
usage.Size = st.Size()
usage.Inodes = 1
}
if info.Parent != "" {
// GetInfo returns total number of bytes used by a snapshot (including parent).
// So subtract parent usage in order to get delta consumed by layer itself.
_, _, parentUsage, err := storage.GetInfo(ctx, info.Parent)
if err != nil {
return err
}
usage.Size -= parentUsage.Size
}
return err
})
if err != nil {
return snapshots.Usage{}, err
}
return usage, nil
}
func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return o.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
}
func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return o.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
}
// Mounts returns the mounts for the transaction identified by key. Can be
// called on an read-write or readonly transaction.
//
// This can be used to recover mounts after calling View or Prepare.
func (o *snapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount, err error) {
var s storage.Snapshot
err = o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
s, err = storage.GetSnapshot(ctx, key)
if err != nil {
return fmt.Errorf("failed to get snapshot mount: %w", err)
}
return nil
})
if err != nil {
return nil, err
}
return o.mounts(s), nil
}
func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
return o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
id, _, _, err := storage.GetInfo(ctx, key)
if err != nil {
return err
}
st, err := os.Stat(o.getBlockFile(id))
if err != nil {
return err
}
usage := snapshots.Usage{
Size: st.Size(),
Inodes: 1,
}
if _, err = storage.CommitActive(ctx, key, name, usage, opts...); err != nil {
return fmt.Errorf("failed to commit snapshot: %w", err)
}
return nil
})
}
// Remove abandons the transaction identified by key. All resources
// associated with the key will be removed.
func (o *snapshotter) Remove(ctx context.Context, key string) (err error) {
var (
renamed, path string
restore bool
)
err = o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
id, _, err := storage.Remove(ctx, key)
if err != nil {
return fmt.Errorf("failed to remove: %w", err)
}
path = o.getBlockFile(id)
renamed = filepath.Join(o.root, "snapshots", "rm-"+id)
if err = os.Rename(path, renamed); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("failed to rename: %w", err)
}
renamed = ""
}
restore = true
return nil
})
if err != nil {
if renamed != "" && restore {
if err1 := os.Rename(renamed, path); err1 != nil {
// May cause inconsistent data on disk
log.G(ctx).WithError(err1).WithField("path", renamed).Error("failed to rename after failed commit")
}
}
return err
}
if renamed != "" {
if err := os.Remove(renamed); err != nil {
// Must be cleaned up, any "rm-*" could be removed if no active transactions
log.G(ctx).WithError(err).WithField("path", renamed).Warnf("failed to remove root filesystem")
}
}
return nil
}
// Walk the committed snapshots.
func (o *snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
return o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
return storage.WalkInfo(ctx, fn, fs...)
})
}
func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
var s storage.Snapshot
err = o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
s, err = storage.CreateSnapshot(ctx, kind, key, parent, opts...)
if err != nil {
return fmt.Errorf("failed to create snapshot: %w", err)
}
if len(s.ParentIDs) == 0 || s.Kind == snapshots.KindActive {
path := o.getBlockFile(s.ID)
if len(s.ParentIDs) > 0 {
if err = fs.CopyFile(path, o.getBlockFile(s.ParentIDs[0])); err != nil {
return fmt.Errorf("copying of parent failed: %w", err)
}
} else {
if err = fs.CopyFile(path, o.scratch); err != nil {
return fmt.Errorf("copying of scratch failed: %w", err)
}
}
}
return nil
})
if err != nil {
return nil, err
}
return o.mounts(s), nil
}
func (o *snapshotter) getBlockFile(id string) string {
return filepath.Join(o.root, "snapshots", id)
}
func (o *snapshotter) mounts(s storage.Snapshot) []mount.Mount {
var (
mountOptions = []string{
"loop",
}
source string
)
if s.Kind == snapshots.KindView {
mountOptions = append(mountOptions, "ro")
} else {
mountOptions = append(mountOptions, "rw")
}
if len(s.ParentIDs) == 0 || s.Kind == snapshots.KindActive {
source = o.getBlockFile(s.ID)
} else {
source = o.getBlockFile(s.ParentIDs[0])
}
return []mount.Mount{
{
Source: source,
Type: o.fsType,
Options: mountOptions,
},
}
}
// Close closes the snapshotter
func (o *snapshotter) Close() error {
return o.ms.Close()
}

View File

@ -0,0 +1,86 @@
//go:build !windows && !darwin
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package blockfile
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"github.com/containerd/containerd/mount"
"github.com/containerd/continuity/fs"
"github.com/containerd/continuity/testutil/loopback"
"golang.org/x/sys/unix"
)
func setupSnapshotter(t *testing.T) ([]Opt, error) {
mkfs, err := exec.LookPath("mkfs.ext4")
if err != nil {
t.Skipf("Could not find mkfs.ext4: %v", err)
}
loopbackSize := int64(128 << 20) // 128 MB
if os.Getpagesize() > 4096 {
loopbackSize = int64(650 << 20) // 650 MB
}
loop, err := loopback.New(loopbackSize)
if err != nil {
return nil, err
}
defer loop.Close()
if out, err := exec.Command(mkfs, loop.Device).CombinedOutput(); err != nil {
return nil, fmt.Errorf("failed to make ext4 filesystem (out: %q): %w", out, err)
}
// sync after a mkfs on the loopback before trying to mount the device
unix.Sync()
if err := testMount(t, loop.Device); err != nil {
return nil, err
}
scratch := filepath.Join(t.TempDir(), "scratch")
err = fs.CopyFile(scratch, loop.File)
if err != nil {
return nil, err
}
return []Opt{
WithScratchFile(scratch),
}, nil
}
func testMount(t *testing.T, device string) error {
root, err := os.MkdirTemp(t.TempDir(), "")
if err != nil {
return err
}
defer os.RemoveAll(root)
if out, err := exec.Command("mount", device, root).CombinedOutput(); err != nil {
return fmt.Errorf("failed to mount device %s (out: %q): %w", device, out, err)
}
if err := os.Remove(filepath.Join(root, "lost+found")); err != nil {
return err
}
return mount.UnmountAll(root, unix.MNT_DETACH)
}

View File

@ -0,0 +1,26 @@
//go:build windows || darwin
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package blockfile
import "testing"
func setupSnapshotter(t *testing.T) ([]Opt, error) {
t.Skip("No support for loopback mounts")
return nil, nil
}

View File

@ -0,0 +1,47 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package blockfile
import (
"context"
"testing"
"github.com/containerd/containerd/pkg/testutil"
"github.com/containerd/containerd/snapshots"
"github.com/containerd/containerd/snapshots/testsuite"
)
func newSnapshotter(t *testing.T) func(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error) {
opts, err := setupSnapshotter(t)
if err != nil {
t.Fatal("failed to get snapshotter options:", err)
}
return func(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error) {
snapshotter, err := NewSnapshotter(root, opts...)
if err != nil {
return nil, nil, err
}
return snapshotter, func() error { return snapshotter.Close() }, nil
}
}
func TestBlockfile(t *testing.T) {
testutil.RequiresRoot(t)
testsuite.SnapshotterSuite(t, "Blockfile", newSnapshotter(t))
}

View File

@ -0,0 +1,67 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package plugin
import (
"errors"
"github.com/containerd/containerd/platforms"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/snapshots/blockfile"
)
// Config represents configuration for the native plugin.
type Config struct {
// Root directory for the plugin
RootPath string `toml:"root_path"`
// ScratchFile is the scratch block file to use as an empty block
ScratchFile string `toml:"scratch_file"`
// FSType is the filesystem type for the mount
FSType string `toml:"fs_type"`
}
func init() {
plugin.Register(&plugin.Registration{
Type: plugin.SnapshotPlugin,
ID: "blockfile",
Config: &Config{},
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = append(ic.Meta.Platforms, platforms.DefaultSpec())
config, ok := ic.Config.(*Config)
if !ok {
return nil, errors.New("invalid blockfile configuration")
}
var opts []blockfile.Opt
root := ic.Root
if len(config.RootPath) != 0 {
root = config.RootPath
}
if config.ScratchFile != "" {
opts = append(opts, blockfile.WithScratchFile(config.ScratchFile))
}
if config.FSType != "" {
opts = append(opts, blockfile.WithFSType(config.FSType))
}
return blockfile.NewSnapshotter(root, opts...)
},
})
}

View File

@ -959,6 +959,7 @@ func check128LayersMount(name string) func(ctx context.Context, t *testing.T, sn
t.Fatalf("[layer %d] preparing doesn't equal to flat after apply: %+v", i, err) t.Fatalf("[layer %d] preparing doesn't equal to flat after apply: %+v", i, err)
} }
sync()
testutil.Unmount(t, preparing) testutil.Unmount(t, preparing)
parent = filepath.Join(work, fmt.Sprintf("committed-%d", i)) parent = filepath.Join(work, fmt.Sprintf("committed-%d", i))

View File

@ -18,7 +18,11 @@
package testsuite package testsuite
import "syscall" import (
"syscall"
"golang.org/x/sys/unix"
)
func clearMask() func() { func clearMask() func() {
oldumask := syscall.Umask(0) oldumask := syscall.Umask(0)
@ -26,3 +30,7 @@ func clearMask() func() {
syscall.Umask(oldumask) syscall.Umask(oldumask)
} }
} }
func sync() {
unix.Sync()
}

View File

@ -19,3 +19,5 @@ package testsuite
func clearMask() func() { func clearMask() func() {
return func() {} return func() {}
} }
func sync() {}