containerd/snapshots/overlay/overlay.go
Akihiro Suda 9ca6fd9e6e
Merge pull request #9117 from kinvolk/rata/userns-chown-opt-in
Require opt-in for rootfs chown when idmap mounts is not supported
2023-09-28 02:34:41 +09:00

658 lines
18 KiB
Go

//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package overlay
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/snapshots"
"github.com/containerd/containerd/snapshots/overlay/overlayutils"
"github.com/containerd/containerd/snapshots/storage"
"github.com/containerd/continuity/fs"
"github.com/containerd/log"
)
// upperdirKey is a key of an optional label to each snapshot.
// This optional label of a snapshot contains the location of "upperdir" where
// the change set between this snapshot and its parent is stored.
const upperdirKey = "containerd.io/snapshot/overlay.upperdir"
// SnapshotterConfig is used to configure the overlay snapshotter instance
type SnapshotterConfig struct {
asyncRemove bool
upperdirLabel bool
ms MetaStore
mountOptions []string
remapIds bool
slowChown bool
}
// Opt is an option to configure the overlay snapshotter
type Opt func(config *SnapshotterConfig) error
// AsynchronousRemove defers removal of filesystem content until
// the Cleanup method is called. Removals will make the snapshot
// referred to by the key unavailable and make the key immediately
// available for re-use.
func AsynchronousRemove(config *SnapshotterConfig) error {
config.asyncRemove = true
return nil
}
// WithUpperdirLabel adds as an optional label
// "containerd.io/snapshot/overlay.upperdir". This stores the location
// of the upperdir that contains the changeset between the labelled
// snapshot and its parent.
func WithUpperdirLabel(config *SnapshotterConfig) error {
config.upperdirLabel = true
return nil
}
// WithMountOptions defines the default mount options used for the overlay mount.
// NOTE: Options are not applied to bind mounts.
func WithMountOptions(options []string) Opt {
return func(config *SnapshotterConfig) error {
config.mountOptions = append(config.mountOptions, options...)
return nil
}
}
type MetaStore interface {
TransactionContext(ctx context.Context, writable bool) (context.Context, storage.Transactor, error)
WithTransaction(ctx context.Context, writable bool, fn storage.TransactionCallback) error
Close() error
}
// WithMetaStore allows the MetaStore to be created outside the snapshotter
// and passed in.
func WithMetaStore(ms MetaStore) Opt {
return func(config *SnapshotterConfig) error {
config.ms = ms
return nil
}
}
func WithRemapIds(config *SnapshotterConfig) error {
config.remapIds = true
return nil
}
func WithSlowChown(config *SnapshotterConfig) error {
config.slowChown = true
return nil
}
type snapshotter struct {
root string
ms MetaStore
asyncRemove bool
upperdirLabel bool
options []string
remapIds bool
slowChown bool
}
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
// diffs are stored under the provided root. A metadata file is stored under
// the root.
func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
var config SnapshotterConfig
for _, opt := range opts {
if err := opt(&config); err != nil {
return nil, err
}
}
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
supportsDType, err := fs.SupportsDType(root)
if err != nil {
return nil, err
}
if !supportsDType {
return nil, fmt.Errorf("%s does not support d_type. If the backing filesystem is xfs, please reformat with ftype=1 to enable d_type support", root)
}
if config.ms == nil {
config.ms, err = storage.NewMetaStore(filepath.Join(root, "metadata.db"))
if err != nil {
return nil, err
}
}
if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) {
return nil, err
}
if !hasOption(config.mountOptions, "userxattr", false) {
// figure out whether "userxattr" option is recognized by the kernel && needed
userxattr, err := overlayutils.NeedsUserXAttr(root)
if err != nil {
log.L.WithError(err).Warnf("cannot detect whether \"userxattr\" option needs to be used, assuming to be %v", userxattr)
}
if userxattr {
config.mountOptions = append(config.mountOptions, "userxattr")
}
}
if !hasOption(config.mountOptions, "index", false) && supportsIndex() {
config.mountOptions = append(config.mountOptions, "index=off")
}
return &snapshotter{
root: root,
ms: config.ms,
asyncRemove: config.asyncRemove,
upperdirLabel: config.upperdirLabel,
options: config.mountOptions,
remapIds: config.remapIds,
slowChown: config.slowChown,
}, nil
}
func hasOption(options []string, key string, hasValue bool) bool {
for _, option := range options {
if hasValue {
if strings.HasPrefix(option, key) && len(option) > len(key) && option[len(key)] == '=' {
return true
}
} else if option == key {
return true
}
}
return false
}
// Stat returns the info for an active or committed snapshot by name or
// key.
//
// Should be used for parent resolution, existence checks and to discern
// the kind of snapshot.
func (o *snapshotter) Stat(ctx context.Context, key string) (info snapshots.Info, err error) {
var id string
if err := o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
id, info, _, err = storage.GetInfo(ctx, key)
return err
}); err != nil {
return info, err
}
if o.upperdirLabel {
if info.Labels == nil {
info.Labels = make(map[string]string)
}
info.Labels[upperdirKey] = o.upperPath(id)
}
return info, nil
}
func (o *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (newInfo snapshots.Info, err error) {
err = o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
newInfo, err = storage.UpdateInfo(ctx, info, fieldpaths...)
if err != nil {
return err
}
if o.upperdirLabel {
id, _, _, err := storage.GetInfo(ctx, newInfo.Name)
if err != nil {
return err
}
if newInfo.Labels == nil {
newInfo.Labels = make(map[string]string)
}
newInfo.Labels[upperdirKey] = o.upperPath(id)
}
return nil
})
return newInfo, err
}
// Usage returns the resources taken by the snapshot identified by key.
//
// For active snapshots, this will scan the usage of the overlay "diff" (aka
// "upper") directory and may take some time.
//
// For committed snapshots, the value is returned from the metadata database.
func (o *snapshotter) Usage(ctx context.Context, key string) (_ snapshots.Usage, err error) {
var (
usage snapshots.Usage
info snapshots.Info
id string
)
if err := o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
id, info, usage, err = storage.GetInfo(ctx, key)
return err
}); err != nil {
return usage, err
}
if info.Kind == snapshots.KindActive {
upperPath := o.upperPath(id)
du, err := fs.DiskUsage(ctx, upperPath)
if err != nil {
// TODO(stevvooe): Consider not reporting an error in this case.
return snapshots.Usage{}, err
}
usage = snapshots.Usage(du)
}
return usage, nil
}
func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return o.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
}
func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return o.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
}
// Mounts returns the mounts for the transaction identified by key. Can be
// called on an read-write or readonly transaction.
//
// This can be used to recover mounts after calling View or Prepare.
func (o *snapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount, err error) {
var s storage.Snapshot
var info snapshots.Info
if err := o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
s, err = storage.GetSnapshot(ctx, key)
if err != nil {
return fmt.Errorf("failed to get active mount: %w", err)
}
_, info, _, err = storage.GetInfo(ctx, key)
if err != nil {
return fmt.Errorf("failed to get snapshot info: %w", err)
}
return nil
}); err != nil {
return nil, err
}
return o.mounts(s, info), nil
}
func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
return o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
// grab the existing id
id, _, _, err := storage.GetInfo(ctx, key)
if err != nil {
return err
}
usage, err := fs.DiskUsage(ctx, o.upperPath(id))
if err != nil {
return err
}
if _, err = storage.CommitActive(ctx, key, name, snapshots.Usage(usage), opts...); err != nil {
return fmt.Errorf("failed to commit snapshot %s: %w", key, err)
}
return nil
})
}
// Remove abandons the snapshot identified by key. The snapshot will
// immediately become unavailable and unrecoverable. Disk space will
// be freed up on the next call to `Cleanup`.
func (o *snapshotter) Remove(ctx context.Context, key string) (err error) {
var removals []string
// Remove directories after the transaction is closed, failures must not
// return error since the transaction is committed with the removal
// key no longer available.
defer func() {
if err == nil {
for _, dir := range removals {
if err := os.RemoveAll(dir); err != nil {
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
}
}
}
}()
return o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
_, _, err = storage.Remove(ctx, key)
if err != nil {
return fmt.Errorf("failed to remove snapshot %s: %w", key, err)
}
if !o.asyncRemove {
removals, err = o.getCleanupDirectories(ctx)
if err != nil {
return fmt.Errorf("unable to get directories for removal: %w", err)
}
}
return nil
})
}
// Walk the snapshots.
func (o *snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
return o.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
if o.upperdirLabel {
return storage.WalkInfo(ctx, func(ctx context.Context, info snapshots.Info) error {
id, _, _, err := storage.GetInfo(ctx, info.Name)
if err != nil {
return err
}
if info.Labels == nil {
info.Labels = make(map[string]string)
}
info.Labels[upperdirKey] = o.upperPath(id)
return fn(ctx, info)
}, fs...)
}
return storage.WalkInfo(ctx, fn, fs...)
})
}
// Cleanup cleans up disk resources from removed or abandoned snapshots
func (o *snapshotter) Cleanup(ctx context.Context) error {
cleanup, err := o.cleanupDirectories(ctx)
if err != nil {
return err
}
for _, dir := range cleanup {
if err := os.RemoveAll(dir); err != nil {
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
}
}
return nil
}
func (o *snapshotter) cleanupDirectories(ctx context.Context) (_ []string, err error) {
var cleanupDirs []string
// Get a write transaction to ensure no other write transaction can be entered
// while the cleanup is scanning.
if err := o.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
cleanupDirs, err = o.getCleanupDirectories(ctx)
return err
}); err != nil {
return nil, err
}
return cleanupDirs, nil
}
func (o *snapshotter) getCleanupDirectories(ctx context.Context) ([]string, error) {
ids, err := storage.IDMap(ctx)
if err != nil {
return nil, err
}
snapshotDir := filepath.Join(o.root, "snapshots")
fd, err := os.Open(snapshotDir)
if err != nil {
return nil, err
}
defer fd.Close()
dirs, err := fd.Readdirnames(0)
if err != nil {
return nil, err
}
cleanup := []string{}
for _, d := range dirs {
if _, ok := ids[d]; ok {
continue
}
cleanup = append(cleanup, filepath.Join(snapshotDir, d))
}
return cleanup, nil
}
func validateIDMapping(mapping string) error {
var (
hostID int
ctrID int
length int
)
if _, err := fmt.Sscanf(mapping, "%d:%d:%d", &ctrID, &hostID, &length); err != nil {
return err
}
// Almost impossible, but snapshots.WithLabels doesn't check it
if ctrID < 0 || hostID < 0 || length < 0 {
return fmt.Errorf("invalid mapping \"%d:%d:%d\"", ctrID, hostID, length)
}
if ctrID != 0 {
return fmt.Errorf("container mapping of 0 is only supported")
}
return nil
}
func hostID(mapping string) (int, error) {
var (
hostID int
ctrID int
length int
)
if err := validateIDMapping(mapping); err != nil {
return -1, fmt.Errorf("invalid mapping: %w", err)
}
if _, err := fmt.Sscanf(mapping, "%d:%d:%d", &ctrID, &hostID, &length); err != nil {
return -1, err
}
return hostID, nil
}
func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
var (
s storage.Snapshot
td, path string
info snapshots.Info
)
defer func() {
if err != nil {
if td != "" {
if err1 := os.RemoveAll(td); err1 != nil {
log.G(ctx).WithError(err1).Warn("failed to cleanup temp snapshot directory")
}
}
if path != "" {
if err1 := os.RemoveAll(path); err1 != nil {
log.G(ctx).WithError(err1).WithField("path", path).Error("failed to reclaim snapshot directory, directory may need removal")
err = fmt.Errorf("failed to remove path: %v: %w", err1, err)
}
}
}
}()
if err := o.ms.WithTransaction(ctx, true, func(ctx context.Context) (err error) {
snapshotDir := filepath.Join(o.root, "snapshots")
td, err = o.prepareDirectory(ctx, snapshotDir, kind)
if err != nil {
return fmt.Errorf("failed to create prepare snapshot dir: %w", err)
}
s, err = storage.CreateSnapshot(ctx, kind, key, parent, opts...)
if err != nil {
return fmt.Errorf("failed to create snapshot: %w", err)
}
_, info, _, err = storage.GetInfo(ctx, key)
if err != nil {
return fmt.Errorf("failed to get snapshot info: %w", err)
}
mappedUID := -1
mappedGID := -1
// NOTE: if idmapped mounts' supported by hosted kernel there may be
// no parents at all, so overlayfs will not work and snapshotter
// will use bind mount. To be able to create file objects inside the
// rootfs -- just chown this only bound directory according to provided
// {uid,gid}map. In case of one/multiple parents -- chown upperdir.
if v, ok := info.Labels[snapshots.LabelSnapshotUIDMapping]; ok {
if mappedUID, err = hostID(v); err != nil {
return fmt.Errorf("failed to parse UID mapping: %w", err)
}
}
if v, ok := info.Labels[snapshots.LabelSnapshotGIDMapping]; ok {
if mappedGID, err = hostID(v); err != nil {
return fmt.Errorf("failed to parse GID mapping: %w", err)
}
}
if mappedUID == -1 || mappedGID == -1 {
if len(s.ParentIDs) > 0 {
st, err := os.Stat(o.upperPath(s.ParentIDs[0]))
if err != nil {
return fmt.Errorf("failed to stat parent: %w", err)
}
stat, ok := st.Sys().(*syscall.Stat_t)
if !ok {
return fmt.Errorf("incompatible types after stat call: *syscall.Stat_t expected")
}
mappedUID = int(stat.Uid)
mappedGID = int(stat.Gid)
}
}
if mappedUID != -1 && mappedGID != -1 {
if err := os.Lchown(filepath.Join(td, "fs"), mappedUID, mappedGID); err != nil {
return fmt.Errorf("failed to chown: %w", err)
}
}
path = filepath.Join(snapshotDir, s.ID)
if err = os.Rename(td, path); err != nil {
return fmt.Errorf("failed to rename: %w", err)
}
td = ""
return nil
}); err != nil {
return nil, err
}
return o.mounts(s, info), nil
}
func (o *snapshotter) prepareDirectory(ctx context.Context, snapshotDir string, kind snapshots.Kind) (string, error) {
td, err := os.MkdirTemp(snapshotDir, "new-")
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}
if err := os.Mkdir(filepath.Join(td, "fs"), 0755); err != nil {
return td, err
}
if kind == snapshots.KindActive {
if err := os.Mkdir(filepath.Join(td, "work"), 0711); err != nil {
return td, err
}
}
return td, nil
}
func (o *snapshotter) mounts(s storage.Snapshot, info snapshots.Info) []mount.Mount {
var options []string
if o.remapIds {
if v, ok := info.Labels[snapshots.LabelSnapshotUIDMapping]; ok {
options = append(options, fmt.Sprintf("uidmap=%s", v))
}
if v, ok := info.Labels[snapshots.LabelSnapshotGIDMapping]; ok {
options = append(options, fmt.Sprintf("gidmap=%s", v))
}
}
if len(s.ParentIDs) == 0 {
// if we only have one layer/no parents then just return a bind mount as overlay
// will not work
roFlag := "rw"
if s.Kind == snapshots.KindView {
roFlag = "ro"
}
return []mount.Mount{
{
Source: o.upperPath(s.ID),
Type: "bind",
Options: append(options,
roFlag,
"rbind",
),
},
}
}
if s.Kind == snapshots.KindActive {
options = append(options,
fmt.Sprintf("workdir=%s", o.workPath(s.ID)),
fmt.Sprintf("upperdir=%s", o.upperPath(s.ID)),
)
} else if len(s.ParentIDs) == 1 {
return []mount.Mount{
{
Source: o.upperPath(s.ParentIDs[0]),
Type: "bind",
Options: append(options,
"ro",
"rbind",
),
},
}
}
parentPaths := make([]string, len(s.ParentIDs))
for i := range s.ParentIDs {
parentPaths[i] = o.upperPath(s.ParentIDs[i])
}
options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(parentPaths, ":")))
options = append(options, o.options...)
return []mount.Mount{
{
Type: "overlay",
Source: "overlay",
Options: options,
},
}
}
func (o *snapshotter) upperPath(id string) string {
return filepath.Join(o.root, "snapshots", id, "fs")
}
func (o *snapshotter) workPath(id string) string {
return filepath.Join(o.root, "snapshots", id, "work")
}
// Close closes the snapshotter
func (o *snapshotter) Close() error {
return o.ms.Close()
}
// supportsIndex checks whether the "index=off" option is supported by the kernel.
func supportsIndex() bool {
if _, err := os.Stat("/sys/module/overlay/parameters/index"); err == nil {
return true
}
return false
}