Move metadata to core/metadata

Signed-off-by: Derek McGowan <derek@mcg.dev>
This commit is contained in:
Derek McGowan
2024-01-17 09:52:03 -08:00
parent 18b3cbe4fa
commit 1a1e0e8c81
43 changed files with 26 additions and 26 deletions

177
core/metadata/adaptors.go Normal file
View File

@@ -0,0 +1,177 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"strings"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/leases"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/sandbox"
"github.com/containerd/containerd/v2/snapshots"
)
func adaptImage(o interface{}) filters.Adaptor {
obj := o.(images.Image)
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "name":
return obj.Name, len(obj.Name) > 0
case "target":
if len(fieldpath) < 2 {
return "", false
}
switch fieldpath[1] {
case "digest":
return obj.Target.Digest.String(), len(obj.Target.Digest) > 0
case "mediatype":
return obj.Target.MediaType, len(obj.Target.MediaType) > 0
}
case "labels":
return checkMap(fieldpath[1:], obj.Labels)
// TODO(stevvooe): Greater/Less than filters would be awesome for
// size. Let's do it!
case "annotations":
return checkMap(fieldpath[1:], obj.Target.Annotations)
}
return "", false
})
}
func adaptContainer(o interface{}) filters.Adaptor {
obj := o.(containers.Container)
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "id":
return obj.ID, len(obj.ID) > 0
case "runtime":
if len(fieldpath) <= 1 {
return "", false
}
switch fieldpath[1] {
case "name":
return obj.Runtime.Name, len(obj.Runtime.Name) > 0
default:
return "", false
}
case "image":
return obj.Image, len(obj.Image) > 0
case "labels":
return checkMap(fieldpath[1:], obj.Labels)
}
return "", false
})
}
func adaptContentStatus(status content.Status) filters.Adaptor {
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "ref":
return status.Ref, true
}
return "", false
})
}
func adaptLease(lease leases.Lease) filters.Adaptor {
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "id":
return lease.ID, len(lease.ID) > 0
case "labels":
return checkMap(fieldpath[1:], lease.Labels)
}
return "", false
})
}
func adaptSnapshot(info snapshots.Info) filters.Adaptor {
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "kind":
switch info.Kind {
case snapshots.KindActive:
return "active", true
case snapshots.KindView:
return "view", true
case snapshots.KindCommitted:
return "committed", true
}
case "name":
return info.Name, true
case "parent":
return info.Parent, true
case "labels":
return checkMap(fieldpath[1:], info.Labels)
}
return "", false
})
}
func adaptSandbox(instance *sandbox.Sandbox) filters.Adaptor {
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "id":
return instance.ID, true
case "labels":
return checkMap(fieldpath[1:], instance.Labels)
default:
return "", false
}
})
}
func checkMap(fieldpath []string, m map[string]string) (string, bool) {
if len(m) == 0 {
return "", false
}
value, ok := m[strings.Join(fieldpath, ".")]
return value, ok
}

61
core/metadata/bolt.go Normal file
View File

@@ -0,0 +1,61 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"fmt"
bolt "go.etcd.io/bbolt"
)
type transactionKey struct{}
// WithTransactionContext returns a new context holding the provided
// bolt transaction. Functions which require a bolt transaction will
// first check to see if a transaction is already created on the
// context before creating their own.
func WithTransactionContext(ctx context.Context, tx *bolt.Tx) context.Context {
return context.WithValue(ctx, transactionKey{}, tx)
}
type transactor interface {
View(fn func(*bolt.Tx) error) error
Update(fn func(*bolt.Tx) error) error
}
// view gets a bolt db transaction either from the context
// or starts a new one with the provided bolt database.
func view(ctx context.Context, db transactor, fn func(*bolt.Tx) error) error {
tx, ok := ctx.Value(transactionKey{}).(*bolt.Tx)
if !ok {
return db.View(fn)
}
return fn(tx)
}
// update gets a writable bolt db transaction either from the context
// or starts a new one with the provided bolt database.
func update(ctx context.Context, db transactor, fn func(*bolt.Tx) error) error {
tx, ok := ctx.Value(transactionKey{}).(*bolt.Tx)
if !ok {
return db.Update(fn)
} else if !tx.Writable() {
return fmt.Errorf("unable to use transaction from context: %w", bolt.ErrTxNotWritable)
}
return fn(tx)
}

View File

@@ -0,0 +1,239 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package boltutil
import (
"fmt"
"time"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/containerd/v2/protobuf/proto"
"github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/typeurl/v2"
bolt "go.etcd.io/bbolt"
)
var (
bucketKeyAnnotations = []byte("annotations")
bucketKeyLabels = []byte("labels")
bucketKeyCreatedAt = []byte("createdat")
bucketKeyUpdatedAt = []byte("updatedat")
bucketKeyExtensions = []byte("extensions")
)
// ReadLabels reads the labels key from the bucket
// Uses the key "labels"
func ReadLabels(bkt *bolt.Bucket) (map[string]string, error) {
return readMap(bkt, bucketKeyLabels)
}
// ReadAnnotations reads the OCI Descriptor Annotations key from the bucket
// Uses the key "annotations"
func ReadAnnotations(bkt *bolt.Bucket) (map[string]string, error) {
return readMap(bkt, bucketKeyAnnotations)
}
func readMap(bkt *bolt.Bucket, bucketName []byte) (map[string]string, error) {
lbkt := bkt.Bucket(bucketName)
if lbkt == nil {
return nil, nil
}
labels := map[string]string{}
if err := lbkt.ForEach(func(k, v []byte) error {
labels[string(k)] = string(v)
return nil
}); err != nil {
return nil, err
}
return labels, nil
}
// WriteLabels will write a new labels bucket to the provided bucket at key
// bucketKeyLabels, replacing the contents of the bucket with the provided map.
//
// The provide map labels will be modified to have the final contents of the
// bucket. Typically, this removes zero-value entries.
// Uses the key "labels"
func WriteLabels(bkt *bolt.Bucket, labels map[string]string) error {
return writeMap(bkt, bucketKeyLabels, labels)
}
// WriteAnnotations writes the OCI Descriptor Annotations
func WriteAnnotations(bkt *bolt.Bucket, labels map[string]string) error {
return writeMap(bkt, bucketKeyAnnotations, labels)
}
func writeMap(bkt *bolt.Bucket, bucketName []byte, labels map[string]string) error {
// Remove existing labels to keep from merging
if lbkt := bkt.Bucket(bucketName); lbkt != nil {
if err := bkt.DeleteBucket(bucketName); err != nil {
return err
}
}
if len(labels) == 0 {
return nil
}
lbkt, err := bkt.CreateBucket(bucketName)
if err != nil {
return err
}
for k, v := range labels {
if v == "" {
delete(labels, k) // remove since we don't actually set it
continue
}
if err := lbkt.Put([]byte(k), []byte(v)); err != nil {
return fmt.Errorf("failed to set label %q=%q: %w", k, v, err)
}
}
return nil
}
// ReadTimestamps reads created and updated timestamps from a bucket.
// Uses keys "createdat" and "updatedat"
func ReadTimestamps(bkt *bolt.Bucket, created, updated *time.Time) error {
for _, f := range []struct {
b []byte
t *time.Time
}{
{bucketKeyCreatedAt, created},
{bucketKeyUpdatedAt, updated},
} {
v := bkt.Get(f.b)
if v != nil {
if err := f.t.UnmarshalBinary(v); err != nil {
return err
}
}
}
return nil
}
// WriteTimestamps writes created and updated timestamps to a bucket.
// Uses keys "createdat" and "updatedat"
func WriteTimestamps(bkt *bolt.Bucket, created, updated time.Time) error {
createdAt, err := created.MarshalBinary()
if err != nil {
return err
}
updatedAt, err := updated.MarshalBinary()
if err != nil {
return err
}
for _, v := range [][2][]byte{
{bucketKeyCreatedAt, createdAt},
{bucketKeyUpdatedAt, updatedAt},
} {
if err := bkt.Put(v[0], v[1]); err != nil {
return err
}
}
return nil
}
// WriteExtensions will write a KV map to the given bucket,
// where `K` is a string key and `V` is a protobuf's Any type that represents a generic extension.
func WriteExtensions(bkt *bolt.Bucket, extensions map[string]typeurl.Any) error {
if len(extensions) == 0 {
return nil
}
ebkt, err := bkt.CreateBucketIfNotExists(bucketKeyExtensions)
if err != nil {
return err
}
for name, ext := range extensions {
ext := protobuf.FromAny(ext)
p, err := proto.Marshal(ext)
if err != nil {
return err
}
if err := ebkt.Put([]byte(name), p); err != nil {
return err
}
}
return nil
}
// ReadExtensions will read back a map of extensions from the given bucket, previously written by WriteExtensions
func ReadExtensions(bkt *bolt.Bucket) (map[string]typeurl.Any, error) {
var (
extensions = make(map[string]typeurl.Any)
ebkt = bkt.Bucket(bucketKeyExtensions)
)
if ebkt == nil {
return extensions, nil
}
if err := ebkt.ForEach(func(k, v []byte) error {
var t types.Any
if err := proto.Unmarshal(v, &t); err != nil {
return err
}
extensions[string(k)] = &t
return nil
}); err != nil {
return nil, err
}
return extensions, nil
}
// WriteAny write a protobuf's Any type to the bucket
func WriteAny(bkt *bolt.Bucket, name []byte, any typeurl.Any) error {
pbany := protobuf.FromAny(any)
if pbany == nil {
return nil
}
data, err := proto.Marshal(pbany)
if err != nil {
return fmt.Errorf("failed to marshal: %w", err)
}
if err := bkt.Put(name, data); err != nil {
return fmt.Errorf("put failed: %w", err)
}
return nil
}
// ReadAny reads back protobuf's Any type from the bucket
func ReadAny(bkt *bolt.Bucket, name []byte) (*types.Any, error) {
bytes := bkt.Get(name)
if bytes == nil {
return nil, nil
}
out := types.Any{}
if err := proto.Unmarshal(bytes, &out); err != nil {
return nil, fmt.Errorf("failed to unmarshal any: %w", err)
}
return &out, nil
}

314
core/metadata/buckets.go Normal file
View File

@@ -0,0 +1,314 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metadata stores all labels and object specific metadata by namespace.
// This package also contains the main garbage collection logic for cleaning up
// resources consistently and atomically. Resources used by backends will be
// tracked in the metadata store to be exposed to consumers of this package.
//
// The layout where a "/" delineates a bucket is described in the following
// section. Please try to follow this as closely as possible when adding
// functionality. We can bolster this with helpers and more structure if that
// becomes an issue.
//
// Generically, we try to do the following:
//
// <version>/<namespace>/<object>/<key> -> <field>
//
// version: Currently, this is "v1". Additions can be made to v1 in a backwards
// compatible way. If the layout changes, a new version must be made, along
// with a migration.
//
// namespace: the namespace to which this object belongs.
//
// object: defines which object set is stored in the bucket. There are two
// special objects, "labels" and "indexes". The "labels" bucket stores the
// labels for the parent namespace. The "indexes" object is reserved for
// indexing objects, if we require in the future.
//
// key: object-specific key identifying the storage bucket for the objects
// contents.
//
// Below is the current database schema. This should be updated each time
// the structure is changed in addition to adding a migration and incrementing
// the database version.
// Notes:
//
// - `╘══*...*` refers to maps with arbitrary keys
//
// - `version` is a key to a numeric value identifying the minor revisions
// of schema version
//
// - a namespace in a schema bucket cannot be named "version"
//
/*
v1 - Schema version bucket
version : <varint> - Latest version, see migrations
*namespace*
labels
*key* : <string> - Label value
image
*image name*
createdat : <binary time> - Created at
updatedat : <binary time> - Updated at
target
digest : <digest> - Descriptor digest
mediatype : <string> - Descriptor media type
size : <varint> - Descriptor size
labels
*key* : <string> - Label value
containers
*container id*
createdat : <binary time> - Created at
updatedat : <binary time> - Updated at
spec : <binary> - Proto marshaled spec
image : <string> - Image name
snapshotter : <string> - Snapshotter name
snapshotKey : <string> - Snapshot key
runtime
name : <string> - Runtime name
options : <binary> - Proto marshaled options
extensions
*name* : <binary> - Proto marshaled extension
labels
*key* : <string> - Label value
snapshots
*snapshotter*
*snapshot key*
name : <string> - Snapshot name in backend
createdat : <binary time> - Created at
updatedat : <binary time> - Updated at
parent : <string> - Parent snapshot name
children
*snapshot key* : <nil> - Child snapshot reference
labels
*key* : <string> - Label value
content
blob
*blob digest*
createdat : <binary time> - Created at
updatedat : <binary time> - Updated at
size : <varint> - Blob size
labels
*key* : <string> - Label value
ingests
*ingest reference*
ref : <string> - Ingest reference in backend
expireat : <binary time> - Time to expire ingest
expected : <digest> - Expected commit digest
sandboxes
*sandbox id*
createdat : <binary time> - Created at
updatedat : <binary time> - Updated at
spec : <binary> - Proto marshaled spec
sandboxer : <string> - Sandboxer name
runtime
name : <string> - Runtime name
options : <binary> - Proto marshaled options
extensions
*name* : <binary> - Proto marshaled extension
labels
*key* : <string> - Label value
leases
*lease id*
createdat : <binary time> - Created at
labels
*key* : <string> - Label value
snapshots
*snapshotter*
*snapshot key* : <nil> - Snapshot reference
content
*blob digest* : <nil> - Content blob reference
ingests
*ingest reference* : <nil> - Content ingest reference
*/
package metadata
import (
digest "github.com/opencontainers/go-digest"
bolt "go.etcd.io/bbolt"
)
var (
bucketKeyVersion = []byte(schemaVersion)
bucketKeyDBVersion = []byte("version") // stores the version of the schema
bucketKeyObjectLabels = []byte("labels") // stores the labels for a namespace.
bucketKeyObjectImages = []byte("images") // stores image objects
bucketKeyObjectContainers = []byte("containers") // stores container objects
bucketKeyObjectSnapshots = []byte("snapshots") // stores snapshot references
bucketKeyObjectContent = []byte("content") // stores content references
bucketKeyObjectBlob = []byte("blob") // stores content links
bucketKeyObjectIngests = []byte("ingests") // stores ingest objects
bucketKeyObjectLeases = []byte("leases") // stores leases
bucketKeyObjectSandboxes = []byte("sandboxes") // stores sandboxes
bucketKeyDigest = []byte("digest")
bucketKeyMediaType = []byte("mediatype")
bucketKeySize = []byte("size")
bucketKeyImage = []byte("image")
bucketKeyRuntime = []byte("runtime")
bucketKeyName = []byte("name")
bucketKeyParent = []byte("parent")
bucketKeyChildren = []byte("children")
bucketKeyOptions = []byte("options")
bucketKeySpec = []byte("spec")
bucketKeySnapshotKey = []byte("snapshotKey")
bucketKeySnapshotter = []byte("snapshotter")
bucketKeyTarget = []byte("target")
bucketKeyExtensions = []byte("extensions")
bucketKeyCreatedAt = []byte("createdat")
bucketKeyExpected = []byte("expected")
bucketKeyRef = []byte("ref")
bucketKeyExpireAt = []byte("expireat")
bucketKeySandboxID = []byte("sandboxid")
bucketKeySandboxer = []byte("sandboxer")
deprecatedBucketKeyObjectIngest = []byte("ingest") // stores ingest links, deprecated in v1.2
)
func getBucket(tx *bolt.Tx, keys ...[]byte) *bolt.Bucket {
bkt := tx.Bucket(keys[0])
for _, key := range keys[1:] {
if bkt == nil {
break
}
bkt = bkt.Bucket(key)
}
return bkt
}
func createBucketIfNotExists(tx *bolt.Tx, keys ...[]byte) (*bolt.Bucket, error) {
bkt, err := tx.CreateBucketIfNotExists(keys[0])
if err != nil {
return nil, err
}
for _, key := range keys[1:] {
bkt, err = bkt.CreateBucketIfNotExists(key)
if err != nil {
return nil, err
}
}
return bkt, nil
}
func namespaceLabelsBucketPath(namespace string) [][]byte {
return [][]byte{bucketKeyVersion, []byte(namespace), bucketKeyObjectLabels}
}
func withNamespacesLabelsBucket(tx *bolt.Tx, namespace string, fn func(bkt *bolt.Bucket) error) error {
bkt, err := createBucketIfNotExists(tx, namespaceLabelsBucketPath(namespace)...)
if err != nil {
return err
}
return fn(bkt)
}
func getNamespaceLabelsBucket(tx *bolt.Tx, namespace string) *bolt.Bucket {
return getBucket(tx, namespaceLabelsBucketPath(namespace)...)
}
func imagesBucketPath(namespace string) [][]byte {
return [][]byte{bucketKeyVersion, []byte(namespace), bucketKeyObjectImages}
}
func createImagesBucket(tx *bolt.Tx, namespace string) (*bolt.Bucket, error) {
return createBucketIfNotExists(tx, imagesBucketPath(namespace)...)
}
func getImagesBucket(tx *bolt.Tx, namespace string) *bolt.Bucket {
return getBucket(tx, imagesBucketPath(namespace)...)
}
func createContainersBucket(tx *bolt.Tx, namespace string) (*bolt.Bucket, error) {
return createBucketIfNotExists(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContainers)
}
func getContainersBucket(tx *bolt.Tx, namespace string) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContainers)
}
func getContainerBucket(tx *bolt.Tx, namespace, id string) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContainers, []byte(id))
}
func createSnapshotterBucket(tx *bolt.Tx, namespace, snapshotter string) (*bolt.Bucket, error) {
bkt, err := createBucketIfNotExists(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectSnapshots, []byte(snapshotter))
if err != nil {
return nil, err
}
return bkt, nil
}
func getSnapshottersBucket(tx *bolt.Tx, namespace string) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectSnapshots)
}
func getSnapshotterBucket(tx *bolt.Tx, namespace, snapshotter string) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectSnapshots, []byte(snapshotter))
}
func createBlobBucket(tx *bolt.Tx, namespace string, dgst digest.Digest) (*bolt.Bucket, error) {
bkt, err := createBucketIfNotExists(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContent, bucketKeyObjectBlob)
if err != nil {
return nil, err
}
return bkt.CreateBucket([]byte(dgst.String()))
}
func getBlobsBucket(tx *bolt.Tx, namespace string) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContent, bucketKeyObjectBlob)
}
func getBlobBucket(tx *bolt.Tx, namespace string, dgst digest.Digest) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContent, bucketKeyObjectBlob, []byte(dgst.String()))
}
func getIngestsBucket(tx *bolt.Tx, namespace string) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContent, bucketKeyObjectIngests)
}
func createIngestBucket(tx *bolt.Tx, namespace, ref string) (*bolt.Bucket, error) {
bkt, err := createBucketIfNotExists(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContent, bucketKeyObjectIngests, []byte(ref))
if err != nil {
return nil, err
}
return bkt, nil
}
func getIngestBucket(tx *bolt.Tx, namespace, ref string) *bolt.Bucket {
return getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectContent, bucketKeyObjectIngests, []byte(ref))
}
func createSandboxBucket(tx *bolt.Tx, namespace string) (*bolt.Bucket, error) {
return createBucketIfNotExists(
tx,
[]byte(namespace),
bucketKeyObjectSandboxes,
)
}
func getSandboxBucket(tx *bolt.Tx, namespace string) *bolt.Bucket {
return getBucket(
tx,
[]byte(namespace),
bucketKeyObjectSandboxes,
)
}

View File

@@ -0,0 +1,74 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"bytes"
"reflect"
"time"
"github.com/containerd/typeurl/v2"
"github.com/google/go-cmp/cmp"
)
// isNil returns true if the given parameter is nil or typed nil.
func isNil(x interface{}) bool {
if x == nil {
return true
}
v := reflect.ValueOf(x)
return v.Kind() == reflect.Ptr && v.IsNil()
}
var compareNil = cmp.FilterValues(
func(x, y interface{}) bool {
return isNil(x) && isNil(y)
},
cmp.Comparer(func(_, _ interface{}) bool {
return true
}),
)
var ignoreTime = cmp.FilterValues(
func(x, y interface{}) bool {
_, xok := x.(time.Time)
_, yok := y.(time.Time)
return xok && yok
},
cmp.Comparer(func(_, _ interface{}) bool {
return true
}),
)
var compareAny = cmp.FilterValues(
func(x, y interface{}) bool {
_, xok := x.(typeurl.Any)
_, yok := y.(typeurl.Any)
return xok && yok
},
cmp.Comparer(func(x, y interface{}) bool {
vx, ok := x.(typeurl.Any)
if !ok {
return false
}
vy, ok := y.(typeurl.Any)
if !ok {
return false
}
return vx.GetTypeUrl() == vy.GetTypeUrl() && bytes.Equal(vx.GetValue(), vy.GetValue())
}),
)

417
core/metadata/containers.go Normal file
View File

@@ -0,0 +1,417 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/core/metadata/boltutil"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/identifiers"
"github.com/containerd/containerd/v2/labels"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/protobuf/proto"
"github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/typeurl/v2"
bolt "go.etcd.io/bbolt"
)
type containerStore struct {
db *DB
}
// NewContainerStore returns a Store backed by an underlying bolt DB
func NewContainerStore(db *DB) containers.Store {
return &containerStore{
db: db,
}
}
func (s *containerStore) Get(ctx context.Context, id string) (containers.Container, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return containers.Container{}, err
}
container := containers.Container{ID: id}
if err := view(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getContainerBucket(tx, namespace, id)
if bkt == nil {
return fmt.Errorf("container %q in namespace %q: %w", id, namespace, errdefs.ErrNotFound)
}
if err := readContainer(&container, bkt); err != nil {
return fmt.Errorf("failed to read container %q: %w", id, err)
}
return nil
}); err != nil {
return containers.Container{}, err
}
return container, nil
}
func (s *containerStore) List(ctx context.Context, fs ...string) ([]containers.Container, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
filter, err := filters.ParseAll(fs...)
if err != nil {
return nil, fmt.Errorf("%s: %w", err.Error(), errdefs.ErrInvalidArgument)
}
var m []containers.Container
if err := view(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getContainersBucket(tx, namespace)
if bkt == nil {
return nil // empty store
}
return bkt.ForEach(func(k, v []byte) error {
cbkt := bkt.Bucket(k)
if cbkt == nil {
return nil
}
container := containers.Container{ID: string(k)}
if err := readContainer(&container, cbkt); err != nil {
return fmt.Errorf("failed to read container %q: %w", string(k), err)
}
if filter.Match(adaptContainer(container)) {
m = append(m, container)
}
return nil
})
}); err != nil {
return nil, err
}
return m, nil
}
func (s *containerStore) Create(ctx context.Context, container containers.Container) (containers.Container, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return containers.Container{}, err
}
if err := validateContainer(&container); err != nil {
return containers.Container{}, fmt.Errorf("create container failed validation: %w", err)
}
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
bkt, err := createContainersBucket(tx, namespace)
if err != nil {
return err
}
cbkt, err := bkt.CreateBucket([]byte(container.ID))
if err != nil {
if err == bolt.ErrBucketExists {
err = fmt.Errorf("container %q: %w", container.ID, errdefs.ErrAlreadyExists)
}
return err
}
container.CreatedAt = time.Now().UTC()
container.UpdatedAt = container.CreatedAt
if err := writeContainer(cbkt, &container); err != nil {
return fmt.Errorf("failed to write container %q: %w", container.ID, err)
}
return nil
}); err != nil {
return containers.Container{}, err
}
return container, nil
}
func (s *containerStore) Update(ctx context.Context, container containers.Container, fieldpaths ...string) (containers.Container, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return containers.Container{}, err
}
if container.ID == "" {
return containers.Container{}, fmt.Errorf("must specify a container id: %w", errdefs.ErrInvalidArgument)
}
var updated containers.Container
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getContainersBucket(tx, namespace)
if bkt == nil {
return fmt.Errorf("cannot update container %q in namespace %q: %w", container.ID, namespace, errdefs.ErrNotFound)
}
cbkt := bkt.Bucket([]byte(container.ID))
if cbkt == nil {
return fmt.Errorf("container %q: %w", container.ID, errdefs.ErrNotFound)
}
if err := readContainer(&updated, cbkt); err != nil {
return fmt.Errorf("failed to read container %q: %w", container.ID, err)
}
createdat := updated.CreatedAt
updated.ID = container.ID
if len(fieldpaths) == 0 {
// only allow updates to these field on full replace.
fieldpaths = []string{"labels", "spec", "extensions", "image", "snapshotkey"}
// Fields that are immutable must cause an error when no field paths
// are provided. This allows these fields to become mutable in the
// future.
if updated.Snapshotter != container.Snapshotter {
return fmt.Errorf("container.Snapshotter field is immutable: %w", errdefs.ErrInvalidArgument)
}
if updated.Runtime.Name != container.Runtime.Name {
return fmt.Errorf("container.Runtime.Name field is immutable: %w", errdefs.ErrInvalidArgument)
}
}
// apply the field mask. If you update this code, you better follow the
// field mask rules in field_mask.proto. If you don't know what this
// is, do not update this code.
for _, path := range fieldpaths {
if strings.HasPrefix(path, "labels.") {
if updated.Labels == nil {
updated.Labels = map[string]string{}
}
key := strings.TrimPrefix(path, "labels.")
updated.Labels[key] = container.Labels[key]
continue
}
if strings.HasPrefix(path, "extensions.") {
if updated.Extensions == nil {
updated.Extensions = map[string]typeurl.Any{}
}
key := strings.TrimPrefix(path, "extensions.")
updated.Extensions[key] = container.Extensions[key]
continue
}
switch path {
case "labels":
updated.Labels = container.Labels
case "spec":
updated.Spec = container.Spec
case "extensions":
updated.Extensions = container.Extensions
case "image":
updated.Image = container.Image
case "snapshotkey":
updated.SnapshotKey = container.SnapshotKey
default:
return fmt.Errorf("cannot update %q field on %q: %w", path, container.ID, errdefs.ErrInvalidArgument)
}
}
if err := validateContainer(&updated); err != nil {
return fmt.Errorf("update failed validation: %w", err)
}
updated.CreatedAt = createdat
updated.UpdatedAt = time.Now().UTC()
if err := writeContainer(cbkt, &updated); err != nil {
return fmt.Errorf("failed to write container %q: %w", container.ID, err)
}
return nil
}); err != nil {
return containers.Container{}, err
}
return updated, nil
}
func (s *containerStore) Delete(ctx context.Context, id string) error {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
return update(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getContainersBucket(tx, namespace)
if bkt == nil {
return fmt.Errorf("cannot delete container %q in namespace %q: %w", id, namespace, errdefs.ErrNotFound)
}
if err := bkt.DeleteBucket([]byte(id)); err != nil {
if err == bolt.ErrBucketNotFound {
err = fmt.Errorf("container %v: %w", id, errdefs.ErrNotFound)
}
return err
}
atomic.AddUint32(&s.db.dirty, 1)
return nil
})
}
func validateContainer(container *containers.Container) error {
if err := identifiers.Validate(container.ID); err != nil {
return fmt.Errorf("container.ID: %w", err)
}
for k := range container.Extensions {
if k == "" {
return fmt.Errorf("container.Extension keys must not be zero-length: %w", errdefs.ErrInvalidArgument)
}
}
// image has no validation
for k, v := range container.Labels {
if err := labels.Validate(k, v); err != nil {
return fmt.Errorf("containers.Labels: %w", err)
}
}
if container.Runtime.Name == "" {
return fmt.Errorf("container.Runtime.Name must be set: %w", errdefs.ErrInvalidArgument)
}
if container.Spec == nil {
return fmt.Errorf("container.Spec must be set: %w", errdefs.ErrInvalidArgument)
}
if container.SnapshotKey != "" && container.Snapshotter == "" {
return fmt.Errorf("container.Snapshotter must be set if container.SnapshotKey is set: %w", errdefs.ErrInvalidArgument)
}
return nil
}
func readContainer(container *containers.Container, bkt *bolt.Bucket) error {
labels, err := boltutil.ReadLabels(bkt)
if err != nil {
return err
}
container.Labels = labels
if err := boltutil.ReadTimestamps(bkt, &container.CreatedAt, &container.UpdatedAt); err != nil {
return err
}
return bkt.ForEach(func(k, v []byte) error {
switch string(k) {
case string(bucketKeyImage):
container.Image = string(v)
case string(bucketKeyRuntime):
rbkt := bkt.Bucket(bucketKeyRuntime)
if rbkt == nil {
return nil // skip runtime. should be an error?
}
n := rbkt.Get(bucketKeyName)
if n != nil {
container.Runtime.Name = string(n)
}
o, err := boltutil.ReadAny(rbkt, bucketKeyOptions)
if err != nil {
return err
}
container.Runtime.Options = o
case string(bucketKeySpec):
var spec types.Any
if err := proto.Unmarshal(v, &spec); err != nil {
return err
}
container.Spec = &spec
case string(bucketKeySnapshotKey):
container.SnapshotKey = string(v)
case string(bucketKeySnapshotter):
container.Snapshotter = string(v)
case string(bucketKeyExtensions):
extensions, err := boltutil.ReadExtensions(bkt)
if err != nil {
return err
}
container.Extensions = extensions
case string(bucketKeySandboxID):
container.SandboxID = string(v)
}
return nil
})
}
func writeContainer(bkt *bolt.Bucket, container *containers.Container) error {
if err := boltutil.WriteTimestamps(bkt, container.CreatedAt, container.UpdatedAt); err != nil {
return err
}
if err := boltutil.WriteAny(bkt, bucketKeySpec, container.Spec); err != nil {
return err
}
for _, v := range [][2][]byte{
{bucketKeyImage, []byte(container.Image)},
{bucketKeySnapshotter, []byte(container.Snapshotter)},
{bucketKeySnapshotKey, []byte(container.SnapshotKey)},
} {
if err := bkt.Put(v[0], v[1]); err != nil {
return err
}
}
if rbkt := bkt.Bucket(bucketKeyRuntime); rbkt != nil {
if err := bkt.DeleteBucket(bucketKeyRuntime); err != nil {
return err
}
}
rbkt, err := bkt.CreateBucket(bucketKeyRuntime)
if err != nil {
return err
}
if err := rbkt.Put(bucketKeyName, []byte(container.Runtime.Name)); err != nil {
return err
}
if err := boltutil.WriteExtensions(bkt, container.Extensions); err != nil {
return err
}
if err := boltutil.WriteAny(rbkt, bucketKeyOptions, container.Runtime.Options); err != nil {
return err
}
if err := bkt.Put(bucketKeySandboxID, []byte(container.SandboxID)); err != nil {
return err
}
return boltutil.WriteLabels(bkt, container.Labels)
}

View File

@@ -0,0 +1,725 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"errors"
"fmt"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/protobuf"
"github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/log/logtest"
"github.com/containerd/typeurl/v2"
"github.com/google/go-cmp/cmp"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
bolt "go.etcd.io/bbolt"
)
func init() {
typeurl.Register(&specs.Spec{}, "types.containerd.io/opencontainers/runtime-spec", "v1", "Spec")
}
func TestContainersList(t *testing.T) {
ctx, db := testEnv(t)
store := NewContainerStore(NewDB(db, nil, nil))
spec := &specs.Spec{}
encoded, err := protobuf.MarshalAnyToProto(spec)
require.NoError(t, err)
testset := map[string]*containers.Container{}
for i := 0; i < 4; i++ {
id := "container-" + fmt.Sprint(i)
testset[id] = &containers.Container{
ID: id,
Labels: map[string]string{
"idlabel": id,
"even": fmt.Sprint(i%2 == 0),
"odd": fmt.Sprint(i%2 != 0),
},
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
}
if err := db.Update(func(tx *bolt.Tx) error {
now := time.Now()
result, err := store.Create(WithTransactionContext(ctx, tx), *testset[id])
if err != nil {
return err
}
checkContainerTimestamps(t, &result, now, true)
testset[id].UpdatedAt, testset[id].CreatedAt = result.UpdatedAt, result.CreatedAt
checkContainersEqual(t, &result, testset[id], "ensure that containers were created as expected for list")
return nil
}); err != nil {
t.Fatal(err)
}
}
for _, testcase := range []struct {
name string
filters []string
}{
{
name: "FullSet",
},
{
name: "FullSetFiltered", // full set, but because we have OR filter
filters: []string{"labels.even==true", "labels.odd==true"},
},
{
name: "Even",
filters: []string{"labels.even==true"},
},
{
name: "Odd",
filters: []string{"labels.odd==true"},
},
{
name: "ByID",
filters: []string{"id==container-0"},
},
{
name: "ByIDLabelEven",
filters: []string{"labels.idlabel==container-0,labels.even==true"},
},
{
name: "ByRuntime",
filters: []string{"runtime.name==testruntime"},
},
} {
t.Run(testcase.name, func(t *testing.T) {
testset := testset
if len(testcase.filters) > 0 {
fs, err := filters.ParseAll(testcase.filters...)
if err != nil {
t.Fatal(err)
}
newtestset := make(map[string]*containers.Container, len(testset))
for k, v := range testset {
if fs.Match(adaptContainer(*v)) {
newtestset[k] = v
}
}
testset = newtestset
}
results, err := store.List(ctx, testcase.filters...)
if err != nil {
t.Fatal(err)
}
if len(results) == 0 { // all tests return a non-empty result set
t.Fatalf("not results returned")
}
if len(results) != len(testset) {
t.Fatalf("length of result does not match testset: %v != %v", len(results), len(testset))
}
for _, result := range results {
result := result
checkContainersEqual(t, &result, testset[result.ID], "list results did not match")
}
})
}
// delete everything to test it
for id := range testset {
if err := store.Delete(ctx, id); err != nil {
t.Fatal(err)
}
// try it again, get NotFound
if err := store.Delete(ctx, id); err == nil {
t.Fatalf("expected error deleting non-existent container")
} else if !errdefs.IsNotFound(err) {
t.Fatalf("unexpected error %v", err)
}
}
}
// TestContainersUpdate ensures that updates are taken in an expected manner.
func TestContainersCreateUpdateDelete(t *testing.T) {
var (
ctx, db = testEnv(t)
store = NewContainerStore(NewDB(db, nil, nil))
spec = &specs.Spec{}
)
encoded, err := protobuf.MarshalAnyToProto(spec)
require.NoError(t, err)
spec.Annotations = map[string]string{"updated": "true"}
encodedUpdated, err := protobuf.MarshalAnyToProto(spec)
require.NoError(t, err)
for _, testcase := range []struct {
name string
original containers.Container
createerr error
input containers.Container
fieldpaths []string
expected containers.Container
cause error
}{
{
name: "UpdateIDFail",
original: containers.Container{
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
},
input: containers.Container{
ID: "newid",
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
},
fieldpaths: []string{"id"},
cause: errdefs.ErrNotFound,
},
{
name: "UpdateRuntimeFail",
original: containers.Container{
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
},
input: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntimedifferent",
},
},
fieldpaths: []string{"runtime"},
cause: errdefs.ErrInvalidArgument,
},
{
name: "UpdateRuntimeClearFail",
original: containers.Container{
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
},
input: containers.Container{
Spec: encoded,
},
fieldpaths: []string{"runtime"},
cause: errdefs.ErrInvalidArgument,
},
{
name: "UpdateSpec",
original: containers.Container{
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
input: containers.Container{
Spec: encodedUpdated,
},
fieldpaths: []string{"spec"},
expected: containers.Container{
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Spec: encodedUpdated,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Image: "test image",
},
},
{
name: "UpdateSnapshot",
original: containers.Container{
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
input: containers.Container{
SnapshotKey: "test2-snapshot-key",
},
fieldpaths: []string{"snapshotkey"},
expected: containers.Container{
Spec: encoded,
SnapshotKey: "test2-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
},
{
name: "UpdateImage",
original: containers.Container{
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
input: containers.Container{
Image: "test2 image",
},
fieldpaths: []string{"image"},
expected: containers.Container{
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test2 image",
},
},
{
name: "UpdateLabel",
original: containers.Container{
Labels: map[string]string{
"foo": "one",
"bar": "two",
},
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
input: containers.Container{
Labels: map[string]string{
"bar": "baz",
},
},
fieldpaths: []string{"labels.bar"},
expected: containers.Container{
Labels: map[string]string{
"foo": "one",
"bar": "baz",
},
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
},
{
name: "DeleteAllLabels",
original: containers.Container{
Labels: map[string]string{
"foo": "one",
"bar": "two",
},
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
input: containers.Container{
Labels: nil,
},
fieldpaths: []string{"labels"},
expected: containers.Container{
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
},
{
name: "DeleteLabel",
original: containers.Container{
Labels: map[string]string{
"foo": "one",
"bar": "two",
},
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
input: containers.Container{
Labels: map[string]string{
"bar": "",
},
},
fieldpaths: []string{"labels.bar"},
expected: containers.Container{
Labels: map[string]string{
"foo": "one",
},
Spec: encoded,
SnapshotKey: "test-snapshot-key",
Snapshotter: "snapshotter",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Image: "test image",
},
},
{
name: "UpdateSnapshotKeyImmutable",
original: containers.Container{
Spec: encoded,
SnapshotKey: "",
Snapshotter: "",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
},
input: containers.Container{
SnapshotKey: "something",
Snapshotter: "something",
},
fieldpaths: []string{"snapshotkey", "snapshotter"},
cause: errdefs.ErrInvalidArgument,
},
{
name: "SnapshotKeyWithoutSnapshot",
original: containers.Container{
Spec: encoded,
SnapshotKey: "/nosnapshot",
Snapshotter: "",
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
},
createerr: errdefs.ErrInvalidArgument,
},
{
name: "UpdateExtensionsFull",
original: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("hello"),
},
},
},
input: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("world"),
},
},
},
expected: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("world"),
},
},
},
},
{
name: "UpdateExtensionsNotInFieldpath",
original: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("hello"),
},
},
},
input: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("world"),
},
},
},
fieldpaths: []string{"labels"},
expected: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("hello"),
},
},
},
},
{
name: "UpdateExtensionsFieldPath",
original: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("hello"),
},
},
},
input: containers.Container{
Labels: map[string]string{
"foo": "one",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("world"),
},
},
},
fieldpaths: []string{"extensions"},
expected: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("world"),
},
},
},
},
{
name: "UpdateExtensionsFieldPathIsolated",
original: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
// leaves hello in place.
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("hello"),
},
},
},
input: containers.Container{
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("universe"), // this will be ignored
},
"bar": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("foo"), // this will be added
},
},
},
fieldpaths: []string{"extensions.bar"}, //
expected: containers.Container{
Spec: encoded,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Extensions: map[string]typeurl.Any{
"hello": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("hello"), // remains as world
},
"bar": &types.Any{
TypeUrl: "test.update.extensions",
Value: []byte("foo"), // this will be added
},
},
},
},
} {
testcase := testcase
t.Run(testcase.name, func(t *testing.T) {
testcase.original.ID = testcase.name
if testcase.input.ID == "" {
testcase.input.ID = testcase.name
}
testcase.expected.ID = testcase.name
now := time.Now().UTC()
result, err := store.Create(ctx, testcase.original)
if !errors.Is(err, testcase.createerr) {
if testcase.createerr == nil {
t.Fatalf("unexpected error: %v", err)
} else {
t.Fatalf("cause of %v (cause: %v) != %v", err, errors.Unwrap(err), testcase.createerr)
}
} else if testcase.createerr != nil {
return
}
checkContainerTimestamps(t, &result, now, true)
// ensure that createdat is never tampered with
testcase.original.CreatedAt = result.CreatedAt
testcase.expected.CreatedAt = result.CreatedAt
testcase.original.UpdatedAt = result.UpdatedAt
testcase.expected.UpdatedAt = result.UpdatedAt
checkContainersEqual(t, &result, &testcase.original, "unexpected result on container update")
now = time.Now()
result, err = store.Update(ctx, testcase.input, testcase.fieldpaths...)
if !errors.Is(err, testcase.cause) {
if testcase.cause == nil {
t.Fatalf("unexpected error: %v", err)
} else {
t.Fatalf("cause of %v (cause: %v) != %v", err, errors.Unwrap(err), testcase.cause)
}
} else if testcase.cause != nil {
return
}
checkContainerTimestamps(t, &result, now, false)
testcase.expected.UpdatedAt = result.UpdatedAt
checkContainersEqual(t, &result, &testcase.expected, "updated failed to get expected result")
result, err = store.Get(ctx, testcase.original.ID)
if err != nil {
t.Fatal(err)
}
checkContainersEqual(t, &result, &testcase.expected, "get after failed to get expected result")
})
}
}
func checkContainerTimestamps(t *testing.T, c *containers.Container, now time.Time, oncreate bool) {
if c.UpdatedAt.IsZero() || c.CreatedAt.IsZero() {
t.Fatalf("timestamps not set")
}
if oncreate {
if !c.CreatedAt.Equal(c.UpdatedAt) {
t.Fatal("timestamps should be equal on create")
}
} else {
// ensure that updatedat is always after createdat
if !c.UpdatedAt.After(c.CreatedAt) {
if runtime.GOOS == "windows" && c.UpdatedAt == c.CreatedAt {
// Windows' time.Now resolution is lower than Linux, due to Go.
// https://github.com/golang/go/issues/31160
} else {
t.Fatalf("timestamp for updatedat not after createdat: %v <= %v", c.UpdatedAt, c.CreatedAt)
}
}
}
if c.UpdatedAt.Before(now) {
t.Fatal("createdat time incorrect should be after the start of the operation")
}
}
func checkContainersEqual(t *testing.T, a, b *containers.Container, format string, args ...interface{}) {
assert.True(t, cmp.Equal(a, b, compareNil, compareAny))
}
func testEnv(t *testing.T) (context.Context, *bolt.DB) {
ctx, cancel := context.WithCancel(context.Background())
ctx = namespaces.WithNamespace(ctx, "testing")
ctx = logtest.WithT(ctx, t)
dirname := t.TempDir()
db, err := bolt.Open(filepath.Join(dirname, "meta.db"), 0644, nil)
require.NoError(t, err)
t.Cleanup(func() {
assert.NoError(t, db.Close())
cancel()
})
return ctx, db
}

912
core/metadata/content.go Normal file
View File

@@ -0,0 +1,912 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"encoding/binary"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/metadata/boltutil"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/labels"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/log"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
bolt "go.etcd.io/bbolt"
)
type contentStore struct {
content.Store
db *DB
shared bool
l sync.RWMutex
}
// newContentStore returns a namespaced content store using an existing
// content store interface.
// policy defines the sharing behavior for content between namespaces. Both
// modes will result in shared storage in the backend for committed. Choose
// "shared" to prevent separate namespaces from having to pull the same content
// twice. Choose "isolated" if the content must not be shared between
// namespaces.
//
// If the policy is "shared", writes will try to resolve the "expected" digest
// against the backend, allowing imports of content from other namespaces. In
// "isolated" mode, the client must prove they have the content by providing
// the entire blob before the content can be added to another namespace.
//
// Since we have only two policies right now, it's simpler using bool to
// represent it internally.
func newContentStore(db *DB, shared bool, cs content.Store) *contentStore {
return &contentStore{
Store: cs,
db: db,
shared: shared,
}
}
func (cs *contentStore) Info(ctx context.Context, dgst digest.Digest) (content.Info, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return content.Info{}, err
}
var info content.Info
if err := view(ctx, cs.db, func(tx *bolt.Tx) error {
bkt := getBlobBucket(tx, ns, dgst)
if bkt == nil {
return fmt.Errorf("content digest %v: %w", dgst, errdefs.ErrNotFound)
}
info.Digest = dgst
return readInfo(&info, bkt)
}); err != nil {
return content.Info{}, err
}
return info, nil
}
func (cs *contentStore) Update(ctx context.Context, info content.Info, fieldpaths ...string) (content.Info, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return content.Info{}, err
}
cs.l.RLock()
defer cs.l.RUnlock()
updated := content.Info{
Digest: info.Digest,
}
if err := update(ctx, cs.db, func(tx *bolt.Tx) error {
bkt := getBlobBucket(tx, ns, info.Digest)
if bkt == nil {
return fmt.Errorf("content digest %v: %w", info.Digest, errdefs.ErrNotFound)
}
if err := readInfo(&updated, bkt); err != nil {
return fmt.Errorf("info %q: %w", info.Digest, err)
}
if len(fieldpaths) > 0 {
for _, path := range fieldpaths {
if strings.HasPrefix(path, "labels.") {
if updated.Labels == nil {
updated.Labels = map[string]string{}
}
key := strings.TrimPrefix(path, "labels.")
updated.Labels[key] = info.Labels[key]
continue
}
switch path {
case "labels":
updated.Labels = info.Labels
default:
return fmt.Errorf("cannot update %q field on content info %q: %w", path, info.Digest, errdefs.ErrInvalidArgument)
}
}
} else {
// Set mutable fields
updated.Labels = info.Labels
}
if err := validateInfo(&updated); err != nil {
return err
}
updated.UpdatedAt = time.Now().UTC()
return writeInfo(&updated, bkt)
}); err != nil {
return content.Info{}, err
}
return updated, nil
}
func (cs *contentStore) Walk(ctx context.Context, fn content.WalkFunc, fs ...string) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
filter, err := filters.ParseAll(fs...)
if err != nil {
return err
}
// TODO: Batch results to keep from reading all info into memory
var infos []content.Info
if err := view(ctx, cs.db, func(tx *bolt.Tx) error {
bkt := getBlobsBucket(tx, ns)
if bkt == nil {
return nil
}
return bkt.ForEach(func(k, v []byte) error {
dgst, err := digest.Parse(string(k))
if err != nil {
// Not a digest, skip
return nil
}
bbkt := bkt.Bucket(k)
if bbkt == nil {
return nil
}
info := content.Info{
Digest: dgst,
}
if err := readInfo(&info, bkt.Bucket(k)); err != nil {
return err
}
if filter.Match(content.AdaptInfo(info)) {
infos = append(infos, info)
}
return nil
})
}); err != nil {
return err
}
for _, info := range infos {
if err := fn(info); err != nil {
return err
}
}
return nil
}
func (cs *contentStore) Delete(ctx context.Context, dgst digest.Digest) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
cs.l.RLock()
defer cs.l.RUnlock()
return update(ctx, cs.db, func(tx *bolt.Tx) error {
bkt := getBlobBucket(tx, ns, dgst)
if bkt == nil {
return fmt.Errorf("content digest %v: %w", dgst, errdefs.ErrNotFound)
}
if err := getBlobsBucket(tx, ns).DeleteBucket([]byte(dgst.String())); err != nil {
return err
}
if err := removeContentLease(ctx, tx, dgst); err != nil {
return err
}
// Mark content store as dirty for triggering garbage collection
atomic.AddUint32(&cs.db.dirty, 1)
cs.db.dirtyCS = true
return nil
})
}
func (cs *contentStore) ListStatuses(ctx context.Context, fs ...string) ([]content.Status, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
filter, err := filters.ParseAll(fs...)
if err != nil {
return nil, err
}
brefs := map[string]string{}
if err := view(ctx, cs.db, func(tx *bolt.Tx) error {
bkt := getIngestsBucket(tx, ns)
if bkt == nil {
return nil
}
return bkt.ForEach(func(k, v []byte) error {
if v == nil {
// TODO(dmcgowan): match name and potentially labels here
brefs[string(k)] = string(bkt.Bucket(k).Get(bucketKeyRef))
}
return nil
})
}); err != nil {
return nil, err
}
statuses := make([]content.Status, 0, len(brefs))
for k, bref := range brefs {
status, err := cs.Store.Status(ctx, bref)
if err != nil {
if errdefs.IsNotFound(err) {
continue
}
return nil, err
}
status.Ref = k
if filter.Match(adaptContentStatus(status)) {
statuses = append(statuses, status)
}
}
return statuses, nil
}
func getRef(tx *bolt.Tx, ns, ref string) string {
bkt := getIngestBucket(tx, ns, ref)
if bkt == nil {
return ""
}
v := bkt.Get(bucketKeyRef)
if len(v) == 0 {
return ""
}
return string(v)
}
func (cs *contentStore) Status(ctx context.Context, ref string) (content.Status, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return content.Status{}, err
}
var bref string
if err := view(ctx, cs.db, func(tx *bolt.Tx) error {
bref = getRef(tx, ns, ref)
if bref == "" {
return fmt.Errorf("reference %v: %w", ref, errdefs.ErrNotFound)
}
return nil
}); err != nil {
return content.Status{}, err
}
st, err := cs.Store.Status(ctx, bref)
if err != nil {
return content.Status{}, err
}
st.Ref = ref
return st, nil
}
func (cs *contentStore) Abort(ctx context.Context, ref string) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
cs.l.RLock()
defer cs.l.RUnlock()
return update(ctx, cs.db, func(tx *bolt.Tx) error {
ibkt := getIngestsBucket(tx, ns)
if ibkt == nil {
return fmt.Errorf("reference %v: %w", ref, errdefs.ErrNotFound)
}
bkt := ibkt.Bucket([]byte(ref))
if bkt == nil {
return fmt.Errorf("reference %v: %w", ref, errdefs.ErrNotFound)
}
bref := string(bkt.Get(bucketKeyRef))
if bref == "" {
return fmt.Errorf("reference %v: %w", ref, errdefs.ErrNotFound)
}
expected := string(bkt.Get(bucketKeyExpected))
if err := ibkt.DeleteBucket([]byte(ref)); err != nil {
return err
}
if err := removeIngestLease(ctx, tx, ref); err != nil {
return err
}
// if not shared content, delete active ingest on backend
if expected == "" {
return cs.Store.Abort(ctx, bref)
}
return nil
})
}
func (cs *contentStore) Writer(ctx context.Context, opts ...content.WriterOpt) (content.Writer, error) {
var wOpts content.WriterOpts
for _, opt := range opts {
if err := opt(&wOpts); err != nil {
return nil, err
}
}
// TODO(AkihiroSuda): we could create a random string or one calculated based on the context
// https://github.com/containerd/containerd/issues/2129#issuecomment-380255019
if wOpts.Ref == "" {
return nil, fmt.Errorf("ref must not be empty: %w", errdefs.ErrInvalidArgument)
}
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
cs.l.RLock()
defer cs.l.RUnlock()
var (
w content.Writer
exists bool
bref string
)
if err := update(ctx, cs.db, func(tx *bolt.Tx) error {
var shared bool
if wOpts.Desc.Digest != "" {
cbkt := getBlobBucket(tx, ns, wOpts.Desc.Digest)
if cbkt != nil {
// Add content to lease to prevent other reference removals
// from effecting this object during a provided lease
if err := addContentLease(ctx, tx, wOpts.Desc.Digest); err != nil {
return fmt.Errorf("unable to lease content: %w", err)
}
// Return error outside of transaction to ensure
// commit succeeds with the lease.
exists = true
return nil
}
if cs.shared || isSharedContent(tx, wOpts.Desc.Digest) {
if st, err := cs.Store.Info(ctx, wOpts.Desc.Digest); err == nil {
// Ensure the expected size is the same, it is likely
// an error if the size is mismatched but the caller
// must resolve this on commit
if wOpts.Desc.Size == 0 || wOpts.Desc.Size == st.Size {
shared = true
wOpts.Desc.Size = st.Size
}
}
}
}
bkt, err := createIngestBucket(tx, ns, wOpts.Ref)
if err != nil {
return err
}
leased, err := addIngestLease(ctx, tx, wOpts.Ref)
if err != nil {
return err
}
brefb := bkt.Get(bucketKeyRef)
if brefb == nil {
sid, err := bkt.NextSequence()
if err != nil {
return err
}
bref = createKey(sid, ns, wOpts.Ref)
if err := bkt.Put(bucketKeyRef, []byte(bref)); err != nil {
return err
}
} else {
bref = string(brefb)
}
if !leased {
// Add timestamp to allow aborting once stale
// When lease is set the ingest should be aborted
// after lease it belonged to is deleted.
// Expiration can be configurable in the future to
// give more control to the daemon, however leases
// already give users more control of expiration.
expireAt := time.Now().UTC().Add(24 * time.Hour)
if err := writeExpireAt(expireAt, bkt); err != nil {
return err
}
}
if shared {
if err := bkt.Put(bucketKeyExpected, []byte(wOpts.Desc.Digest)); err != nil {
return err
}
} else {
// Do not use the passed in expected value here since it was
// already checked against the user metadata. The content must
// be committed in the namespace before it will be seen as
// available in the current namespace.
desc := wOpts.Desc
desc.Digest = ""
w, err = cs.Store.Writer(ctx, content.WithRef(bref), content.WithDescriptor(desc))
}
return err
}); err != nil {
return nil, err
}
if exists {
return nil, fmt.Errorf("content %v: %w", wOpts.Desc.Digest, errdefs.ErrAlreadyExists)
}
return &namespacedWriter{
ctx: ctx,
ref: wOpts.Ref,
namespace: ns,
db: cs.db,
provider: cs.Store,
l: &cs.l,
w: w,
bref: bref,
started: time.Now(),
desc: wOpts.Desc,
}, nil
}
type namespacedWriter struct {
ctx context.Context
ref string
namespace string
db transactor
provider interface {
content.Provider
content.Ingester
}
l *sync.RWMutex
w content.Writer
bref string
started time.Time
desc ocispec.Descriptor
}
func (nw *namespacedWriter) Close() error {
if nw.w != nil {
return nw.w.Close()
}
return nil
}
func (nw *namespacedWriter) Write(p []byte) (int, error) {
// if no writer, first copy and unshare before performing write
if nw.w == nil {
if len(p) == 0 {
return 0, nil
}
if err := nw.createAndCopy(nw.ctx, nw.desc); err != nil {
return 0, err
}
}
return nw.w.Write(p)
}
func (nw *namespacedWriter) Digest() digest.Digest {
if nw.w != nil {
return nw.w.Digest()
}
return nw.desc.Digest
}
func (nw *namespacedWriter) Truncate(size int64) error {
if nw.w != nil {
return nw.w.Truncate(size)
}
desc := nw.desc
desc.Size = size
desc.Digest = ""
return nw.createAndCopy(nw.ctx, desc)
}
func (nw *namespacedWriter) createAndCopy(ctx context.Context, desc ocispec.Descriptor) error {
nwDescWithoutDigest := desc
nwDescWithoutDigest.Digest = ""
w, err := nw.provider.Writer(ctx, content.WithRef(nw.bref), content.WithDescriptor(nwDescWithoutDigest))
if err != nil {
return err
}
if desc.Size > 0 {
ra, err := nw.provider.ReaderAt(ctx, nw.desc)
if err != nil {
w.Close()
return err
}
defer ra.Close()
if err := content.CopyReaderAt(w, ra, desc.Size); err != nil {
w.Close()
return err
}
}
nw.w = w
return nil
}
func (nw *namespacedWriter) Commit(ctx context.Context, size int64, expected digest.Digest, opts ...content.Opt) error {
ctx = namespaces.WithNamespace(ctx, nw.namespace)
nw.l.RLock()
defer nw.l.RUnlock()
var innerErr error
if err := update(ctx, nw.db, func(tx *bolt.Tx) error {
dgst, err := nw.commit(ctx, tx, size, expected, opts...)
if err != nil {
if !errdefs.IsAlreadyExists(err) {
return err
}
innerErr = err
}
bkt := getIngestsBucket(tx, nw.namespace)
if bkt != nil {
if err := bkt.DeleteBucket([]byte(nw.ref)); err != nil && err != bolt.ErrBucketNotFound {
return err
}
}
if err := removeIngestLease(ctx, tx, nw.ref); err != nil {
return err
}
return addContentLease(ctx, tx, dgst)
}); err != nil {
return err
}
return innerErr
}
func (nw *namespacedWriter) commit(ctx context.Context, tx *bolt.Tx, size int64, expected digest.Digest, opts ...content.Opt) (digest.Digest, error) {
var base content.Info
for _, opt := range opts {
if err := opt(&base); err != nil {
if nw.w != nil {
nw.w.Close()
}
return "", err
}
}
if err := validateInfo(&base); err != nil {
if nw.w != nil {
nw.w.Close()
}
return "", err
}
var actual digest.Digest
if nw.w == nil {
if size != 0 && size != nw.desc.Size {
return "", fmt.Errorf("%q failed size validation: %v != %v: %w", nw.ref, nw.desc.Size, size, errdefs.ErrFailedPrecondition)
}
if expected != "" && expected != nw.desc.Digest {
return "", fmt.Errorf("%q unexpected digest: %w", nw.ref, errdefs.ErrFailedPrecondition)
}
size = nw.desc.Size
actual = nw.desc.Digest
} else {
status, err := nw.w.Status()
if err != nil {
nw.w.Close()
return "", err
}
if size != 0 && size != status.Offset {
nw.w.Close()
return "", fmt.Errorf("%q failed size validation: %v != %v: %w", nw.ref, status.Offset, size, errdefs.ErrFailedPrecondition)
}
size = status.Offset
if err := nw.w.Commit(ctx, size, expected); err != nil && !errdefs.IsAlreadyExists(err) {
return "", err
}
actual = nw.w.Digest()
}
bkt, err := createBlobBucket(tx, nw.namespace, actual)
if err != nil {
if err == bolt.ErrBucketExists {
return actual, fmt.Errorf("content %v: %w", actual, errdefs.ErrAlreadyExists)
}
return "", err
}
commitTime := time.Now().UTC()
sizeEncoded, err := encodeInt(size)
if err != nil {
return "", err
}
if err := boltutil.WriteTimestamps(bkt, commitTime, commitTime); err != nil {
return "", err
}
if err := boltutil.WriteLabels(bkt, base.Labels); err != nil {
return "", err
}
return actual, bkt.Put(bucketKeySize, sizeEncoded)
}
func (nw *namespacedWriter) Status() (st content.Status, err error) {
if nw.w != nil {
st, err = nw.w.Status()
} else {
st.Offset = nw.desc.Size
st.Total = nw.desc.Size
st.StartedAt = nw.started
st.UpdatedAt = nw.started
st.Expected = nw.desc.Digest
}
if err == nil {
st.Ref = nw.ref
}
return
}
func (cs *contentStore) ReaderAt(ctx context.Context, desc ocispec.Descriptor) (content.ReaderAt, error) {
if err := cs.checkAccess(ctx, desc.Digest); err != nil {
return nil, err
}
return cs.Store.ReaderAt(ctx, desc)
}
func (cs *contentStore) checkAccess(ctx context.Context, dgst digest.Digest) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
return view(ctx, cs.db, func(tx *bolt.Tx) error {
bkt := getBlobBucket(tx, ns, dgst)
if bkt == nil {
return fmt.Errorf("content digest %v: %w", dgst, errdefs.ErrNotFound)
}
return nil
})
}
func isSharedContent(tx *bolt.Tx, dgst digest.Digest) bool {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return false
}
// iterate through each namespace
v1c := v1bkt.Cursor()
for nk, _ := v1c.First(); nk != nil; nk, _ = v1c.Next() {
ns := string(nk)
lbkt := getNamespaceLabelsBucket(tx, ns)
if lbkt == nil {
continue
}
if sharedNS := lbkt.Get([]byte(labels.LabelSharedNamespace)); sharedNS != nil && string(sharedNS) == "true" && getBlobBucket(tx, ns, dgst) != nil {
return true
}
}
return false
}
func validateInfo(info *content.Info) error {
for k, v := range info.Labels {
if err := labels.Validate(k, v); err != nil {
return fmt.Errorf("info.Labels: %w", err)
}
}
return nil
}
func readInfo(info *content.Info, bkt *bolt.Bucket) error {
if err := boltutil.ReadTimestamps(bkt, &info.CreatedAt, &info.UpdatedAt); err != nil {
return err
}
labels, err := boltutil.ReadLabels(bkt)
if err != nil {
return err
}
info.Labels = labels
if v := bkt.Get(bucketKeySize); len(v) > 0 {
info.Size, _ = binary.Varint(v)
}
return nil
}
func writeInfo(info *content.Info, bkt *bolt.Bucket) error {
if err := boltutil.WriteTimestamps(bkt, info.CreatedAt, info.UpdatedAt); err != nil {
return err
}
if err := boltutil.WriteLabels(bkt, info.Labels); err != nil {
return fmt.Errorf("writing labels for info %v: %w", info.Digest, err)
}
// Write size
sizeEncoded, err := encodeInt(info.Size)
if err != nil {
return err
}
return bkt.Put(bucketKeySize, sizeEncoded)
}
func readExpireAt(bkt *bolt.Bucket) (*time.Time, error) {
v := bkt.Get(bucketKeyExpireAt)
if v == nil {
return nil, nil
}
t := &time.Time{}
if err := t.UnmarshalBinary(v); err != nil {
return nil, err
}
return t, nil
}
func writeExpireAt(expire time.Time, bkt *bolt.Bucket) error {
expireAt, err := expire.MarshalBinary()
if err != nil {
return err
}
return bkt.Put(bucketKeyExpireAt, expireAt)
}
// garbageCollect removes all contents that are no longer used.
func (cs *contentStore) garbageCollect(ctx context.Context) (d time.Duration, err error) {
cs.l.Lock()
t1 := time.Now()
defer func() {
if err == nil {
d = time.Since(t1)
}
cs.l.Unlock()
}()
contentSeen := map[string]struct{}{}
ingestSeen := map[string]struct{}{}
if err := cs.db.View(func(tx *bolt.Tx) error {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return nil
}
// iterate through each namespace
v1c := v1bkt.Cursor()
for k, v := v1c.First(); k != nil; k, v = v1c.Next() {
if v != nil {
continue
}
cbkt := v1bkt.Bucket(k).Bucket(bucketKeyObjectContent)
if cbkt == nil {
continue
}
bbkt := cbkt.Bucket(bucketKeyObjectBlob)
if bbkt != nil {
if err := bbkt.ForEach(func(ck, cv []byte) error {
if cv == nil {
contentSeen[string(ck)] = struct{}{}
}
return nil
}); err != nil {
return err
}
}
ibkt := cbkt.Bucket(bucketKeyObjectIngests)
if ibkt != nil {
if err := ibkt.ForEach(func(ref, v []byte) error {
if v == nil {
bkt := ibkt.Bucket(ref)
// expected here may be from a different namespace
// so much be explicitly retained from the ingest
// in case it was removed from the other namespace
expected := bkt.Get(bucketKeyExpected)
if len(expected) > 0 {
contentSeen[string(expected)] = struct{}{}
}
bref := bkt.Get(bucketKeyRef)
if len(bref) > 0 {
ingestSeen[string(bref)] = struct{}{}
}
}
return nil
}); err != nil {
return err
}
}
}
return nil
}); err != nil {
return 0, err
}
err = cs.Store.Walk(ctx, func(info content.Info) error {
if _, ok := contentSeen[info.Digest.String()]; !ok {
if err := cs.Store.Delete(ctx, info.Digest); err != nil {
return err
}
log.G(ctx).WithField("digest", info.Digest).Debug("removed content")
}
return nil
})
if err != nil {
return
}
// If the content store has implemented a more efficient walk function
// then use that else fallback to reading all statuses which may
// cause reading of unneeded metadata.
type statusWalker interface {
WalkStatusRefs(context.Context, func(string) error) error
}
if w, ok := cs.Store.(statusWalker); ok {
err = w.WalkStatusRefs(ctx, func(ref string) error {
if _, ok := ingestSeen[ref]; !ok {
if err := cs.Store.Abort(ctx, ref); err != nil {
return err
}
log.G(ctx).WithField("ref", ref).Debug("cleanup aborting ingest")
}
return nil
})
} else {
var statuses []content.Status
statuses, err = cs.Store.ListStatuses(ctx)
if err != nil {
return 0, err
}
for _, status := range statuses {
if _, ok := ingestSeen[status.Ref]; !ok {
if err = cs.Store.Abort(ctx, status.Ref); err != nil {
return
}
log.G(ctx).WithField("ref", status.Ref).Debug("cleanup aborting ingest")
}
}
}
return
}

View File

@@ -0,0 +1,236 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"bytes"
"context"
"errors"
"fmt"
"path/filepath"
"sync/atomic"
"testing"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/content/testsuite"
"github.com/containerd/containerd/v2/core/leases"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/labels"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/plugins/content/local"
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
bolt "go.etcd.io/bbolt"
)
func createContentStore(ctx context.Context, root string, opts ...DBOpt) (context.Context, content.Store, func() error, error) {
// TODO: Use mocked or in-memory store
cs, err := local.NewStore(root)
if err != nil {
return nil, nil, nil, err
}
db, err := bolt.Open(filepath.Join(root, "metadata.db"), 0660, nil)
if err != nil {
return nil, nil, nil, err
}
var (
count uint64
name = testsuite.Name(ctx)
)
wrap := func(ctx context.Context, sharedNS bool) (context.Context, func(context.Context) error, error) {
n := atomic.AddUint64(&count, 1)
ctx2 := namespaces.WithNamespace(ctx, fmt.Sprintf("%s-n%d", name, n))
if sharedNS {
db.Update(func(tx *bolt.Tx) error {
if ns, err := namespaces.NamespaceRequired(ctx2); err == nil {
return NewNamespaceStore(tx).SetLabel(ctx2, ns, labels.LabelSharedNamespace, "true")
}
return err
})
}
return ctx2, func(context.Context) error {
return nil
}, nil
}
ctx = testsuite.SetContextWrapper(ctx, wrap)
return ctx, NewDB(db, cs, nil, opts...).ContentStore(), func() error {
return db.Close()
}, nil
}
func createContentStoreWithPolicy(opts ...DBOpt) testsuite.StoreInitFn {
return func(ctx context.Context, root string) (context.Context, content.Store, func() error, error) {
return createContentStore(ctx, root, opts...)
}
}
func TestContent(t *testing.T) {
testsuite.ContentSuite(t, "metadata", createContentStoreWithPolicy())
testsuite.ContentCrossNSSharedSuite(t, "metadata", createContentStoreWithPolicy())
testsuite.ContentCrossNSIsolatedSuite(
t, "metadata", createContentStoreWithPolicy([]DBOpt{
WithPolicyIsolated,
}...))
testsuite.ContentSharedNSIsolatedSuite(
t, "metadata", createContentStoreWithPolicy([]DBOpt{
WithPolicyIsolated,
}...))
}
func TestContentLeased(t *testing.T) {
ctx, db := testDB(t)
cs := db.ContentStore()
blob := []byte("any content")
expected := digest.FromBytes(blob)
lctx, _, err := createLease(ctx, db, "lease-1")
if err != nil {
t.Fatal(err)
}
if err := content.WriteBlob(lctx, cs, "test-1", bytes.NewReader(blob),
ocispec.Descriptor{Size: int64(len(blob)), Digest: expected}); err != nil {
t.Fatal(err)
}
if err := checkContentLeased(lctx, db, expected); err != nil {
t.Fatal("lease checked failed:", err)
}
if err := checkIngestLeased(lctx, db, "test-1"); err == nil {
t.Fatal("test-1 should not be leased after write")
} else if !errdefs.IsNotFound(err) {
t.Fatal("lease checked failed:", err)
}
lctx, _, err = createLease(ctx, db, "lease-2")
if err != nil {
t.Fatal(err)
}
if _, err := cs.Writer(lctx,
content.WithRef("test-2"),
content.WithDescriptor(ocispec.Descriptor{Size: int64(len(blob)), Digest: expected})); err == nil {
t.Fatal("expected already exist error")
} else if !errdefs.IsAlreadyExists(err) {
t.Fatal(err)
}
if err := checkContentLeased(lctx, db, expected); err != nil {
t.Fatal("lease checked failed:", err)
}
if err := checkIngestLeased(lctx, db, "test-2"); err == nil {
t.Fatal("test-2 should not be leased")
} else if !errdefs.IsNotFound(err) {
t.Fatal("lease checked failed:", err)
}
}
func TestIngestLeased(t *testing.T) {
ctx, db := testDB(t)
cs := db.ContentStore()
blob := []byte("any content")
expected := digest.FromBytes(blob)
lctx, _, err := createLease(ctx, db, "lease-1")
if err != nil {
t.Fatal(err)
}
w, err := cs.Writer(lctx,
content.WithRef("test-1"),
content.WithDescriptor(ocispec.Descriptor{Size: int64(len(blob)), Digest: expected}))
if err != nil {
t.Fatal(err)
}
err = checkIngestLeased(lctx, db, "test-1")
w.Close()
if err != nil {
t.Fatal("lease checked failed:", err)
}
if err := cs.Abort(lctx, "test-1"); err != nil {
t.Fatal(err)
}
if err := checkIngestLeased(lctx, db, "test-1"); err == nil {
t.Fatal("test-1 should not be leased after write")
} else if !errdefs.IsNotFound(err) {
t.Fatal("lease checked failed:", err)
}
}
func createLease(ctx context.Context, db *DB, name string) (context.Context, func() error, error) {
lm := NewLeaseManager(db)
if _, err := lm.Create(ctx, leases.WithID(name)); err != nil {
return nil, nil, err
}
return leases.WithLease(ctx, name), func() error {
return lm.Delete(ctx, leases.Lease{
ID: name,
})
}, nil
}
func checkContentLeased(ctx context.Context, db *DB, dgst digest.Digest) error {
ns, ok := namespaces.Namespace(ctx)
if !ok {
return errors.New("no namespace in context")
}
lease, ok := leases.FromContext(ctx)
if !ok {
return errors.New("no lease in context")
}
return db.View(func(tx *bolt.Tx) error {
bkt := getBucket(tx, bucketKeyVersion, []byte(ns), bucketKeyObjectLeases, []byte(lease), bucketKeyObjectContent)
if bkt == nil {
return fmt.Errorf("bucket not found %s: %w", lease, errdefs.ErrNotFound)
}
v := bkt.Get([]byte(dgst.String()))
if v == nil {
return fmt.Errorf("object not leased: %w", errdefs.ErrNotFound)
}
return nil
})
}
func checkIngestLeased(ctx context.Context, db *DB, ref string) error {
ns, ok := namespaces.Namespace(ctx)
if !ok {
return errors.New("no namespace in context")
}
lease, ok := leases.FromContext(ctx)
if !ok {
return errors.New("no lease in context")
}
return db.View(func(tx *bolt.Tx) error {
bkt := getBucket(tx, bucketKeyVersion, []byte(ns), bucketKeyObjectLeases, []byte(lease), bucketKeyObjectIngests)
if bkt == nil {
return fmt.Errorf("bucket not found %s: %w", lease, errdefs.ErrNotFound)
}
v := bkt.Get([]byte(ref))
if v == nil {
return fmt.Errorf("object not leased: %w", errdefs.ErrNotFound)
}
return nil
})
}

544
core/metadata/db.go Normal file
View File

@@ -0,0 +1,544 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"encoding/binary"
"errors"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
eventstypes "github.com/containerd/containerd/v2/api/events"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/events"
"github.com/containerd/containerd/v2/gc"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/cleanup"
"github.com/containerd/containerd/v2/snapshots"
"github.com/containerd/log"
bolt "go.etcd.io/bbolt"
)
const (
// schemaVersion represents the schema version of
// the database. This schema version represents the
// structure of the data in the database. The schema
// can envolve at any time but any backwards
// incompatible changes or structural changes require
// bumping the schema version.
schemaVersion = "v1"
// dbVersion represents updates to the schema
// version which are additions and compatible with
// prior version of the same schema.
dbVersion = 3
)
// DBOpt configures how we set up the DB
type DBOpt func(*dbOptions)
// WithPolicyIsolated isolates contents between namespaces
func WithPolicyIsolated(o *dbOptions) {
o.shared = false
}
// WithEventsPublisher adds an events publisher to the
// metadata db to directly publish events
func WithEventsPublisher(p events.Publisher) DBOpt {
return func(o *dbOptions) {
o.publisher = p
}
}
// dbOptions configure db options.
type dbOptions struct {
shared bool
publisher events.Publisher
}
// DB represents a metadata database backed by a bolt
// database. The database is fully namespaced and stores
// image, container, namespace, snapshot, and content data
// while proxying data shared across namespaces to backend
// datastores for content and snapshots.
type DB struct {
db *bolt.DB
ss map[string]*snapshotter
cs *contentStore
// wlock is used to protect access to the data structures during garbage
// collection. While the wlock is held no writable transactions can be
// opened, preventing changes from occurring between the mark and
// sweep phases without preventing read transactions.
wlock sync.RWMutex
// dirty flag indicates that references have been removed which require
// a garbage collection to ensure the database is clean. This tracks
// the number of dirty operations. This should be updated and read
// atomically if outside of wlock.Lock.
dirty uint32
// dirtySS and dirtyCS flags keeps track of datastores which have had
// deletions since the last garbage collection. These datastores will
// be garbage collected during the next garbage collection. These
// should only be updated inside of a write transaction or wlock.Lock.
dirtySS map[string]struct{}
dirtyCS bool
// mutationCallbacks are called after each mutation with the flag
// set indicating whether any dirty flags are set
mutationCallbacks []func(bool)
// collectible resources
collectors map[gc.ResourceType]Collector
dbopts dbOptions
}
// NewDB creates a new metadata database using the provided
// bolt database, content store, and snapshotters.
func NewDB(db *bolt.DB, cs content.Store, ss map[string]snapshots.Snapshotter, opts ...DBOpt) *DB {
m := &DB{
db: db,
ss: make(map[string]*snapshotter, len(ss)),
dirtySS: map[string]struct{}{},
dbopts: dbOptions{
shared: true,
},
}
for _, opt := range opts {
opt(&m.dbopts)
}
// Initialize data stores
m.cs = newContentStore(m, m.dbopts.shared, cs)
for name, sn := range ss {
m.ss[name] = newSnapshotter(m, name, sn)
}
return m
}
// Init ensures the database is at the correct version
// and performs any needed migrations.
func (m *DB) Init(ctx context.Context) error {
// errSkip is used when no migration or version needs to be written
// to the database and the transaction can be immediately rolled
// back rather than performing a much slower and unnecessary commit.
var errSkip = errors.New("skip update")
err := m.db.Update(func(tx *bolt.Tx) error {
var (
// current schema and version
schema = "v0"
version = 0
)
// i represents the index of the first migration
// which must be run to get the database up to date.
// The migration's version will be checked in reverse
// order, decrementing i for each migration which
// represents a version newer than the current
// database version
i := len(migrations)
for ; i > 0; i-- {
migration := migrations[i-1]
bkt := tx.Bucket([]byte(migration.schema))
if bkt == nil {
// Hasn't encountered another schema, go to next migration
if schema == "v0" {
continue
}
break
}
if schema == "v0" {
schema = migration.schema
vb := bkt.Get(bucketKeyDBVersion)
if vb != nil {
v, _ := binary.Varint(vb)
version = int(v)
}
}
if version >= migration.version {
break
}
}
// Previous version of database found
if schema != "v0" {
updates := migrations[i:]
// No migration updates, return immediately
if len(updates) == 0 {
return errSkip
}
for _, m := range updates {
t0 := time.Now()
if err := m.migrate(tx); err != nil {
return fmt.Errorf("failed to migrate to %s.%d: %w", m.schema, m.version, err)
}
log.G(ctx).WithField("d", time.Since(t0)).Debugf("finished database migration to %s.%d", m.schema, m.version)
}
}
bkt, err := tx.CreateBucketIfNotExists(bucketKeyVersion)
if err != nil {
return err
}
versionEncoded, err := encodeInt(dbVersion)
if err != nil {
return err
}
return bkt.Put(bucketKeyDBVersion, versionEncoded)
})
if err == errSkip {
err = nil
}
return err
}
// ContentStore returns a namespaced content store
// proxied to a content store.
func (m *DB) ContentStore() content.Store {
if m.cs == nil {
return nil
}
return m.cs
}
// Snapshotter returns a snapshotter for the requested snapshotter name
// proxied to a snapshotter.
func (m *DB) Snapshotter(name string) snapshots.Snapshotter {
sn, ok := m.ss[name]
if !ok {
return nil
}
return sn
}
// Snapshotters returns all available snapshotters.
func (m *DB) Snapshotters() map[string]snapshots.Snapshotter {
ss := make(map[string]snapshots.Snapshotter, len(m.ss))
for n, sn := range m.ss {
ss[n] = sn
}
return ss
}
// View runs a readonly transaction on the metadata store.
func (m *DB) View(fn func(*bolt.Tx) error) error {
return m.db.View(fn)
}
// Update runs a writable transaction on the metadata store.
func (m *DB) Update(fn func(*bolt.Tx) error) error {
m.wlock.RLock()
defer m.wlock.RUnlock()
err := m.db.Update(fn)
if err == nil {
dirty := atomic.LoadUint32(&m.dirty) > 0
for _, fn := range m.mutationCallbacks {
fn(dirty)
}
}
return err
}
// RegisterMutationCallback registers a function to be called after a metadata
// mutations has been performed.
//
// The callback function is an argument for whether a deletion has occurred
// since the last garbage collection.
func (m *DB) RegisterMutationCallback(fn func(bool)) {
m.wlock.Lock()
m.mutationCallbacks = append(m.mutationCallbacks, fn)
m.wlock.Unlock()
}
// RegisterCollectibleResource registers a resource type which can be
// referenced by metadata resources and garbage collected.
// Collectible Resources are useful ephemeral resources which need to
// be tracked by go away after reboot or process restart.
//
// A few limitations to consider:
// - Collectible Resources cannot reference other resources.
// - A failure to complete collection will not fail the garbage collection,
// however, the resources can be collected in a later run.
// - Collectible Resources must track whether the resource is active and/or
// lease membership.
func (m *DB) RegisterCollectibleResource(t gc.ResourceType, c Collector) {
if t < resourceEnd {
panic("cannot re-register metadata resource")
} else if t >= gc.ResourceMax {
panic("resource type greater than max")
}
m.wlock.Lock()
defer m.wlock.Unlock()
if m.collectors == nil {
m.collectors = map[gc.ResourceType]Collector{}
}
if _, ok := m.collectors[t]; ok {
panic("cannot register collectible type twice")
}
m.collectors[t] = c
}
// namespacedEvent is used to handle any event for a namespace
type namespacedEvent struct {
namespace string
event interface{}
}
func (m *DB) publishEvents(events []namespacedEvent) {
ctx := context.Background()
if publisher := m.dbopts.publisher; publisher != nil {
for _, ne := range events {
ctx := namespaces.WithNamespace(ctx, ne.namespace)
var topic string
switch ne.event.(type) {
case *eventstypes.ImageDelete:
topic = "/images/delete"
case *eventstypes.SnapshotRemove:
topic = "/snapshot/remove"
default:
log.G(ctx).WithField("event", ne.event).Debug("unhandled event type from garbage collection removal")
continue
}
if err := publisher.Publish(ctx, topic, ne.event); err != nil {
log.G(ctx).WithError(err).WithField("topic", topic).Debug("publish event failed")
}
}
}
}
// GCStats holds the duration for the different phases of the garbage collector
type GCStats struct {
MetaD time.Duration
ContentD time.Duration
SnapshotD map[string]time.Duration
}
// Elapsed returns the duration which elapsed during a collection
func (s GCStats) Elapsed() time.Duration {
return s.MetaD
}
// GarbageCollect removes resources (snapshots, contents, ...) that are no longer used.
func (m *DB) GarbageCollect(ctx context.Context) (gc.Stats, error) {
m.wlock.Lock()
t1 := time.Now()
c := startGCContext(ctx, m.collectors)
marked, err := m.getMarked(ctx, c) // Pass in gc context
if err != nil {
m.wlock.Unlock()
c.cancel(ctx)
return nil, err
}
events := []namespacedEvent{}
if err := m.db.Update(func(tx *bolt.Tx) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
rm := func(ctx context.Context, n gc.Node) error {
if _, ok := marked[n]; ok {
return nil
}
if n.Type == ResourceSnapshot {
if idx := strings.IndexRune(n.Key, '/'); idx > 0 {
m.dirtySS[n.Key[:idx]] = struct{}{}
}
// queue event to publish after successful commit
} else if n.Type == ResourceContent || n.Type == ResourceIngest {
m.dirtyCS = true
}
event, err := c.remove(ctx, tx, n)
if event != nil && err == nil {
events = append(events,
namespacedEvent{
namespace: n.Namespace,
event: event,
})
}
return err
}
if err := c.scanAll(ctx, tx, rm); err != nil { // From gc context
return fmt.Errorf("failed to scan and remove: %w", err)
}
return nil
}); err != nil {
m.wlock.Unlock()
c.cancel(ctx)
return nil, err
}
var stats GCStats
var wg sync.WaitGroup
// Flush events asynchronously after commit
wg.Add(1)
go func() {
m.publishEvents(events)
wg.Done()
}()
// reset dirty, no need for atomic inside of wlock.Lock
m.dirty = 0
if len(m.dirtySS) > 0 {
var sl sync.Mutex
stats.SnapshotD = map[string]time.Duration{}
wg.Add(len(m.dirtySS))
for snapshotterName := range m.dirtySS {
log.G(ctx).WithField("snapshotter", snapshotterName).Debug("schedule snapshotter cleanup")
go func(snapshotterName string) {
st1 := time.Now()
m.cleanupSnapshotter(ctx, snapshotterName)
sl.Lock()
stats.SnapshotD[snapshotterName] = time.Since(st1)
sl.Unlock()
wg.Done()
}(snapshotterName)
}
m.dirtySS = map[string]struct{}{}
}
if m.dirtyCS {
wg.Add(1)
log.G(ctx).Debug("schedule content cleanup")
go func() {
ct1 := time.Now()
m.cleanupContent(ctx)
stats.ContentD = time.Since(ct1)
wg.Done()
}()
m.dirtyCS = false
}
stats.MetaD = time.Since(t1)
m.wlock.Unlock()
c.finish(ctx)
wg.Wait()
return stats, err
}
// getMarked returns all resources that are used.
func (m *DB) getMarked(ctx context.Context, c *gcContext) (map[gc.Node]struct{}, error) {
var marked map[gc.Node]struct{}
if err := m.db.View(func(tx *bolt.Tx) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var (
nodes []gc.Node
wg sync.WaitGroup
roots = make(chan gc.Node)
)
wg.Add(1)
go func() {
defer wg.Done()
for n := range roots {
nodes = append(nodes, n)
}
}()
// Call roots
if err := c.scanRoots(ctx, tx, roots); err != nil { // From gc context
cancel()
return err
}
close(roots)
wg.Wait()
refs := func(n gc.Node) ([]gc.Node, error) {
var sn []gc.Node
if err := c.references(ctx, tx, n, func(nn gc.Node) { // From gc context
sn = append(sn, nn)
}); err != nil {
return nil, err
}
return sn, nil
}
reachable, err := gc.Tricolor(nodes, refs)
if err != nil {
return err
}
marked = reachable
return nil
}); err != nil {
return nil, err
}
return marked, nil
}
func (m *DB) cleanupSnapshotter(ctx context.Context, name string) (time.Duration, error) {
ctx = cleanup.Background(ctx)
sn, ok := m.ss[name]
if !ok {
return 0, nil
}
d, err := sn.garbageCollect(ctx)
logger := log.G(ctx).WithField("snapshotter", name)
if err != nil {
logger.WithError(err).Warn("snapshot garbage collection failed")
} else {
logger.WithField("d", d).Tracef("snapshot garbage collected")
}
return d, err
}
func (m *DB) cleanupContent(ctx context.Context) (time.Duration, error) {
ctx = cleanup.Background(ctx)
if m.cs == nil {
return 0, nil
}
d, err := m.cs.garbageCollect(ctx)
if err != nil {
log.G(ctx).WithError(err).Warn("content garbage collection failed")
} else {
log.G(ctx).WithField("d", d).Tracef("content garbage collected")
}
return d, err
}

814
core/metadata/db_test.go Normal file
View File

@@ -0,0 +1,814 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"math/rand"
"path/filepath"
"runtime/pprof"
"strings"
"testing"
"time"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/leases"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/gc"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/plugins/content/local"
"github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/containerd/v2/snapshots"
"github.com/containerd/containerd/v2/snapshots/native"
"github.com/containerd/log/logtest"
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
bolt "go.etcd.io/bbolt"
)
type testOptions struct {
extraSnapshots map[string]func(string) (snapshots.Snapshotter, error)
}
type testOpt func(*testOptions)
func withSnapshotter(name string, fn func(string) (snapshots.Snapshotter, error)) testOpt {
return func(to *testOptions) {
if to.extraSnapshots == nil {
to.extraSnapshots = map[string]func(string) (snapshots.Snapshotter, error){}
}
to.extraSnapshots[name] = fn
}
}
func testDB(t *testing.T, opt ...testOpt) (context.Context, *DB) {
ctx, cancel := context.WithCancel(context.Background())
ctx = namespaces.WithNamespace(ctx, "testing")
ctx = logtest.WithT(ctx, t)
var topts testOptions
for _, o := range opt {
o(&topts)
}
dirname := t.TempDir()
snapshotter, err := native.NewSnapshotter(filepath.Join(dirname, "native"))
require.NoError(t, err)
snapshotters := map[string]snapshots.Snapshotter{
"native": snapshotter,
}
for name, fn := range topts.extraSnapshots {
snapshotter, err := fn(filepath.Join(dirname, name))
if err != nil {
t.Fatal(err)
}
snapshotters[name] = snapshotter
}
cs, err := local.NewStore(filepath.Join(dirname, "content"))
require.NoError(t, err)
bdb, err := bolt.Open(filepath.Join(dirname, "metadata.db"), 0644, nil)
require.NoError(t, err)
db := NewDB(bdb, cs, snapshotters)
require.NoError(t, db.Init(ctx))
t.Cleanup(func() {
assert.NoError(t, bdb.Close())
cancel()
})
return ctx, db
}
func TestInit(t *testing.T) {
ctx, db := testEnv(t)
require.NoError(t, NewDB(db, nil, nil).Init(ctx))
version, err := readDBVersion(db, bucketKeyVersion)
require.NoError(t, err)
assert.EqualValues(t, version, dbVersion, "unexpected version %d, expected %d", version, dbVersion)
}
func TestMigrations(t *testing.T) {
testRefs := []struct {
ref string
bref string
}{
{
ref: "k1",
bref: "bk1",
},
{
ref: strings.Repeat("longerkey", 30), // 270 characters
bref: "short",
},
{
ref: "short",
bref: strings.Repeat("longerkey", 30), // 270 characters
},
{
ref: "emptykey",
bref: "",
},
}
migrationTests := []struct {
name string
init func(*bolt.Tx) error
check func(*bolt.Tx) error
}{
{
name: "ChildrenKey",
init: func(tx *bolt.Tx) error {
bkt, err := createSnapshotterBucket(tx, "testing", "testing")
if err != nil {
return err
}
snapshots := []struct {
key string
parent string
}{
{
key: "k1",
parent: "",
},
{
key: "k2",
parent: "k1",
},
{
key: "k2a",
parent: "k1",
},
{
key: "a1",
parent: "k2",
},
}
for _, s := range snapshots {
sbkt, err := bkt.CreateBucket([]byte(s.key))
if err != nil {
return err
}
if err := sbkt.Put(bucketKeyParent, []byte(s.parent)); err != nil {
return err
}
}
return nil
},
check: func(tx *bolt.Tx) error {
bkt := getSnapshotterBucket(tx, "testing", "testing")
if bkt == nil {
return fmt.Errorf("snapshots bucket not found: %w", errdefs.ErrNotFound)
}
snapshots := []struct {
key string
children []string
}{
{
key: "k1",
children: []string{"k2", "k2a"},
},
{
key: "k2",
children: []string{"a1"},
},
{
key: "k2a",
children: []string{},
},
{
key: "a1",
children: []string{},
},
}
for _, s := range snapshots {
sbkt := bkt.Bucket([]byte(s.key))
if sbkt == nil {
return fmt.Errorf("key does not exist: %w", errdefs.ErrNotFound)
}
cbkt := sbkt.Bucket(bucketKeyChildren)
var cn int
if cbkt != nil {
cn = cbkt.Stats().KeyN
}
if cn != len(s.children) {
return fmt.Errorf("unexpected number of children %d, expected %d", cn, len(s.children))
}
for _, ch := range s.children {
if v := cbkt.Get([]byte(ch)); v == nil {
return fmt.Errorf("missing child record for %s", ch)
}
}
}
return nil
},
},
{
name: "IngestUpdate",
init: func(tx *bolt.Tx) error {
bkt, err := createBucketIfNotExists(tx, bucketKeyVersion, []byte("testing"), bucketKeyObjectContent, deprecatedBucketKeyObjectIngest)
if err != nil {
return err
}
for _, s := range testRefs {
if err := bkt.Put([]byte(s.ref), []byte(s.bref)); err != nil {
return err
}
}
return nil
},
check: func(tx *bolt.Tx) error {
bkt := getIngestsBucket(tx, "testing")
if bkt == nil {
return fmt.Errorf("ingests bucket not found: %w", errdefs.ErrNotFound)
}
for _, s := range testRefs {
sbkt := bkt.Bucket([]byte(s.ref))
if sbkt == nil {
return fmt.Errorf("ref does not exist: %w", errdefs.ErrNotFound)
}
bref := string(sbkt.Get(bucketKeyRef))
if bref != s.bref {
return fmt.Errorf("unexpected reference key %q, expected %q", bref, s.bref)
}
}
dbkt := getBucket(tx, bucketKeyVersion, []byte("testing"), bucketKeyObjectContent, deprecatedBucketKeyObjectIngest)
if dbkt != nil {
return errors.New("deprecated ingest bucket still exists")
}
return nil
},
},
{
name: "NoOp",
init: func(tx *bolt.Tx) error {
return nil
},
check: func(tx *bolt.Tx) error {
return nil
},
},
}
if len(migrationTests) != len(migrations) {
t.Fatal("Each migration must have a test case")
}
for i, mt := range migrationTests {
t.Run(mt.name, runMigrationTest(i, mt.init, mt.check))
}
}
func runMigrationTest(i int, init, check func(*bolt.Tx) error) func(t *testing.T) {
return func(t *testing.T) {
_, db := testEnv(t)
if err := db.Update(init); err != nil {
t.Fatal(err)
}
if err := db.Update(migrations[i].migrate); err != nil {
t.Fatal(err)
}
if err := db.View(check); err != nil {
t.Fatal(err)
}
}
}
func readDBVersion(db *bolt.DB, schema []byte) (int, error) {
var version int
if err := db.View(func(tx *bolt.Tx) error {
bkt := tx.Bucket(schema)
if bkt == nil {
return fmt.Errorf("no version bucket: %w", errdefs.ErrNotFound)
}
vb := bkt.Get(bucketKeyDBVersion)
if vb == nil {
return fmt.Errorf("no version value: %w", errdefs.ErrNotFound)
}
v, _ := binary.Varint(vb)
version = int(v)
return nil
}); err != nil {
return 0, err
}
return version, nil
}
func TestMetadataCollector(t *testing.T) {
mdb, cs, sn, cleanup := newStores(t)
defer cleanup()
var (
ctx = logtest.WithT(context.Background(), t)
objects = []object{
blob(bytesFor(1), true),
blob(bytesFor(2), false),
blob(bytesFor(3), true),
blob(bytesFor(4), false, "containerd.io/gc.root", time.Now().String()),
newSnapshot("1", "", false, false),
newSnapshot("2", "1", false, false),
newSnapshot("3", "2", false, false),
newSnapshot("4", "3", false, false),
newSnapshot("5", "3", false, true),
container("1", "4"),
image("image-1", digestFor(2)),
// Test lease preservation
blob(bytesFor(5), false, "containerd.io/gc.ref.content.0", digestFor(6).String()),
blob(bytesFor(6), false),
blob(bytesFor(7), false),
newSnapshot("6", "", false, false, "containerd.io/gc.ref.content.0", digestFor(7).String()),
lease("lease-1", []leases.Resource{
{
ID: digestFor(5).String(),
Type: "content",
},
{
ID: "6",
Type: "snapshots/native",
},
}, false),
// Test flat lease
blob(bytesFor(8), false, "containerd.io/gc.ref.content.0", digestFor(9).String()),
blob(bytesFor(9), true),
blob(bytesFor(10), true),
newSnapshot("7", "", false, false, "containerd.io/gc.ref.content.0", digestFor(10).String()),
newSnapshot("8", "7", false, false),
newSnapshot("9", "8", false, false),
lease("lease-2", []leases.Resource{
{
ID: digestFor(8).String(),
Type: "content",
},
{
ID: "9",
Type: "snapshots/native",
},
}, false, "containerd.io/gc.flat", time.Now().String()),
// Test Collectible Resource
blob(bytesFor(11), false, "containerd.io/gc.ref.test", "test1"),
blob(bytesFor(12), true, "containerd.io/gc.ref.test", "test2"),
lease("lease-3", []leases.Resource{
{
ID: digestFor(11).String(),
Type: "content",
},
}, false),
}
testResource = gc.ResourceType(0x10)
remaining = []gc.Node{
gcnode(testResource, "test", "test1"),
gcnode(testResource, "test", "test3"),
gcnode(testResource, "test", "test4"),
}
collector = &testCollector{
all: []gc.Node{
gcnode(testResource, "random", "test1"),
gcnode(testResource, "test", "test1"),
gcnode(testResource, "test", "test2"),
gcnode(testResource, "test", "test3"),
gcnode(testResource, "test", "test4"),
},
active: []gc.Node{
gcnode(testResource, "test", "test4"),
},
leased: map[string][]gc.Node{
"lease-3": {
gcnode(testResource, "test", "test3"),
},
},
}
)
mdb.RegisterCollectibleResource(testResource, collector)
if err := mdb.Update(func(tx *bolt.Tx) error {
for _, obj := range objects {
node, err := create(obj, tx, mdb, cs, sn)
if err != nil {
return err
}
if node != nil {
remaining = append(remaining, *node)
}
}
return nil
}); err != nil {
t.Fatalf("Creation failed: %+v", err)
}
if _, err := mdb.GarbageCollect(ctx); err != nil {
t.Fatal(err)
}
var actual []gc.Node
if err := mdb.View(func(tx *bolt.Tx) error {
scanFn := func(ctx context.Context, node gc.Node) error {
actual = append(actual, node)
return nil
}
cc := startGCContext(ctx, mdb.collectors)
return cc.scanAll(ctx, tx, scanFn)
}); err != nil {
t.Fatal(err)
}
checkNodesEqual(t, actual, remaining)
}
func BenchmarkGarbageCollect(b *testing.B) {
b.Run("10-Sets", benchmarkTrigger(10))
b.Run("100-Sets", benchmarkTrigger(100))
b.Run("1000-Sets", benchmarkTrigger(1000))
b.Run("10000-Sets", benchmarkTrigger(10000))
}
func benchmarkTrigger(n int) func(b *testing.B) {
return func(b *testing.B) {
mdb, cs, sn, cleanup := newStores(b)
defer cleanup()
objects := []object{}
// TODO: Allow max to be configurable
for i := 0; i < n; i++ {
objects = append(objects,
blob(bytesFor(int64(i)), false),
image(fmt.Sprintf("image-%d", i), digestFor(int64(i))),
)
lastSnapshot := 6
for j := 0; j <= lastSnapshot; j++ {
var parent string
key := fmt.Sprintf("snapshot-%d-%d", i, j)
if j > 0 {
parent = fmt.Sprintf("snapshot-%d-%d", i, j-1)
}
objects = append(objects, newSnapshot(key, parent, false, false))
}
objects = append(objects, container(fmt.Sprintf("container-%d", i), fmt.Sprintf("snapshot-%d-%d", i, lastSnapshot)))
}
// TODO: Create set of objects for removal
var (
ctx = context.Background()
remaining []gc.Node
)
if err := mdb.Update(func(tx *bolt.Tx) error {
for _, obj := range objects {
node, err := create(obj, tx, mdb, cs, sn)
if err != nil {
return err
}
if node != nil {
remaining = append(remaining, *node)
}
}
return nil
}); err != nil {
b.Fatalf("Creation failed: %+v", err)
}
// TODO: reset benchmark
b.ResetTimer()
//b.StopTimer()
labels := pprof.Labels("worker", "trigger")
pprof.Do(ctx, labels, func(ctx context.Context) {
for i := 0; i < b.N; i++ {
// TODO: Add removal objects
//b.StartTimer()
if _, err := mdb.GarbageCollect(ctx); err != nil {
b.Fatal(err)
}
//b.StopTimer()
//var actual []gc.Node
//if err := db.View(func(tx *bolt.Tx) error {
// nodeC := make(chan gc.Node)
// var scanErr error
// go func() {
// defer close(nodeC)
// scanErr = scanAll(ctx, tx, nodeC)
// }()
// for node := range nodeC {
// actual = append(actual, node)
// }
// return scanErr
//}); err != nil {
// t.Fatal(err)
//}
//checkNodesEqual(t, actual, remaining)
}
})
}
}
func bytesFor(i int64) []byte {
r := rand.New(rand.NewSource(i))
var b [256]byte
_, err := r.Read(b[:])
if err != nil {
panic(err)
}
return b[:]
}
func digestFor(i int64) digest.Digest {
r := rand.New(rand.NewSource(i))
dgstr := digest.SHA256.Digester()
_, err := io.Copy(dgstr.Hash(), io.LimitReader(r, 256))
if err != nil {
panic(err)
}
return dgstr.Digest()
}
type object struct {
data interface{}
removed bool
labels map[string]string
}
func create(obj object, tx *bolt.Tx, db *DB, cs content.Store, sn snapshots.Snapshotter) (*gc.Node, error) {
var (
node *gc.Node
namespace = "test"
ctx = WithTransactionContext(namespaces.WithNamespace(context.Background(), namespace), tx)
)
switch v := obj.data.(type) {
case testContent:
expected := digest.FromBytes(v.data)
w, err := cs.Writer(ctx,
content.WithRef("test-ref"),
content.WithDescriptor(ocispec.Descriptor{Size: int64(len(v.data)), Digest: expected}))
if err != nil {
return nil, fmt.Errorf("failed to create writer: %w", err)
}
if _, err := w.Write(v.data); err != nil {
return nil, fmt.Errorf("write blob failed: %w", err)
}
if err := w.Commit(ctx, int64(len(v.data)), expected, content.WithLabels(obj.labels)); err != nil {
return nil, fmt.Errorf("failed to commit blob: %w", err)
}
if !obj.removed {
node = &gc.Node{
Type: ResourceContent,
Namespace: namespace,
Key: expected.String(),
}
}
case testSnapshot:
if v.active {
_, err := sn.Prepare(ctx, v.key, v.parent, snapshots.WithLabels(obj.labels))
if err != nil {
return nil, err
}
} else {
akey := fmt.Sprintf("%s-active", v.key)
_, err := sn.Prepare(ctx, akey, v.parent)
if err != nil {
return nil, err
}
if err := sn.Commit(ctx, v.key, akey, snapshots.WithLabels(obj.labels)); err != nil {
return nil, err
}
}
if !obj.removed {
node = &gc.Node{
Type: ResourceSnapshot,
Namespace: namespace,
Key: fmt.Sprintf("native/%s", v.key),
}
}
case testImage:
image := images.Image{
Name: v.name,
Target: v.target,
Labels: obj.labels,
}
_, err := NewImageStore(db).Create(ctx, image)
if err != nil {
return nil, fmt.Errorf("failed to create image: %w", err)
}
if !obj.removed {
node = &gc.Node{
Type: ResourceImage,
Namespace: namespace,
Key: image.Name,
}
}
case testContainer:
container := containers.Container{
ID: v.id,
SnapshotKey: v.snapshot,
Snapshotter: "native",
Labels: obj.labels,
Runtime: containers.RuntimeInfo{
Name: "testruntime",
},
Spec: &types.Any{},
}
_, err := NewContainerStore(db).Create(ctx, container)
if err != nil {
return nil, err
}
case testLease:
lm := NewLeaseManager(db)
l, err := lm.Create(ctx, leases.WithID(v.id), leases.WithLabels(obj.labels))
if err != nil {
return nil, err
}
for _, ref := range v.refs {
if err := lm.AddResource(ctx, l, ref); err != nil {
return nil, err
}
}
if !obj.removed {
node = &gc.Node{
Type: ResourceLease,
Namespace: namespace,
Key: v.id,
}
}
}
return node, nil
}
func blob(b []byte, r bool, l ...string) object {
return object{
data: testContent{
data: b,
},
removed: r,
labels: labelmap(l...),
}
}
func image(n string, d digest.Digest, l ...string) object {
return object{
data: testImage{
name: n,
target: ocispec.Descriptor{
MediaType: "irrelevant",
Digest: d,
Size: 256,
},
},
removed: false,
labels: labelmap(l...),
}
}
func newSnapshot(key, parent string, active, r bool, l ...string) object {
return object{
data: testSnapshot{
key: key,
parent: parent,
active: active,
},
removed: r,
labels: labelmap(l...),
}
}
func container(id, s string, l ...string) object {
return object{
data: testContainer{
id: id,
snapshot: s,
},
removed: false,
labels: labelmap(l...),
}
}
func lease(id string, refs []leases.Resource, r bool, l ...string) object {
return object{
data: testLease{
id: id,
refs: refs,
},
removed: r,
labels: labelmap(l...),
}
}
type testContent struct {
data []byte
}
type testSnapshot struct {
key string
parent string
active bool
}
type testImage struct {
name string
target ocispec.Descriptor
}
type testContainer struct {
id string
snapshot string
}
type testLease struct {
id string
refs []leases.Resource
}
func newStores(t testing.TB) (*DB, content.Store, snapshots.Snapshotter, func()) {
td := t.TempDir()
db, err := bolt.Open(filepath.Join(td, "meta.db"), 0644, nil)
if err != nil {
t.Fatal(err)
}
nsn, err := native.NewSnapshotter(filepath.Join(td, "snapshots"))
if err != nil {
t.Fatal(err)
}
lcs, err := local.NewStore(filepath.Join(td, "content"))
if err != nil {
t.Fatal(err)
}
mdb := NewDB(db, lcs, map[string]snapshots.Snapshotter{"native": nsn})
return mdb, mdb.ContentStore(), mdb.Snapshotter("native"), func() {
nsn.Close()
db.Close()
}
}

793
core/metadata/gc.go Normal file
View File

@@ -0,0 +1,793 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"bytes"
"context"
"fmt"
"sort"
"strings"
"time"
eventstypes "github.com/containerd/containerd/v2/api/events"
"github.com/containerd/containerd/v2/gc"
"github.com/containerd/log"
bolt "go.etcd.io/bbolt"
)
const (
// ResourceUnknown specifies an unknown resource
ResourceUnknown gc.ResourceType = iota
// ResourceContent specifies a content resource
ResourceContent
// ResourceSnapshot specifies a snapshot resource
ResourceSnapshot
// ResourceContainer specifies a container resource
ResourceContainer
// ResourceTask specifies a task resource
ResourceTask
// ResourceImage specifies an image
ResourceImage
// ResourceLease specifies a lease
ResourceLease
// ResourceIngest specifies a content ingest
ResourceIngest
// resourceEnd is the end of specified resource types
resourceEnd
// ResourceStream specifies a stream
ResourceStream
)
const (
resourceContentFlat = ResourceContent | 0x20
resourceSnapshotFlat = ResourceSnapshot | 0x20
resourceImageFlat = ResourceImage | 0x20
)
var (
labelGCRoot = []byte("containerd.io/gc.root")
labelGCRef = []byte("containerd.io/gc.ref.")
labelGCSnapRef = []byte("containerd.io/gc.ref.snapshot.")
labelGCContentRef = []byte("containerd.io/gc.ref.content")
labelGCImageRef = []byte("containerd.io/gc.ref.image")
// labelGCExpire indicates that an object is collectible after the
// provided time. For image objects, this makes them available to
// garbage collect when expired, when not provided, image objects
// are root objects that never expire. For non-root objects such
// as content or snapshots, these objects will be treated like
// root objects before their expiration.
// Expected format is RFC 3339
labelGCExpire = []byte("containerd.io/gc.expire")
// labelGCFlat indicates that a lease is flat and only intends to
// lease the referenced objects, not their references. This can be
// used to avoid leasing an entire tree of objects when only the root
// object is needed.
labelGCFlat = []byte("containerd.io/gc.flat")
)
// CollectionContext manages a resource collection during a single run of
// the garbage collector. The context is responsible for managing access to
// resources as well as tracking removal.
// Implementations should defer any longer running operations to the Finish
// function and optimize other functions for running fast during garbage
// collection write locks.
type CollectionContext interface {
// All sends all known resources
All(func(gc.Node))
// Active sends all active resources
// Leased resources may be excluded since lease ownership should take
// precedence over active status.
Active(namespace string, fn func(gc.Node))
// Leased sends all resources associated with the given lease
Leased(namespace, lease string, fn func(gc.Node))
// Remove marks the given resource as removed
Remove(gc.Node)
// Cancel is called to cleanup a context after a failed collection
Cancel() error
// Finish is called to cleanup a context after a successful collection
Finish() error
}
// Collector is an interface to manage resource collection for any collectible
// resource registered for garbage collection.
type Collector interface {
StartCollection(context.Context) (CollectionContext, error)
ReferenceLabel() string
}
type gcContext struct {
labelHandlers []referenceLabelHandler
contexts map[gc.ResourceType]CollectionContext
}
type referenceLabelHandler struct {
key []byte
fn func(string, []byte, []byte, func(gc.Node))
}
func startGCContext(ctx context.Context, collectors map[gc.ResourceType]Collector) *gcContext {
var contexts map[gc.ResourceType]CollectionContext
labelHandlers := []referenceLabelHandler{
{
key: labelGCContentRef,
fn: func(ns string, k, v []byte, fn func(gc.Node)) {
if ks := string(k); ks != string(labelGCContentRef) {
// Allow reference naming separated by . or /, ignore names
if ks[len(labelGCContentRef)] != '.' && ks[len(labelGCContentRef)] != '/' {
return
}
}
fn(gcnode(ResourceContent, ns, string(v)))
},
},
{
key: labelGCSnapRef,
fn: func(ns string, k, v []byte, fn func(gc.Node)) {
snapshotter := k[len(labelGCSnapRef):]
if i := bytes.IndexByte(snapshotter, '/'); i >= 0 {
snapshotter = snapshotter[:i]
}
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", snapshotter, v)))
},
},
{
key: labelGCImageRef,
fn: func(ns string, k, v []byte, fn func(gc.Node)) {
if ks := string(k); ks != string(labelGCImageRef) {
// Allow reference naming separated by . or /, ignore names
if ks[len(labelGCImageRef)] != '.' && ks[len(labelGCImageRef)] != '/' {
return
}
}
fn(gcnode(ResourceImage, ns, string(v)))
},
},
}
if len(collectors) > 0 {
contexts = map[gc.ResourceType]CollectionContext{}
for rt, collector := range collectors {
rt := rt
c, err := collector.StartCollection(ctx)
if err != nil {
// Only skipping this resource this round
continue
}
if reflabel := collector.ReferenceLabel(); reflabel != "" {
key := append(labelGCRef, reflabel...)
labelHandlers = append(labelHandlers, referenceLabelHandler{
key: key,
fn: func(ns string, k, v []byte, fn func(gc.Node)) {
if ks := string(k); ks != string(key) {
// Allow reference naming separated by . or /, ignore names
if ks[len(key)] != '.' && ks[len(key)] != '/' {
return
}
}
fn(gcnode(rt, ns, string(v)))
},
})
}
contexts[rt] = c
}
// Sort labelHandlers to ensure key seeking is always forward
sort.Slice(labelHandlers, func(i, j int) bool {
return bytes.Compare(labelHandlers[i].key, labelHandlers[j].key) < 0
})
}
return &gcContext{
labelHandlers: labelHandlers,
contexts: contexts,
}
}
func (c *gcContext) all(fn func(gc.Node)) {
for _, gctx := range c.contexts {
gctx.All(fn)
}
}
func (c *gcContext) active(namespace string, fn func(gc.Node)) {
for _, gctx := range c.contexts {
gctx.Active(namespace, fn)
}
}
func (c *gcContext) leased(namespace, lease string, fn func(gc.Node)) {
for _, gctx := range c.contexts {
gctx.Leased(namespace, lease, fn)
}
}
func (c *gcContext) cancel(ctx context.Context) {
for _, gctx := range c.contexts {
if err := gctx.Cancel(); err != nil {
log.G(ctx).WithError(err).Error("failed to cancel collection context")
}
}
}
func (c *gcContext) finish(ctx context.Context) {
for _, gctx := range c.contexts {
if err := gctx.Finish(); err != nil {
log.G(ctx).WithError(err).Error("failed to finish collection context")
}
}
}
// scanRoots sends the given channel "root" resources that are certainly used.
// The caller could look the references of the resources to find all resources that are used.
func (c *gcContext) scanRoots(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return nil
}
expThreshold := time.Now()
// iterate through each namespace
v1c := v1bkt.Cursor()
// cerr indicates the scan did not successfully send all
// the roots. The scan does not need to be cancelled but
// must return error at the end.
var cerr error
fn := func(n gc.Node) {
select {
case nc <- n:
case <-ctx.Done():
cerr = ctx.Err()
}
}
for k, v := v1c.First(); k != nil; k, v = v1c.Next() {
if v != nil {
continue
}
nbkt := v1bkt.Bucket(k)
ns := string(k)
lbkt := nbkt.Bucket(bucketKeyObjectLeases)
if lbkt != nil {
if err := lbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
libkt := lbkt.Bucket(k)
var flat bool
if lblbkt := libkt.Bucket(bucketKeyObjectLabels); lblbkt != nil {
if expV := lblbkt.Get(labelGCExpire); expV != nil {
exp, err := time.Parse(time.RFC3339, string(expV))
if err != nil {
// label not used, log and continue to use lease
log.G(ctx).WithError(err).WithField("lease", string(k)).Infof("ignoring invalid expiration value %q", string(expV))
} else if expThreshold.After(exp) {
// lease has expired, skip
log.G(ctx).WithField("lease", string(k)).Debug("expired lease")
return nil
}
}
if flatV := lblbkt.Get(labelGCFlat); flatV != nil {
flat = true
}
}
fn(gcnode(ResourceLease, ns, string(k)))
// Emit content and snapshots as roots instead of implementing
// in references. Since leases cannot be referenced there is
// no need to allow the lookup to be recursive, handling here
// therefore reduces the number of database seeks.
ctype := ResourceContent
if flat {
ctype = resourceContentFlat
}
cbkt := libkt.Bucket(bucketKeyObjectContent)
if cbkt != nil {
if err := cbkt.ForEach(func(k, v []byte) error {
fn(gcnode(ctype, ns, string(k)))
return nil
}); err != nil {
return err
}
}
stype := ResourceSnapshot
if flat {
stype = resourceSnapshotFlat
}
sbkt := libkt.Bucket(bucketKeyObjectSnapshots)
if sbkt != nil {
if err := sbkt.ForEach(func(sk, sv []byte) error {
if sv != nil {
return nil
}
snbkt := sbkt.Bucket(sk)
return snbkt.ForEach(func(k, v []byte) error {
fn(gcnode(stype, ns, fmt.Sprintf("%s/%s", sk, k)))
return nil
})
}); err != nil {
return err
}
}
ibkt := libkt.Bucket(bucketKeyObjectIngests)
if ibkt != nil {
if err := ibkt.ForEach(func(k, v []byte) error {
fn(gcnode(ResourceIngest, ns, string(k)))
return nil
}); err != nil {
return err
}
}
itype := ResourceImage
if flat {
itype = resourceImageFlat
}
ibkt = libkt.Bucket(bucketKeyObjectImages)
if ibkt != nil {
if err := ibkt.ForEach(func(k, v []byte) error {
fn(gcnode(itype, ns, string(k)))
return nil
}); err != nil {
return err
}
}
c.leased(ns, string(k), fn)
return nil
}); err != nil {
return err
}
}
ibkt := nbkt.Bucket(bucketKeyObjectImages)
if ibkt != nil {
if err := ibkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
if !isExpiredImage(ctx, k, ibkt.Bucket(k), expThreshold) {
fn(gcnode(ResourceImage, ns, string(k)))
}
return nil
}); err != nil {
return err
}
}
cbkt := nbkt.Bucket(bucketKeyObjectContent)
if cbkt != nil {
ibkt := cbkt.Bucket(bucketKeyObjectIngests)
if ibkt != nil {
if err := ibkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
ea, err := readExpireAt(ibkt.Bucket(k))
if err != nil {
return err
}
if ea == nil || expThreshold.After(*ea) {
return nil
}
fn(gcnode(ResourceIngest, ns, string(k)))
return nil
}); err != nil {
return err
}
}
cbkt = cbkt.Bucket(bucketKeyObjectBlob)
if cbkt != nil {
if err := cbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
if isRootRef(cbkt.Bucket(k)) {
fn(gcnode(ResourceContent, ns, string(k)))
}
return nil
}); err != nil {
return err
}
}
}
cbkt = nbkt.Bucket(bucketKeyObjectContainers)
if cbkt != nil {
if err := cbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
cibkt := cbkt.Bucket(k)
snapshotter := string(cibkt.Get(bucketKeySnapshotter))
if snapshotter != "" {
ss := string(cibkt.Get(bucketKeySnapshotKey))
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", snapshotter, ss)))
}
return c.sendLabelRefs(ns, cibkt, fn)
}); err != nil {
return err
}
}
sbkt := nbkt.Bucket(bucketKeyObjectSnapshots)
if sbkt != nil {
if err := sbkt.ForEach(func(sk, sv []byte) error {
if sv != nil {
return nil
}
snbkt := sbkt.Bucket(sk)
return snbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
if isRootRef(snbkt.Bucket(k)) {
fn(gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", sk, k)))
}
return nil
})
}); err != nil {
return err
}
}
bbkt := nbkt.Bucket(bucketKeyObjectSandboxes)
if bbkt != nil {
if err := bbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
sbbkt := bbkt.Bucket(k)
return c.sendLabelRefs(ns, sbbkt, fn)
}); err != nil {
return err
}
}
c.active(ns, fn)
}
return cerr
}
// references finds the resources that are reachable from the given node.
func (c *gcContext) references(ctx context.Context, tx *bolt.Tx, node gc.Node, fn func(gc.Node)) error {
switch node.Type {
case ResourceContent:
bkt := getBucket(tx, bucketKeyVersion, []byte(node.Namespace), bucketKeyObjectContent, bucketKeyObjectBlob, []byte(node.Key))
if bkt == nil {
// Node may be created from dead edge
return nil
}
return c.sendLabelRefs(node.Namespace, bkt, fn)
case ResourceSnapshot, resourceSnapshotFlat:
ss, name, ok := strings.Cut(node.Key, "/")
if !ok {
return fmt.Errorf("invalid snapshot gc key %s", node.Key)
}
bkt := getBucket(tx, bucketKeyVersion, []byte(node.Namespace), bucketKeyObjectSnapshots, []byte(ss), []byte(name))
if bkt == nil {
// Node may be created from dead edge
return nil
}
if pv := bkt.Get(bucketKeyParent); len(pv) > 0 {
fn(gcnode(node.Type, node.Namespace, fmt.Sprintf("%s/%s", ss, pv)))
}
// Do not send labeled references for flat snapshot refs
if node.Type == resourceSnapshotFlat {
return nil
}
return c.sendLabelRefs(node.Namespace, bkt, fn)
case ResourceImage, resourceImageFlat:
bkt := getBucket(tx, bucketKeyVersion, []byte(node.Namespace), bucketKeyObjectImages, []byte(node.Key))
if bkt == nil {
// Node may be created from dead edge
return nil
}
target := bkt.Bucket(bucketKeyTarget)
if target != nil {
ctype := ResourceContent
if node.Type == resourceImageFlat {
// For flat leases, keep the target content only
ctype = resourceContentFlat
}
contentKey := string(target.Get(bucketKeyDigest))
fn(gcnode(ctype, node.Namespace, contentKey))
}
// Do not send labeled references for flat image refs
if node.Type == resourceImageFlat {
return nil
}
return c.sendLabelRefs(node.Namespace, bkt, fn)
case ResourceIngest:
// Send expected value
bkt := getBucket(tx, bucketKeyVersion, []byte(node.Namespace), bucketKeyObjectContent, bucketKeyObjectIngests, []byte(node.Key))
if bkt == nil {
// Node may be created from dead edge
return nil
}
// Load expected
expected := bkt.Get(bucketKeyExpected)
if len(expected) > 0 {
fn(gcnode(ResourceContent, node.Namespace, string(expected)))
}
return nil
}
return nil
}
// scanAll finds all resources regardless whether the resources are used or not.
func (c *gcContext) scanAll(ctx context.Context, tx *bolt.Tx, fn func(ctx context.Context, n gc.Node) error) error {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return nil
}
// iterate through each namespace
v1c := v1bkt.Cursor()
for k, v := v1c.First(); k != nil; k, v = v1c.Next() {
if v != nil {
continue
}
nbkt := v1bkt.Bucket(k)
ns := string(k)
lbkt := nbkt.Bucket(bucketKeyObjectLeases)
if lbkt != nil {
if err := lbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
return fn(ctx, gcnode(ResourceLease, ns, string(k)))
}); err != nil {
return err
}
}
sbkt := nbkt.Bucket(bucketKeyObjectSnapshots)
if sbkt != nil {
if err := sbkt.ForEach(func(sk, sv []byte) error {
if sv != nil {
return nil
}
snbkt := sbkt.Bucket(sk)
return snbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
node := gcnode(ResourceSnapshot, ns, fmt.Sprintf("%s/%s", sk, k))
return fn(ctx, node)
})
}); err != nil {
return err
}
}
cbkt := nbkt.Bucket(bucketKeyObjectContent)
if cbkt != nil {
ibkt := cbkt.Bucket(bucketKeyObjectIngests)
if ibkt != nil {
if err := ibkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
node := gcnode(ResourceIngest, ns, string(k))
return fn(ctx, node)
}); err != nil {
return err
}
}
cbkt = cbkt.Bucket(bucketKeyObjectBlob)
if cbkt != nil {
if err := cbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
node := gcnode(ResourceContent, ns, string(k))
return fn(ctx, node)
}); err != nil {
return err
}
}
}
ibkt := nbkt.Bucket(bucketKeyObjectImages)
if ibkt != nil {
if err := ibkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
node := gcnode(ResourceImage, ns, string(k))
return fn(ctx, node)
}); err != nil {
return err
}
}
}
c.all(func(n gc.Node) {
_ = fn(ctx, n)
})
return nil
}
// remove all buckets for the given node.
func (c *gcContext) remove(ctx context.Context, tx *bolt.Tx, node gc.Node) (interface{}, error) {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return nil, nil
}
nsbkt := v1bkt.Bucket([]byte(node.Namespace))
if nsbkt == nil {
// Still remove object if refenced outside the db
if cc, ok := c.contexts[node.Type]; ok {
cc.Remove(node)
}
return nil, nil
}
switch node.Type {
case ResourceContent:
cbkt := nsbkt.Bucket(bucketKeyObjectContent)
if cbkt != nil {
cbkt = cbkt.Bucket(bucketKeyObjectBlob)
}
if cbkt != nil {
log.G(ctx).WithField("key", node.Key).Debug("remove content")
return nil, cbkt.DeleteBucket([]byte(node.Key))
}
case ResourceSnapshot:
sbkt := nsbkt.Bucket(bucketKeyObjectSnapshots)
if sbkt != nil {
ss, key, ok := strings.Cut(node.Key, "/")
if !ok {
return nil, fmt.Errorf("invalid snapshot gc key %s", node.Key)
}
ssbkt := sbkt.Bucket([]byte(ss))
if ssbkt != nil {
log.G(ctx).WithField("key", key).WithField("snapshotter", ss).Debug("remove snapshot")
return &eventstypes.SnapshotRemove{
Key: key,
Snapshotter: ss,
}, ssbkt.DeleteBucket([]byte(key))
}
}
case ResourceImage:
ibkt := nsbkt.Bucket(bucketKeyObjectImages)
if ibkt != nil {
return &eventstypes.ImageDelete{
Name: node.Key,
}, ibkt.DeleteBucket([]byte(node.Key))
}
case ResourceLease:
lbkt := nsbkt.Bucket(bucketKeyObjectLeases)
if lbkt != nil {
return nil, lbkt.DeleteBucket([]byte(node.Key))
}
case ResourceIngest:
ibkt := nsbkt.Bucket(bucketKeyObjectContent)
if ibkt != nil {
ibkt = ibkt.Bucket(bucketKeyObjectIngests)
}
if ibkt != nil {
log.G(ctx).WithField("ref", node.Key).Debug("remove ingest")
return nil, ibkt.DeleteBucket([]byte(node.Key))
}
default:
cc, ok := c.contexts[node.Type]
if ok {
cc.Remove(node)
} else {
log.G(ctx).WithField("ref", node.Key).WithField("type", node.Type).Info("no remove defined for resource")
}
}
return nil, nil
}
// sendLabelRefs sends all snapshot and content references referred to by the labels in the bkt
func (c *gcContext) sendLabelRefs(ns string, bkt *bolt.Bucket, fn func(gc.Node)) error {
lbkt := bkt.Bucket(bucketKeyObjectLabels)
if lbkt != nil {
lc := lbkt.Cursor()
for i := range c.labelHandlers {
labelRef := string(c.labelHandlers[i].key)
for k, v := lc.Seek(c.labelHandlers[i].key); k != nil && strings.HasPrefix(string(k), labelRef); k, v = lc.Next() {
c.labelHandlers[i].fn(ns, k, v, fn)
}
}
}
return nil
}
func isRootRef(bkt *bolt.Bucket) bool {
lbkt := bkt.Bucket(bucketKeyObjectLabels)
if lbkt != nil {
rv := lbkt.Get(labelGCRoot)
if rv != nil {
// TODO: interpret rv as a timestamp and skip if expired
return true
}
}
return false
}
func isExpiredImage(ctx context.Context, k []byte, bkt *bolt.Bucket, expTheshold time.Time) bool {
lbkt := bkt.Bucket(bucketKeyObjectLabels)
if lbkt != nil {
el := lbkt.Get(labelGCExpire)
if el != nil {
exp, err := time.Parse(time.RFC3339, string(el))
if err != nil {
log.G(ctx).WithError(err).WithField("image", string(k)).Infof("ignoring invalid expiration value %q", string(el))
return false
}
return expTheshold.After(exp)
}
}
return false
}
func gcnode(t gc.ResourceType, ns, key string) gc.Node {
return gc.Node{
Type: t,
Namespace: ns,
Key: key,
}
}

901
core/metadata/gc_test.go Normal file
View File

@@ -0,0 +1,901 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"bytes"
"context"
"fmt"
"io"
"math/rand"
"path/filepath"
"sort"
"testing"
"text/tabwriter"
"time"
"github.com/containerd/containerd/v2/core/metadata/boltutil"
"github.com/containerd/containerd/v2/gc"
"github.com/opencontainers/go-digest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
bolt "go.etcd.io/bbolt"
)
func TestResourceMax(t *testing.T) {
if ResourceContent != resourceContentFlat&gc.ResourceMax {
t.Fatalf("Invalid flat content type: %d (max %d)", resourceContentFlat, gc.ResourceMax)
}
if ResourceSnapshot != resourceSnapshotFlat&gc.ResourceMax {
t.Fatalf("Invalid flat snapshot type: %d (max %d)", resourceSnapshotFlat, gc.ResourceMax)
}
}
func TestGCRoots(t *testing.T) {
db, err := newDatabase(t)
require.NoError(t, err)
alters := []alterFunc{
addImage("ns1", "image1", dgst(1), nil),
addImage("ns1", "image2", dgst(2), labelmap(string(labelGCSnapRef)+"overlay", "sn2")),
addImage("ns2", "image3", dgst(10), labelmap(
string(labelGCContentRef), dgst(11).String(),
string(labelGCImageRef), "image4",
)),
addImage("ns2", "image4", dgst(12), labelmap(string(labelGCExpire), time.Now().Format(time.RFC3339))),
addImage("ns2", "image5", dgst(13), labelmap(string(labelGCExpire), time.Now().Format(time.RFC3339))),
addContainer("ns1", "container1", "overlay", "sn4", nil),
addContainer("ns1", "container2", "overlay", "sn5", labelmap(string(labelGCSnapRef)+"overlay", "sn6")),
addContainer("ns1", "container3", "overlay", "sn7", labelmap(
string(labelGCSnapRef)+"overlay/anything-1", "sn8",
string(labelGCSnapRef)+"overlay/anything-2", "sn9",
string(labelGCContentRef), dgst(7).String())),
addContainer("ns1", "container4", "", "", labelmap(
string(labelGCContentRef)+".0", dgst(8).String(),
string(labelGCContentRef)+".1", dgst(9).String())),
addContent("ns1", dgst(1), nil),
addContent("ns1", dgst(2), nil),
addContent("ns1", dgst(3), nil),
addContent("ns2", dgst(1), nil),
addContent("ns2", dgst(2), labelmap(string(labelGCRoot), "always")),
addContent("ns2", dgst(8), nil),
addContent("ns2", dgst(9), nil),
addIngest("ns1", "ingest-1", "", nil), // will be seen as expired
addIngest("ns1", "ingest-2", "", timeIn(0)), // expired
addIngest("ns1", "ingest-3", "", timeIn(time.Hour)),
addIngest("ns2", "ingest-4", "", nil),
addIngest("ns2", "ingest-5", dgst(8), nil),
addIngest("ns2", "ingest-6", "", nil), // added to expired lease
addIngest("ns2", "ingest-7", dgst(9), nil), // added to expired lease
addSnapshot("ns1", "overlay", "sn1", "", nil),
addSnapshot("ns1", "overlay", "sn2", "", nil),
addSnapshot("ns1", "overlay", "sn3", "", labelmap(string(labelGCRoot), "always")),
addSnapshot("ns1", "overlay", "sn4", "", nil),
addSnapshot("ns1", "overlay", "sn5", "", nil),
addSnapshot("ns1", "overlay", "sn6", "", nil),
addSnapshot("ns1", "overlay", "sn7", "", nil),
addSnapshot("ns1", "overlay", "sn8", "", nil),
addSnapshot("ns1", "overlay", "sn9", "", nil),
addLeaseSnapshot("ns2", "l1", "overlay", "sn5"),
addLeaseSnapshot("ns2", "l2", "overlay", "sn6"),
addLeaseContent("ns2", "l1", dgst(4)),
addLeaseContent("ns2", "l2", dgst(5)),
addLease("ns2", "l3", labelmap(string(labelGCExpire), time.Now().Add(time.Hour).Format(time.RFC3339))),
addLeaseContent("ns2", "l3", dgst(6)),
addLeaseSnapshot("ns2", "l3", "overlay", "sn7"),
addLeaseImage("ns2", "l3", "image5"),
addLeaseIngest("ns2", "l3", "ingest-4"),
addLeaseIngest("ns2", "l3", "ingest-5"),
addLease("ns2", "l4", labelmap(string(labelGCExpire), time.Now().Format(time.RFC3339))),
addLeaseContent("ns2", "l4", dgst(7)),
addLeaseSnapshot("ns2", "l4", "overlay", "sn8"),
addLeaseImage("ns2", "l4", "image4"),
addLeaseIngest("ns2", "l4", "ingest-6"),
addLeaseIngest("ns2", "l4", "ingest-7"),
addLease("ns3", "l1", labelmap(string(labelGCFlat), time.Now().Add(time.Hour).Format(time.RFC3339))),
addLeaseContent("ns3", "l1", dgst(1)),
addLeaseSnapshot("ns3", "l1", "overlay", "sn1"),
addLeaseImage("ns3", "l1", "image1"),
addLeaseIngest("ns3", "l1", "ingest-1"),
addSandbox("ns3", "sandbox1", nil),
addSandbox("ns4", "sandbox1", labelmap(string(labelGCSnapRef)+"overlay", "sn1")),
}
expected := []gc.Node{
gcnode(ResourceContent, "ns1", dgst(7).String()),
gcnode(ResourceContent, "ns1", dgst(8).String()),
gcnode(ResourceContent, "ns1", dgst(9).String()),
gcnode(ResourceContent, "ns2", dgst(2).String()),
gcnode(ResourceContent, "ns2", dgst(4).String()),
gcnode(ResourceContent, "ns2", dgst(5).String()),
gcnode(ResourceContent, "ns2", dgst(6).String()),
gcnode(ResourceSnapshot, "ns1", "overlay/sn3"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn4"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn5"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn6"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn7"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn8"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn9"),
gcnode(ResourceSnapshot, "ns2", "overlay/sn5"),
gcnode(ResourceSnapshot, "ns2", "overlay/sn6"),
gcnode(ResourceSnapshot, "ns2", "overlay/sn7"),
gcnode(ResourceSnapshot, "ns4", "overlay/sn1"),
gcnode(ResourceImage, "ns1", "image1"),
gcnode(ResourceImage, "ns1", "image2"),
gcnode(ResourceImage, "ns2", "image3"),
gcnode(ResourceImage, "ns2", "image5"),
gcnode(ResourceLease, "ns2", "l1"),
gcnode(ResourceLease, "ns2", "l2"),
gcnode(ResourceLease, "ns2", "l3"),
gcnode(ResourceIngest, "ns1", "ingest-3"),
gcnode(ResourceIngest, "ns2", "ingest-4"),
gcnode(ResourceIngest, "ns2", "ingest-5"),
gcnode(ResourceLease, "ns3", "l1"),
gcnode(ResourceIngest, "ns3", "ingest-1"),
gcnode(resourceContentFlat, "ns3", dgst(1).String()),
gcnode(resourceSnapshotFlat, "ns3", "overlay/sn1"),
gcnode(resourceImageFlat, "ns3", "image1"),
}
if err := db.Update(func(tx *bolt.Tx) error {
v1bkt, err := tx.CreateBucketIfNotExists(bucketKeyVersion)
if err != nil {
return err
}
for _, alter := range alters {
if err := alter(v1bkt); err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatalf("Update failed: %+v", err)
}
ctx := context.Background()
checkNodeC(ctx, t, db, expected, func(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
return startGCContext(ctx, nil).scanRoots(ctx, tx, nc)
})
}
func TestGCRemove(t *testing.T) {
db, err := newDatabase(t)
require.NoError(t, err)
alters := []alterFunc{
addImage("ns1", "image1", dgst(1), nil),
addImage("ns1", "image2", dgst(2), labelmap(string(labelGCSnapRef)+"overlay", "sn2")),
addContainer("ns1", "container1", "overlay", "sn4", nil),
addContent("ns1", dgst(1), nil),
addContent("ns1", dgst(2), nil),
addContent("ns1", dgst(3), nil),
addContent("ns2", dgst(1), nil),
addContent("ns2", dgst(2), labelmap(string(labelGCRoot), "always")),
addIngest("ns1", "ingest-1", "", nil),
addIngest("ns2", "ingest-2", "", timeIn(0)),
addSnapshot("ns1", "overlay", "sn1", "", nil),
addSnapshot("ns1", "overlay", "sn2", "", nil),
addSnapshot("ns1", "overlay", "sn3", "", labelmap(string(labelGCRoot), "always")),
addSnapshot("ns1", "overlay", "sn4", "", nil),
addSnapshot("ns2", "overlay", "sn1", "", nil),
addLease("ns1", "l1", labelmap(string(labelGCExpire), time.Now().Add(time.Hour).Format(time.RFC3339))),
addLease("ns2", "l2", labelmap(string(labelGCExpire), time.Now().Format(time.RFC3339))),
}
all := []gc.Node{
gcnode(ResourceContent, "ns1", dgst(1).String()),
gcnode(ResourceContent, "ns1", dgst(2).String()),
gcnode(ResourceContent, "ns1", dgst(3).String()),
gcnode(ResourceContent, "ns2", dgst(1).String()),
gcnode(ResourceContent, "ns2", dgst(2).String()),
gcnode(ResourceSnapshot, "ns1", "overlay/sn1"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn2"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn3"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn4"),
gcnode(ResourceSnapshot, "ns2", "overlay/sn1"),
gcnode(ResourceImage, "ns1", "image1"),
gcnode(ResourceImage, "ns1", "image2"),
gcnode(ResourceLease, "ns1", "l1"),
gcnode(ResourceLease, "ns2", "l2"),
gcnode(ResourceIngest, "ns1", "ingest-1"),
gcnode(ResourceIngest, "ns2", "ingest-2"),
}
var deleted, remaining []gc.Node
for i, n := range all {
if i%2 == 0 {
deleted = append(deleted, n)
} else {
remaining = append(remaining, n)
}
}
if err := db.Update(func(tx *bolt.Tx) error {
v1bkt, err := tx.CreateBucketIfNotExists(bucketKeyVersion)
if err != nil {
return err
}
for _, alter := range alters {
if err := alter(v1bkt); err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatalf("Update failed: %+v", err)
}
ctx := context.Background()
c := startGCContext(ctx, nil)
checkNodes(ctx, t, db, all, func(ctx context.Context, tx *bolt.Tx, fn func(context.Context, gc.Node) error) error {
return c.scanAll(ctx, tx, fn)
})
if t.Failed() {
t.Fatal("Scan all failed")
}
if err := db.Update(func(tx *bolt.Tx) error {
for _, n := range deleted {
if _, err := c.remove(ctx, tx, n); err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatalf("Update failed: %+v", err)
}
checkNodes(ctx, t, db, remaining, func(ctx context.Context, tx *bolt.Tx, fn func(context.Context, gc.Node) error) error {
return c.scanAll(ctx, tx, fn)
})
}
func TestGCRefs(t *testing.T) {
db, err := newDatabase(t)
require.NoError(t, err)
alters := []alterFunc{
addContent("ns1", dgst(1), nil),
addContent("ns1", dgst(2), nil),
addContent("ns1", dgst(3), nil),
addContent("ns1", dgst(4), labelmap(string(labelGCContentRef), dgst(1).String())),
addContent("ns1", dgst(5), labelmap(string(labelGCContentRef)+".anything-1", dgst(2).String(), string(labelGCContentRef)+".anything-2", dgst(3).String())),
addContent("ns1", dgst(6), labelmap(string(labelGCContentRef)+"bad", dgst(1).String())),
addContent("ns1", dgst(7), labelmap(string(labelGCContentRef)+"/anything-1", dgst(2).String(), string(labelGCContentRef)+"/anything-2", dgst(3).String())),
addContent("ns2", dgst(1), nil),
addContent("ns2", dgst(2), nil),
addImage("ns1", "image1", dgst(3), nil),
addImage("ns1", "image2", dgst(4), labelmap(
string(labelGCImageRef)+".anything", "image1",
string(labelGCContentRef)+".anotherimage", dgst(5).String())),
addIngest("ns1", "ingest-1", "", nil),
addIngest("ns2", "ingest-2", dgst(8), nil),
addSnapshot("ns1", "overlay", "sn1", "", nil),
addSnapshot("ns1", "overlay", "sn2", "sn1", nil),
addSnapshot("ns1", "overlay", "sn3", "sn2", nil),
addSnapshot("ns1", "overlay", "sn4", "", labelmap(string(labelGCSnapRef)+"btrfs", "sn1", string(labelGCSnapRef)+"overlay", "sn1")),
addSnapshot("ns1", "overlay", "sn5", "", labelmap(string(labelGCSnapRef)+"overlay/anything-1", "sn1", string(labelGCSnapRef)+"overlay/anything-2", "sn2")),
addSnapshot("ns1", "btrfs", "sn1", "", nil),
addSnapshot("ns2", "overlay", "sn1", "", nil),
addSnapshot("ns2", "overlay", "sn2", "sn1", nil),
addSnapshot("ns2", "overlay", "sn3", "", labelmap(
string(labelGCContentRef), dgst(1).String(),
string(labelGCContentRef)+".keep-me", dgst(6).String())),
// Test flat references don't follow label references
addContent("ns3", dgst(1), nil),
addContent("ns3", dgst(2), labelmap(string(labelGCContentRef)+".0", dgst(1).String())),
addSnapshot("ns3", "overlay", "sn1", "", nil),
addSnapshot("ns3", "overlay", "sn2", "sn1", nil),
addSnapshot("ns3", "overlay", "sn3", "", labelmap(string(labelGCSnapRef)+"btrfs", "sn1", string(labelGCSnapRef)+"overlay", "sn1")),
}
refs := map[gc.Node][]gc.Node{
gcnode(ResourceContent, "ns1", dgst(1).String()): nil,
gcnode(ResourceContent, "ns1", dgst(2).String()): nil,
gcnode(ResourceContent, "ns1", dgst(3).String()): nil,
gcnode(ResourceContent, "ns1", dgst(4).String()): {
gcnode(ResourceContent, "ns1", dgst(1).String()),
},
gcnode(ResourceContent, "ns1", dgst(5).String()): {
gcnode(ResourceContent, "ns1", dgst(2).String()),
gcnode(ResourceContent, "ns1", dgst(3).String()),
},
gcnode(ResourceContent, "ns1", dgst(6).String()): nil,
gcnode(ResourceContent, "ns1", dgst(7).String()): {
gcnode(ResourceContent, "ns1", dgst(2).String()),
gcnode(ResourceContent, "ns1", dgst(3).String()),
},
gcnode(ResourceContent, "ns2", dgst(1).String()): nil,
gcnode(ResourceContent, "ns2", dgst(2).String()): nil,
gcnode(ResourceSnapshot, "ns1", "overlay/sn1"): nil,
gcnode(ResourceSnapshot, "ns1", "overlay/sn2"): {
gcnode(ResourceSnapshot, "ns1", "overlay/sn1"),
},
gcnode(ResourceSnapshot, "ns1", "overlay/sn3"): {
gcnode(ResourceSnapshot, "ns1", "overlay/sn2"),
},
gcnode(ResourceSnapshot, "ns1", "overlay/sn4"): {
gcnode(ResourceSnapshot, "ns1", "btrfs/sn1"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn1"),
},
gcnode(ResourceSnapshot, "ns1", "overlay/sn5"): {
gcnode(ResourceSnapshot, "ns1", "overlay/sn1"),
gcnode(ResourceSnapshot, "ns1", "overlay/sn2"),
},
gcnode(ResourceSnapshot, "ns1", "btrfs/sn1"): nil,
gcnode(ResourceSnapshot, "ns2", "overlay/sn1"): nil,
gcnode(ResourceSnapshot, "ns2", "overlay/sn2"): {
gcnode(ResourceSnapshot, "ns2", "overlay/sn1"),
},
gcnode(ResourceSnapshot, "ns2", "overlay/sn3"): {
gcnode(ResourceContent, "ns2", dgst(1).String()),
gcnode(ResourceContent, "ns2", dgst(6).String()),
},
gcnode(ResourceImage, "ns1", "image1"): {
gcnode(ResourceContent, "ns1", dgst(3).String()),
},
gcnode(ResourceImage, "ns1", "image2"): {
gcnode(ResourceContent, "ns1", dgst(4).String()),
gcnode(ResourceContent, "ns1", dgst(5).String()),
gcnode(ResourceImage, "ns1", "image1"),
},
gcnode(ResourceIngest, "ns1", "ingest-1"): nil,
gcnode(ResourceIngest, "ns2", "ingest-2"): {
gcnode(ResourceContent, "ns2", dgst(8).String()),
},
gcnode(resourceSnapshotFlat, "ns3", "overlay/sn2"): {
gcnode(resourceSnapshotFlat, "ns3", "overlay/sn1"),
},
gcnode(ResourceSnapshot, "ns3", "overlay/sn2"): {
gcnode(ResourceSnapshot, "ns3", "overlay/sn1"),
},
gcnode(resourceSnapshotFlat, "ns3", "overlay/sn1"): nil,
gcnode(resourceSnapshotFlat, "ns3", "overlay/sn3"): nil,
gcnode(ResourceSnapshot, "ns3", "overlay/sn3"): {
gcnode(ResourceSnapshot, "ns3", "btrfs/sn1"),
gcnode(ResourceSnapshot, "ns3", "overlay/sn1"),
},
gcnode(resourceImageFlat, "ns1", "image1"): {
gcnode(resourceContentFlat, "ns1", dgst(3).String()),
},
gcnode(resourceImageFlat, "ns1", "image2"): {
gcnode(resourceContentFlat, "ns1", dgst(4).String()),
},
}
if err := db.Update(func(tx *bolt.Tx) error {
v1bkt, err := tx.CreateBucketIfNotExists(bucketKeyVersion)
if err != nil {
return err
}
for _, alter := range alters {
if err := alter(v1bkt); err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatalf("Update failed: %+v", err)
}
ctx := context.Background()
c := startGCContext(ctx, nil)
for n, nodes := range refs {
checkNodeC(ctx, t, db, nodes, func(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
return c.references(ctx, tx, n, func(n gc.Node) {
select {
case nc <- n:
case <-ctx.Done():
}
})
})
if t.Failed() {
t.Fatalf("Failure scanning %v", n)
}
}
}
func TestCollectibleResources(t *testing.T) {
db, err := newDatabase(t)
require.NoError(t, err)
testResource := gc.ResourceType(0x10)
alters := []alterFunc{
addContent("ns1", dgst(1), nil),
addImage("ns1", "image1", dgst(1), nil),
addContent("ns1", dgst(2), map[string]string{
"containerd.io/gc.ref.test": "test2",
}),
addImage("ns1", "image2", dgst(2), nil),
addLease("ns1", "lease1", labelmap(string(labelGCExpire), time.Now().Add(time.Hour).Format(time.RFC3339))),
addLease("ns1", "lease2", labelmap(string(labelGCExpire), time.Now().Add(-1*time.Hour).Format(time.RFC3339))),
}
refs := map[gc.Node][]gc.Node{
gcnode(ResourceContent, "ns1", dgst(1).String()): nil,
gcnode(ResourceContent, "ns1", dgst(2).String()): {
gcnode(testResource, "ns1", "test2"),
},
}
all := []gc.Node{
gcnode(ResourceContent, "ns1", dgst(1).String()),
gcnode(ResourceContent, "ns1", dgst(2).String()),
gcnode(ResourceImage, "ns1", "image1"),
gcnode(ResourceImage, "ns1", "image2"),
gcnode(ResourceLease, "ns1", "lease1"),
gcnode(ResourceLease, "ns1", "lease2"),
gcnode(testResource, "ns1", "test1"),
gcnode(testResource, "ns1", "test2"), // 7: Will be removed
gcnode(testResource, "ns1", "test3"),
gcnode(testResource, "ns1", "test4"),
}
removeIndex := 7
roots := []gc.Node{
gcnode(ResourceImage, "ns1", "image1"),
gcnode(ResourceImage, "ns1", "image2"),
gcnode(ResourceLease, "ns1", "lease1"),
gcnode(testResource, "ns1", "test1"),
gcnode(testResource, "ns1", "test3"),
}
collector := &testCollector{
all: []gc.Node{
gcnode(testResource, "ns1", "test1"),
gcnode(testResource, "ns1", "test2"),
gcnode(testResource, "ns1", "test3"),
gcnode(testResource, "ns1", "test4"),
},
active: []gc.Node{
gcnode(testResource, "ns1", "test1"),
},
leased: map[string][]gc.Node{
"lease1": {
gcnode(testResource, "ns1", "test3"),
},
"lease2": {
gcnode(testResource, "ns1", "test4"),
},
},
}
if err := db.Update(func(tx *bolt.Tx) error {
v1bkt, err := tx.CreateBucketIfNotExists(bucketKeyVersion)
if err != nil {
return err
}
for _, alter := range alters {
if err := alter(v1bkt); err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatalf("Update failed: %+v", err)
}
ctx := context.Background()
c := startGCContext(ctx, map[gc.ResourceType]Collector{
testResource: collector,
})
for n, nodes := range refs {
checkNodeC(ctx, t, db, nodes, func(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
return c.references(ctx, tx, n, func(n gc.Node) {
select {
case nc <- n:
case <-ctx.Done():
}
})
})
if t.Failed() {
t.Fatalf("Failure scanning %v", n)
}
}
checkNodes(ctx, t, db, all, func(ctx context.Context, tx *bolt.Tx, fn func(context.Context, gc.Node) error) error {
return c.scanAll(ctx, tx, fn)
})
checkNodeC(ctx, t, db, roots, func(ctx context.Context, tx *bolt.Tx, nc chan<- gc.Node) error {
return c.scanRoots(ctx, tx, nc)
})
if err := db.Update(func(tx *bolt.Tx) error {
if _, err := c.remove(ctx, tx, all[removeIndex]); err != nil {
return err
}
return nil
}); err != nil {
t.Fatalf("Update failed: %+v", err)
}
all = append(all[:removeIndex], all[removeIndex+1:]...)
checkNodes(ctx, t, db, all, func(ctx context.Context, tx *bolt.Tx, fn func(context.Context, gc.Node) error) error {
return c.scanAll(ctx, tx, fn)
})
}
type testCollector struct {
all []gc.Node
active []gc.Node
leased map[string][]gc.Node
}
func (tc *testCollector) StartCollection(context.Context) (CollectionContext, error) {
return tc, nil
}
func (tc *testCollector) ReferenceLabel() string {
return "test"
}
func (tc *testCollector) All(fn func(gc.Node)) {
for _, n := range tc.all {
fn(n)
}
}
func (tc *testCollector) Active(namespace string, fn func(gc.Node)) {
for _, n := range tc.active {
if n.Namespace == namespace {
fn(n)
}
}
}
func (tc *testCollector) Leased(namespace, lease string, fn func(gc.Node)) {
for _, n := range tc.leased[lease] {
if n.Namespace == namespace {
fn(n)
}
}
}
func (tc *testCollector) Remove(n gc.Node) {
for i := range tc.all {
if tc.all[i] == n {
tc.all = append(tc.all[:i], tc.all[i+1:]...)
return
}
}
}
func (tc *testCollector) Cancel() error {
return nil
}
func (tc *testCollector) Finish() error {
return nil
}
func newDatabase(t testing.TB) (*bolt.DB, error) {
td := t.TempDir()
db, err := bolt.Open(filepath.Join(td, "test.db"), 0777, nil)
if err != nil {
return nil, err
}
t.Cleanup(func() {
assert.NoError(t, db.Close())
})
return db, nil
}
func checkNodeC(ctx context.Context, t *testing.T, db *bolt.DB, expected []gc.Node, fn func(context.Context, *bolt.Tx, chan<- gc.Node) error) {
t.Helper()
var actual []gc.Node
nc := make(chan gc.Node)
done := make(chan struct{})
go func() {
defer close(done)
for n := range nc {
actual = append(actual, n)
}
}()
if err := db.View(func(tx *bolt.Tx) error {
defer close(nc)
return fn(ctx, tx, nc)
}); err != nil {
t.Fatal(err)
}
<-done
checkNodesEqual(t, actual, expected)
}
func checkNodes(ctx context.Context, t *testing.T, db *bolt.DB, expected []gc.Node, fn func(context.Context, *bolt.Tx, func(context.Context, gc.Node) error) error) {
t.Helper()
var actual []gc.Node
scanFn := func(ctx context.Context, n gc.Node) error {
actual = append(actual, n)
return nil
}
if err := db.View(func(tx *bolt.Tx) error {
return fn(ctx, tx, scanFn)
}); err != nil {
t.Fatal(err)
}
checkNodesEqual(t, actual, expected)
}
func checkNodesEqual(t *testing.T, n1, n2 []gc.Node) {
t.Helper()
sort.Sort(nodeList(n1))
sort.Sort(nodeList(n2))
if len(n1) != len(n2) {
buf := bytes.NewBuffer(nil)
tw := tabwriter.NewWriter(buf, 8, 4, 1, ' ', 0)
max := len(n1)
if len(n2) > max {
max = len(n2)
}
fmt.Fprintln(tw, "Expected:\tActual:")
for i := 0; i < max; i++ {
var left, right string
if i < len(n1) {
right = printNode(n1[i])
}
if i < len(n2) {
left = printNode(n2[i])
}
fmt.Fprintln(tw, left+"\t"+right)
}
tw.Flush()
t.Fatal("Nodes do not match\n" + buf.String())
}
for i := range n1 {
if n1[i] != n2[i] {
t.Errorf("[%d] root does not match expected: expected %v, got %v", i, printNode(n2[i]), printNode(n1[i]))
}
}
}
func printNode(n gc.Node) string {
var t string
switch n.Type {
case ResourceContent:
t = "content"
case ResourceSnapshot:
t = "snapshot"
case ResourceContainer:
t = "container"
case ResourceTask:
t = "task"
case ResourceImage:
t = "image"
case ResourceLease:
t = "lease"
case ResourceIngest:
t = "ingest"
case resourceContentFlat:
t = "content-flat"
case resourceSnapshotFlat:
t = "snapshot-flat"
case resourceImageFlat:
t = "image-flat"
default:
return fmt.Sprintf("%v", n)
}
return fmt.Sprintf("%s(%s/%s)", t, n.Namespace, n.Key)
}
type nodeList []gc.Node
func (nodes nodeList) Len() int {
return len(nodes)
}
func (nodes nodeList) Less(i, j int) bool {
if nodes[i].Type != nodes[j].Type {
return nodes[i].Type < nodes[j].Type
}
if nodes[i].Namespace != nodes[j].Namespace {
return nodes[i].Namespace < nodes[j].Namespace
}
return nodes[i].Key < nodes[j].Key
}
func (nodes nodeList) Swap(i, j int) {
nodes[i], nodes[j] = nodes[j], nodes[i]
}
type alterFunc func(bkt *bolt.Bucket) error
func addImage(ns, name string, dgst digest.Digest, labels map[string]string) alterFunc {
return func(bkt *bolt.Bucket) error {
ibkt, err := createBuckets(bkt, ns, string(bucketKeyObjectImages), name)
if err != nil {
return err
}
tbkt, err := ibkt.CreateBucket(bucketKeyTarget)
if err != nil {
return err
}
if err := tbkt.Put(bucketKeyDigest, []byte(dgst.String())); err != nil {
return err
}
return boltutil.WriteLabels(ibkt, labels)
}
}
func addSnapshot(ns, snapshotter, name, parent string, labels map[string]string) alterFunc {
return func(bkt *bolt.Bucket) error {
sbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectSnapshots), snapshotter, name)
if err != nil {
return err
}
if parent != "" {
if err := sbkt.Put(bucketKeyParent, []byte(parent)); err != nil {
return err
}
}
return boltutil.WriteLabels(sbkt, labels)
}
}
func addContent(ns string, dgst digest.Digest, labels map[string]string) alterFunc {
return func(bkt *bolt.Bucket) error {
cbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectContent), string(bucketKeyObjectBlob), dgst.String())
if err != nil {
return err
}
return boltutil.WriteLabels(cbkt, labels)
}
}
func addIngest(ns, ref string, expected digest.Digest, expires *time.Time) alterFunc {
return func(bkt *bolt.Bucket) error {
cbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectContent), string(bucketKeyObjectIngests), ref)
if err != nil {
return err
}
if expected != "" {
if err := cbkt.Put(bucketKeyExpected, []byte(expected)); err != nil {
return err
}
}
if expires != nil {
if err := writeExpireAt(*expires, cbkt); err != nil {
return err
}
}
return nil
}
}
func addLease(ns, lid string, labels map[string]string) alterFunc {
return func(bkt *bolt.Bucket) error {
lbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectLeases), lid)
if err != nil {
return err
}
return boltutil.WriteLabels(lbkt, labels)
}
}
func addLeaseSnapshot(ns, lid, snapshotter, name string) alterFunc {
return func(bkt *bolt.Bucket) error {
sbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectLeases), lid, string(bucketKeyObjectSnapshots), snapshotter)
if err != nil {
return err
}
return sbkt.Put([]byte(name), nil)
}
}
func addLeaseContent(ns, lid string, dgst digest.Digest) alterFunc {
return func(bkt *bolt.Bucket) error {
cbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectLeases), lid, string(bucketKeyObjectContent))
if err != nil {
return err
}
return cbkt.Put([]byte(dgst.String()), nil)
}
}
func addLeaseImage(ns, lid, image string) alterFunc {
return func(bkt *bolt.Bucket) error {
cbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectLeases), lid, string(bucketKeyObjectImages))
if err != nil {
return err
}
return cbkt.Put([]byte(image), nil)
}
}
func addLeaseIngest(ns, lid, ref string) alterFunc {
return func(bkt *bolt.Bucket) error {
cbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectLeases), lid, string(bucketKeyObjectIngests))
if err != nil {
return err
}
return cbkt.Put([]byte(ref), nil)
}
}
func addContainer(ns, name, snapshotter, snapshot string, labels map[string]string) alterFunc {
return func(bkt *bolt.Bucket) error {
cbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectContainers), name)
if err != nil {
return err
}
if err := cbkt.Put(bucketKeySnapshotter, []byte(snapshotter)); err != nil {
return err
}
if err := cbkt.Put(bucketKeySnapshotKey, []byte(snapshot)); err != nil {
return err
}
return boltutil.WriteLabels(cbkt, labels)
}
}
func addSandbox(ns, name string, labels map[string]string) alterFunc {
return func(bkt *bolt.Bucket) error {
sbkt, err := createBuckets(bkt, ns, string(bucketKeyObjectSandboxes), name)
if err != nil {
return err
}
return boltutil.WriteLabels(sbkt, labels)
}
}
func createBuckets(bkt *bolt.Bucket, names ...string) (*bolt.Bucket, error) {
for _, name := range names {
nbkt, err := bkt.CreateBucketIfNotExists([]byte(name))
if err != nil {
return nil, err
}
bkt = nbkt
}
return bkt, nil
}
func labelmap(kv ...string) map[string]string {
if len(kv)%2 != 0 {
panic("bad labels argument")
}
l := map[string]string{}
for i := 0; i < len(kv); i = i + 2 {
l[kv[i]] = kv[i+1]
}
return l
}
func dgst(i int64) digest.Digest {
r := rand.New(rand.NewSource(i))
dgstr := digest.SHA256.Digester()
if _, err := io.CopyN(dgstr.Hash(), r, 256); err != nil {
panic(err)
}
return dgstr.Digest()
}
func timeIn(d time.Duration) *time.Time {
t := time.Now().UTC().Add(d)
return &t
}

435
core/metadata/images.go Normal file
View File

@@ -0,0 +1,435 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"encoding/binary"
"errors"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/metadata/boltutil"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/labels"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/epoch"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
bolt "go.etcd.io/bbolt"
)
type imageStore struct {
db *DB
}
// NewImageStore returns a store backed by a bolt DB
func NewImageStore(db *DB) images.Store {
return &imageStore{db: db}
}
func (s *imageStore) Get(ctx context.Context, name string) (images.Image, error) {
var image images.Image
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return images.Image{}, err
}
if err := view(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getImagesBucket(tx, namespace)
if bkt == nil {
return fmt.Errorf("image %q: %w", name, errdefs.ErrNotFound)
}
ibkt := bkt.Bucket([]byte(name))
if ibkt == nil {
return fmt.Errorf("image %q: %w", name, errdefs.ErrNotFound)
}
image.Name = name
if err := readImage(&image, ibkt); err != nil {
return fmt.Errorf("image %q: %w", name, err)
}
return nil
}); err != nil {
return images.Image{}, err
}
return image, nil
}
func (s *imageStore) List(ctx context.Context, fs ...string) ([]images.Image, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
filter, err := filters.ParseAll(fs...)
if err != nil {
return nil, fmt.Errorf("%s: %w", err.Error(), errdefs.ErrInvalidArgument)
}
var m []images.Image
if err := view(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getImagesBucket(tx, namespace)
if bkt == nil {
return nil // empty store
}
return bkt.ForEach(func(k, v []byte) error {
var (
image = images.Image{
Name: string(k),
}
kbkt = bkt.Bucket(k)
)
if err := readImage(&image, kbkt); err != nil {
return err
}
if filter.Match(adaptImage(image)) {
m = append(m, image)
}
return nil
})
}); err != nil {
return nil, err
}
return m, nil
}
func (s *imageStore) Create(ctx context.Context, image images.Image) (images.Image, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return images.Image{}, err
}
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
if err := validateImage(&image); err != nil {
return err
}
bkt, err := createImagesBucket(tx, namespace)
if err != nil {
return err
}
if err := addImageLease(ctx, tx, image.Name, image.Labels); err != nil {
return err
}
ibkt, err := bkt.CreateBucket([]byte(image.Name))
if err != nil {
if err != bolt.ErrBucketExists {
return err
}
return fmt.Errorf("image %q: %w", image.Name, errdefs.ErrAlreadyExists)
}
// The value of `image.CreatedAt` passed from the caller is discarded here.
// Ideally we should return an error when the value is already set.
// However, as `image.CreatedAt` is defined as a non-pointer `time.Time`, we can't compare it to nil.
// And we can't compare it to `time.Time{}` either, as `time.Time{}` is a proper timestamp (1970-01-01 00:00:00).
if tm := epoch.FromContext(ctx); tm != nil {
image.CreatedAt = tm.UTC()
} else {
image.CreatedAt = time.Now().UTC()
}
image.UpdatedAt = image.CreatedAt
return writeImage(ibkt, &image)
}); err != nil {
return images.Image{}, err
}
return image, nil
}
func (s *imageStore) Update(ctx context.Context, image images.Image, fieldpaths ...string) (images.Image, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return images.Image{}, err
}
if image.Name == "" {
return images.Image{}, fmt.Errorf("image name is required for update: %w", errdefs.ErrInvalidArgument)
}
var updated images.Image
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
bkt, err := createImagesBucket(tx, namespace)
if err != nil {
return err
}
ibkt := bkt.Bucket([]byte(image.Name))
if ibkt == nil {
return fmt.Errorf("image %q: %w", image.Name, errdefs.ErrNotFound)
}
if err := readImage(&updated, ibkt); err != nil {
return fmt.Errorf("image %q: %w", image.Name, err)
}
createdat := updated.CreatedAt
updated.Name = image.Name
if len(fieldpaths) > 0 {
for _, path := range fieldpaths {
if strings.HasPrefix(path, "labels.") {
if updated.Labels == nil {
updated.Labels = map[string]string{}
}
key := strings.TrimPrefix(path, "labels.")
updated.Labels[key] = image.Labels[key]
continue
} else if strings.HasPrefix(path, "annotations.") {
if updated.Target.Annotations == nil {
updated.Target.Annotations = map[string]string{}
}
key := strings.TrimPrefix(path, "annotations.")
updated.Target.Annotations[key] = image.Target.Annotations[key]
continue
}
switch path {
case "labels":
updated.Labels = image.Labels
case "target":
// NOTE(stevvooe): While we allow setting individual labels, we
// only support replacing the target as a unit, since that is
// commonly pulled as a unit from other sources. It often doesn't
// make sense to modify the size or digest without touching the
// mediatype, as well, for example.
updated.Target = image.Target
case "annotations":
updated.Target.Annotations = image.Target.Annotations
default:
return fmt.Errorf("cannot update %q field on image %q: %w", path, image.Name, errdefs.ErrInvalidArgument)
}
}
} else {
updated = image
}
if err := validateImage(&updated); err != nil {
return err
}
// Collectible label may be added, if so add to lease
if err := addImageLease(ctx, tx, updated.Name, updated.Labels); err != nil {
return err
}
updated.CreatedAt = createdat
if tm := epoch.FromContext(ctx); tm != nil {
updated.UpdatedAt = tm.UTC()
} else {
updated.UpdatedAt = time.Now().UTC()
}
return writeImage(ibkt, &updated)
}); err != nil {
return images.Image{}, err
}
return updated, nil
}
func (s *imageStore) Delete(ctx context.Context, name string, opts ...images.DeleteOpt) error {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
var options images.DeleteOptions
for _, opt := range opts {
if err := opt(ctx, &options); err != nil {
return err
}
}
return update(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getImagesBucket(tx, namespace)
if bkt == nil {
return fmt.Errorf("image %q: %w", name, errdefs.ErrNotFound)
}
if err := removeImageLease(ctx, tx, name); err != nil {
return err
}
if options.Target != nil && options.Target.Digest != "" {
ibkt := bkt.Bucket([]byte(name))
if ibkt == nil {
return fmt.Errorf("image %q: %w", name, errdefs.ErrNotFound)
}
var check images.Image
if err := readImage(&check, ibkt); err != nil {
return fmt.Errorf("image %q: %w", name, err)
}
if check.Target.Digest != options.Target.Digest {
return fmt.Errorf("image %q has target %v, not %v: %w", name, check.Target.Digest, options.Target.Digest, errdefs.ErrNotFound)
}
}
if err = bkt.DeleteBucket([]byte(name)); err != nil {
if err == bolt.ErrBucketNotFound {
err = fmt.Errorf("image %q: %w", name, errdefs.ErrNotFound)
}
return err
}
atomic.AddUint32(&s.db.dirty, 1)
return nil
})
}
func validateImage(image *images.Image) error {
if image.Name == "" {
return fmt.Errorf("image name must not be empty: %w", errdefs.ErrInvalidArgument)
}
for k, v := range image.Labels {
if err := labels.Validate(k, v); err != nil {
return fmt.Errorf("image.Labels: %w", err)
}
}
return validateTarget(&image.Target)
}
func validateTarget(target *ocispec.Descriptor) error {
// NOTE(stevvooe): Only validate fields we actually store.
if err := target.Digest.Validate(); err != nil {
return fmt.Errorf("Target.Digest %q invalid: %v: %w", target.Digest, err, errdefs.ErrInvalidArgument)
}
if target.Size <= 0 {
return fmt.Errorf("Target.Size must be greater than zero: %w", errdefs.ErrInvalidArgument)
}
if target.MediaType == "" {
return fmt.Errorf("Target.MediaType must be set: %w", errdefs.ErrInvalidArgument)
}
return nil
}
func readImage(image *images.Image, bkt *bolt.Bucket) error {
if err := boltutil.ReadTimestamps(bkt, &image.CreatedAt, &image.UpdatedAt); err != nil {
return err
}
labels, err := boltutil.ReadLabels(bkt)
if err != nil {
return err
}
image.Labels = labels
image.Target.Annotations, err = boltutil.ReadAnnotations(bkt)
if err != nil {
return err
}
tbkt := bkt.Bucket(bucketKeyTarget)
if tbkt == nil {
return errors.New("unable to read target bucket")
}
return tbkt.ForEach(func(k, v []byte) error {
if v == nil {
return nil // skip it? a bkt maybe?
}
// TODO(stevvooe): This is why we need to use byte values for
// keys, rather than full arrays.
switch string(k) {
case string(bucketKeyDigest):
image.Target.Digest = digest.Digest(v)
case string(bucketKeyMediaType):
image.Target.MediaType = string(v)
case string(bucketKeySize):
image.Target.Size, _ = binary.Varint(v)
}
return nil
})
}
func writeImage(bkt *bolt.Bucket, image *images.Image) error {
if err := boltutil.WriteTimestamps(bkt, image.CreatedAt, image.UpdatedAt); err != nil {
return err
}
if err := boltutil.WriteLabels(bkt, image.Labels); err != nil {
return fmt.Errorf("writing labels for image %v: %w", image.Name, err)
}
if err := boltutil.WriteAnnotations(bkt, image.Target.Annotations); err != nil {
return fmt.Errorf("writing Annotations for image %v: %w", image.Name, err)
}
// write the target bucket
tbkt, err := bkt.CreateBucketIfNotExists(bucketKeyTarget)
if err != nil {
return err
}
sizeEncoded, err := encodeInt(image.Target.Size)
if err != nil {
return err
}
for _, v := range [][2][]byte{
{bucketKeyDigest, []byte(image.Target.Digest)},
{bucketKeyMediaType, []byte(image.Target.MediaType)},
{bucketKeySize, sizeEncoded},
} {
if err := tbkt.Put(v[0], v[1]); err != nil {
return err
}
}
return nil
}
func encodeInt(i int64) ([]byte, error) {
var (
buf [binary.MaxVarintLen64]byte
iEncoded = buf[:]
)
iEncoded = iEncoded[:binary.PutVarint(iEncoded, i)]
if len(iEncoded) == 0 {
return nil, fmt.Errorf("failed encoding integer = %v", i)
}
return iEncoded, nil
}

View File

@@ -0,0 +1,657 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"errors"
"fmt"
"reflect"
"testing"
"time"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
func TestImagesList(t *testing.T) {
ctx, db := testEnv(t)
store := NewImageStore(NewDB(db, nil, nil))
testset := map[string]*images.Image{}
for i := 0; i < 4; i++ {
id := "image-" + fmt.Sprint(i)
testset[id] = &images.Image{
Name: id,
Labels: map[string]string{
"namelabel": id,
"even": fmt.Sprint(i%2 == 0),
"odd": fmt.Sprint(i%2 != 0),
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.containerd.test",
Digest: digest.FromString(id),
Annotations: map[string]string{
"foo": "bar",
},
},
}
now := time.Now()
result, err := store.Create(ctx, *testset[id])
if err != nil {
t.Fatal(err)
}
checkImageTimestamps(t, &result, now, true)
testset[id].UpdatedAt, testset[id].CreatedAt = result.UpdatedAt, result.CreatedAt
checkImagesEqual(t, &result, testset[id], "ensure that containers were created as expected for list")
}
for _, testcase := range []struct {
name string
filters []string
}{
{
name: "FullSet",
},
{
name: "FullSetFiltered", // full set, but because we have OR filter
filters: []string{"labels.even==true", "labels.odd==true"},
},
{
name: "Even",
filters: []string{"labels.even==true"},
},
{
name: "Odd",
filters: []string{"labels.odd==true"},
},
{
name: "ByName",
filters: []string{"name==image-0"},
},
{
name: "ByNameLabelEven",
filters: []string{"labels.namelabel==image-0,labels.even==true"},
},
{
name: "ByMediaType",
filters: []string{"target.mediatype~=application/vnd.*"},
},
} {
t.Run(testcase.name, func(t *testing.T) {
testset := testset
if len(testcase.filters) > 0 {
fs, err := filters.ParseAll(testcase.filters...)
if err != nil {
t.Fatal(err)
}
newtestset := make(map[string]*images.Image, len(testset))
for k, v := range testset {
if fs.Match(adaptImage(*v)) {
newtestset[k] = v
}
}
testset = newtestset
}
results, err := store.List(ctx, testcase.filters...)
if err != nil {
t.Fatal(err)
}
if len(results) == 0 { // all tests return a non-empty result set
t.Fatalf("no results returned")
}
if len(results) != len(testset) {
t.Fatalf("length of result does not match testset: %v != %v", len(results), len(testset))
}
for _, result := range results {
result := result
checkImagesEqual(t, &result, testset[result.Name], "list results did not match")
}
})
}
// delete everything to test it
for id := range testset {
if err := store.Delete(ctx, id); err != nil {
t.Fatal(err)
}
// try it again, get NotFound
if err := store.Delete(ctx, id); !errdefs.IsNotFound(err) {
t.Fatalf("unexpected error %v", err)
}
}
}
func TestImagesCreateUpdateDelete(t *testing.T) {
ctx, db := testEnv(t)
store := NewImageStore(NewDB(db, nil, nil))
for _, testcase := range []struct {
name string
original images.Image
createerr error
input images.Image // Input target size determines target digest, base image uses 10
fieldpaths []string
expected images.Image
cause error
deleteerr error
}{
{
name: "Touch",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
expected: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
},
{
name: "NoTarget",
original: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{},
},
createerr: errdefs.ErrInvalidArgument,
},
{
name: "ReplaceLabels",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"for": "bar",
"boo": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
expected: images.Image{
Labels: map[string]string{
"for": "bar",
"boo": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
},
{
name: "ReplaceLabelsFieldPath",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"for": "bar",
"boo": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab+ignored", // make sure other stuff is ignored
Annotations: map[string]string{
"not": "bar",
"new": "boo",
},
},
},
fieldpaths: []string{"labels"},
expected: images.Image{
Labels: map[string]string{
"for": "bar",
"boo": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
"baz": "boo",
},
},
},
},
{
name: "ReplaceLabelsAnnotationsFieldPath",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"for": "bar",
"boo": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab+ignored", // make sure other stuff is ignored
Annotations: map[string]string{
"foo": "boo",
},
},
},
fieldpaths: []string{"annotations", "labels"},
expected: images.Image{
Labels: map[string]string{
"for": "bar",
"boo": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "boo",
},
},
},
},
{
name: "ReplaceLabel",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"foo": "baz",
"baz": "bunk",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab+ignored", // make sure other stuff is ignored
Annotations: map[string]string{
"foo": "bar",
},
},
},
fieldpaths: []string{"labels.foo"},
expected: images.Image{
Labels: map[string]string{
"foo": "baz",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
"baz": "boo",
},
},
},
},
{
name: "ReplaceAnnotation",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"foo": "baz",
"baz": "bunk",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab+ignored", // make sure other stuff is ignored
Annotations: map[string]string{
"foo": "baz",
"baz": "bunk",
},
},
},
fieldpaths: []string{"annotations.foo"},
expected: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "baz",
"baz": "boo",
},
},
},
},
{
name: "ReplaceTargetTypeAndAnnotations", // target must be updated as a unit
original: imageBase(),
input: images.Image{
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab+replaced",
Annotations: map[string]string{
"fox": "dog",
},
},
},
fieldpaths: []string{"target"},
expected: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab+replaced",
Annotations: map[string]string{
"fox": "dog",
},
},
},
},
{
name: "ReplaceTargetFieldpath", // target must be updated as a unit
original: imageBase(),
input: images.Image{
Target: ocispec.Descriptor{
Size: 20,
MediaType: "application/vnd.oci.blab+replaced",
Annotations: map[string]string{
"fox": "dog",
},
},
},
fieldpaths: []string{"target"},
expected: images.Image{
Labels: map[string]string{ // Labels not updated
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 20,
MediaType: "application/vnd.oci.blab+replaced",
Annotations: map[string]string{
"fox": "dog",
},
},
},
deleteerr: errdefs.ErrNotFound,
},
{
name: "ReplaceTarget", // target must be updated as a unit
original: imageBase(),
input: images.Image{
Target: ocispec.Descriptor{
Size: 20,
MediaType: "application/vnd.oci.blab+replaced",
Annotations: map[string]string{
"fox": "dog",
},
},
},
expected: images.Image{
Target: ocispec.Descriptor{
Size: 20,
MediaType: "application/vnd.oci.blab+replaced",
Annotations: map[string]string{
"fox": "dog",
},
},
},
deleteerr: errdefs.ErrNotFound,
},
{
name: "EmptySize",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 0,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
cause: errdefs.ErrInvalidArgument,
},
{
name: "EmptySizeOnCreate",
original: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
createerr: errdefs.ErrInvalidArgument,
},
{
name: "EmptyMediaType",
original: imageBase(),
input: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
},
},
cause: errdefs.ErrInvalidArgument,
},
{
name: "EmptySizeOnCreate",
original: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
},
},
createerr: errdefs.ErrInvalidArgument,
},
{
name: "TryUpdateNameFail",
original: images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
input: images.Image{
Name: "test should fail",
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
},
},
},
cause: errdefs.ErrNotFound,
},
} {
testcase := testcase
t.Run(testcase.name, func(t *testing.T) {
testcase.original.Name = testcase.name
if testcase.input.Name == "" {
testcase.input.Name = testcase.name
}
testcase.expected.Name = testcase.name
testcase.original.Target.Digest = digest.FromString(fmt.Sprintf("%s-%d", testcase.name, testcase.original.Target.Size))
testcase.input.Target.Digest = digest.FromString(fmt.Sprintf("%s-%d", testcase.name, testcase.input.Target.Size))
testcase.expected.Target.Digest = testcase.input.Target.Digest
// Create
now := time.Now()
created, err := store.Create(ctx, testcase.original)
if !errors.Is(err, testcase.createerr) {
if testcase.createerr == nil {
t.Fatalf("unexpected error: %v", err)
} else {
t.Fatalf("cause of %v (cause: %v) != %v", err, errors.Unwrap(err), testcase.createerr)
}
} else if testcase.createerr != nil {
return
}
checkImageTimestamps(t, &created, now, true)
testcase.original.CreatedAt = created.CreatedAt
testcase.expected.CreatedAt = created.CreatedAt
testcase.original.UpdatedAt = created.UpdatedAt
testcase.expected.UpdatedAt = created.UpdatedAt
checkImagesEqual(t, &created, &testcase.original, "unexpected image on creation")
// Update
now = time.Now()
updated, err := store.Update(ctx, testcase.input, testcase.fieldpaths...)
if !errors.Is(err, testcase.cause) {
if testcase.cause == nil {
t.Fatalf("unexpected error: %v", err)
} else {
t.Fatalf("cause of %v (cause: %v) != %v", err, errors.Unwrap(err), testcase.cause)
}
} else if testcase.cause != nil {
return
}
checkImageTimestamps(t, &updated, now, false)
testcase.expected.UpdatedAt = updated.UpdatedAt
checkImagesEqual(t, &updated, &testcase.expected, "updated failed to get expected result")
// Get
result, err := store.Get(ctx, testcase.original.Name)
if err != nil {
t.Fatal(err)
}
checkImagesEqual(t, &result, &testcase.expected, "get after failed to get expected result")
if testcase.original.Target.Digest != testcase.expected.Target.Digest {
t.Log("Delete should fail")
}
// Delete
err = store.Delete(ctx, testcase.original.Name, images.DeleteTarget(&testcase.original.Target))
if err != nil {
if testcase.deleteerr == nil {
t.Fatal(err)
}
if !errors.Is(err, testcase.deleteerr) {
t.Fatal("unexpected error", err, ", expected", testcase.deleteerr)
}
} else if testcase.deleteerr != nil {
t.Fatal("no error on deleted, expected", testcase.deleteerr)
}
})
}
}
func imageBase() images.Image {
return images.Image{
Labels: map[string]string{
"foo": "bar",
"baz": "boo",
},
Target: ocispec.Descriptor{
Size: 10,
MediaType: "application/vnd.oci.blab",
Annotations: map[string]string{
"foo": "bar",
"baz": "boo",
},
},
}
}
func checkImageTimestamps(t *testing.T, im *images.Image, now time.Time, oncreate bool) {
t.Helper()
if im.UpdatedAt.IsZero() || im.CreatedAt.IsZero() {
t.Fatalf("timestamps not set")
}
if oncreate {
if !im.CreatedAt.Equal(im.UpdatedAt) {
t.Fatal("timestamps should be equal on create")
}
} else {
// ensure that updatedat is always after createdat
if !im.UpdatedAt.After(im.CreatedAt) {
t.Fatalf("timestamp for updatedat not after createdat: %v <= %v", im.UpdatedAt, im.CreatedAt)
}
}
if im.UpdatedAt.Before(now) {
t.Fatal("createdat time incorrect should be after the start of the operation")
}
}
func checkImagesEqual(t *testing.T, a, b *images.Image, format string, args ...interface{}) {
t.Helper()
if !reflect.DeepEqual(a, b) {
t.Fatalf("images not equal \n\t%v != \n\t%v: "+format, append([]interface{}{a, b}, args...)...)
}
}

563
core/metadata/leases.go Normal file
View File

@@ -0,0 +1,563 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"errors"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/containerd/containerd/v2/core/leases"
"github.com/containerd/containerd/v2/core/metadata/boltutil"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/namespaces"
digest "github.com/opencontainers/go-digest"
bolt "go.etcd.io/bbolt"
)
// leaseManager manages the create/delete lifecycle of leases
// and also returns existing leases
type leaseManager struct {
db *DB
}
// NewLeaseManager creates a new lease manager for managing leases using
// the provided database transaction.
func NewLeaseManager(db *DB) leases.Manager {
return &leaseManager{
db: db,
}
}
// Create creates a new lease using the provided lease
func (lm *leaseManager) Create(ctx context.Context, opts ...leases.Opt) (leases.Lease, error) {
var l leases.Lease
for _, opt := range opts {
if err := opt(&l); err != nil {
return leases.Lease{}, err
}
}
if l.ID == "" {
return leases.Lease{}, errors.New("lease id must be provided")
}
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return leases.Lease{}, err
}
if err := update(ctx, lm.db, func(tx *bolt.Tx) error {
topbkt, err := createBucketIfNotExists(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases)
if err != nil {
return err
}
txbkt, err := topbkt.CreateBucket([]byte(l.ID))
if err != nil {
if err == bolt.ErrBucketExists {
err = errdefs.ErrAlreadyExists
}
return fmt.Errorf("lease %q: %w", l.ID, err)
}
t := time.Now().UTC()
createdAt, err := t.MarshalBinary()
if err != nil {
return err
}
if err := txbkt.Put(bucketKeyCreatedAt, createdAt); err != nil {
return err
}
if l.Labels != nil {
if err := boltutil.WriteLabels(txbkt, l.Labels); err != nil {
return err
}
}
l.CreatedAt = t
return nil
}); err != nil {
return leases.Lease{}, err
}
return l, nil
}
// Delete deletes the lease with the provided lease ID
func (lm *leaseManager) Delete(ctx context.Context, lease leases.Lease, _ ...leases.DeleteOpt) error {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
return update(ctx, lm.db, func(tx *bolt.Tx) error {
topbkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases)
if topbkt == nil {
return fmt.Errorf("lease %q: %w", lease.ID, errdefs.ErrNotFound)
}
if err := topbkt.DeleteBucket([]byte(lease.ID)); err != nil {
if err == bolt.ErrBucketNotFound {
err = fmt.Errorf("lease %q: %w", lease.ID, errdefs.ErrNotFound)
}
return err
}
atomic.AddUint32(&lm.db.dirty, 1)
return nil
})
}
// List lists all active leases
func (lm *leaseManager) List(ctx context.Context, fs ...string) ([]leases.Lease, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
filter, err := filters.ParseAll(fs...)
if err != nil {
return nil, fmt.Errorf("%s: %w", err.Error(), errdefs.ErrInvalidArgument)
}
var ll []leases.Lease
if err := view(ctx, lm.db, func(tx *bolt.Tx) error {
topbkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases)
if topbkt == nil {
return nil
}
return topbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
txbkt := topbkt.Bucket(k)
l := leases.Lease{
ID: string(k),
}
if v := txbkt.Get(bucketKeyCreatedAt); v != nil {
t := &l.CreatedAt
if err := t.UnmarshalBinary(v); err != nil {
return err
}
}
labels, err := boltutil.ReadLabels(txbkt)
if err != nil {
return err
}
l.Labels = labels
if filter.Match(adaptLease(l)) {
ll = append(ll, l)
}
return nil
})
}); err != nil {
return nil, err
}
return ll, nil
}
// AddResource references the resource by the provided lease.
func (lm *leaseManager) AddResource(ctx context.Context, lease leases.Lease, r leases.Resource) error {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
return update(ctx, lm.db, func(tx *bolt.Tx) error {
topbkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lease.ID))
if topbkt == nil {
return fmt.Errorf("lease %q: %w", lease.ID, errdefs.ErrNotFound)
}
keys, ref, err := parseLeaseResource(r)
if err != nil {
return err
}
bkt := topbkt
for _, key := range keys {
bkt, err = bkt.CreateBucketIfNotExists([]byte(key))
if err != nil {
return err
}
}
return bkt.Put([]byte(ref), nil)
})
}
// DeleteResource dereferences the resource by the provided lease.
func (lm *leaseManager) DeleteResource(ctx context.Context, lease leases.Lease, r leases.Resource) error {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
return update(ctx, lm.db, func(tx *bolt.Tx) error {
topbkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lease.ID))
if topbkt == nil {
return fmt.Errorf("lease %q: %w", lease.ID, errdefs.ErrNotFound)
}
keys, ref, err := parseLeaseResource(r)
if err != nil {
return err
}
bkt := topbkt
for _, key := range keys {
if bkt == nil {
break
}
bkt = bkt.Bucket([]byte(key))
}
if bkt != nil {
if err := bkt.Delete([]byte(ref)); err != nil {
return err
}
}
atomic.AddUint32(&lm.db.dirty, 1)
return nil
})
}
// ListResources lists all the resources referenced by the lease.
func (lm *leaseManager) ListResources(ctx context.Context, lease leases.Lease) ([]leases.Resource, error) {
namespace, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
var rs []leases.Resource
if err := view(ctx, lm.db, func(tx *bolt.Tx) error {
topbkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lease.ID))
if topbkt == nil {
return fmt.Errorf("lease %q: %w", lease.ID, errdefs.ErrNotFound)
}
// content resources
if cbkt := topbkt.Bucket(bucketKeyObjectContent); cbkt != nil {
if err := cbkt.ForEach(func(k, _ []byte) error {
rs = append(rs, leases.Resource{
ID: string(k),
Type: string(bucketKeyObjectContent),
})
return nil
}); err != nil {
return err
}
}
// images resources
if ibkt := topbkt.Bucket(bucketKeyObjectImages); ibkt != nil {
if err := ibkt.ForEach(func(k, _ []byte) error {
rs = append(rs, leases.Resource{
ID: string(k),
Type: string(bucketKeyObjectImages),
})
return nil
}); err != nil {
return err
}
}
// ingest resources
if lbkt := topbkt.Bucket(bucketKeyObjectIngests); lbkt != nil {
if err := lbkt.ForEach(func(k, _ []byte) error {
rs = append(rs, leases.Resource{
ID: string(k),
Type: string(bucketKeyObjectIngests),
})
return nil
}); err != nil {
return err
}
}
// snapshot resources
if sbkt := topbkt.Bucket(bucketKeyObjectSnapshots); sbkt != nil {
if err := sbkt.ForEach(func(sk, sv []byte) error {
if sv != nil {
return nil
}
snbkt := sbkt.Bucket(sk)
return snbkt.ForEach(func(k, _ []byte) error {
rs = append(rs, leases.Resource{
ID: string(k),
Type: fmt.Sprintf("%s/%s", bucketKeyObjectSnapshots, sk),
})
return nil
})
}); err != nil {
return err
}
}
return nil
}); err != nil {
return nil, err
}
return rs, nil
}
func addSnapshotLease(ctx context.Context, tx *bolt.Tx, snapshotter, key string) error {
lid, ok := leases.FromContext(ctx)
if !ok {
return nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be checked")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid))
if bkt == nil {
return fmt.Errorf("lease does not exist: %w", errdefs.ErrNotFound)
}
bkt, err := bkt.CreateBucketIfNotExists(bucketKeyObjectSnapshots)
if err != nil {
return err
}
bkt, err = bkt.CreateBucketIfNotExists([]byte(snapshotter))
if err != nil {
return err
}
return bkt.Put([]byte(key), nil)
}
func removeSnapshotLease(ctx context.Context, tx *bolt.Tx, snapshotter, key string) error {
lid, ok := leases.FromContext(ctx)
if !ok {
return nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be checked")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid), bucketKeyObjectSnapshots, []byte(snapshotter))
if bkt == nil {
// Key does not exist so we return nil
return nil
}
return bkt.Delete([]byte(key))
}
func addContentLease(ctx context.Context, tx *bolt.Tx, dgst digest.Digest) error {
lid, ok := leases.FromContext(ctx)
if !ok {
return nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be required")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid))
if bkt == nil {
return fmt.Errorf("lease does not exist: %w", errdefs.ErrNotFound)
}
bkt, err := bkt.CreateBucketIfNotExists(bucketKeyObjectContent)
if err != nil {
return err
}
return bkt.Put([]byte(dgst.String()), nil)
}
func removeContentLease(ctx context.Context, tx *bolt.Tx, dgst digest.Digest) error {
lid, ok := leases.FromContext(ctx)
if !ok {
return nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be checked")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid), bucketKeyObjectContent)
if bkt == nil {
// Key does not exist so we return nil
return nil
}
return bkt.Delete([]byte(dgst.String()))
}
func addIngestLease(ctx context.Context, tx *bolt.Tx, ref string) (bool, error) {
lid, ok := leases.FromContext(ctx)
if !ok {
return false, nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be required")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid))
if bkt == nil {
return false, fmt.Errorf("lease does not exist: %w", errdefs.ErrNotFound)
}
bkt, err := bkt.CreateBucketIfNotExists(bucketKeyObjectIngests)
if err != nil {
return false, err
}
if err := bkt.Put([]byte(ref), nil); err != nil {
return false, err
}
return true, nil
}
func removeIngestLease(ctx context.Context, tx *bolt.Tx, ref string) error {
lid, ok := leases.FromContext(ctx)
if !ok {
return nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be checked")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid), bucketKeyObjectIngests)
if bkt == nil {
// Key does not exist so we return nil
return nil
}
return bkt.Delete([]byte(ref))
}
func addImageLease(ctx context.Context, tx *bolt.Tx, ref string, labels map[string]string) error {
lid, ok := leases.FromContext(ctx)
if !ok {
return nil
}
// If image doesn't have expiration, it does not need to be leased
if _, ok := labels[string(labelGCExpire)]; !ok {
return nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be required")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid))
if bkt == nil {
return fmt.Errorf("lease does not exist: %w", errdefs.ErrNotFound)
}
bkt, err := bkt.CreateBucketIfNotExists(bucketKeyObjectImages)
if err != nil {
return err
}
if err := bkt.Put([]byte(ref), nil); err != nil {
return err
}
return nil
}
func removeImageLease(ctx context.Context, tx *bolt.Tx, ref string) error {
lid, ok := leases.FromContext(ctx)
if !ok {
return nil
}
namespace, ok := namespaces.Namespace(ctx)
if !ok {
panic("namespace must already be checked")
}
bkt := getBucket(tx, bucketKeyVersion, []byte(namespace), bucketKeyObjectLeases, []byte(lid), bucketKeyObjectImages)
if bkt == nil {
// Key does not exist so we return nil
return nil
}
return bkt.Delete([]byte(ref))
}
func parseLeaseResource(r leases.Resource) ([]string, string, error) {
var (
ref = r.ID
typ = r.Type
keys = strings.Split(typ, "/")
)
switch k := keys[0]; k {
case string(bucketKeyObjectContent),
string(bucketKeyObjectIngests),
string(bucketKeyObjectImages):
if len(keys) != 1 {
return nil, "", fmt.Errorf("invalid resource type %s: %w", typ, errdefs.ErrInvalidArgument)
}
if k == string(bucketKeyObjectContent) {
dgst, err := digest.Parse(ref)
if err != nil {
return nil, "", fmt.Errorf("invalid content resource id %s: %v: %w", ref, err, errdefs.ErrInvalidArgument)
}
ref = dgst.String()
}
case string(bucketKeyObjectSnapshots):
if len(keys) != 2 {
return nil, "", fmt.Errorf("invalid snapshot resource type %s: %w", typ, errdefs.ErrInvalidArgument)
}
default:
return nil, "", fmt.Errorf("resource type %s not supported yet: %w", typ, errdefs.ErrNotImplemented)
}
return keys, ref, nil
}

View File

@@ -0,0 +1,416 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
_ "crypto/sha256"
"errors"
"fmt"
"testing"
"github.com/containerd/containerd/v2/core/leases"
"github.com/containerd/containerd/v2/errdefs"
bolt "go.etcd.io/bbolt"
)
func TestLeases(t *testing.T) {
ctx, db := testEnv(t)
lm := NewLeaseManager(NewDB(db, nil, nil))
testCases := []struct {
ID string
CreateErr error
DeleteErr error
}{
{
ID: "tx1",
},
{
ID: "tx1",
CreateErr: errdefs.ErrAlreadyExists,
DeleteErr: errdefs.ErrNotFound,
},
{
ID: "tx2",
},
}
var ll []leases.Lease
for _, tc := range testCases {
if err := db.Update(func(tx *bolt.Tx) error {
lease, err := lm.Create(WithTransactionContext(ctx, tx), leases.WithID(tc.ID))
if err != nil {
if tc.CreateErr != nil && errors.Is(err, tc.CreateErr) {
return nil
}
return err
}
ll = append(ll, lease)
return nil
}); err != nil {
t.Fatal(err)
}
}
listed, err := lm.List(ctx)
if err != nil {
t.Fatal(err)
}
if len(listed) != len(ll) {
t.Fatalf("Expected %d lease, got %d", len(ll), len(listed))
}
for i := range listed {
if listed[i].ID != ll[i].ID {
t.Fatalf("Expected lease ID %s, got %s", ll[i].ID, listed[i].ID)
}
if listed[i].CreatedAt != ll[i].CreatedAt {
t.Fatalf("Expected lease created at time %s, got %s", ll[i].CreatedAt, listed[i].CreatedAt)
}
}
for _, tc := range testCases {
if err := lm.Delete(ctx, leases.Lease{
ID: tc.ID,
}); err != nil {
if tc.DeleteErr == nil && !errors.Is(err, tc.DeleteErr) {
t.Fatal(err)
}
}
}
listed, err = lm.List(ctx)
if err != nil {
t.Fatal(err)
}
if len(listed) > 0 {
t.Fatalf("Expected no leases, found %d: %v", len(listed), listed)
}
}
func TestLeasesList(t *testing.T) {
ctx, db := testEnv(t)
lm := NewLeaseManager(NewDB(db, nil, nil))
testset := [][]leases.Opt{
{
leases.WithID("lease1"),
leases.WithLabels(map[string]string{
"label1": "value1",
"label3": "other",
}),
},
{
leases.WithID("lease2"),
leases.WithLabels(map[string]string{
"label1": "value1",
"label2": "",
"label3": "other",
}),
},
{
leases.WithID("lease3"),
leases.WithLabels(map[string]string{
"label1": "value2",
"label2": "something",
}),
},
}
// Insert all
if err := db.Update(func(tx *bolt.Tx) error {
for _, opts := range testset {
_, err := lm.Create(WithTransactionContext(ctx, tx), opts...)
if err != nil {
return err
}
}
return nil
}); err != nil {
t.Fatal(err)
}
for _, testcase := range []struct {
name string
filters []string
expected []string
}{
{
name: "All",
filters: []string{},
expected: []string{"lease1", "lease2", "lease3"},
},
{
name: "ID",
filters: []string{"id==lease1"},
expected: []string{"lease1"},
},
{
name: "IDx2",
filters: []string{"id==lease1", "id==lease2"},
expected: []string{"lease1", "lease2"},
},
{
name: "Label1",
filters: []string{"labels.label1"},
expected: []string{"lease1", "lease2", "lease3"},
},
{
name: "Label1value1",
filters: []string{"labels.label1==value1"},
expected: []string{"lease1", "lease2"},
},
{
name: "Label1value2",
filters: []string{"labels.label1==value2"},
expected: []string{"lease3"},
},
{
name: "Label2",
filters: []string{"labels.label2"},
expected: []string{"lease3"},
},
{
name: "Label3",
filters: []string{"labels.label2", "labels.label3"},
expected: []string{"lease1", "lease2", "lease3"},
},
} {
t.Run(testcase.name, func(t *testing.T) {
results, err := lm.List(ctx, testcase.filters...)
if err != nil {
t.Fatal(err)
}
if len(results) != len(testcase.expected) {
t.Errorf("length of result does not match expected: %v != %v", len(results), len(testcase.expected))
}
expectedMap := map[string]struct{}{}
for _, expected := range testcase.expected {
expectedMap[expected] = struct{}{}
}
for _, result := range results {
if _, ok := expectedMap[result.ID]; !ok {
t.Errorf("unexpected match: %v", result.ID)
} else {
delete(expectedMap, result.ID)
}
}
if len(expectedMap) > 0 {
for match := range expectedMap {
t.Errorf("missing match: %v", match)
}
}
})
}
// delete everything to test it
for _, opts := range testset {
var lease leases.Lease
for _, opt := range opts {
if err := opt(&lease); err != nil {
t.Fatal(err)
}
}
if err := lm.Delete(ctx, lease); err != nil {
t.Fatal(err)
}
// try it again, get not found
if err := lm.Delete(ctx, lease); err == nil {
t.Fatalf("expected error deleting non-existent lease")
} else if !errdefs.IsNotFound(err) {
t.Fatalf("unexpected error: %s", err)
}
}
}
func TestLeaseResource(t *testing.T) {
ctx, db := testEnv(t)
lm := NewLeaseManager(NewDB(db, nil, nil))
var (
leaseID = "l1"
lease = leases.Lease{
ID: leaseID,
}
snapshotterKey = "RstMI3X8vguKoPFkmIStZ5fQFI7F1L0o"
)
// prepare lease
if _, err := lm.Create(ctx, leases.WithID(leaseID)); err != nil {
t.Fatal(err)
}
testCases := []struct {
lease leases.Lease
resource leases.Resource
err error
}{
{
lease: lease,
resource: leases.Resource{
ID: "sha256:29f5d56d12684887bdfa50dcd29fc31eea4aaf4ad3bec43daf19026a7ce69912",
Type: "content",
},
},
{
lease: lease,
resource: leases.Resource{
ID: "d2UdcINOwrBTQG9kS8rySAM3eMNBSojH",
Type: "ingests",
},
},
{
// allow to add resource which exists
lease: lease,
resource: leases.Resource{
ID: "d2UdcINOwrBTQG9kS8rySAM3eMNBSojH",
Type: "ingests",
},
},
{
// not allow to reference to lease
lease: lease,
resource: leases.Resource{
ID: "xCAV3F6PddlXitbtby0Vo23Qof6RTWpG",
Type: "leases",
},
err: errdefs.ErrNotImplemented,
},
{
// not allow to reference to container
lease: lease,
resource: leases.Resource{
ID: "05O9ljptPu5Qq9kZGOacEfymBwQFM8ZH",
Type: "containers",
},
err: errdefs.ErrNotImplemented,
},
{
lease: lease,
resource: leases.Resource{
ID: "qBUHpWBn03YaCt9cL3PPGKWoxBqTlLfu",
Type: "images",
},
},
{
lease: lease,
resource: leases.Resource{
ID: "HMemOhlygombYhkhHhAZj5aRbDy2a3z2",
Type: "snapshots",
},
err: errdefs.ErrInvalidArgument,
},
{
lease: lease,
resource: leases.Resource{
ID: snapshotterKey,
Type: "snapshots/overlayfs",
},
},
{
lease: lease,
resource: leases.Resource{
ID: "HMemOhlygombYhkhHhAZj5aRbDy2a3z2",
Type: "snapshots/overlayfs/type1",
},
err: errdefs.ErrInvalidArgument,
},
{
lease: leases.Lease{
ID: "non-found",
},
resource: leases.Resource{
ID: "HMemOhlygombYhkhHhAZj5aRbDy2a3z2",
Type: "snapshots/overlayfs",
},
err: errdefs.ErrNotFound,
},
}
idxList := make(map[leases.Resource]bool)
for i, tc := range testCases {
if err := db.Update(func(tx *bolt.Tx) error {
err0 := lm.AddResource(WithTransactionContext(ctx, tx), tc.lease, tc.resource)
if !errors.Is(err0, tc.err) {
return fmt.Errorf("expect error (%v), but got (%v)", tc.err, err0)
}
if err0 == nil {
// not visited yet
idxList[tc.resource] = false
}
return nil
}); err != nil {
t.Fatalf("failed to run case %d with resource: %v", i, err)
}
}
// check list function
var gotList []leases.Resource
gotList, err := lm.ListResources(ctx, lease)
if err != nil {
t.Fatal(err)
}
if len(gotList) != len(idxList) {
t.Fatalf("expected (%d) resources, but got (%d)", len(idxList), len(gotList))
}
for _, r := range gotList {
visited, ok := idxList[r]
if !ok {
t.Fatalf("unexpected resource(%v)", r)
}
if visited {
t.Fatalf("duplicate resource(%v)", r)
}
idxList[r] = true
}
// remove snapshots
if err := lm.DeleteResource(ctx, lease, leases.Resource{
ID: snapshotterKey,
Type: "snapshots/overlayfs",
}); err != nil {
t.Fatal(err)
}
// check list number
gotList, err = lm.ListResources(ctx, lease)
if err != nil {
t.Fatal(err)
}
if len(gotList)+1 != len(idxList) {
t.Fatalf("expected (%d) resources, but got (%d)", len(idxList)-1, len(gotList))
}
}

168
core/metadata/migrations.go Normal file
View File

@@ -0,0 +1,168 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import bolt "go.etcd.io/bbolt"
type migration struct {
schema string
version int
migrate func(*bolt.Tx) error
}
// migrations stores the list of database migrations
// for each update to the database schema. The migrations
// array MUST be ordered by version from least to greatest.
// The last entry in the array should correspond to the
// schemaVersion and dbVersion constants.
// A migration test MUST be added for each migration in
// the array.
// The migrate function can safely assume the version
// of the data it is migrating from is the previous version
// of the database.
var migrations = []migration{
{
schema: "v1",
version: 1,
migrate: addChildLinks,
},
{
schema: "v1",
version: 2,
migrate: migrateIngests,
},
{
schema: "v1",
version: 3,
migrate: noOpMigration,
},
}
// addChildLinks Adds children key to the snapshotters to enforce snapshot
// entries cannot be removed which have children
func addChildLinks(tx *bolt.Tx) error {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return nil
}
// iterate through each namespace
v1c := v1bkt.Cursor()
for k, v := v1c.First(); k != nil; k, v = v1c.Next() {
if v != nil {
continue
}
nbkt := v1bkt.Bucket(k)
sbkt := nbkt.Bucket(bucketKeyObjectSnapshots)
if sbkt != nil {
// Iterate through each snapshotter
if err := sbkt.ForEach(func(sk, sv []byte) error {
if sv != nil {
return nil
}
snbkt := sbkt.Bucket(sk)
// Iterate through each snapshot
return snbkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil
}
parent := snbkt.Bucket(k).Get(bucketKeyParent)
if len(parent) > 0 {
pbkt := snbkt.Bucket(parent)
if pbkt == nil {
// Not enforcing consistency during migration, skip
return nil
}
cbkt, err := pbkt.CreateBucketIfNotExists(bucketKeyChildren)
if err != nil {
return err
}
if err := cbkt.Put(k, nil); err != nil {
return err
}
}
return nil
})
}); err != nil {
return err
}
}
}
return nil
}
// migrateIngests moves ingests from the key/value ingest bucket
// to a structured ingest bucket for storing additional state about
// an ingest.
func migrateIngests(tx *bolt.Tx) error {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return nil
}
// iterate through each namespace
v1c := v1bkt.Cursor()
for k, v := v1c.First(); k != nil; k, v = v1c.Next() {
if v != nil {
continue
}
bkt := v1bkt.Bucket(k).Bucket(bucketKeyObjectContent)
if bkt == nil {
continue
}
dbkt := bkt.Bucket(deprecatedBucketKeyObjectIngest)
if dbkt == nil {
continue
}
// Create new ingests bucket
nbkt, err := bkt.CreateBucketIfNotExists(bucketKeyObjectIngests)
if err != nil {
return err
}
if err := dbkt.ForEach(func(ref, bref []byte) error {
ibkt, err := nbkt.CreateBucketIfNotExists(ref)
if err != nil {
return err
}
return ibkt.Put(bucketKeyRef, bref)
}); err != nil {
return err
}
if err := bkt.DeleteBucket(deprecatedBucketKeyObjectIngest); err != nil {
return err
}
}
return nil
}
// noOpMigration was for a database change from boltdb/bolt which is no
// longer being supported, to go.etcd.io/bbolt which is the currently
// maintained repo for boltdb.
func noOpMigration(tx *bolt.Tx) error {
return nil
}

206
core/metadata/namespaces.go Normal file
View File

@@ -0,0 +1,206 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"fmt"
"strings"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/identifiers"
l "github.com/containerd/containerd/v2/labels"
"github.com/containerd/containerd/v2/namespaces"
bolt "go.etcd.io/bbolt"
)
type namespaceStore struct {
tx *bolt.Tx
}
// NewNamespaceStore returns a store backed by a bolt DB
func NewNamespaceStore(tx *bolt.Tx) namespaces.Store {
return &namespaceStore{tx: tx}
}
func (s *namespaceStore) Create(ctx context.Context, namespace string, labels map[string]string) error {
topbkt, err := createBucketIfNotExists(s.tx, bucketKeyVersion)
if err != nil {
return err
}
if err := identifiers.Validate(namespace); err != nil {
return err
}
for k, v := range labels {
if err := l.Validate(k, v); err != nil {
return fmt.Errorf("namespace.Labels: %w", err)
}
}
// provides the already exists error.
bkt, err := topbkt.CreateBucket([]byte(namespace))
if err != nil {
if err == bolt.ErrBucketExists {
return fmt.Errorf("namespace %q: %w", namespace, errdefs.ErrAlreadyExists)
}
return err
}
lbkt, err := bkt.CreateBucketIfNotExists(bucketKeyObjectLabels)
if err != nil {
return err
}
for k, v := range labels {
if err := lbkt.Put([]byte(k), []byte(v)); err != nil {
return err
}
}
return nil
}
func (s *namespaceStore) Labels(ctx context.Context, namespace string) (map[string]string, error) {
labels := map[string]string{}
bkt := getNamespaceLabelsBucket(s.tx, namespace)
if bkt == nil {
return labels, nil
}
if err := bkt.ForEach(func(k, v []byte) error {
labels[string(k)] = string(v)
return nil
}); err != nil {
return nil, err
}
return labels, nil
}
func (s *namespaceStore) SetLabel(ctx context.Context, namespace, key, value string) error {
if err := l.Validate(key, value); err != nil {
return fmt.Errorf("namespace.Labels: %w", err)
}
return withNamespacesLabelsBucket(s.tx, namespace, func(bkt *bolt.Bucket) error {
if value == "" {
return bkt.Delete([]byte(key))
}
return bkt.Put([]byte(key), []byte(value))
})
}
func (s *namespaceStore) List(ctx context.Context) ([]string, error) {
bkt := getBucket(s.tx, bucketKeyVersion)
if bkt == nil {
return nil, nil // no namespaces!
}
var namespaces []string
if err := bkt.ForEach(func(k, v []byte) error {
if v != nil {
return nil // not a bucket
}
namespaces = append(namespaces, string(k))
return nil
}); err != nil {
return nil, err
}
return namespaces, nil
}
func (s *namespaceStore) Delete(ctx context.Context, namespace string, opts ...namespaces.DeleteOpts) error {
i := &namespaces.DeleteInfo{
Name: namespace,
}
for _, o := range opts {
if err := o(ctx, i); err != nil {
return err
}
}
bkt := getBucket(s.tx, bucketKeyVersion)
types, err := s.listNs(namespace)
if err != nil {
return err
}
if len(types) > 0 {
return fmt.Errorf(
"namespace %q must be empty, but it still has %s: %w",
namespace, strings.Join(types, ", "),
errdefs.ErrFailedPrecondition,
)
}
if err := bkt.DeleteBucket([]byte(namespace)); err != nil {
if err == bolt.ErrBucketNotFound {
return fmt.Errorf("namespace %q: %w", namespace, errdefs.ErrNotFound)
}
return err
}
return nil
}
// listNs returns the types of the remaining objects inside the given namespace.
// It doesn't return exact objects due to performance concerns.
func (s *namespaceStore) listNs(namespace string) ([]string, error) {
var out []string
if !isBucketEmpty(getImagesBucket(s.tx, namespace)) {
out = append(out, "images")
}
if !isBucketEmpty(getBlobsBucket(s.tx, namespace)) {
out = append(out, "blobs")
}
if !isBucketEmpty(getContainersBucket(s.tx, namespace)) {
out = append(out, "containers")
}
if snbkt := getSnapshottersBucket(s.tx, namespace); snbkt != nil {
if err := snbkt.ForEach(func(k, v []byte) error {
if v == nil {
if !isBucketEmpty(snbkt.Bucket(k)) {
out = append(out, fmt.Sprintf("snapshots on %q snapshotter", k))
}
}
return nil
}); err != nil {
return nil, err
}
}
return out, nil
}
func isBucketEmpty(bkt *bolt.Bucket) bool {
if bkt == nil {
return true
}
k, _ := bkt.Cursor().First()
return k == nil
}

View File

@@ -0,0 +1,96 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"testing"
"github.com/containerd/containerd/v2/core/containers"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/protobuf/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.etcd.io/bbolt"
)
func TestCreateDelete(t *testing.T) {
ctx, db := testDB(t)
subtests := []struct {
name string
create func(t *testing.T, ctx context.Context)
validate func(t *testing.T, err error)
}{
{
name: "empty",
create: func(t *testing.T, ctx context.Context) {},
validate: func(t *testing.T, err error) {
require.NoError(t, err)
},
},
{
name: "not-empty",
create: func(t *testing.T, ctx context.Context) {
store := NewContainerStore(db)
_, err := store.Create(ctx, containers.Container{
ID: "c1",
Runtime: containers.RuntimeInfo{Name: "rt"},
Spec: &types.Any{},
})
require.NoError(t, err)
db.Update(func(tx *bbolt.Tx) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
bucket, err := createSnapshotterBucket(tx, ns, "testss")
if err != nil {
return err
}
return bucket.Put([]byte("key"), []byte("value"))
})
},
validate: func(t *testing.T, err error) {
require.Error(t, err)
assert.Contains(t, err.Error(), `still has containers, snapshots on "testss" snapshotter`)
},
},
}
for _, subtest := range subtests {
ns := subtest.name
ctx = namespaces.WithNamespace(ctx, ns)
t.Run(subtest.name, func(t *testing.T) {
err := db.Update(func(tx *bbolt.Tx) error {
store := NewNamespaceStore(tx)
return store.Create(ctx, ns, nil)
})
require.NoError(t, err)
subtest.create(t, ctx)
err = db.Update(func(tx *bbolt.Tx) error {
store := NewNamespaceStore(tx)
return store.Delete(ctx, ns)
})
subtest.validate(t, err)
})
}
}

384
core/metadata/sandbox.go Normal file
View File

@@ -0,0 +1,384 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"errors"
"fmt"
"strings"
"time"
"github.com/containerd/containerd/v2/core/metadata/boltutil"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/identifiers"
"github.com/containerd/containerd/v2/namespaces"
api "github.com/containerd/containerd/v2/sandbox"
"github.com/containerd/typeurl/v2"
"go.etcd.io/bbolt"
)
type sandboxStore struct {
db *DB
}
var _ api.Store = (*sandboxStore)(nil)
// NewSandboxStore creates a datababase client for sandboxes
func NewSandboxStore(db *DB) api.Store {
return &sandboxStore{db: db}
}
// Create a sandbox record in the store
func (s *sandboxStore) Create(ctx context.Context, sandbox api.Sandbox) (api.Sandbox, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return api.Sandbox{}, err
}
sandbox.CreatedAt = time.Now().UTC()
sandbox.UpdatedAt = sandbox.CreatedAt
if err := s.validate(&sandbox); err != nil {
return api.Sandbox{}, fmt.Errorf("failed to validate sandbox: %w", err)
}
if err := update(ctx, s.db, func(tx *bbolt.Tx) error {
parent, err := createSandboxBucket(tx, ns)
if err != nil {
return fmt.Errorf("create error: %w", err)
}
if err := s.write(parent, &sandbox, false); err != nil {
return fmt.Errorf("write error: %w", err)
}
return nil
}); err != nil {
return api.Sandbox{}, err
}
return sandbox, nil
}
// Update the sandbox with the provided sandbox object and fields
func (s *sandboxStore) Update(ctx context.Context, sandbox api.Sandbox, fieldpaths ...string) (api.Sandbox, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return api.Sandbox{}, err
}
ret := api.Sandbox{}
if err := update(ctx, s.db, func(tx *bbolt.Tx) error {
parent := getSandboxBucket(tx, ns)
if parent == nil {
return fmt.Errorf("no sandbox buckets: %w", errdefs.ErrNotFound)
}
updated, err := s.read(parent, []byte(sandbox.ID))
if err != nil {
return err
}
if len(fieldpaths) == 0 {
fieldpaths = []string{"labels", "extensions", "spec", "runtime"}
if updated.Runtime.Name != sandbox.Runtime.Name {
return fmt.Errorf("sandbox.Runtime.Name field is immutable: %w", errdefs.ErrInvalidArgument)
}
}
for _, path := range fieldpaths {
if strings.HasPrefix(path, "labels.") {
if updated.Labels == nil {
updated.Labels = map[string]string{}
}
key := strings.TrimPrefix(path, "labels.")
updated.Labels[key] = sandbox.Labels[key]
continue
} else if strings.HasPrefix(path, "extensions.") {
if updated.Extensions == nil {
updated.Extensions = map[string]typeurl.Any{}
}
key := strings.TrimPrefix(path, "extensions.")
updated.Extensions[key] = sandbox.Extensions[key]
continue
}
switch path {
case "labels":
updated.Labels = sandbox.Labels
case "extensions":
updated.Extensions = sandbox.Extensions
case "runtime":
updated.Runtime = sandbox.Runtime
case "spec":
updated.Spec = sandbox.Spec
default:
return fmt.Errorf("cannot update %q field on sandbox %q: %w", path, sandbox.ID, errdefs.ErrInvalidArgument)
}
}
updated.UpdatedAt = time.Now().UTC()
if err := s.write(parent, &updated, true); err != nil {
return err
}
ret = updated
return nil
}); err != nil {
return api.Sandbox{}, err
}
return ret, nil
}
// Get sandbox metadata using the id
func (s *sandboxStore) Get(ctx context.Context, id string) (api.Sandbox, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return api.Sandbox{}, err
}
ret := api.Sandbox{}
if err := view(ctx, s.db, func(tx *bbolt.Tx) error {
bucket := getSandboxBucket(tx, ns)
if bucket == nil {
return fmt.Errorf("no sandbox buckets: %w", errdefs.ErrNotFound)
}
out, err := s.read(bucket, []byte(id))
if err != nil {
return err
}
ret = out
return nil
}); err != nil {
return api.Sandbox{}, err
}
return ret, nil
}
// List returns sandboxes that match one or more of the provided filters
func (s *sandboxStore) List(ctx context.Context, fields ...string) ([]api.Sandbox, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
filter, err := filters.ParseAll(fields...)
if err != nil {
return nil, fmt.Errorf("%s: %w", err.Error(), errdefs.ErrInvalidArgument)
}
var (
list []api.Sandbox
)
if err := view(ctx, s.db, func(tx *bbolt.Tx) error {
bucket := getSandboxBucket(tx, ns)
if bucket == nil {
// We haven't created any sandboxes yet, just return empty list
return nil
}
if err := bucket.ForEach(func(k, v []byte) error {
info, err := s.read(bucket, k)
if err != nil {
return fmt.Errorf("failed to read bucket %q: %w", string(k), err)
}
if filter.Match(adaptSandbox(&info)) {
list = append(list, info)
}
return nil
}); err != nil {
return err
}
return nil
}); err != nil {
return nil, err
}
return list, nil
}
// Delete a sandbox from metadata store using the id
func (s *sandboxStore) Delete(ctx context.Context, id string) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
if err := update(ctx, s.db, func(tx *bbolt.Tx) error {
buckets := getSandboxBucket(tx, ns)
if buckets == nil {
return fmt.Errorf("no sandbox buckets: %w", errdefs.ErrNotFound)
}
if err := buckets.DeleteBucket([]byte(id)); err != nil {
if err == bbolt.ErrBucketNotFound {
err = errdefs.ErrNotFound
}
return fmt.Errorf("failed to delete sandbox %q: %w", id, err)
}
return nil
}); err != nil {
return err
}
return nil
}
func (s *sandboxStore) write(parent *bbolt.Bucket, instance *api.Sandbox, overwrite bool) error {
if err := s.validate(instance); err != nil {
return err
}
var (
bucket *bbolt.Bucket
err error
id = []byte(instance.ID)
)
if overwrite {
bucket, err = parent.CreateBucketIfNotExists(id)
if err != nil {
return err
}
} else {
bucket, err = parent.CreateBucket(id)
if err != nil {
if err == bbolt.ErrBucketExists {
return fmt.Errorf("sandbox bucket %q already exists: %w", instance.ID, errdefs.ErrAlreadyExists)
}
return err
}
}
if err := boltutil.WriteTimestamps(bucket, instance.CreatedAt, instance.UpdatedAt); err != nil {
return err
}
if err := boltutil.WriteLabels(bucket, instance.Labels); err != nil {
return err
}
if err := boltutil.WriteExtensions(bucket, instance.Extensions); err != nil {
return err
}
if err := boltutil.WriteAny(bucket, bucketKeySpec, instance.Spec); err != nil {
return err
}
if err := bucket.Put(bucketKeySandboxer, []byte(instance.Sandboxer)); err != nil {
return err
}
runtimeBucket, err := bucket.CreateBucketIfNotExists(bucketKeyRuntime)
if err != nil {
return err
}
if err := runtimeBucket.Put(bucketKeyName, []byte(instance.Runtime.Name)); err != nil {
return err
}
if err := boltutil.WriteAny(runtimeBucket, bucketKeyOptions, instance.Runtime.Options); err != nil {
return err
}
return nil
}
func (s *sandboxStore) read(parent *bbolt.Bucket, id []byte) (api.Sandbox, error) {
var (
inst api.Sandbox
err error
)
bucket := parent.Bucket(id)
if bucket == nil {
return api.Sandbox{}, fmt.Errorf("bucket %q not found: %w", id, errdefs.ErrNotFound)
}
inst.ID = string(id)
inst.Labels, err = boltutil.ReadLabels(bucket)
if err != nil {
return api.Sandbox{}, err
}
if err := boltutil.ReadTimestamps(bucket, &inst.CreatedAt, &inst.UpdatedAt); err != nil {
return api.Sandbox{}, err
}
inst.Spec, err = boltutil.ReadAny(bucket, bucketKeySpec)
if err != nil {
return api.Sandbox{}, err
}
runtimeBucket := bucket.Bucket(bucketKeyRuntime)
if runtimeBucket == nil {
return api.Sandbox{}, errors.New("no runtime bucket")
}
inst.Runtime.Name = string(runtimeBucket.Get(bucketKeyName))
inst.Runtime.Options, err = boltutil.ReadAny(runtimeBucket, bucketKeyOptions)
if err != nil {
return api.Sandbox{}, err
}
inst.Extensions, err = boltutil.ReadExtensions(bucket)
if err != nil {
return api.Sandbox{}, err
}
sandboxer := bucket.Get(bucketKeySandboxer)
if sandboxer == nil {
inst.Sandboxer = ""
} else {
inst.Sandboxer = string(sandboxer)
}
return inst, nil
}
func (s *sandboxStore) validate(new *api.Sandbox) error {
if err := identifiers.Validate(new.ID); err != nil {
return fmt.Errorf("invalid sandbox ID: %w", err)
}
if new.CreatedAt.IsZero() {
return fmt.Errorf("creation date must not be zero: %w", errdefs.ErrInvalidArgument)
}
if new.UpdatedAt.IsZero() {
return fmt.Errorf("updated date must not be zero: %w", errdefs.ErrInvalidArgument)
}
return nil
}

View File

@@ -0,0 +1,266 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"testing"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/protobuf/types"
api "github.com/containerd/containerd/v2/sandbox"
"github.com/containerd/typeurl/v2"
"github.com/google/go-cmp/cmp"
)
func TestSandboxCreate(t *testing.T) {
ctx, db := testDB(t)
store := NewSandboxStore(db)
in := api.Sandbox{
ID: "1",
Labels: map[string]string{"a": "1", "b": "2"},
Spec: &types.Any{TypeUrl: "1", Value: []byte{1, 2, 3}},
Extensions: map[string]typeurl.Any{
"ext1": &types.Any{TypeUrl: "url/1", Value: []byte{1, 2, 3}},
"ext2": &types.Any{TypeUrl: "url/2", Value: []byte{3, 2, 1}},
},
Runtime: api.RuntimeOpts{
Name: "test",
Options: &types.Any{TypeUrl: "url/3", Value: []byte{4, 5, 6}},
},
Sandboxer: "test-sandboxer",
}
_, err := store.Create(ctx, in)
if err != nil {
t.Fatal(err)
}
out, err := store.Get(ctx, "1")
if err != nil {
t.Fatal(err)
}
assertEqualInstances(t, in, out)
}
func TestSandboxCreateDup(t *testing.T) {
ctx, db := testDB(t)
store := NewSandboxStore(db)
in := api.Sandbox{
ID: "1",
Spec: &types.Any{TypeUrl: "1", Value: []byte{1, 2, 3}},
Runtime: api.RuntimeOpts{Name: "test"},
}
_, err := store.Create(ctx, in)
if err != nil {
t.Fatal(err)
}
_, err = store.Create(ctx, in)
if !errdefs.IsAlreadyExists(err) {
t.Fatalf("expected %+v, got %+v", errdefs.ErrAlreadyExists, err)
}
}
func TestSandboxUpdate(t *testing.T) {
ctx, db := testDB(t)
store := NewSandboxStore(db)
if _, err := store.Create(ctx, api.Sandbox{
ID: "2",
Labels: map[string]string{"lbl1": "existing"},
Spec: &types.Any{TypeUrl: "1", Value: []byte{1}}, // will replace
Extensions: map[string]typeurl.Any{
"ext2": &types.Any{TypeUrl: "url2", Value: []byte{4, 5, 6}}, // will append `ext1`
},
Runtime: api.RuntimeOpts{Name: "test"}, // no change
Sandboxer: "test-sandboxer", // no change
}); err != nil {
t.Fatal(err)
}
expectedSpec := types.Any{TypeUrl: "2", Value: []byte{3, 2, 1}}
out, err := store.Update(ctx, api.Sandbox{
ID: "2",
Labels: map[string]string{"lbl1": "new"},
Spec: &expectedSpec,
Extensions: map[string]typeurl.Any{
"ext1": &types.Any{TypeUrl: "url1", Value: []byte{1, 2}},
},
}, "labels.lbl1", "extensions.ext1", "spec")
if err != nil {
t.Fatal(err)
}
expected := api.Sandbox{
ID: "2",
Spec: &expectedSpec,
Labels: map[string]string{
"lbl1": "new",
},
Extensions: map[string]typeurl.Any{
"ext1": &types.Any{TypeUrl: "url1", Value: []byte{1, 2}},
"ext2": &types.Any{TypeUrl: "url2", Value: []byte{4, 5, 6}},
},
Runtime: api.RuntimeOpts{Name: "test"},
Sandboxer: "test-sandboxer",
}
assertEqualInstances(t, out, expected)
}
func TestSandboxGetInvalid(t *testing.T) {
ctx, db := testDB(t)
store := NewSandboxStore(db)
_, err := store.Get(ctx, "invalid_id")
if err == nil {
t.Fatalf("expected %+v error for invalid ID", errdefs.ErrNotFound)
} else if !errdefs.IsNotFound(err) {
t.Fatalf("unexpected error %T type", err)
}
}
func TestSandboxList(t *testing.T) {
ctx, db := testDB(t)
store := NewSandboxStore(db)
in := []api.Sandbox{
{
ID: "1",
Labels: map[string]string{"test": "1"},
Spec: &types.Any{TypeUrl: "1", Value: []byte{1, 2, 3}},
Extensions: map[string]typeurl.Any{"ext": &types.Any{}},
Runtime: api.RuntimeOpts{Name: "test"},
},
{
ID: "2",
Labels: map[string]string{"test": "2"},
Spec: &types.Any{TypeUrl: "2", Value: []byte{3, 2, 1}},
Extensions: map[string]typeurl.Any{"ext": &types.Any{
TypeUrl: "test",
Value: []byte{9},
}},
Runtime: api.RuntimeOpts{Name: "test"},
Sandboxer: "test-sandboxer",
},
}
for _, inst := range in {
_, err := store.Create(ctx, inst)
if err != nil {
t.Fatal(err)
}
}
out, err := store.List(ctx)
if err != nil {
t.Fatal(err)
}
if len(in) != len(out) {
t.Fatalf("expected list size: %d != %d", len(in), len(out))
}
for i := range out {
assertEqualInstances(t, out[i], in[i])
}
}
func TestSandboxListWithFilter(t *testing.T) {
ctx, db := testDB(t)
store := NewSandboxStore(db)
in := []api.Sandbox{
{
ID: "1",
Labels: map[string]string{"test": "1"},
Spec: &types.Any{TypeUrl: "1", Value: []byte{1, 2, 3}},
Extensions: map[string]typeurl.Any{"ext": &types.Any{}},
Runtime: api.RuntimeOpts{Name: "test"},
},
{
ID: "2",
Labels: map[string]string{"test": "2"},
Spec: &types.Any{TypeUrl: "2", Value: []byte{3, 2, 1}},
Extensions: map[string]typeurl.Any{"ext": &types.Any{
TypeUrl: "test",
Value: []byte{9},
}},
Runtime: api.RuntimeOpts{Name: "test"},
Sandboxer: "test-sandboxer",
},
}
for _, inst := range in {
_, err := store.Create(ctx, inst)
if err != nil {
t.Fatal(err)
}
}
out, err := store.List(ctx, "id==1")
if err != nil {
t.Fatal(err)
}
if len(out) != 1 {
t.Fatalf("expected list to contain 1 element, got %d", len(out))
}
assertEqualInstances(t, out[0], in[0])
}
func TestSandboxDelete(t *testing.T) {
ctx, db := testDB(t)
store := NewSandboxStore(db)
in := api.Sandbox{
ID: "2",
Spec: &types.Any{TypeUrl: "1", Value: []byte{1, 2, 3}},
Runtime: api.RuntimeOpts{Name: "test"},
}
_, err := store.Create(ctx, in)
if err != nil {
t.Fatal(err)
}
err = store.Delete(ctx, "2")
if err != nil {
t.Fatalf("deleted failed %+v", err)
}
_, err = store.Get(ctx, "2")
if !errdefs.IsNotFound(err) {
t.Fatalf("unexpected err result: %+v != %+v", err, errdefs.ErrNotFound)
}
}
func assertEqualInstances(t *testing.T, x, y api.Sandbox) {
diff := cmp.Diff(x, y, compareNil, compareAny, ignoreTime)
if diff != "" {
t.Fatalf("x and y are different: %s", diff)
}
}

976
core/metadata/snapshot.go Normal file
View File

@@ -0,0 +1,976 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"fmt"
"strings"
"sync"
"sync/atomic"
"time"
eventstypes "github.com/containerd/containerd/v2/api/events"
"github.com/containerd/containerd/v2/core/metadata/boltutil"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/labels"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/snapshots"
"github.com/containerd/log"
bolt "go.etcd.io/bbolt"
)
const (
inheritedLabelsPrefix = "containerd.io/snapshot/"
labelSnapshotRef = "containerd.io/snapshot.ref"
)
type snapshotter struct {
snapshots.Snapshotter
name string
db *DB
l sync.RWMutex
}
// newSnapshotter returns a new Snapshotter which namespaces the given snapshot
// using the provided name and database.
func newSnapshotter(db *DB, name string, sn snapshots.Snapshotter) *snapshotter {
return &snapshotter{
Snapshotter: sn,
name: name,
db: db,
}
}
func createKey(id uint64, namespace, key string) string {
return fmt.Sprintf("%s/%d/%s", namespace, id, key)
}
func getKey(tx *bolt.Tx, ns, name, key string) string {
bkt := getSnapshotterBucket(tx, ns, name)
if bkt == nil {
return ""
}
bkt = bkt.Bucket([]byte(key))
if bkt == nil {
return ""
}
v := bkt.Get(bucketKeyName)
if len(v) == 0 {
return ""
}
return string(v)
}
func (s *snapshotter) resolveKey(ctx context.Context, key string) (string, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return "", err
}
var id string
if err := view(ctx, s.db, func(tx *bolt.Tx) error {
id = getKey(tx, ns, s.name, key)
if id == "" {
return fmt.Errorf("snapshot %v does not exist: %w", key, errdefs.ErrNotFound)
}
return nil
}); err != nil {
return "", err
}
return id, nil
}
func (s *snapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return snapshots.Info{}, err
}
var (
bkey string
local = snapshots.Info{
Name: key,
}
)
if err := view(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getSnapshotterBucket(tx, ns, s.name)
if bkt == nil {
return fmt.Errorf("snapshot %v does not exist: %w", key, errdefs.ErrNotFound)
}
sbkt := bkt.Bucket([]byte(key))
if sbkt == nil {
return fmt.Errorf("snapshot %v does not exist: %w", key, errdefs.ErrNotFound)
}
local.Labels, err = boltutil.ReadLabels(sbkt)
if err != nil {
return fmt.Errorf("failed to read labels: %w", err)
}
if err := boltutil.ReadTimestamps(sbkt, &local.Created, &local.Updated); err != nil {
return fmt.Errorf("failed to read timestamps: %w", err)
}
bkey = string(sbkt.Get(bucketKeyName))
local.Parent = string(sbkt.Get(bucketKeyParent))
return nil
}); err != nil {
return snapshots.Info{}, err
}
info, err := s.Snapshotter.Stat(ctx, bkey)
if err != nil {
return snapshots.Info{}, err
}
return overlayInfo(info, local), nil
}
func (s *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) {
s.l.RLock()
defer s.l.RUnlock()
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return snapshots.Info{}, err
}
if info.Name == "" {
return snapshots.Info{}, errdefs.ErrInvalidArgument
}
var (
bkey string
local = snapshots.Info{
Name: info.Name,
}
updated bool
)
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getSnapshotterBucket(tx, ns, s.name)
if bkt == nil {
return fmt.Errorf("snapshot %v does not exist: %w", info.Name, errdefs.ErrNotFound)
}
sbkt := bkt.Bucket([]byte(info.Name))
if sbkt == nil {
return fmt.Errorf("snapshot %v does not exist: %w", info.Name, errdefs.ErrNotFound)
}
local.Labels, err = boltutil.ReadLabels(sbkt)
if err != nil {
return fmt.Errorf("failed to read labels: %w", err)
}
if err := boltutil.ReadTimestamps(sbkt, &local.Created, &local.Updated); err != nil {
return fmt.Errorf("failed to read timestamps: %w", err)
}
// Handle field updates
if len(fieldpaths) > 0 {
for _, path := range fieldpaths {
if strings.HasPrefix(path, "labels.") {
if local.Labels == nil {
local.Labels = map[string]string{}
}
key := strings.TrimPrefix(path, "labels.")
local.Labels[key] = info.Labels[key]
continue
}
switch path {
case "labels":
local.Labels = info.Labels
default:
return fmt.Errorf("cannot update %q field on snapshot %q: %w", path, info.Name, errdefs.ErrInvalidArgument)
}
}
} else {
local.Labels = info.Labels
}
if err := validateSnapshot(&local); err != nil {
return err
}
local.Updated = time.Now().UTC()
if err := boltutil.WriteTimestamps(sbkt, local.Created, local.Updated); err != nil {
return fmt.Errorf("failed to read timestamps: %w", err)
}
if err := boltutil.WriteLabels(sbkt, local.Labels); err != nil {
return fmt.Errorf("failed to read labels: %w", err)
}
bkey = string(sbkt.Get(bucketKeyName))
local.Parent = string(sbkt.Get(bucketKeyParent))
inner := snapshots.Info{
Name: bkey,
Labels: snapshots.FilterInheritedLabels(local.Labels),
}
// NOTE: Perform this inside the transaction to reduce the
// chances of out of sync data. The backend snapshotters
// should perform the Update as fast as possible.
if info, err = s.Snapshotter.Update(ctx, inner, fieldpaths...); err != nil {
return err
}
updated = true
return nil
}); err != nil {
if updated {
log.G(ctx).WithField("snapshotter", s.name).WithField("key", local.Name).WithError(err).Error("transaction failed after updating snapshot backend")
}
return snapshots.Info{}, err
}
return overlayInfo(info, local), nil
}
func overlayInfo(info, overlay snapshots.Info) snapshots.Info {
// Merge info
info.Name = overlay.Name
info.Created = overlay.Created
info.Updated = overlay.Updated
info.Parent = overlay.Parent
if info.Labels == nil {
info.Labels = overlay.Labels
} else {
for k, v := range overlay.Labels {
info.Labels[k] = v
}
}
return info
}
func (s *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) {
bkey, err := s.resolveKey(ctx, key)
if err != nil {
return snapshots.Usage{}, err
}
return s.Snapshotter.Usage(ctx, bkey)
}
func (s *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
bkey, err := s.resolveKey(ctx, key)
if err != nil {
return nil, err
}
return s.Snapshotter.Mounts(ctx, bkey)
}
func (s *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
mounts, err := s.createSnapshot(ctx, key, parent, false, opts)
if err != nil {
return nil, err
}
if s.db.dbopts.publisher != nil {
if err := s.db.dbopts.publisher.Publish(ctx, "/snapshot/prepare", &eventstypes.SnapshotPrepare{
Key: key,
Parent: parent,
Snapshotter: s.name,
}); err != nil {
return nil, err
}
}
return mounts, nil
}
func (s *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return s.createSnapshot(ctx, key, parent, true, opts)
}
func (s *snapshotter) createSnapshot(ctx context.Context, key, parent string, readonly bool, opts []snapshots.Opt) ([]mount.Mount, error) {
s.l.RLock()
defer s.l.RUnlock()
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
var base snapshots.Info
for _, opt := range opts {
if err := opt(&base); err != nil {
return nil, err
}
}
if err := validateSnapshot(&base); err != nil {
return nil, err
}
var (
target = base.Labels[labelSnapshotRef]
bparent string
bkey string
bopts = []snapshots.Opt{
snapshots.WithLabels(snapshots.FilterInheritedLabels(base.Labels)),
}
)
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
bkt, err := createSnapshotterBucket(tx, ns, s.name)
if err != nil {
return err
}
// Check if target exists, if so, return already exists
if target != "" {
if tbkt := bkt.Bucket([]byte(target)); tbkt != nil {
return fmt.Errorf("target snapshot %q: %w", target, errdefs.ErrAlreadyExists)
}
}
if bbkt := bkt.Bucket([]byte(key)); bbkt != nil {
return fmt.Errorf("snapshot %q: %w", key, errdefs.ErrAlreadyExists)
}
if parent != "" {
pbkt := bkt.Bucket([]byte(parent))
if pbkt == nil {
return fmt.Errorf("parent snapshot %v does not exist: %w", parent, errdefs.ErrNotFound)
}
bparent = string(pbkt.Get(bucketKeyName))
}
sid, err := bkt.NextSequence()
if err != nil {
return err
}
bkey = createKey(sid, ns, key)
return err
}); err != nil {
return nil, err
}
var (
m []mount.Mount
created string
rerr error
)
if readonly {
m, err = s.Snapshotter.View(ctx, bkey, bparent, bopts...)
} else {
m, err = s.Snapshotter.Prepare(ctx, bkey, bparent, bopts...)
}
// An already exists error should indicate the backend found a snapshot
// matching a provided target reference.
if errdefs.IsAlreadyExists(err) {
if target != "" {
var tinfo *snapshots.Info
filter := fmt.Sprintf(`labels."containerd.io/snapshot.ref"==%s,parent==%q`, target, bparent)
if err := s.Snapshotter.Walk(ctx, func(ctx context.Context, i snapshots.Info) error {
if tinfo == nil && i.Kind == snapshots.KindCommitted {
if i.Labels["containerd.io/snapshot.ref"] != target {
// Walk did not respect filter
return nil
}
if i.Parent != bparent {
// Walk did not respect filter
return nil
}
tinfo = &i
}
return nil
}, filter); err != nil {
return nil, fmt.Errorf("failed walking backend snapshots: %w", err)
}
if tinfo == nil {
return nil, fmt.Errorf("target snapshot %q in backend: %w", target, errdefs.ErrNotFound)
}
key = target
bkey = tinfo.Name
bparent = tinfo.Parent
base.Created = tinfo.Created
base.Updated = tinfo.Updated
if base.Labels == nil {
base.Labels = tinfo.Labels
} else {
for k, v := range tinfo.Labels {
if _, ok := base.Labels[k]; !ok {
base.Labels[k] = v
}
}
}
// Propagate this error after the final update
rerr = fmt.Errorf("target snapshot %q from snapshotter: %w", target, errdefs.ErrAlreadyExists)
} else {
// This condition is unexpected as the key provided is expected
// to be new and unique, return as unknown response from backend
// to avoid confusing callers handling already exists.
return nil, fmt.Errorf("unexpected error from snapshotter: %v: %w", err, errdefs.ErrUnknown)
}
} else if err != nil {
return nil, err
} else {
ts := time.Now().UTC()
base.Created = ts
base.Updated = ts
created = bkey
}
if txerr := update(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getSnapshotterBucket(tx, ns, s.name)
if bkt == nil {
return fmt.Errorf("can not find snapshotter %q: %w", s.name, errdefs.ErrNotFound)
}
if err := addSnapshotLease(ctx, tx, s.name, key); err != nil {
return err
}
bbkt, err := bkt.CreateBucket([]byte(key))
if err != nil {
if err != bolt.ErrBucketExists {
return err
}
if rerr == nil {
rerr = fmt.Errorf("snapshot %q: %w", key, errdefs.ErrAlreadyExists)
}
return nil
}
if parent != "" {
pbkt := bkt.Bucket([]byte(parent))
if pbkt == nil {
return fmt.Errorf("parent snapshot %v does not exist: %w", parent, errdefs.ErrNotFound)
}
// Ensure the backend's parent matches the metadata store's parent
// If it is mismatched, then a target was provided for a snapshotter
// which has a different parent then requested.
// NOTE: The backend snapshotter is responsible for enforcing the
// uniqueness of the reference relationships, the metadata store
// can only error out to prevent inconsistent data.
if bparent != string(pbkt.Get(bucketKeyName)) {
return fmt.Errorf("mismatched parent %s from target %s: %w", parent, target, errdefs.ErrInvalidArgument)
}
cbkt, err := pbkt.CreateBucketIfNotExists(bucketKeyChildren)
if err != nil {
return err
}
if err := cbkt.Put([]byte(key), nil); err != nil {
return err
}
if err := bbkt.Put(bucketKeyParent, []byte(parent)); err != nil {
return err
}
}
if err := boltutil.WriteTimestamps(bbkt, base.Created, base.Updated); err != nil {
return err
}
if err := boltutil.WriteLabels(bbkt, base.Labels); err != nil {
return err
}
return bbkt.Put(bucketKeyName, []byte(bkey))
}); txerr != nil {
rerr = txerr
}
if rerr != nil {
// If the created reference is not stored, attempt clean up
if created != "" {
if err := s.Snapshotter.Remove(ctx, created); err != nil {
log.G(ctx).WithField("snapshotter", s.name).WithField("key", created).WithError(err).Error("failed to cleanup unreferenced snapshot")
}
}
return nil, rerr
}
return m, nil
}
func (s *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
s.l.RLock()
defer s.l.RUnlock()
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
var base snapshots.Info
for _, opt := range opts {
if err := opt(&base); err != nil {
return err
}
}
if err := validateSnapshot(&base); err != nil {
return err
}
var bname string
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getSnapshotterBucket(tx, ns, s.name)
if bkt == nil {
return fmt.Errorf("can not find snapshotter %q: %w",
s.name, errdefs.ErrNotFound)
}
bbkt, err := bkt.CreateBucket([]byte(name))
if err != nil {
if err == bolt.ErrBucketExists {
err = fmt.Errorf("snapshot %q: %w", name, errdefs.ErrAlreadyExists)
}
return err
}
if err := addSnapshotLease(ctx, tx, s.name, name); err != nil {
return err
}
obkt := bkt.Bucket([]byte(key))
if obkt == nil {
return fmt.Errorf("snapshot %v does not exist: %w", key, errdefs.ErrNotFound)
}
bkey := string(obkt.Get(bucketKeyName))
sid, err := bkt.NextSequence()
if err != nil {
return err
}
nameKey := createKey(sid, ns, name)
if err := bbkt.Put(bucketKeyName, []byte(nameKey)); err != nil {
return err
}
parent := obkt.Get(bucketKeyParent)
if len(parent) > 0 {
pbkt := bkt.Bucket(parent)
if pbkt == nil {
return fmt.Errorf("parent snapshot %v does not exist: %w", string(parent), errdefs.ErrNotFound)
}
cbkt, err := pbkt.CreateBucketIfNotExists(bucketKeyChildren)
if err != nil {
return err
}
if err := cbkt.Delete([]byte(key)); err != nil {
return err
}
if err := cbkt.Put([]byte(name), nil); err != nil {
return err
}
if err := bbkt.Put(bucketKeyParent, parent); err != nil {
return err
}
}
ts := time.Now().UTC()
if err := boltutil.WriteTimestamps(bbkt, ts, ts); err != nil {
return err
}
if err := boltutil.WriteLabels(bbkt, base.Labels); err != nil {
return err
}
if err := bkt.DeleteBucket([]byte(key)); err != nil {
return err
}
if err := removeSnapshotLease(ctx, tx, s.name, key); err != nil {
return err
}
inheritedOpt := snapshots.WithLabels(snapshots.FilterInheritedLabels(base.Labels))
// NOTE: Backend snapshotters should commit fast and reliably to
// prevent metadata store locking and minimizing rollbacks.
// This operation should be done in the transaction to minimize the
// risk of the committed keys becoming out of sync. If this operation
// succeed and the overall transaction fails then the risk of out of
// sync data is higher and may require manual cleanup.
if err := s.Snapshotter.Commit(ctx, nameKey, bkey, inheritedOpt); err != nil {
if errdefs.IsNotFound(err) {
log.G(ctx).WithField("snapshotter", s.name).WithField("key", key).WithError(err).Error("uncommittable snapshot: missing in backend, snapshot should be removed")
}
// NOTE: Consider handling already exists here from the backend. Currently
// already exists from the backend may be confusing to the client since it
// may require the client to re-attempt from prepare. However, if handling
// here it is not clear what happened with the existing backend key and
// whether the already prepared snapshot would still be used or must be
// discarded. It is best that all implementations of the snapshotter
// interface behave the same, in which case the backend should handle the
// mapping of duplicates and not error.
return err
}
bname = nameKey
return nil
}); err != nil {
if bname != "" {
log.G(ctx).WithField("snapshotter", s.name).WithField("key", key).WithField("bname", bname).WithError(err).Error("uncommittable snapshot: transaction failed after commit, snapshot should be removed")
}
return err
}
if s.db.dbopts.publisher != nil {
if err := s.db.dbopts.publisher.Publish(ctx, "/snapshot/commit", &eventstypes.SnapshotCommit{
Key: key,
Name: name,
Snapshotter: s.name,
}); err != nil {
return err
}
}
return nil
}
func (s *snapshotter) Remove(ctx context.Context, key string) error {
s.l.RLock()
defer s.l.RUnlock()
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
if err := update(ctx, s.db, func(tx *bolt.Tx) error {
var sbkt *bolt.Bucket
bkt := getSnapshotterBucket(tx, ns, s.name)
if bkt != nil {
sbkt = bkt.Bucket([]byte(key))
}
if sbkt == nil {
return fmt.Errorf("snapshot %v does not exist: %w", key, errdefs.ErrNotFound)
}
cbkt := sbkt.Bucket(bucketKeyChildren)
if cbkt != nil {
if child, _ := cbkt.Cursor().First(); child != nil {
return fmt.Errorf("cannot remove snapshot with child: %w", errdefs.ErrFailedPrecondition)
}
}
parent := sbkt.Get(bucketKeyParent)
if len(parent) > 0 {
pbkt := bkt.Bucket(parent)
if pbkt == nil {
return fmt.Errorf("parent snapshot %v does not exist: %w", string(parent), errdefs.ErrNotFound)
}
cbkt := pbkt.Bucket(bucketKeyChildren)
if cbkt != nil {
if err := cbkt.Delete([]byte(key)); err != nil {
return fmt.Errorf("failed to remove child link: %w", err)
}
}
}
if err := bkt.DeleteBucket([]byte(key)); err != nil {
return err
}
if err := removeSnapshotLease(ctx, tx, s.name, key); err != nil {
return err
}
// Mark snapshotter as dirty for triggering garbage collection
atomic.AddUint32(&s.db.dirty, 1)
s.db.dirtySS[s.name] = struct{}{}
return nil
}); err != nil {
return err
}
if s.db.dbopts.publisher != nil {
return s.db.dbopts.publisher.Publish(ctx, "/snapshot/remove", &eventstypes.SnapshotRemove{
Key: key,
Snapshotter: s.name,
})
}
return nil
}
type infoPair struct {
bkey string
info snapshots.Info
}
func (s *snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return err
}
var (
batchSize = 100
pairs = []infoPair{}
lastKey string
)
filter, err := filters.ParseAll(fs...)
if err != nil {
return err
}
for {
if err := view(ctx, s.db, func(tx *bolt.Tx) error {
bkt := getSnapshotterBucket(tx, ns, s.name)
if bkt == nil {
return nil
}
c := bkt.Cursor()
var k, v []byte
if lastKey == "" {
k, v = c.First()
} else {
k, v = c.Seek([]byte(lastKey))
}
for k != nil {
if v == nil {
if len(pairs) >= batchSize {
break
}
sbkt := bkt.Bucket(k)
pair := infoPair{
bkey: string(sbkt.Get(bucketKeyName)),
info: snapshots.Info{
Name: string(k),
Parent: string(sbkt.Get(bucketKeyParent)),
},
}
err := boltutil.ReadTimestamps(sbkt, &pair.info.Created, &pair.info.Updated)
if err != nil {
return err
}
pair.info.Labels, err = boltutil.ReadLabels(sbkt)
if err != nil {
return err
}
pairs = append(pairs, pair)
}
k, v = c.Next()
}
lastKey = string(k)
return nil
}); err != nil {
return err
}
for _, pair := range pairs {
info, err := s.Snapshotter.Stat(ctx, pair.bkey)
if err != nil {
if errdefs.IsNotFound(err) {
continue
}
return err
}
info = overlayInfo(info, pair.info)
if filter.Match(adaptSnapshot(info)) {
if err := fn(ctx, info); err != nil {
return err
}
}
}
if lastKey == "" {
break
}
pairs = pairs[:0]
}
return nil
}
func validateSnapshot(info *snapshots.Info) error {
for k, v := range info.Labels {
if err := labels.Validate(k, v); err != nil {
return fmt.Errorf("info.Labels: %w", err)
}
}
return nil
}
// garbageCollect removes all snapshots that are no longer used.
func (s *snapshotter) garbageCollect(ctx context.Context) (d time.Duration, err error) {
s.l.Lock()
t1 := time.Now()
defer func() {
s.l.Unlock()
if err == nil {
if c, ok := s.Snapshotter.(snapshots.Cleaner); ok {
err = c.Cleanup(ctx)
if errdefs.IsNotImplemented(err) {
err = nil
}
}
}
if err == nil {
d = time.Since(t1)
}
}()
seen := map[string]struct{}{}
if err := s.db.View(func(tx *bolt.Tx) error {
v1bkt := tx.Bucket(bucketKeyVersion)
if v1bkt == nil {
return nil
}
// iterate through each namespace
v1c := v1bkt.Cursor()
for k, v := v1c.First(); k != nil; k, v = v1c.Next() {
if v != nil {
continue
}
sbkt := v1bkt.Bucket(k).Bucket(bucketKeyObjectSnapshots)
if sbkt == nil {
continue
}
// Load specific snapshotter
ssbkt := sbkt.Bucket([]byte(s.name))
if ssbkt == nil {
continue
}
if err := ssbkt.ForEach(func(sk, sv []byte) error {
if sv == nil {
bkey := ssbkt.Bucket(sk).Get(bucketKeyName)
if len(bkey) > 0 {
seen[string(bkey)] = struct{}{}
}
}
return nil
}); err != nil {
return err
}
}
return nil
}); err != nil {
return 0, err
}
roots, err := s.walkTree(ctx, seen)
if err != nil {
return 0, err
}
// TODO: Unlock before removal (once nodes are fully unavailable).
// This could be achieved through doing prune inside the lock
// and having a cleanup method which actually performs the
// deletions on the snapshotters which support it.
for _, node := range roots {
if err := s.pruneBranch(ctx, node); err != nil {
return 0, err
}
}
return
}
type treeNode struct {
info snapshots.Info
remove bool
children []*treeNode
}
func (s *snapshotter) walkTree(ctx context.Context, seen map[string]struct{}) ([]*treeNode, error) {
roots := []*treeNode{}
nodes := map[string]*treeNode{}
if err := s.Snapshotter.Walk(ctx, func(ctx context.Context, info snapshots.Info) error {
_, isSeen := seen[info.Name]
node, ok := nodes[info.Name]
if !ok {
node = &treeNode{}
nodes[info.Name] = node
}
node.remove = !isSeen
node.info = info
if info.Parent == "" {
roots = append(roots, node)
} else {
parent, ok := nodes[info.Parent]
if !ok {
parent = &treeNode{}
nodes[info.Parent] = parent
}
parent.children = append(parent.children, node)
}
return nil
}); err != nil {
return nil, err
}
return roots, nil
}
func (s *snapshotter) pruneBranch(ctx context.Context, node *treeNode) error {
for _, child := range node.children {
if err := s.pruneBranch(ctx, child); err != nil {
return err
}
}
if node.remove {
logger := log.G(ctx).WithField("snapshotter", s.name)
if err := s.Snapshotter.Remove(ctx, node.info.Name); err != nil {
if !errdefs.IsFailedPrecondition(err) {
return err
}
logger.WithError(err).WithField("key", node.info.Name).Warnf("failed to remove snapshot")
} else {
logger.WithField("key", node.info.Name).Debug("removed snapshot")
}
}
return nil
}
// Close closes s.Snapshotter but not db
func (s *snapshotter) Close() error {
return s.Snapshotter.Close()
}

View File

@@ -0,0 +1,432 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"context"
"fmt"
"os"
"path/filepath"
"reflect"
"runtime"
"sync"
"testing"
"time"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/filters"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/testutil"
"github.com/containerd/containerd/v2/snapshots"
"github.com/containerd/containerd/v2/snapshots/native"
"github.com/containerd/containerd/v2/snapshots/testsuite"
bolt "go.etcd.io/bbolt"
)
func newTestSnapshotter(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error) {
nativeRoot := filepath.Join(root, "native")
if err := os.Mkdir(nativeRoot, 0770); err != nil {
return nil, nil, err
}
snapshotter, err := native.NewSnapshotter(nativeRoot)
if err != nil {
return nil, nil, err
}
db, err := bolt.Open(filepath.Join(root, "metadata.db"), 0660, nil)
if err != nil {
return nil, nil, err
}
sn := NewDB(db, nil, map[string]snapshots.Snapshotter{"native": snapshotter}).Snapshotter("native")
return sn, func() error {
if err := sn.Close(); err != nil {
return err
}
return db.Close()
}, nil
}
func TestMetadata(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("snapshotter not implemented on windows")
}
// Snapshot tests require mounting, still requires root
testutil.RequiresRoot(t)
testsuite.SnapshotterSuite(t, "Metadata", newTestSnapshotter)
}
func TestSnapshotterWithRef(t *testing.T) {
ctx, db := testDB(t, withSnapshotter("tmp", func(string) (snapshots.Snapshotter, error) {
return NewTmpSnapshotter(), nil
}))
sn := db.Snapshotter("tmp")
test1opt := snapshots.WithLabels(
map[string]string{
labelSnapshotRef: "test1",
},
)
_, err := sn.Prepare(ctx, "test1-tmp", "", test1opt)
if err != nil {
t.Fatal(err)
}
err = sn.Commit(ctx, "test1", "test1-tmp", test1opt)
if err != nil {
t.Fatal(err)
}
ctx2 := namespaces.WithNamespace(ctx, "testing2")
_, err = sn.Prepare(ctx2, "test1-tmp", "", test1opt)
if err == nil {
t.Fatal("expected already exists error")
} else if !errdefs.IsAlreadyExists(err) {
t.Fatal(err)
}
// test1 should now be in the namespace
_, err = sn.Stat(ctx2, "test1")
if err != nil {
t.Fatal(err)
}
test2opt := snapshots.WithLabels(
map[string]string{
labelSnapshotRef: "test2",
},
)
_, err = sn.Prepare(ctx2, "test2-tmp", "test1", test2opt)
if err != nil {
t.Fatal(err)
}
// In original namespace, but not committed
_, err = sn.Prepare(ctx, "test2-tmp", "test1", test2opt)
if err != nil {
t.Fatal(err)
}
err = sn.Commit(ctx2, "test2", "test2-tmp", test2opt)
if err != nil {
t.Fatal(err)
}
// See note in Commit function for why
// this does not return ErrAlreadyExists
err = sn.Commit(ctx, "test2", "test2-tmp", test2opt)
if err != nil {
t.Fatal(err)
}
// This should error out, already exists in namespace
// despite mismatched parent
_, err = sn.Prepare(ctx2, "test2-tmp-again", "", test2opt)
if err == nil {
t.Fatal("expected already exists error")
} else if !errdefs.IsAlreadyExists(err) {
t.Fatal(err)
}
// In original namespace, but already exists
_, err = sn.Prepare(ctx, "test2-tmp-again", "test1", test2opt)
if err == nil {
t.Fatal("expected already exists error")
} else if !errdefs.IsAlreadyExists(err) {
t.Fatal(err)
}
// Now try a third namespace
ctx3 := namespaces.WithNamespace(ctx, "testing3")
// This should error out, matching parent not found
_, err = sn.Prepare(ctx3, "test2-tmp", "", test2opt)
if err != nil {
t.Fatal(err)
}
// Remove, not going to use yet
err = sn.Remove(ctx3, "test2-tmp")
if err != nil {
t.Fatal(err)
}
_, err = sn.Prepare(ctx3, "test2-tmp", "test1", test2opt)
if err == nil {
t.Fatal("expected not error")
} else if !errdefs.IsNotFound(err) {
t.Fatal(err)
}
_, err = sn.Prepare(ctx3, "test1-tmp", "", test1opt)
if err == nil {
t.Fatal("expected already exists error")
} else if !errdefs.IsAlreadyExists(err) {
t.Fatal(err)
}
_, err = sn.Prepare(ctx3, "test2-tmp", "test1", test2opt)
if err == nil {
t.Fatal("expected already exists error")
} else if !errdefs.IsAlreadyExists(err) {
t.Fatal(err)
}
}
func TestFilterInheritedLabels(t *testing.T) {
tests := []struct {
labels map[string]string
expected map[string]string
}{
{
nil,
nil,
},
{
map[string]string{},
map[string]string{},
},
{
map[string]string{"": ""},
map[string]string{},
},
{
map[string]string{"foo": "bar"},
map[string]string{},
},
{
map[string]string{inheritedLabelsPrefix + "foo": "bar"},
map[string]string{inheritedLabelsPrefix + "foo": "bar"},
},
{
map[string]string{inheritedLabelsPrefix + "foo": "bar", "qux": "qaz"},
map[string]string{inheritedLabelsPrefix + "foo": "bar"},
},
}
for _, test := range tests {
if actual := snapshots.FilterInheritedLabels(test.labels); !reflect.DeepEqual(actual, test.expected) {
t.Fatalf("expected %v but got %v", test.expected, actual)
}
}
}
type tmpSnapshotter struct {
l sync.Mutex
snapshots map[string]snapshots.Info
targets map[string][]string
}
func NewTmpSnapshotter() snapshots.Snapshotter {
return &tmpSnapshotter{
snapshots: map[string]snapshots.Info{},
targets: map[string][]string{},
}
}
func (s *tmpSnapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) {
s.l.Lock()
defer s.l.Unlock()
i, ok := s.snapshots[key]
if !ok {
return snapshots.Info{}, errdefs.ErrNotFound
}
return i, nil
}
func (s *tmpSnapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) {
s.l.Lock()
defer s.l.Unlock()
i, ok := s.snapshots[info.Name]
if !ok {
return snapshots.Info{}, errdefs.ErrNotFound
}
for k, v := range info.Labels {
i.Labels[k] = v
}
s.snapshots[i.Name] = i
return i, nil
}
func (s *tmpSnapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) {
s.l.Lock()
defer s.l.Unlock()
_, ok := s.snapshots[key]
if !ok {
return snapshots.Usage{}, errdefs.ErrNotFound
}
return snapshots.Usage{}, nil
}
func (s *tmpSnapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
s.l.Lock()
defer s.l.Unlock()
_, ok := s.snapshots[key]
if !ok {
return nil, errdefs.ErrNotFound
}
return []mount.Mount{}, nil
}
func (s *tmpSnapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return s.create(ctx, key, parent, snapshots.KindActive, opts...)
}
func (s *tmpSnapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return s.create(ctx, key, parent, snapshots.KindView, opts...)
}
func (s *tmpSnapshotter) create(ctx context.Context, key, parent string, kind snapshots.Kind, opts ...snapshots.Opt) ([]mount.Mount, error) {
s.l.Lock()
defer s.l.Unlock()
var base snapshots.Info
for _, opt := range opts {
if err := opt(&base); err != nil {
return nil, err
}
}
base.Name = key
base.Kind = kind
target := base.Labels[labelSnapshotRef]
if target != "" {
for _, name := range s.targets[target] {
if s.snapshots[name].Parent == parent {
return nil, fmt.Errorf("found target: %w", errdefs.ErrAlreadyExists)
}
}
}
if parent != "" {
_, ok := s.snapshots[parent]
if !ok {
return nil, errdefs.ErrNotFound
}
base.Parent = parent
}
ts := time.Now().UTC()
base.Created = ts
base.Updated = ts
s.snapshots[base.Name] = base
return []mount.Mount{}, nil
}
func (s *tmpSnapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
s.l.Lock()
defer s.l.Unlock()
var base snapshots.Info
for _, opt := range opts {
if err := opt(&base); err != nil {
return err
}
}
base.Name = name
base.Kind = snapshots.KindCommitted
if _, ok := s.snapshots[name]; ok {
return fmt.Errorf("found name: %w", errdefs.ErrAlreadyExists)
}
src, ok := s.snapshots[key]
if !ok {
return errdefs.ErrNotFound
}
if src.Kind == snapshots.KindCommitted {
return errdefs.ErrInvalidArgument
}
base.Parent = src.Parent
ts := time.Now().UTC()
base.Created = ts
base.Updated = ts
s.snapshots[name] = base
delete(s.snapshots, key)
if target := base.Labels[labelSnapshotRef]; target != "" {
s.targets[target] = append(s.targets[target], name)
}
return nil
}
func (s *tmpSnapshotter) Remove(ctx context.Context, key string) error {
s.l.Lock()
defer s.l.Unlock()
sn, ok := s.snapshots[key]
if !ok {
return errdefs.ErrNotFound
}
delete(s.snapshots, key)
// scan and remove all instances of name as a target
for ref, names := range s.targets {
for i := range names {
if names[i] == sn.Name {
if len(names) == 1 {
delete(s.targets, ref)
} else {
copy(names[i:], names[i+1:])
s.targets[ref] = names[:len(names)-1]
}
break
}
}
}
return nil
}
func (s *tmpSnapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
s.l.Lock()
defer s.l.Unlock()
filter, err := filters.ParseAll(fs...)
if err != nil {
return err
}
// call func for each
for _, i := range s.snapshots {
if filter.Match(adaptSnapshot(i)) {
if err := fn(ctx, i); err != nil {
return err
}
}
}
return nil
}
func (s *tmpSnapshotter) Close() error {
return nil
}