From 4247f2684d14f812f0493ec9414cb4f5ec43c53f Mon Sep 17 00:00:00 2001 From: Eric Lin Date: Tue, 18 Dec 2018 15:45:39 +0800 Subject: [PATCH] metadata: define content sharing policy This changeset modifies the metadata store to allow one to set a "content sharing policy" that defines how blobs are shared between namespaces in the content store. The default mode "shared" will make blobs available in all namespaces once it is pulled into any namespace. The blob will be pulled into the namespace if a writer is opened with the "Expected" digest that is already present in the backend. The alternative mode, "isolated" requires that clients prove they have access to the content by providing all of the content to the ingest before the blob is added to the namespace. Both modes share backing data, while "shared" will reduce total bandwidth across namespaces, at the cost of allowing access to any blob just by knowing its digest. Note: Most functional codes and changelog of this commit originate from Stephen J Day , see https://github.com/containerd/containerd/pull/1709/commits/40455aade85f91967b22e7999dbfbfea58c63e26 Fixes #1713 Fixes #2865 Signed-off-by: Eric Lin --- content/testsuite/testsuite.go | 75 +++++++++++++++++++++++++++++++- docs/ops.md | 18 ++++++++ metadata/content.go | 41 ++++++++++++----- metadata/content_test.go | 17 ++++++-- metadata/db.go | 26 ++++++++++- services/server/config/config.go | 38 ++++++++++++++++ services/server/server.go | 26 ++++++++++- 7 files changed, 221 insertions(+), 20 deletions(-) diff --git a/content/testsuite/testsuite.go b/content/testsuite/testsuite.go index 13392f6d1..bab30cb25 100644 --- a/content/testsuite/testsuite.go +++ b/content/testsuite/testsuite.go @@ -38,8 +38,16 @@ import ( "gotest.tools/assert" ) +const ( + emptyDigest = "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" +) + +// StoreInitFn initializes content store with given root and returns a function for +// destroying the content store +type StoreInitFn func(ctx context.Context, root string) (context.Context, content.Store, func() error, error) + // ContentSuite runs a test suite on the content store given a factory function. -func ContentSuite(t *testing.T, name string, storeFn func(ctx context.Context, root string) (context.Context, content.Store, func() error, error)) { +func ContentSuite(t *testing.T, name string, storeFn StoreInitFn) { t.Run("Writer", makeTest(t, name, storeFn, checkContentStoreWriter)) t.Run("UpdateStatus", makeTest(t, name, storeFn, checkUpdateStatus)) t.Run("CommitExists", makeTest(t, name, storeFn, checkCommitExists)) @@ -52,10 +60,18 @@ func ContentSuite(t *testing.T, name string, storeFn func(ctx context.Context, r t.Run("SmallBlob", makeTest(t, name, storeFn, checkSmallBlob)) t.Run("Labels", makeTest(t, name, storeFn, checkLabels)) + t.Run("CommitErrorState", makeTest(t, name, storeFn, checkCommitErrorState)) +} + +// ContentCrossNSSharedSuite runs a test suite under shared content policy +func ContentCrossNSSharedSuite(t *testing.T, name string, storeFn StoreInitFn) { t.Run("CrossNamespaceAppend", makeTest(t, name, storeFn, checkCrossNSAppend)) t.Run("CrossNamespaceShare", makeTest(t, name, storeFn, checkCrossNSShare)) +} - t.Run("CommitErrorState", makeTest(t, name, storeFn, checkCommitErrorState)) +// ContentCrossNSIsolatedSuite runs a test suite under isolated content policy +func ContentCrossNSIsolatedSuite(t *testing.T, name string, storeFn StoreInitFn) { + t.Run("CrossNamespaceIsolate", makeTest(t, name, storeFn, checkCrossNSIsolate)) } // ContextWrapper is used to decorate new context used inside the test @@ -890,6 +906,38 @@ func checkCrossNSAppend(ctx context.Context, t *testing.T, cs content.Store) { } +func checkCrossNSIsolate(ctx context.Context, t *testing.T, cs content.Store) { + wrap, ok := ctx.Value(wrapperKey{}).(ContextWrapper) + if !ok { + t.Skip("multiple contexts not supported") + } + + var size int64 = 1000 + b, d := createContent(size) + ref := fmt.Sprintf("ref-%d", size) + t1 := time.Now() + + if err := content.WriteBlob(ctx, cs, ref, bytes.NewReader(b), ocispec.Descriptor{Size: size, Digest: d}); err != nil { + t.Fatal(err) + } + t2 := time.Now() + + ctx2, done, err := wrap(context.Background()) + if err != nil { + t.Fatal(err) + } + defer done(ctx2) + + t3 := time.Now() + w, err := cs.Writer(ctx2, content.WithRef(ref), content.WithDescriptor(ocispec.Descriptor{Size: size, Digest: d})) + if err != nil { + t.Fatal(err) + } + t4 := time.Now() + + checkNewlyCreated(t, w, t1, t2, t3, t4) +} + func checkStatus(t *testing.T, w content.Writer, expected content.Status, d digest.Digest, preStart, postStart, preUpdate, postUpdate time.Time) { t.Helper() st, err := w.Status() @@ -934,6 +982,29 @@ func checkStatus(t *testing.T, w content.Writer, expected content.Status, d dige } } +func checkNewlyCreated(t *testing.T, w content.Writer, preStart, postStart, preUpdate, postUpdate time.Time) { + t.Helper() + st, err := w.Status() + if err != nil { + t.Fatalf("failed to get status: %v", err) + } + + wd := w.Digest() + if wd != emptyDigest { + t.Fatalf("unexpected digest %v, expected %v", wd, emptyDigest) + } + + if st.Offset != 0 { + t.Fatalf("unexpected offset %v", st.Offset) + } + + if runtime.GOOS != "windows" { + if st.StartedAt.After(postUpdate) || st.StartedAt.Before(postStart) { + t.Fatalf("unexpected started at time %s, expected between %s and %s", st.StartedAt, postStart, postUpdate) + } + } +} + func checkInfo(ctx context.Context, cs content.Store, d digest.Digest, expected content.Info, c1, c2, u1, u2 time.Time) error { info, err := cs.Info(ctx, d) if err != nil { diff --git a/docs/ops.md b/docs/ops.md index 45c628655..aed8ac8c1 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -220,3 +220,21 @@ The linux runtime allows a few options to be set to configure the shim and the r # (this only need to be set on kernel < 3.18) shim_no_newns = true ``` + +### Bolt Metadata Plugin + +The bolt metadata plugin allows configuration of the content sharing policy between namespaces. + +The default mode "shared" will make blobs available in all namespaces once it is pulled into any namespace. +The blob will be pulled into the namespace if a writer is opened with the "Expected" digest that is already present in the backend. + +The alternative mode, "isolated" requires that clients prove they have access to the content by providing all of the content to the ingest before the blob is added to the namespace. + +Both modes share backing data, while "shared" will reduce total bandwidth across namespaces, at the cost of allowing access to any blob just by knowing its digest. + +The default is "shared". While this is largely the most desired policy, one can change to "isolated" mode with the following configuration: + +```toml +[plugins.bolt] + content_sharing_policy = "isolated" +``` diff --git a/metadata/content.go b/metadata/content.go index 8ee0f2e20..5b28f2e42 100644 --- a/metadata/content.go +++ b/metadata/content.go @@ -38,16 +38,31 @@ import ( type contentStore struct { content.Store - db *DB - l sync.RWMutex + db *DB + shared bool + l sync.RWMutex } // newContentStore returns a namespaced content store using an existing // content store interface. -func newContentStore(db *DB, cs content.Store) *contentStore { +// policy defines the sharing behavior for content between namespaces. Both +// modes will result in shared storage in the backend for committed. Choose +// "shared" to prevent separate namespaces from having to pull the same content +// twice. Choose "isolated" if the content must not be shared between +// namespaces. +// +// If the policy is "shared", writes will try to resolve the "expected" digest +// against the backend, allowing imports of content from other namespaces. In +// "isolated" mode, the client must prove they have the content by providing +// the entire blob before the content can be added to another namespace. +// +// Since we have only two policies right now, it's simpler using bool to +// represent it internally. +func newContentStore(db *DB, shared bool, cs content.Store) *contentStore { return &contentStore{ - Store: cs, - db: db, + Store: cs, + db: db, + shared: shared, } } @@ -383,13 +398,15 @@ func (cs *contentStore) Writer(ctx context.Context, opts ...content.WriterOpt) ( return nil } - if st, err := cs.Store.Info(ctx, wOpts.Desc.Digest); err == nil { - // Ensure the expected size is the same, it is likely - // an error if the size is mismatched but the caller - // must resolve this on commit - if wOpts.Desc.Size == 0 || wOpts.Desc.Size == st.Size { - shared = true - wOpts.Desc.Size = st.Size + if cs.shared { + if st, err := cs.Store.Info(ctx, wOpts.Desc.Digest); err == nil { + // Ensure the expected size is the same, it is likely + // an error if the size is mismatched but the caller + // must resolve this on commit + if wOpts.Desc.Size == 0 || wOpts.Desc.Size == st.Size { + shared = true + wOpts.Desc.Size = st.Size + } } } } diff --git a/metadata/content_test.go b/metadata/content_test.go index ac42c9132..3b35b10a3 100644 --- a/metadata/content_test.go +++ b/metadata/content_test.go @@ -36,7 +36,7 @@ import ( bolt "go.etcd.io/bbolt" ) -func createContentStore(ctx context.Context, root string) (context.Context, content.Store, func() error, error) { +func createContentStore(ctx context.Context, root string, opts ...DBOpt) (context.Context, content.Store, func() error, error) { // TODO: Use mocked or in-memory store cs, err := local.NewStore(root) if err != nil { @@ -60,13 +60,24 @@ func createContentStore(ctx context.Context, root string) (context.Context, cont } ctx = testsuite.SetContextWrapper(ctx, wrap) - return ctx, NewDB(db, cs, nil).ContentStore(), func() error { + return ctx, NewDB(db, cs, nil, opts...).ContentStore(), func() error { return db.Close() }, nil } +func createContentStoreWithPolicy(opts ...DBOpt) testsuite.StoreInitFn { + return func(ctx context.Context, root string) (context.Context, content.Store, func() error, error) { + return createContentStore(ctx, root, opts...) + } +} + func TestContent(t *testing.T) { - testsuite.ContentSuite(t, "metadata", createContentStore) + testsuite.ContentSuite(t, "metadata", createContentStoreWithPolicy()) + testsuite.ContentCrossNSSharedSuite(t, "metadata", createContentStoreWithPolicy()) + testsuite.ContentCrossNSIsolatedSuite( + t, "metadata", createContentStoreWithPolicy([]DBOpt{ + WithPolicyIsolated, + }...)) } func TestContentLeased(t *testing.T) { diff --git a/metadata/db.go b/metadata/db.go index 507d6d22d..8d8287529 100644 --- a/metadata/db.go +++ b/metadata/db.go @@ -46,6 +46,19 @@ const ( dbVersion = 3 ) +// DBOpt configures how we set up the DB +type DBOpt func(*dbOptions) + +// WithPolicyIsolated isolates contents between namespaces +func WithPolicyIsolated(o *dbOptions) { + o.shared = false +} + +// dbOptions configure db options. +type dbOptions struct { + shared bool +} + // DB represents a metadata database backed by a bolt // database. The database is fully namespaced and stores // image, container, namespace, snapshot, and content data @@ -72,19 +85,28 @@ type DB struct { // mutationCallbacks are called after each mutation with the flag // set indicating whether any dirty flags are set mutationCallbacks []func(bool) + + dbopts dbOptions } // NewDB creates a new metadata database using the provided // bolt database, content store, and snapshotters. -func NewDB(db *bolt.DB, cs content.Store, ss map[string]snapshots.Snapshotter) *DB { +func NewDB(db *bolt.DB, cs content.Store, ss map[string]snapshots.Snapshotter, opts ...DBOpt) *DB { m := &DB{ db: db, ss: make(map[string]*snapshotter, len(ss)), dirtySS: map[string]struct{}{}, + dbopts: dbOptions{ + shared: true, + }, + } + + for _, opt := range opts { + opt(&m.dbopts) } // Initialize data stores - m.cs = newContentStore(m, cs) + m.cs = newContentStore(m, m.dbopts.shared, cs) for name, sn := range ss { m.ss[name] = newSnapshotter(m, name, sn) } diff --git a/services/server/config/config.go b/services/server/config/config.go index 2124d8671..27cebf579 100644 --- a/services/server/config/config.go +++ b/services/server/config/config.go @@ -83,6 +83,44 @@ type ProxyPlugin struct { Address string `toml:"address"` } +// BoltConfig defines the configuration values for the bolt plugin, which is +// loaded here, rather than back registered in the metadata package. +type BoltConfig struct { + // ContentSharingPolicy sets the sharing policy for content between + // namespaces. + // + // The default mode "shared" will make blobs available in all + // namespaces once it is pulled into any namespace. The blob will be pulled + // into the namespace if a writer is opened with the "Expected" digest that + // is already present in the backend. + // + // The alternative mode, "isolated" requires that clients prove they have + // access to the content by providing all of the content to the ingest + // before the blob is added to the namespace. + // + // Both modes share backing data, while "shared" will reduce total + // bandwidth across namespaces, at the cost of allowing access to any blob + // just by knowing its digest. + ContentSharingPolicy string `toml:"content_sharing_policy"` +} + +const ( + // SharingPolicyShared represents the "shared" sharing policy + SharingPolicyShared = "shared" + // SharingPolicyIsolated represents the "isolated" sharing policy + SharingPolicyIsolated = "isolated" +) + +// Validate validates if BoltConfig is valid +func (bc *BoltConfig) Validate() error { + switch bc.ContentSharingPolicy { + case SharingPolicyShared, SharingPolicyIsolated: + return nil + default: + return errors.Wrapf(errdefs.ErrInvalidArgument, "unknown policy: %s", bc.ContentSharingPolicy) + } +} + // Decode unmarshals a plugin specific configuration by plugin id func (c *Config) Decode(id string, v interface{}) (interface{}, error) { data, ok := c.Plugins[id] diff --git a/services/server/server.go b/services/server/server.go index 514bbdf69..6ed429146 100644 --- a/services/server/server.go +++ b/services/server/server.go @@ -238,6 +238,9 @@ func LoadPlugins(ctx context.Context, config *srvconfig.Config) ([]*plugin.Regis plugin.ContentPlugin, plugin.SnapshotPlugin, }, + Config: &srvconfig.BoltConfig{ + ContentSharingPolicy: srvconfig.SharingPolicyShared, + }, InitFn: func(ic *plugin.InitContext) (interface{}, error) { if err := os.MkdirAll(ic.Root, 0711); err != nil { return nil, err @@ -265,6 +268,22 @@ func LoadPlugins(ctx context.Context, config *srvconfig.Config) ([]*plugin.Regis snapshotters[name] = sn.(snapshots.Snapshotter) } + shared := true + ic.Meta.Exports["policy"] = srvconfig.SharingPolicyShared + if cfg, ok := ic.Config.(*srvconfig.BoltConfig); ok { + if cfg.ContentSharingPolicy != "" { + if err := cfg.Validate(); err != nil { + return nil, err + } + if cfg.ContentSharingPolicy == srvconfig.SharingPolicyIsolated { + ic.Meta.Exports["policy"] = srvconfig.SharingPolicyIsolated + shared = false + } + + log.L.WithField("policy", cfg.ContentSharingPolicy).Info("metadata content store policy set") + } + } + path := filepath.Join(ic.Root, "meta.db") ic.Meta.Exports["path"] = path @@ -272,7 +291,12 @@ func LoadPlugins(ctx context.Context, config *srvconfig.Config) ([]*plugin.Regis if err != nil { return nil, err } - mdb := metadata.NewDB(db, cs.(content.Store), snapshotters) + + var dbopts []metadata.DBOpt + if !shared { + dbopts = append(dbopts, metadata.WithPolicyIsolated) + } + mdb := metadata.NewDB(db, cs.(content.Store), snapshotters, dbopts...) if err := mdb.Init(ic.Context); err != nil { return nil, err }