CRI: use (snapshotter_id, snapshot_key) to uniquely identify snapshots

Before snapshotter per runtime, CRI only supports a global snapshotter.
So a snapshot can be uniquely identified by `snapshot_key`. With snapshotter
per runtime enabled, there may be multiple snapshotters used by CRI. So only
(snapshotter_id, snapshot_key) can uniquely identify a snapshot.
Also extends CRI/store/snapshot/Store to support multiple snapshotters.

Signed-off-by: Jiang Liu <gerry@linux.alibaba.com>
This commit is contained in:
Jiang Liu 2023-10-15 21:06:39 +08:00
parent a7333657af
commit 5ad6f34329
11 changed files with 174 additions and 70 deletions

View File

@ -81,11 +81,21 @@ func (c *criService) getMetricsHandler(ctx context.Context, sandboxID string) (m
return nil, err
}
ociRuntime, err := c.getSandboxRuntime(sandbox.Config, sandbox.RuntimeHandler)
if err != nil {
return nil, fmt.Errorf("failed to get runtimeHandler %q: %w", sandbox.RuntimeHandler, err)
}
snapshotter := c.RuntimeSnapshotter(ctx, ociRuntime)
switch p.OS {
case "windows":
return c.windowsContainerMetrics, nil
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
return c.windowsContainerMetrics(meta, stats, snapshotter)
}, nil
case "linux":
return c.linuxContainerMetrics, nil
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
return c.linuxContainerMetrics(meta, stats, snapshotter)
}, nil
default:
return nil, fmt.Errorf("container metrics for platform %+v: %w", p, errdefs.ErrNotImplemented)
}
@ -267,10 +277,11 @@ func matchLabelSelector(selector, labels map[string]string) bool {
func (c *criService) windowsContainerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
snapshotter string,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.GetSnapshot(meta.ID)
sn, err := c.GetSnapshot(meta.ID, snapshotter)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
@ -322,10 +333,11 @@ func (c *criService) windowsContainerMetrics(
func (c *criService) linuxContainerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
snapshotter string,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.GetSnapshot(meta.ID)
sn, err := c.GetSnapshot(meta.ID, snapshotter)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {

View File

@ -278,7 +278,7 @@ func (s *fakeImageService) UpdateImage(ctx context.Context, r string) error { re
func (s *fakeImageService) GetImage(id string) (imagestore.Image, error) { return s.imageStore.Get(id) }
func (s *fakeImageService) GetSnapshot(key string) (snapshotstore.Snapshot, error) {
func (s *fakeImageService) GetSnapshot(key, snapshotter string) (snapshotstore.Snapshot, error) {
return snapshotstore.Snapshot{}, errors.New("not implemented")
}

View File

@ -32,21 +32,30 @@ func TestImageFsInfo(t *testing.T) {
c := newTestCRIService()
snapshots := []snapshotstore.Snapshot{
{
Key: "key1",
Key: snapshotstore.Key{
Key: "key1",
Snapshotter: "snapshotter1",
},
Kind: snapshot.KindActive,
Size: 10,
Inodes: 100,
Timestamp: 234567,
},
{
Key: "key2",
Key: snapshotstore.Key{
Key: "key2",
Snapshotter: "snapshotter1",
},
Kind: snapshot.KindCommitted,
Size: 20,
Inodes: 200,
Timestamp: 123456,
},
{
Key: "key3",
Key: snapshotstore.Key{
Key: "key3",
Snapshotter: "snapshotter1",
},
Kind: snapshot.KindView,
Size: 0,
Inodes: 0,

View File

@ -25,8 +25,10 @@ import (
criconfig "github.com/containerd/containerd/pkg/cri/config"
imagestore "github.com/containerd/containerd/pkg/cri/store/image"
snapshotstore "github.com/containerd/containerd/pkg/cri/store/snapshot"
ctrdutil "github.com/containerd/containerd/pkg/cri/util"
"github.com/containerd/containerd/pkg/kmutex"
"github.com/containerd/containerd/platforms"
snapshot "github.com/containerd/containerd/snapshots"
"github.com/containerd/log"
docker "github.com/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
@ -59,15 +61,32 @@ func NewService(config criconfig.Config, imageFSPath string, client *containerd.
unpackDuplicationSuppressor: kmutex.New(),
}
if client.SnapshotService(svc.config.ContainerdConfig.Snapshotter) == nil {
return nil, fmt.Errorf("failed to find snapshotter %q", svc.config.ContainerdConfig.Snapshotter)
snapshotters := map[string]snapshot.Snapshotter{}
ctx := ctrdutil.NamespacedContext()
// Add runtime specific snapshotters
for _, runtime := range config.ContainerdConfig.Runtimes {
snapshotterName := svc.RuntimeSnapshotter(ctx, runtime)
if snapshotter := svc.client.SnapshotService(snapshotterName); snapshotter != nil {
snapshotters[snapshotterName] = snapshotter
} else {
return nil, fmt.Errorf("failed to find snapshotter %q", snapshotterName)
}
}
// Add default snapshotter
snapshotterName := svc.config.ContainerdConfig.Snapshotter
if snapshotter := svc.client.SnapshotService(snapshotterName); snapshotter != nil {
snapshotters[snapshotterName] = snapshotter
} else {
return nil, fmt.Errorf("failed to find snapshotter %q", snapshotterName)
}
// Start snapshot stats syncer, it doesn't need to be stopped.
log.L.Info("Start snapshots syncer")
snapshotsSyncer := newSnapshotsSyncer(
svc.snapshotStore,
svc.client.SnapshotService(svc.config.ContainerdConfig.Snapshotter),
snapshotters,
time.Duration(svc.config.StatsCollectPeriod)*time.Second,
)
snapshotsSyncer.start()
@ -122,6 +141,10 @@ func (c *CRIImageService) GetImage(id string) (imagestore.Image, error) {
}
// GetSnapshot returns the snapshot with specified key.
func (c *CRIImageService) GetSnapshot(key string) (snapshotstore.Snapshot, error) {
return c.snapshotStore.Get(key)
func (c *CRIImageService) GetSnapshot(key, snapshotter string) (snapshotstore.Snapshot, error) {
snapshotKey := snapshotstore.Key{
Key: key,
Snapshotter: snapshotter,
}
return c.snapshotStore.Get(snapshotKey)
}

View File

@ -33,18 +33,18 @@ import (
// TODO(random-liu): Benchmark with high workload. We may need a statsSyncer instead if
// benchmark result shows that container cpu/memory stats also need to be cached.
type snapshotsSyncer struct {
store *snapshotstore.Store
snapshotter snapshot.Snapshotter
syncPeriod time.Duration
store *snapshotstore.Store
snapshotters map[string]snapshot.Snapshotter
syncPeriod time.Duration
}
// newSnapshotsSyncer creates a snapshot syncer.
func newSnapshotsSyncer(store *snapshotstore.Store, snapshotter snapshot.Snapshotter,
func newSnapshotsSyncer(store *snapshotstore.Store, snapshotters map[string]snapshot.Snapshotter,
period time.Duration) *snapshotsSyncer {
return &snapshotsSyncer{
store: store,
snapshotter: snapshotter,
syncPeriod: period,
store: store,
snapshotters: snapshotters,
syncPeriod: period,
}
}
@ -70,44 +70,55 @@ func (s *snapshotsSyncer) start() {
func (s *snapshotsSyncer) sync() error {
ctx := ctrdutil.NamespacedContext()
start := time.Now().UnixNano()
var snapshots []snapshot.Info
// Do not call `Usage` directly in collect function, because
// `Usage` takes time, we don't want `Walk` to hold read lock
// of snapshot metadata store for too long time.
// TODO(random-liu): Set timeout for the following 2 contexts.
if err := s.snapshotter.Walk(ctx, func(ctx context.Context, info snapshot.Info) error {
snapshots = append(snapshots, info)
return nil
}); err != nil {
return fmt.Errorf("walk all snapshots failed: %w", err)
}
for _, info := range snapshots {
sn, err := s.store.Get(info.Name)
if err == nil {
// Only update timestamp for non-active snapshot.
if sn.Kind == info.Kind && sn.Kind != snapshot.KindActive {
sn.Timestamp = time.Now().UnixNano()
s.store.Add(sn)
for key, snapshotter := range s.snapshotters {
var snapshots []snapshot.Info
// Do not call `Usage` directly in collect function, because
// `Usage` takes time, we don't want `Walk` to hold read lock
// of snapshot metadata store for too long time.
// TODO(random-liu): Set timeout for the following 2 contexts.
if err := snapshotter.Walk(ctx, func(ctx context.Context, info snapshot.Info) error {
snapshots = append(snapshots, info)
return nil
}); err != nil {
return fmt.Errorf("walk all snapshots for %q failed: %w", key, err)
}
for _, info := range snapshots {
snapshotKey := snapshotstore.Key{
Key: info.Name,
Snapshotter: key,
}
sn, err := s.store.Get(snapshotKey)
if err == nil {
// Only update timestamp for non-active snapshot.
if sn.Kind == info.Kind && sn.Kind != snapshot.KindActive {
sn.Timestamp = time.Now().UnixNano()
s.store.Add(sn)
continue
}
}
// Get newest stats if the snapshot is new or active.
sn = snapshotstore.Snapshot{
Key: snapshotstore.Key{
Key: info.Name,
Snapshotter: key,
},
Kind: info.Kind,
Timestamp: time.Now().UnixNano(),
}
usage, err := snapshotter.Usage(ctx, info.Name)
if err != nil {
if !errdefs.IsNotFound(err) {
log.L.WithError(err).Errorf("Failed to get usage for snapshot %q", info.Name)
}
continue
}
sn.Size = uint64(usage.Size)
sn.Inodes = uint64(usage.Inodes)
s.store.Add(sn)
}
// Get newest stats if the snapshot is new or active.
sn = snapshotstore.Snapshot{
Key: info.Name,
Kind: info.Kind,
Timestamp: time.Now().UnixNano(),
}
usage, err := s.snapshotter.Usage(ctx, info.Name)
if err != nil {
if !errdefs.IsNotFound(err) {
log.L.WithError(err).Errorf("Failed to get usage for snapshot %q", info.Name)
}
continue
}
sn.Size = uint64(usage.Size)
sn.Inodes = uint64(usage.Inodes)
s.store.Add(sn)
}
for _, sn := range s.store.List() {
if sn.Timestamp >= start {
continue

View File

@ -30,6 +30,7 @@ import (
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
"github.com/containerd/containerd/pkg/cri/store/stats"
ctrdutil "github.com/containerd/containerd/pkg/cri/util"
"github.com/containerd/containerd/protobuf"
"github.com/containerd/log"
"github.com/containerd/typeurl/v2"
@ -120,6 +121,12 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
return nil, nil, fmt.Errorf("failed to find container metric for pod with id %s", sandbox.ID)
}
ociRuntime, err := c.getSandboxRuntime(sandbox.Config, sandbox.RuntimeHandler)
if err != nil {
return nil, nil, fmt.Errorf("failed to get runtimeHandler %q: %w", sandbox.RuntimeHandler, err)
}
snapshotter := c.RuntimeSnapshotter(ctrdutil.NamespacedContext(), ociRuntime)
podRuntimeStats, err := c.convertToCRIStats(podMetric)
if err != nil {
return nil, nil, fmt.Errorf("failed to covert container metrics for sandbox with id %s: %w", sandbox.ID, err)
@ -159,7 +166,7 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
var usedBytes uint64
sn, err := c.GetSnapshot(cntr.ID)
sn, err := c.GetSnapshot(cntr.ID, snapshotter)
if err == nil {
usedBytes = sn.Size
}

View File

@ -392,7 +392,8 @@ func Test_criService_podSandboxStats(t *testing.T) {
func sandboxPod(id string, timestamp time.Time, cachedCPU uint64) sandboxstore.Sandbox {
return sandboxstore.Sandbox{
Metadata: sandboxstore.Metadata{ID: id}, Stats: &stats.ContainerStats{
Metadata: sandboxstore.Metadata{ID: id, RuntimeHandler: "runc"},
Stats: &stats.ContainerStats{
Timestamp: timestamp,
UsageCoreNanoSeconds: cachedCPU,
}}

View File

@ -77,7 +77,7 @@ type imageService interface {
UpdateImage(ctx context.Context, r string) error
GetImage(id string) (imagestore.Image, error)
GetSnapshot(key string) (snapshotstore.Snapshot, error)
GetSnapshot(key, snapshotter string) (snapshotstore.Snapshot, error)
LocalResolve(refOrID string) (imagestore.Image, error)
}

View File

@ -33,5 +33,15 @@ var testConfig = criconfig.Config{
PluginConfig: criconfig.PluginConfig{
SandboxImage: testSandboxImage,
TolerateMissingHugetlbController: true,
ContainerdConfig: criconfig.ContainerdConfig{
Snapshotter: "overlayfs",
DefaultRuntimeName: "runc",
Runtimes: map[string]criconfig.Runtime{
"runc": {
Type: "runc",
Snapshotter: "overlayfs",
},
},
},
},
}

View File

@ -23,10 +23,17 @@ import (
snapshot "github.com/containerd/containerd/snapshots"
)
type Key struct {
// Key is the key of the snapshot
Key string
// Snapshotter is the name of the snapshotter managing the snapshot
Snapshotter string
}
// Snapshot contains the information about the snapshot.
type Snapshot struct {
// Key is the key of the snapshot
Key string
Key Key
// Kind is the kind of the snapshot (active, committed, view)
Kind snapshot.Kind
// Size is the size of the snapshot in bytes.
@ -41,12 +48,12 @@ type Snapshot struct {
// Store stores all snapshots.
type Store struct {
lock sync.RWMutex
snapshots map[string]Snapshot
snapshots map[Key]Snapshot
}
// NewStore creates a snapshot store.
func NewStore() *Store {
return &Store{snapshots: make(map[string]Snapshot)}
return &Store{snapshots: make(map[Key]Snapshot)}
}
// Add a snapshot into the store.
@ -58,7 +65,7 @@ func (s *Store) Add(snapshot Snapshot) {
// Get returns the snapshot with specified key. Returns errdefs.ErrNotFound if the
// snapshot doesn't exist.
func (s *Store) Get(key string) (Snapshot, error) {
func (s *Store) Get(key Key) (Snapshot, error) {
s.lock.RLock()
defer s.lock.RUnlock()
if sn, ok := s.snapshots[key]; ok {
@ -79,7 +86,7 @@ func (s *Store) List() []Snapshot {
}
// Delete deletes the snapshot with specified key.
func (s *Store) Delete(key string) {
func (s *Store) Delete(key Key) {
s.lock.Lock()
defer s.lock.Unlock()
delete(s.snapshots, key)

View File

@ -27,23 +27,35 @@ import (
)
func TestSnapshotStore(t *testing.T) {
snapshots := map[string]Snapshot{
"key1": {
Key: "key1",
key1 := Key{
Key: "key1",
Snapshotter: "snapshotter1",
}
key2 := Key{
Key: "key2",
Snapshotter: "snapshotter1",
}
key3 := Key{
Key: "key1",
Snapshotter: "snapshotter2",
}
snapshots := map[Key]Snapshot{
key1: {
Key: key1,
Kind: snapshot.KindActive,
Size: 10,
Inodes: 100,
Timestamp: time.Now().UnixNano(),
},
"key2": {
Key: "key2",
key2: {
Key: key2,
Kind: snapshot.KindCommitted,
Size: 20,
Inodes: 200,
Timestamp: time.Now().UnixNano(),
},
"key3": {
Key: "key3",
key3: {
Key: key3,
Kind: snapshot.KindView,
Size: 0,
Inodes: 0,
@ -70,7 +82,19 @@ func TestSnapshotStore(t *testing.T) {
sns := s.List()
assert.Len(sns, 3)
testKey := "key2"
invalidKey := Key{
Key: "key2",
Snapshotter: "snapshotter2",
}
t.Logf("should not delete snapshot with invalid key")
s.Delete(invalidKey)
sns = s.List()
assert.Len(sns, 3)
testKey := Key{
Key: "key2",
Snapshotter: "snapshotter1",
}
t.Logf("should be able to delete snapshot")
s.Delete(testKey)