Implement CRI container and pods stats

See https://kep.k8s.io/2371

* Implement new CRI RPCs - `ListPodSandboxStats` and `PodSandboxStats`
  * `ListPodSandboxStats` and `PodSandboxStats` which return stats about
    pod sandbox. To obtain pod sandbox stats, underlying metrics are
    read from the pod sandbox cgroup parent.
  * Process info is obtained by calling into the underlying task
  * Network stats are taken by looking up network metrics based on the
    pod sandbox network namespace path
* Return more detailed stats for cpu and memory for existing container
  stats. These metrics use the underlying task's metrics to obtain
  stats.

Signed-off-by: David Porter <porterdavid@google.com>
This commit is contained in:
David Porter
2021-10-05 13:28:36 -07:00
parent b69bbe25ac
commit 2e6d5709e3
14 changed files with 1090 additions and 33 deletions

View File

@@ -24,6 +24,7 @@ import (
cio "github.com/containerd/containerd/pkg/cri/io"
"github.com/containerd/containerd/pkg/cri/store"
"github.com/containerd/containerd/pkg/cri/store/label"
"github.com/containerd/containerd/pkg/cri/store/stats"
"github.com/containerd/containerd/pkg/cri/store/truncindex"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@@ -46,6 +47,8 @@ type Container struct {
// IsStopSignaledWithTimeout the default is 0, and it is set to 1 after sending
// the signal once to avoid repeated sending of the signal.
IsStopSignaledWithTimeout *uint32
// Stats contains (mutable) stats for the container
Stats *stats.ContainerStats
}
// Opts sets specific information to newly created Container.
@@ -166,6 +169,27 @@ func (s *Store) List() []Container {
return containers
}
func (s *Store) UpdateContainerStats(id string, newContainerStats *stats.ContainerStats) error {
s.lock.RLock()
defer s.lock.RUnlock()
id, err := s.idIndex.Get(id)
if err != nil {
if err == truncindex.ErrNotExist {
err = errdefs.ErrNotFound
}
return err
}
if _, ok := s.containers[id]; !ok {
return errdefs.ErrNotFound
}
c := s.containers[id]
c.Stats = newContainerStats
s.containers[id] = c
return nil
}
// Delete deletes the container from store with specified id.
func (s *Store) Delete(id string) {
s.lock.Lock()

View File

@@ -24,6 +24,7 @@ import (
"github.com/containerd/containerd/errdefs"
cio "github.com/containerd/containerd/pkg/cri/io"
"github.com/containerd/containerd/pkg/cri/store/label"
"github.com/containerd/containerd/pkg/cri/store/stats"
"github.com/opencontainers/selinux/go-selinux"
assertlib "github.com/stretchr/testify/assert"
@@ -132,6 +133,25 @@ func TestContainerStore(t *testing.T) {
Removing: true,
},
}
stats := map[string]*stats.ContainerStats{
"1": {
Timestamp: time.Now(),
UsageCoreNanoSeconds: 1,
},
"2abcd": {
Timestamp: time.Now(),
UsageCoreNanoSeconds: 2,
},
"4a333": {
Timestamp: time.Now(),
UsageCoreNanoSeconds: 3,
},
"4abcd": {
Timestamp: time.Now(),
UsageCoreNanoSeconds: 4,
},
}
assert := assertlib.New(t)
containers := map[string]Container{}
for id := range metadatas {
@@ -163,12 +183,26 @@ func TestContainerStore(t *testing.T) {
got, err := s.Get(genTruncIndex(id))
assert.NoError(err)
assert.Equal(c, got)
assert.Nil(c.Stats)
}
t.Logf("should be able to list containers")
cs := s.List()
assert.Len(cs, len(containers))
t.Logf("should be able to update stats on container")
for id := range containers {
err := s.UpdateContainerStats(id, stats[id])
assert.NoError(err)
}
// Validate stats were updated
cs = s.List()
assert.Len(cs, len(containers))
for _, c := range cs {
assert.Equal(stats[c.ID], c.Stats)
}
if selinux.GetEnabled() {
t.Logf("should have reserved labels (requires -tag selinux)")
assert.Equal(map[string]bool{