Implement CRI container and pods stats
See https://kep.k8s.io/2371 * Implement new CRI RPCs - `ListPodSandboxStats` and `PodSandboxStats` * `ListPodSandboxStats` and `PodSandboxStats` which return stats about pod sandbox. To obtain pod sandbox stats, underlying metrics are read from the pod sandbox cgroup parent. * Process info is obtained by calling into the underlying task * Network stats are taken by looking up network metrics based on the pod sandbox network namespace path * Return more detailed stats for cpu and memory for existing container stats. These metrics use the underlying task's metrics to obtain stats. Signed-off-by: David Porter <porterdavid@google.com>
This commit is contained in:
@@ -24,6 +24,7 @@ import (
|
||||
cio "github.com/containerd/containerd/pkg/cri/io"
|
||||
"github.com/containerd/containerd/pkg/cri/store"
|
||||
"github.com/containerd/containerd/pkg/cri/store/label"
|
||||
"github.com/containerd/containerd/pkg/cri/store/stats"
|
||||
"github.com/containerd/containerd/pkg/cri/store/truncindex"
|
||||
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
@@ -46,6 +47,8 @@ type Container struct {
|
||||
// IsStopSignaledWithTimeout the default is 0, and it is set to 1 after sending
|
||||
// the signal once to avoid repeated sending of the signal.
|
||||
IsStopSignaledWithTimeout *uint32
|
||||
// Stats contains (mutable) stats for the container
|
||||
Stats *stats.ContainerStats
|
||||
}
|
||||
|
||||
// Opts sets specific information to newly created Container.
|
||||
@@ -166,6 +169,27 @@ func (s *Store) List() []Container {
|
||||
return containers
|
||||
}
|
||||
|
||||
func (s *Store) UpdateContainerStats(id string, newContainerStats *stats.ContainerStats) error {
|
||||
s.lock.RLock()
|
||||
defer s.lock.RUnlock()
|
||||
id, err := s.idIndex.Get(id)
|
||||
if err != nil {
|
||||
if err == truncindex.ErrNotExist {
|
||||
err = errdefs.ErrNotFound
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if _, ok := s.containers[id]; !ok {
|
||||
return errdefs.ErrNotFound
|
||||
}
|
||||
|
||||
c := s.containers[id]
|
||||
c.Stats = newContainerStats
|
||||
s.containers[id] = c
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete deletes the container from store with specified id.
|
||||
func (s *Store) Delete(id string) {
|
||||
s.lock.Lock()
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
cio "github.com/containerd/containerd/pkg/cri/io"
|
||||
"github.com/containerd/containerd/pkg/cri/store/label"
|
||||
"github.com/containerd/containerd/pkg/cri/store/stats"
|
||||
|
||||
"github.com/opencontainers/selinux/go-selinux"
|
||||
assertlib "github.com/stretchr/testify/assert"
|
||||
@@ -132,6 +133,25 @@ func TestContainerStore(t *testing.T) {
|
||||
Removing: true,
|
||||
},
|
||||
}
|
||||
|
||||
stats := map[string]*stats.ContainerStats{
|
||||
"1": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 1,
|
||||
},
|
||||
"2abcd": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 2,
|
||||
},
|
||||
"4a333": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 3,
|
||||
},
|
||||
"4abcd": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 4,
|
||||
},
|
||||
}
|
||||
assert := assertlib.New(t)
|
||||
containers := map[string]Container{}
|
||||
for id := range metadatas {
|
||||
@@ -163,12 +183,26 @@ func TestContainerStore(t *testing.T) {
|
||||
got, err := s.Get(genTruncIndex(id))
|
||||
assert.NoError(err)
|
||||
assert.Equal(c, got)
|
||||
assert.Nil(c.Stats)
|
||||
}
|
||||
|
||||
t.Logf("should be able to list containers")
|
||||
cs := s.List()
|
||||
assert.Len(cs, len(containers))
|
||||
|
||||
t.Logf("should be able to update stats on container")
|
||||
for id := range containers {
|
||||
err := s.UpdateContainerStats(id, stats[id])
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
// Validate stats were updated
|
||||
cs = s.List()
|
||||
assert.Len(cs, len(containers))
|
||||
for _, c := range cs {
|
||||
assert.Equal(stats[c.ID], c.Stats)
|
||||
}
|
||||
|
||||
if selinux.GetEnabled() {
|
||||
t.Logf("should have reserved labels (requires -tag selinux)")
|
||||
assert.Equal(map[string]bool{
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/pkg/cri/store"
|
||||
"github.com/containerd/containerd/pkg/cri/store/label"
|
||||
"github.com/containerd/containerd/pkg/cri/store/stats"
|
||||
"github.com/containerd/containerd/pkg/cri/store/truncindex"
|
||||
"github.com/containerd/containerd/pkg/netns"
|
||||
)
|
||||
@@ -42,6 +43,8 @@ type Sandbox struct {
|
||||
NetNS *netns.NetNS
|
||||
// StopCh is used to propagate the stop information of the sandbox.
|
||||
*store.StopCh
|
||||
// Stats contains (mutable) stats for the (pause) sandbox container
|
||||
Stats *stats.ContainerStats
|
||||
}
|
||||
|
||||
// NewSandbox creates an internally used sandbox type. This functions reminds
|
||||
@@ -121,6 +124,27 @@ func (s *Store) List() []Sandbox {
|
||||
return sandboxes
|
||||
}
|
||||
|
||||
func (s *Store) UpdateContainerStats(id string, newContainerStats *stats.ContainerStats) error {
|
||||
s.lock.RLock()
|
||||
defer s.lock.RUnlock()
|
||||
id, err := s.idIndex.Get(id)
|
||||
if err != nil {
|
||||
if err == truncindex.ErrNotExist {
|
||||
err = errdefs.ErrNotFound
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
if _, ok := s.sandboxes[id]; !ok {
|
||||
return errdefs.ErrNotFound
|
||||
}
|
||||
|
||||
c := s.sandboxes[id]
|
||||
c.Stats = newContainerStats
|
||||
s.sandboxes[id] = c
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete deletes the sandbox with specified id.
|
||||
func (s *Store) Delete(id string) {
|
||||
s.lock.Lock()
|
||||
|
||||
@@ -18,9 +18,11 @@ package sandbox
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/pkg/cri/store/label"
|
||||
"github.com/containerd/containerd/pkg/cri/store/stats"
|
||||
|
||||
assertlib "github.com/stretchr/testify/assert"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
@@ -109,6 +111,24 @@ func TestSandboxStore(t *testing.T) {
|
||||
},
|
||||
Status{State: StateUnknown},
|
||||
)
|
||||
stats := map[string]*stats.ContainerStats{
|
||||
"1": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 1,
|
||||
},
|
||||
"2abcd": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 2,
|
||||
},
|
||||
"4a333": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 3,
|
||||
},
|
||||
"4abcd": {
|
||||
Timestamp: time.Now(),
|
||||
UsageCoreNanoSeconds: 4,
|
||||
},
|
||||
}
|
||||
assert := assertlib.New(t)
|
||||
s := NewStore(label.NewStore())
|
||||
|
||||
@@ -136,6 +156,19 @@ func TestSandboxStore(t *testing.T) {
|
||||
sbs := s.List()
|
||||
assert.Len(sbs, sbNum)
|
||||
|
||||
t.Logf("should be able to update stats on container")
|
||||
for id := range sandboxes {
|
||||
err := s.UpdateContainerStats(id, stats[id])
|
||||
assert.NoError(err)
|
||||
}
|
||||
|
||||
// Validate stats were updated
|
||||
sbs = s.List()
|
||||
assert.Len(sbs, sbNum)
|
||||
for _, sb := range sbs {
|
||||
assert.Equal(stats[sb.ID], sb.Stats)
|
||||
}
|
||||
|
||||
for testID, v := range sandboxes {
|
||||
truncID := genTruncIndex(testID)
|
||||
|
||||
|
||||
27
pkg/cri/store/stats/stats.go
Normal file
27
pkg/cri/store/stats/stats.go
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package stats
|
||||
|
||||
import "time"
|
||||
|
||||
// ContainerStats contains the information about container stats.
|
||||
type ContainerStats struct {
|
||||
// Timestamp of when stats were collected
|
||||
Timestamp time.Time
|
||||
// Cumulative CPU usage (sum across all cores) since object creation.
|
||||
UsageCoreNanoSeconds uint64
|
||||
}
|
||||
Reference in New Issue
Block a user