Add a check to skip stats for containers that are not running

When a container is just created, exited state the container will not have stats. A common case for this in k8s is the init containers for a pod. The will be present in the listed containers but will not have a running task and there for no stats.

Signed-off-by: James Sturtevant <jstur@microsoft.com>
This commit is contained in:
James Sturtevant 2023-06-02 14:43:42 -07:00
parent 8a6c8a96c0
commit 28a5199ff6
No known key found for this signature in database
4 changed files with 282 additions and 47 deletions

View File

@ -129,6 +129,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
for _, cntr := range containers {
containerMetric := statsMap[cntr.ID]
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
// containers that are just created, in a failed state or exited (init containers) will not have stats
continue
}
if containerMetric == nil {
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
}

View File

@ -118,7 +118,102 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
{Metadata: containerstore.Metadata{ID: "c1"}},
newContainer("c1", running, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
UsageNanoCores: 0,
WorkingSetBytes: 40,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
{
desc: "pod stats will include the init container stats",
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"i1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", running, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 600,
UsageNanoCores: 0,
WorkingSetBytes: 60,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
{
desc: "pod stats will not include the init container stats if it is stopped",
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", exitedValid, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
UsageNanoCores: 0,
WorkingSetBytes: 40,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
{
desc: "pod stats will not include the init container stats if it is stopped in failed state",
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", exitedInvalid, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
@ -146,13 +241,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 400),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 800,
@ -178,13 +270,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
@ -210,13 +299,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
@ -240,7 +326,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
assert.NotNil(t, err)
return
}
assert.Nil(t, err)
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
@ -276,6 +362,33 @@ func windowsStat(timestamp time.Time, cpu uint64, memory uint64) *wstats.Statist
}
}
func newContainer(id string, status containerstore.Status, stats *stats.ContainerStats) containerstore.Container {
cntr, err := containerstore.NewContainer(containerstore.Metadata{ID: id}, containerstore.WithFakeStatus(status))
if err != nil {
panic(err)
}
if stats != nil {
cntr.Stats = stats
}
return cntr
}
var exitedValid = containerstore.Status{
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 0,
}
var exitedInvalid = containerstore.Status{
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 1,
}
var running = containerstore.Status{
StartedAt: time.Now().UnixNano(),
}
func Test_criService_saveSandBoxMetrics(t *testing.T) {
timestamp := time.Now()

View File

@ -71,7 +71,6 @@ func (c *criService) podSandboxStats(
podSandboxStats.Windows.Cpu = podCPU.Cpu
podSandboxStats.Windows.Memory = podCPU.Memory
podSandboxStats.Windows.Containers = containerStats
podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp)
pidCount, err := c.getSandboxPidCount(ctx, sandbox)
@ -128,6 +127,11 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
for _, cntr := range containers {
containerMetric := statsMap[cntr.ID]
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
// containers that are just created, in a failed state or exited (init containers) will not have stats
continue
}
if containerMetric == nil {
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
}

View File

@ -118,7 +118,102 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
{Metadata: containerstore.Metadata{ID: "c1"}},
newContainer("c1", running, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
UsageNanoCores: 0,
WorkingSetBytes: 40,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
{
desc: "pod stats will include the init container stats",
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"i1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", running, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 600,
UsageNanoCores: 0,
WorkingSetBytes: 60,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
{
desc: "pod stats will not include the init container stats if it is stopped",
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", exitedValid, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
UsageNanoCores: 0,
WorkingSetBytes: 40,
},
expectedContainerStats: []expectedStats{
{
UsageCoreNanoSeconds: 200,
UsageNanoCores: 0,
WorkingSetBytes: 20,
},
},
expectError: false,
},
{
desc: "pod stats will not include the init container stats if it is stopped in failed state",
metrics: map[string]*wstats.Statistics{
"c1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
"s1": {
Container: windowsStat(currentStatsTimestamp, 200, 20),
},
},
sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}},
containers: []containerstore.Container{
newContainer("c1", running, nil),
newContainer("i1", exitedInvalid, nil),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
@ -146,13 +241,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 400),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 800,
@ -178,13 +270,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
@ -210,13 +299,10 @@ func Test_criService_podSandboxStats(t *testing.T) {
},
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
containers: []containerstore.Container{
{
Metadata: containerstore.Metadata{ID: "c1"},
Stats: &stats.ContainerStats{
newContainer("c1", running, &stats.ContainerStats{
Timestamp: initialStatsTimestamp,
UsageCoreNanoSeconds: 200,
},
},
}),
},
expectedPodStats: expectedStats{
UsageCoreNanoSeconds: 400,
@ -240,7 +326,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
assert.NotNil(t, err)
return
}
assert.Nil(t, err)
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
@ -276,6 +362,33 @@ func windowsStat(timestamp time.Time, cpu uint64, memory uint64) *wstats.Statist
}
}
func newContainer(id string, status containerstore.Status, stats *stats.ContainerStats) containerstore.Container {
cntr, err := containerstore.NewContainer(containerstore.Metadata{ID: id}, containerstore.WithFakeStatus(status))
if err != nil {
panic(err)
}
if stats != nil {
cntr.Stats = stats
}
return cntr
}
var exitedValid = containerstore.Status{
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 0,
}
var exitedInvalid = containerstore.Status{
StartedAt: time.Now().UnixNano(),
FinishedAt: time.Now().UnixNano(),
ExitCode: 1,
}
var running = containerstore.Status{
StartedAt: time.Now().UnixNano(),
}
func Test_criService_saveSandBoxMetrics(t *testing.T) {
timestamp := time.Now()