Merge pull request #8671 from jsturtevant/fix-windows-edge-cases
[cri] Handle pod transition states gracefully while listing pod stats
This commit is contained in:
commit
3ed1bc108f
@ -24,6 +24,7 @@ import (
|
|||||||
"github.com/containerd/cgroups/v3"
|
"github.com/containerd/cgroups/v3"
|
||||||
"github.com/containerd/cgroups/v3/cgroup1"
|
"github.com/containerd/cgroups/v3/cgroup1"
|
||||||
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
||||||
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
||||||
"github.com/containernetworking/plugins/pkg/ns"
|
"github.com/containernetworking/plugins/pkg/ns"
|
||||||
@ -37,7 +38,7 @@ func (c *criService) podSandboxStats(
|
|||||||
meta := sandbox.Metadata
|
meta := sandbox.Metadata
|
||||||
|
|
||||||
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
||||||
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID)
|
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state: %w", meta.ID, errdefs.ErrUnavailable)
|
||||||
}
|
}
|
||||||
|
|
||||||
stats, err := metricsForSandbox(sandbox)
|
stats, err := metricsForSandbox(sandbox)
|
||||||
|
@ -20,7 +20,10 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/errdefs"
|
||||||
|
"github.com/containerd/containerd/log"
|
||||||
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
||||||
|
"github.com/hashicorp/go-multierror"
|
||||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -31,16 +34,21 @@ func (c *criService) ListPodSandboxStats(
|
|||||||
) (*runtime.ListPodSandboxStatsResponse, error) {
|
) (*runtime.ListPodSandboxStatsResponse, error) {
|
||||||
sandboxes := c.sandboxesForListPodSandboxStatsRequest(r)
|
sandboxes := c.sandboxesForListPodSandboxStatsRequest(r)
|
||||||
|
|
||||||
|
var errs *multierror.Error
|
||||||
podSandboxStats := new(runtime.ListPodSandboxStatsResponse)
|
podSandboxStats := new(runtime.ListPodSandboxStatsResponse)
|
||||||
for _, sandbox := range sandboxes {
|
for _, sandbox := range sandboxes {
|
||||||
sandboxStats, err := c.podSandboxStats(ctx, sandbox)
|
sandboxStats, err := c.podSandboxStats(ctx, sandbox)
|
||||||
if err != nil {
|
switch {
|
||||||
return nil, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err)
|
case errdefs.IsUnavailable(err):
|
||||||
|
log.G(ctx).WithField("podsandboxid", sandbox.ID).Debugf("failed to get pod sandbox stats, this is likely a transient error: %v", err)
|
||||||
|
case err != nil:
|
||||||
|
errs = multierror.Append(errs, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err))
|
||||||
|
default:
|
||||||
|
podSandboxStats.Stats = append(podSandboxStats.Stats, sandboxStats)
|
||||||
}
|
}
|
||||||
podSandboxStats.Stats = append(podSandboxStats.Stats, sandboxStats)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return podSandboxStats, nil
|
return podSandboxStats, errs.ErrorOrNil()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *criService) sandboxesForListPodSandboxStatsRequest(r *runtime.ListPodSandboxStatsRequest) []sandboxstore.Sandbox {
|
func (c *criService) sandboxesForListPodSandboxStatsRequest(r *runtime.ListPodSandboxStatsRequest) []sandboxstore.Sandbox {
|
||||||
|
@ -26,6 +26,7 @@ import (
|
|||||||
"github.com/Microsoft/hcsshim/hcn"
|
"github.com/Microsoft/hcsshim/hcn"
|
||||||
"github.com/containerd/containerd/api/services/tasks/v1"
|
"github.com/containerd/containerd/api/services/tasks/v1"
|
||||||
"github.com/containerd/containerd/api/types"
|
"github.com/containerd/containerd/api/types"
|
||||||
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
|
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
|
||||||
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
||||||
@ -36,12 +37,11 @@ import (
|
|||||||
|
|
||||||
func (c *criService) podSandboxStats(
|
func (c *criService) podSandboxStats(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
sandbox sandboxstore.Sandbox,
|
sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) {
|
||||||
) (*runtime.PodSandboxStats, error) {
|
|
||||||
meta := sandbox.Metadata
|
meta := sandbox.Metadata
|
||||||
|
|
||||||
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
||||||
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID)
|
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state: %w", meta.ID, errdefs.ErrUnavailable)
|
||||||
}
|
}
|
||||||
|
|
||||||
timestamp := time.Now()
|
timestamp := time.Now()
|
||||||
@ -62,7 +62,7 @@ func (c *criService) podSandboxStats(
|
|||||||
|
|
||||||
statsMap, err := convertMetricsToWindowsStats(metrics, sandbox)
|
statsMap, err := convertMetricsToWindowsStats(metrics, sandbox)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to convert stats: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
podCPU, containerStats, err := c.toPodSandboxStats(sandbox, statsMap, containers, timestamp)
|
podCPU, containerStats, err := c.toPodSandboxStats(sandbox, statsMap, containers, timestamp)
|
||||||
@ -72,12 +72,11 @@ func (c *criService) podSandboxStats(
|
|||||||
podSandboxStats.Windows.Cpu = podCPU.Cpu
|
podSandboxStats.Windows.Cpu = podCPU.Cpu
|
||||||
podSandboxStats.Windows.Memory = podCPU.Memory
|
podSandboxStats.Windows.Memory = podCPU.Memory
|
||||||
podSandboxStats.Windows.Containers = containerStats
|
podSandboxStats.Windows.Containers = containerStats
|
||||||
|
|
||||||
podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp)
|
podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp)
|
||||||
|
|
||||||
pidCount, err := c.getSandboxPidCount(ctx, sandbox)
|
pidCount, err := c.getSandboxPidCount(ctx, sandbox)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to get pid count: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
podSandboxStats.Windows.Process = &runtime.WindowsProcessUsage{
|
podSandboxStats.Windows.Process = &runtime.WindowsProcessUsage{
|
||||||
@ -131,11 +130,13 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
|
|||||||
|
|
||||||
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
|
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
|
||||||
// containers that are just created, in a failed state or exited (init containers) will not have stats
|
// containers that are just created, in a failed state or exited (init containers) will not have stats
|
||||||
|
log.L.Warnf("failed to get container stats since container %q is not in running state", cntr.ID)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if containerMetric == nil {
|
if containerMetric == nil {
|
||||||
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
|
log.L.Warnf("no metrics found for container %q", cntr.ID)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
containerStats, err := c.convertToCRIStats(containerMetric)
|
containerStats, err := c.convertToCRIStats(containerMetric)
|
||||||
@ -144,7 +145,7 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate NanoCores for container
|
// Calculate NanoCores for container
|
||||||
if containerStats.Cpu.UsageCoreNanoSeconds != nil {
|
if containerStats.Cpu != nil && containerStats.Cpu.UsageCoreNanoSeconds != nil {
|
||||||
nanoCoreUsage := getUsageNanoCores(containerStats.Cpu.UsageCoreNanoSeconds.Value, cntr.Stats, containerStats.Cpu.Timestamp)
|
nanoCoreUsage := getUsageNanoCores(containerStats.Cpu.UsageCoreNanoSeconds.Value, cntr.Stats, containerStats.Cpu.Timestamp)
|
||||||
containerStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
containerStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
||||||
}
|
}
|
||||||
@ -180,7 +181,7 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate NanoCores for pod after adding containers cpu including the pods cpu
|
// Calculate NanoCores for pod after adding containers cpu including the pods cpu
|
||||||
if podRuntimeStats.Cpu.UsageCoreNanoSeconds != nil {
|
if podRuntimeStats.Cpu != nil && podRuntimeStats.Cpu.UsageCoreNanoSeconds != nil {
|
||||||
nanoCoreUsage := getUsageNanoCores(podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value, sandbox.Stats, podRuntimeStats.Cpu.Timestamp)
|
nanoCoreUsage := getUsageNanoCores(podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value, sandbox.Stats, podRuntimeStats.Cpu.Timestamp)
|
||||||
podRuntimeStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
podRuntimeStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
||||||
}
|
}
|
||||||
@ -391,10 +392,16 @@ func (c *criService) getSandboxPidCount(ctx context.Context, sandbox sandboxstor
|
|||||||
// get process count inside PodSandbox for Windows
|
// get process count inside PodSandbox for Windows
|
||||||
task, err := sandbox.Container.Task(ctx, nil)
|
task, err := sandbox.Container.Task(ctx, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if errdefs.IsNotFound(err) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
processes, err := task.Pids(ctx)
|
processes, err := task.Pids(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if errdefs.IsNotFound(err) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
pidCount += uint64(len(processes))
|
pidCount += uint64(len(processes))
|
||||||
@ -416,6 +423,9 @@ func (c *criService) getSandboxPidCount(ctx context.Context, sandbox sandboxstor
|
|||||||
|
|
||||||
processes, err := task.Pids(ctx)
|
processes, err := task.Pids(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if errdefs.IsNotFound(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
pidCount += uint64(len(processes))
|
pidCount += uint64(len(processes))
|
||||||
|
@ -93,7 +93,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
metrics map[string]*wstats.Statistics
|
metrics map[string]*wstats.Statistics
|
||||||
sandbox sandboxstore.Sandbox
|
sandbox sandboxstore.Sandbox
|
||||||
containers []containerstore.Container
|
containers []containerstore.Container
|
||||||
expectedPodStats expectedStats
|
expectedPodStats *expectedStats
|
||||||
expectedContainerStats []expectedStats
|
expectedContainerStats []expectedStats
|
||||||
expectError bool
|
expectError bool
|
||||||
}{
|
}{
|
||||||
@ -102,7 +102,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
metrics: map[string]*wstats.Statistics{},
|
metrics: map[string]*wstats.Statistics{},
|
||||||
sandbox: sandboxstore.Sandbox{},
|
sandbox: sandboxstore.Sandbox{},
|
||||||
containers: []containerstore.Container{},
|
containers: []containerstore.Container{},
|
||||||
expectedPodStats: expectedStats{},
|
expectedPodStats: &expectedStats{},
|
||||||
expectedContainerStats: []expectedStats{},
|
expectedContainerStats: []expectedStats{},
|
||||||
expectError: true,
|
expectError: true,
|
||||||
},
|
},
|
||||||
@ -120,7 +120,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
containers: []containerstore.Container{
|
containers: []containerstore.Container{
|
||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -152,7 +152,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
newContainer("i1", running, nil),
|
newContainer("i1", running, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 600,
|
UsageCoreNanoSeconds: 600,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 60,
|
WorkingSetBytes: 60,
|
||||||
@ -186,7 +186,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
newContainer("i1", exitedValid, nil),
|
newContainer("i1", exitedValid, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -215,7 +215,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
newContainer("i1", exitedInvalid, nil),
|
newContainer("i1", exitedInvalid, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -246,7 +246,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
UsageCoreNanoSeconds: 200,
|
UsageCoreNanoSeconds: 200,
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 800,
|
UsageCoreNanoSeconds: 800,
|
||||||
UsageNanoCores: 400,
|
UsageNanoCores: 400,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -275,7 +275,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
UsageCoreNanoSeconds: 200,
|
UsageCoreNanoSeconds: 200,
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 200,
|
UsageNanoCores: 200,
|
||||||
WorkingSetBytes: 20,
|
WorkingSetBytes: 20,
|
||||||
@ -304,7 +304,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
UsageCoreNanoSeconds: 200,
|
UsageCoreNanoSeconds: 200,
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 200,
|
UsageNanoCores: 200,
|
||||||
WorkingSetBytes: 20,
|
WorkingSetBytes: 20,
|
||||||
@ -318,6 +318,32 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expectError: false,
|
expectError: false,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
desc: "pod sandbox with a container that has no cpu shouldn't error",
|
||||||
|
metrics: map[string]*wstats.Statistics{
|
||||||
|
"c1": {},
|
||||||
|
"s1": {},
|
||||||
|
},
|
||||||
|
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
|
||||||
|
containers: []containerstore.Container{
|
||||||
|
newContainer("c1", running, &stats.ContainerStats{
|
||||||
|
Timestamp: initialStatsTimestamp,
|
||||||
|
UsageCoreNanoSeconds: 200,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
expectedPodStats: nil,
|
||||||
|
expectedContainerStats: []expectedStats{},
|
||||||
|
expectError: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "pod sandbox with no stats in metric mapp will fail",
|
||||||
|
metrics: map[string]*wstats.Statistics{},
|
||||||
|
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
|
||||||
|
containers: []containerstore.Container{},
|
||||||
|
expectedPodStats: nil,
|
||||||
|
expectedContainerStats: []expectedStats{},
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
} {
|
} {
|
||||||
test := test
|
test := test
|
||||||
t.Run(test.desc, func(t *testing.T) {
|
t.Run(test.desc, func(t *testing.T) {
|
||||||
@ -327,6 +353,13 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
if test.expectedPodStats == nil {
|
||||||
|
assert.Nil(t, actualPodStats.Cpu)
|
||||||
|
assert.Nil(t, actualPodStats.Memory)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
|
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
|
||||||
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
|
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ import (
|
|||||||
"github.com/containerd/cgroups/v3"
|
"github.com/containerd/cgroups/v3"
|
||||||
"github.com/containerd/cgroups/v3/cgroup1"
|
"github.com/containerd/cgroups/v3/cgroup1"
|
||||||
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
||||||
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
||||||
"github.com/containernetworking/plugins/pkg/ns"
|
"github.com/containernetworking/plugins/pkg/ns"
|
||||||
@ -37,7 +38,7 @@ func (c *criService) podSandboxStats(
|
|||||||
meta := sandbox.Metadata
|
meta := sandbox.Metadata
|
||||||
|
|
||||||
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
||||||
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID)
|
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state: %w", meta.ID, errdefs.ErrUnavailable)
|
||||||
}
|
}
|
||||||
|
|
||||||
stats, err := metricsForSandbox(sandbox)
|
stats, err := metricsForSandbox(sandbox)
|
||||||
|
@ -20,8 +20,12 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/errdefs"
|
||||||
|
"github.com/containerd/containerd/log"
|
||||||
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
||||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
|
||||||
|
"github.com/hashicorp/go-multierror"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ListPodSandboxStats returns stats of all ready sandboxes.
|
// ListPodSandboxStats returns stats of all ready sandboxes.
|
||||||
@ -31,16 +35,21 @@ func (c *criService) ListPodSandboxStats(
|
|||||||
) (*runtime.ListPodSandboxStatsResponse, error) {
|
) (*runtime.ListPodSandboxStatsResponse, error) {
|
||||||
sandboxes := c.sandboxesForListPodSandboxStatsRequest(r)
|
sandboxes := c.sandboxesForListPodSandboxStatsRequest(r)
|
||||||
|
|
||||||
|
var errs *multierror.Error
|
||||||
podSandboxStats := new(runtime.ListPodSandboxStatsResponse)
|
podSandboxStats := new(runtime.ListPodSandboxStatsResponse)
|
||||||
for _, sandbox := range sandboxes {
|
for _, sandbox := range sandboxes {
|
||||||
sandboxStats, err := c.podSandboxStats(ctx, sandbox)
|
sandboxStats, err := c.podSandboxStats(ctx, sandbox)
|
||||||
if err != nil {
|
switch {
|
||||||
return nil, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err)
|
case errdefs.IsUnavailable(err):
|
||||||
|
log.G(ctx).WithField("podsandboxid", sandbox.ID).Debugf("failed to get pod sandbox stats, this is likely a transient error: %v", err)
|
||||||
|
case err != nil:
|
||||||
|
errs = multierror.Append(errs, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err))
|
||||||
|
default:
|
||||||
|
podSandboxStats.Stats = append(podSandboxStats.Stats, sandboxStats)
|
||||||
}
|
}
|
||||||
podSandboxStats.Stats = append(podSandboxStats.Stats, sandboxStats)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return podSandboxStats, nil
|
return podSandboxStats, errs.ErrorOrNil()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *criService) sandboxesForListPodSandboxStatsRequest(r *runtime.ListPodSandboxStatsRequest) []sandboxstore.Sandbox {
|
func (c *criService) sandboxesForListPodSandboxStatsRequest(r *runtime.ListPodSandboxStatsRequest) []sandboxstore.Sandbox {
|
||||||
|
@ -26,6 +26,7 @@ import (
|
|||||||
"github.com/Microsoft/hcsshim/hcn"
|
"github.com/Microsoft/hcsshim/hcn"
|
||||||
"github.com/containerd/containerd/api/services/tasks/v1"
|
"github.com/containerd/containerd/api/services/tasks/v1"
|
||||||
"github.com/containerd/containerd/api/types"
|
"github.com/containerd/containerd/api/types"
|
||||||
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
|
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
|
||||||
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox"
|
||||||
@ -40,7 +41,7 @@ func (c *criService) podSandboxStats(
|
|||||||
meta := sandbox.Metadata
|
meta := sandbox.Metadata
|
||||||
|
|
||||||
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
if sandbox.Status.Get().State != sandboxstore.StateReady {
|
||||||
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID)
|
return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state: %w", meta.ID, errdefs.ErrUnavailable)
|
||||||
}
|
}
|
||||||
|
|
||||||
timestamp := time.Now()
|
timestamp := time.Now()
|
||||||
@ -61,7 +62,7 @@ func (c *criService) podSandboxStats(
|
|||||||
|
|
||||||
statsMap, err := convertMetricsToWindowsStats(metrics, sandbox)
|
statsMap, err := convertMetricsToWindowsStats(metrics, sandbox)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to convert stats: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
podCPU, containerStats, err := c.toPodSandboxStats(sandbox, statsMap, containers, timestamp)
|
podCPU, containerStats, err := c.toPodSandboxStats(sandbox, statsMap, containers, timestamp)
|
||||||
@ -75,7 +76,7 @@ func (c *criService) podSandboxStats(
|
|||||||
|
|
||||||
pidCount, err := c.getSandboxPidCount(ctx, sandbox)
|
pidCount, err := c.getSandboxPidCount(ctx, sandbox)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to get pid count: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
podSandboxStats.Windows.Process = &runtime.WindowsProcessUsage{
|
podSandboxStats.Windows.Process = &runtime.WindowsProcessUsage{
|
||||||
@ -129,11 +130,13 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
|
|||||||
|
|
||||||
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
|
if cntr.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
|
||||||
// containers that are just created, in a failed state or exited (init containers) will not have stats
|
// containers that are just created, in a failed state or exited (init containers) will not have stats
|
||||||
|
log.L.Warnf("failed to get container stats since container %q is not in running state", cntr.ID)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if containerMetric == nil {
|
if containerMetric == nil {
|
||||||
return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err)
|
log.L.Warnf("no metrics found for container %q", cntr.ID)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
containerStats, err := c.convertToCRIStats(containerMetric)
|
containerStats, err := c.convertToCRIStats(containerMetric)
|
||||||
@ -142,7 +145,7 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate NanoCores for container
|
// Calculate NanoCores for container
|
||||||
if containerStats.Cpu.UsageCoreNanoSeconds != nil {
|
if containerStats.Cpu != nil && containerStats.Cpu.UsageCoreNanoSeconds != nil {
|
||||||
nanoCoreUsage := getUsageNanoCores(containerStats.Cpu.UsageCoreNanoSeconds.Value, cntr.Stats, containerStats.Cpu.Timestamp)
|
nanoCoreUsage := getUsageNanoCores(containerStats.Cpu.UsageCoreNanoSeconds.Value, cntr.Stats, containerStats.Cpu.Timestamp)
|
||||||
containerStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
containerStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
||||||
}
|
}
|
||||||
@ -178,7 +181,7 @@ func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate NanoCores for pod after adding containers cpu including the pods cpu
|
// Calculate NanoCores for pod after adding containers cpu including the pods cpu
|
||||||
if podRuntimeStats.Cpu.UsageCoreNanoSeconds != nil {
|
if podRuntimeStats.Cpu != nil && podRuntimeStats.Cpu.UsageCoreNanoSeconds != nil {
|
||||||
nanoCoreUsage := getUsageNanoCores(podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value, sandbox.Stats, podRuntimeStats.Cpu.Timestamp)
|
nanoCoreUsage := getUsageNanoCores(podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value, sandbox.Stats, podRuntimeStats.Cpu.Timestamp)
|
||||||
podRuntimeStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
podRuntimeStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage}
|
||||||
}
|
}
|
||||||
@ -389,10 +392,16 @@ func (c *criService) getSandboxPidCount(ctx context.Context, sandbox sandboxstor
|
|||||||
// get process count inside PodSandbox for Windows
|
// get process count inside PodSandbox for Windows
|
||||||
task, err := sandbox.Container.Task(ctx, nil)
|
task, err := sandbox.Container.Task(ctx, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if errdefs.IsNotFound(err) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
processes, err := task.Pids(ctx)
|
processes, err := task.Pids(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if errdefs.IsNotFound(err) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
pidCount += uint64(len(processes))
|
pidCount += uint64(len(processes))
|
||||||
@ -414,6 +423,9 @@ func (c *criService) getSandboxPidCount(ctx context.Context, sandbox sandboxstor
|
|||||||
|
|
||||||
processes, err := task.Pids(ctx)
|
processes, err := task.Pids(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if errdefs.IsNotFound(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
pidCount += uint64(len(processes))
|
pidCount += uint64(len(processes))
|
||||||
|
@ -93,7 +93,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
metrics map[string]*wstats.Statistics
|
metrics map[string]*wstats.Statistics
|
||||||
sandbox sandboxstore.Sandbox
|
sandbox sandboxstore.Sandbox
|
||||||
containers []containerstore.Container
|
containers []containerstore.Container
|
||||||
expectedPodStats expectedStats
|
expectedPodStats *expectedStats
|
||||||
expectedContainerStats []expectedStats
|
expectedContainerStats []expectedStats
|
||||||
expectError bool
|
expectError bool
|
||||||
}{
|
}{
|
||||||
@ -102,7 +102,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
metrics: map[string]*wstats.Statistics{},
|
metrics: map[string]*wstats.Statistics{},
|
||||||
sandbox: sandboxstore.Sandbox{},
|
sandbox: sandboxstore.Sandbox{},
|
||||||
containers: []containerstore.Container{},
|
containers: []containerstore.Container{},
|
||||||
expectedPodStats: expectedStats{},
|
expectedPodStats: &expectedStats{},
|
||||||
expectedContainerStats: []expectedStats{},
|
expectedContainerStats: []expectedStats{},
|
||||||
expectError: true,
|
expectError: true,
|
||||||
},
|
},
|
||||||
@ -120,7 +120,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
containers: []containerstore.Container{
|
containers: []containerstore.Container{
|
||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -152,7 +152,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
newContainer("i1", running, nil),
|
newContainer("i1", running, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 600,
|
UsageCoreNanoSeconds: 600,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 60,
|
WorkingSetBytes: 60,
|
||||||
@ -186,7 +186,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
newContainer("i1", exitedValid, nil),
|
newContainer("i1", exitedValid, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -215,7 +215,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
newContainer("c1", running, nil),
|
newContainer("c1", running, nil),
|
||||||
newContainer("i1", exitedInvalid, nil),
|
newContainer("i1", exitedInvalid, nil),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 0,
|
UsageNanoCores: 0,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -246,7 +246,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
UsageCoreNanoSeconds: 200,
|
UsageCoreNanoSeconds: 200,
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 800,
|
UsageCoreNanoSeconds: 800,
|
||||||
UsageNanoCores: 400,
|
UsageNanoCores: 400,
|
||||||
WorkingSetBytes: 40,
|
WorkingSetBytes: 40,
|
||||||
@ -275,7 +275,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
UsageCoreNanoSeconds: 200,
|
UsageCoreNanoSeconds: 200,
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 200,
|
UsageNanoCores: 200,
|
||||||
WorkingSetBytes: 20,
|
WorkingSetBytes: 20,
|
||||||
@ -304,7 +304,7 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
UsageCoreNanoSeconds: 200,
|
UsageCoreNanoSeconds: 200,
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
expectedPodStats: expectedStats{
|
expectedPodStats: &expectedStats{
|
||||||
UsageCoreNanoSeconds: 400,
|
UsageCoreNanoSeconds: 400,
|
||||||
UsageNanoCores: 200,
|
UsageNanoCores: 200,
|
||||||
WorkingSetBytes: 20,
|
WorkingSetBytes: 20,
|
||||||
@ -318,6 +318,32 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expectError: false,
|
expectError: false,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
desc: "pod sandbox with a container that has no cpu shouldn't error",
|
||||||
|
metrics: map[string]*wstats.Statistics{
|
||||||
|
"c1": {},
|
||||||
|
"s1": {},
|
||||||
|
},
|
||||||
|
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
|
||||||
|
containers: []containerstore.Container{
|
||||||
|
newContainer("c1", running, &stats.ContainerStats{
|
||||||
|
Timestamp: initialStatsTimestamp,
|
||||||
|
UsageCoreNanoSeconds: 200,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
expectedPodStats: nil,
|
||||||
|
expectedContainerStats: []expectedStats{},
|
||||||
|
expectError: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "pod sandbox with no stats in metric mapp will fail",
|
||||||
|
metrics: map[string]*wstats.Statistics{},
|
||||||
|
sandbox: sandboxPod("s1", initialStatsTimestamp, 200),
|
||||||
|
containers: []containerstore.Container{},
|
||||||
|
expectedPodStats: nil,
|
||||||
|
expectedContainerStats: []expectedStats{},
|
||||||
|
expectError: true,
|
||||||
|
},
|
||||||
} {
|
} {
|
||||||
test := test
|
test := test
|
||||||
t.Run(test.desc, func(t *testing.T) {
|
t.Run(test.desc, func(t *testing.T) {
|
||||||
@ -327,6 +353,13 @@ func Test_criService_podSandboxStats(t *testing.T) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
if test.expectedPodStats == nil {
|
||||||
|
assert.Nil(t, actualPodStats.Cpu)
|
||||||
|
assert.Nil(t, actualPodStats.Memory)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
|
assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value)
|
||||||
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
|
assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user