cri: get pid count from container metrics
This reduces latency of calling ListPodSandboxStats() by avoiding calling shim API Task(). Signed-off-by: Eric Lin <linxiulei@gmail.com>
This commit is contained in:
parent
741c4bde51
commit
f6e731c809
@ -49,5 +49,5 @@ func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerSt
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to decode container metrics: %w", err)
|
return nil, fmt.Errorf("failed to decode container metrics: %w", err)
|
||||||
}
|
}
|
||||||
return &runtime.ContainerStatsResponse{Stats: cs}, nil
|
return &runtime.ContainerStatsResponse{Stats: cs.stats}, nil
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,17 @@ func (c *criService) ListContainerStats(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
in *runtime.ListContainerStatsRequest,
|
in *runtime.ListContainerStatsRequest,
|
||||||
) (*runtime.ListContainerStatsResponse, error) {
|
) (*runtime.ListContainerStatsResponse, error) {
|
||||||
|
css, err := c.listContainerStats(ctx, in)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch containers and stats: %w", err)
|
||||||
|
}
|
||||||
|
return c.toCRIContainerStats(css), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *criService) listContainerStats(
|
||||||
|
ctx context.Context,
|
||||||
|
in *runtime.ListContainerStatsRequest,
|
||||||
|
) ([]containerStats, error) {
|
||||||
request, containers, err := c.buildTaskMetricsRequest(in)
|
request, containers, err := c.buildTaskMetricsRequest(in)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to build metrics request: %w", err)
|
return nil, fmt.Errorf("failed to build metrics request: %w", err)
|
||||||
@ -51,14 +62,20 @@ func (c *criService) ListContainerStats(
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err)
|
return nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err)
|
||||||
}
|
}
|
||||||
criStats, err := c.toCRIContainerStats(ctx, resp.Metrics, containers)
|
css, err := c.toContainerStats(ctx, resp.Metrics, containers)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to convert to cri containerd stats format: %w", err)
|
return nil, fmt.Errorf("failed to convert to cri containerd stats format: %w", err)
|
||||||
}
|
}
|
||||||
return criStats, nil
|
return css, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type metricsHandler func(containerstore.Metadata, *types.Metric) (*runtime.ContainerStats, error)
|
type containerStats struct {
|
||||||
|
stats *runtime.ContainerStats
|
||||||
|
// pids is only valid in linux platform
|
||||||
|
pids uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
type metricsHandler func(containerstore.Metadata, *types.Metric) (containerStats, error)
|
||||||
|
|
||||||
// Returns a function to be used for transforming container metrics into the right format.
|
// Returns a function to be used for transforming container metrics into the right format.
|
||||||
// Uses the platform the given sandbox advertises to implement its logic. If the platform is
|
// Uses the platform the given sandbox advertises to implement its logic. If the platform is
|
||||||
@ -86,11 +103,11 @@ func (c *criService) getMetricsHandler(ctx context.Context, sandboxID string) (m
|
|||||||
|
|
||||||
switch p.OS {
|
switch p.OS {
|
||||||
case "windows":
|
case "windows":
|
||||||
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
|
return func(meta containerstore.Metadata, stats *types.Metric) (containerStats, error) {
|
||||||
return c.windowsContainerMetrics(meta, stats, snapshotter)
|
return c.windowsContainerMetrics(meta, stats, snapshotter)
|
||||||
}, nil
|
}, nil
|
||||||
case "linux":
|
case "linux":
|
||||||
return func(meta containerstore.Metadata, stats *types.Metric) (*runtime.ContainerStats, error) {
|
return func(meta containerstore.Metadata, stats *types.Metric) (containerStats, error) {
|
||||||
return c.linuxContainerMetrics(meta, stats, snapshotter)
|
return c.linuxContainerMetrics(meta, stats, snapshotter)
|
||||||
}, nil
|
}, nil
|
||||||
default:
|
default:
|
||||||
@ -98,16 +115,16 @@ func (c *criService) getMetricsHandler(ctx context.Context, sandboxID string) (m
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *criService) toCRIContainerStats(
|
func (c *criService) toContainerStats(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
stats []*types.Metric,
|
stats []*types.Metric,
|
||||||
containers []containerstore.Container,
|
containers []containerstore.Container,
|
||||||
) (*runtime.ListContainerStatsResponse, error) {
|
) ([]containerStats, error) {
|
||||||
statsMap := make(map[string]*types.Metric)
|
statsMap := make(map[string]*types.Metric)
|
||||||
for _, stat := range stats {
|
for _, stat := range stats {
|
||||||
statsMap[stat.ID] = stat
|
statsMap[stat.ID] = stat
|
||||||
}
|
}
|
||||||
containerStats := new(runtime.ListContainerStatsResponse)
|
css := []containerStats{}
|
||||||
|
|
||||||
// Unfortunately if no filter was passed we're asking for every containers stats which
|
// Unfortunately if no filter was passed we're asking for every containers stats which
|
||||||
// generally belong to multiple different pods, who all might have different platforms.
|
// generally belong to multiple different pods, who all might have different platforms.
|
||||||
@ -143,17 +160,25 @@ func (c *criService) toCRIContainerStats(
|
|||||||
return nil, fmt.Errorf("failed to decode container metrics for %q: %w", cntr.ID, err)
|
return nil, fmt.Errorf("failed to decode container metrics for %q: %w", cntr.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if cs.Cpu != nil && cs.Cpu.UsageCoreNanoSeconds != nil {
|
if cs.stats.Cpu != nil && cs.stats.Cpu.UsageCoreNanoSeconds != nil {
|
||||||
// this is a calculated value and should be computed for all OSes
|
// this is a calculated value and should be computed for all OSes
|
||||||
nanoUsage, err := c.getUsageNanoCores(cntr.Metadata.ID, false, cs.Cpu.UsageCoreNanoSeconds.Value, time.Unix(0, cs.Cpu.Timestamp))
|
nanoUsage, err := c.getUsageNanoCores(cntr.Metadata.ID, false, cs.stats.Cpu.UsageCoreNanoSeconds.Value, time.Unix(0, cs.stats.Cpu.Timestamp))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get usage nano cores, containerID: %s: %w", cntr.Metadata.ID, err)
|
return nil, fmt.Errorf("failed to get usage nano cores, containerID: %s: %w", cntr.Metadata.ID, err)
|
||||||
}
|
}
|
||||||
cs.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoUsage}
|
cs.stats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoUsage}
|
||||||
}
|
}
|
||||||
containerStats.Stats = append(containerStats.Stats, cs)
|
css = append(css, cs)
|
||||||
}
|
}
|
||||||
return containerStats, nil
|
return css, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *criService) toCRIContainerStats(css []containerStats) *runtime.ListContainerStatsResponse {
|
||||||
|
containerStats := new(runtime.ListContainerStatsResponse)
|
||||||
|
for _, cs := range css {
|
||||||
|
containerStats.Stats = append(containerStats.Stats, cs.stats)
|
||||||
|
}
|
||||||
|
return containerStats
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *criService) getUsageNanoCores(containerID string, isSandbox bool, currentUsageCoreNanoSeconds uint64, currentTimestamp time.Time) (uint64, error) {
|
func (c *criService) getUsageNanoCores(containerID string, isSandbox bool, currentUsageCoreNanoSeconds uint64, currentTimestamp time.Time) (uint64, error) {
|
||||||
@ -275,7 +300,7 @@ func (c *criService) windowsContainerMetrics(
|
|||||||
meta containerstore.Metadata,
|
meta containerstore.Metadata,
|
||||||
stats *types.Metric,
|
stats *types.Metric,
|
||||||
snapshotter string,
|
snapshotter string,
|
||||||
) (*runtime.ContainerStats, error) {
|
) (containerStats, error) {
|
||||||
var cs runtime.ContainerStats
|
var cs runtime.ContainerStats
|
||||||
var usedBytes, inodesUsed uint64
|
var usedBytes, inodesUsed uint64
|
||||||
sn, err := c.GetSnapshot(meta.ID, snapshotter)
|
sn, err := c.GetSnapshot(meta.ID, snapshotter)
|
||||||
@ -303,11 +328,11 @@ func (c *criService) windowsContainerMetrics(
|
|||||||
if stats != nil {
|
if stats != nil {
|
||||||
s, err := typeurl.UnmarshalAny(stats.Data)
|
s, err := typeurl.UnmarshalAny(stats.Data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to extract container metrics: %w", err)
|
return containerStats{}, fmt.Errorf("failed to extract container metrics: %w", err)
|
||||||
}
|
}
|
||||||
wstats := s.(*wstats.Statistics).GetWindows()
|
wstats := s.(*wstats.Statistics).GetWindows()
|
||||||
if wstats == nil {
|
if wstats == nil {
|
||||||
return nil, errors.New("windows stats is empty")
|
return containerStats{}, errors.New("windows stats is empty")
|
||||||
}
|
}
|
||||||
if wstats.Processor != nil {
|
if wstats.Processor != nil {
|
||||||
cs.Cpu = &runtime.CpuUsage{
|
cs.Cpu = &runtime.CpuUsage{
|
||||||
@ -324,16 +349,16 @@ func (c *criService) windowsContainerMetrics(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &cs, nil
|
return containerStats{&cs, 0}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *criService) linuxContainerMetrics(
|
func (c *criService) linuxContainerMetrics(
|
||||||
meta containerstore.Metadata,
|
meta containerstore.Metadata,
|
||||||
stats *types.Metric,
|
stats *types.Metric,
|
||||||
snapshotter string,
|
snapshotter string,
|
||||||
) (*runtime.ContainerStats, error) {
|
) (containerStats, error) {
|
||||||
var cs runtime.ContainerStats
|
var cs runtime.ContainerStats
|
||||||
var usedBytes, inodesUsed uint64
|
var usedBytes, inodesUsed, pids uint64
|
||||||
sn, err := c.GetSnapshot(meta.ID, snapshotter)
|
sn, err := c.GetSnapshot(meta.ID, snapshotter)
|
||||||
// If snapshotstore doesn't have cached snapshot information
|
// If snapshotstore doesn't have cached snapshot information
|
||||||
// set WritableLayer usage to zero
|
// set WritableLayer usage to zero
|
||||||
@ -361,32 +386,37 @@ func (c *criService) linuxContainerMetrics(
|
|||||||
switch {
|
switch {
|
||||||
case typeurl.Is(stats.Data, (*cg1.Metrics)(nil)):
|
case typeurl.Is(stats.Data, (*cg1.Metrics)(nil)):
|
||||||
data = &cg1.Metrics{}
|
data = &cg1.Metrics{}
|
||||||
|
if err := typeurl.UnmarshalTo(stats.Data, data); err != nil {
|
||||||
|
return containerStats{}, fmt.Errorf("failed to extract container metrics: %w", err)
|
||||||
|
}
|
||||||
|
pids = data.(*cg1.Metrics).GetPids().GetCurrent()
|
||||||
case typeurl.Is(stats.Data, (*cg2.Metrics)(nil)):
|
case typeurl.Is(stats.Data, (*cg2.Metrics)(nil)):
|
||||||
data = &cg2.Metrics{}
|
data = &cg2.Metrics{}
|
||||||
case typeurl.Is(stats.Data, (*wstats.Statistics)(nil)):
|
if err := typeurl.UnmarshalTo(stats.Data, data); err != nil {
|
||||||
data = &wstats.Statistics{}
|
return containerStats{}, fmt.Errorf("failed to extract container metrics: %w", err)
|
||||||
|
}
|
||||||
|
pids = data.(*cg2.Metrics).GetPids().GetCurrent()
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("cannot convert metric data to cgroups.Metrics or windows.Statistics")
|
return containerStats{}, errors.New("cannot convert metric data to cgroups.Metrics")
|
||||||
}
|
|
||||||
|
|
||||||
if err := typeurl.UnmarshalTo(stats.Data, data); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to extract container metrics: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, data, protobuf.FromTimestamp(stats.Timestamp))
|
cpuStats, err := c.cpuContainerStats(meta.ID, false /* isSandbox */, data, protobuf.FromTimestamp(stats.Timestamp))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to obtain cpu stats: %w", err)
|
return containerStats{}, fmt.Errorf("failed to obtain cpu stats: %w", err)
|
||||||
}
|
}
|
||||||
cs.Cpu = cpuStats
|
cs.Cpu = cpuStats
|
||||||
|
|
||||||
memoryStats, err := c.memoryContainerStats(meta.ID, data, protobuf.FromTimestamp(stats.Timestamp))
|
memoryStats, err := c.memoryContainerStats(meta.ID, data, protobuf.FromTimestamp(stats.Timestamp))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to obtain memory stats: %w", err)
|
return containerStats{}, fmt.Errorf("failed to obtain memory stats: %w", err)
|
||||||
}
|
}
|
||||||
cs.Memory = memoryStats
|
cs.Memory = memoryStats
|
||||||
|
if err != nil {
|
||||||
|
return containerStats{}, fmt.Errorf("failed to obtain pid count: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &cs, nil
|
return containerStats{&cs, pids}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// getWorkingSet calculates workingset memory from cgroup memory stats.
|
// getWorkingSet calculates workingset memory from cgroup memory stats.
|
||||||
|
@ -420,7 +420,7 @@ func TestListContainerStats(t *testing.T) {
|
|||||||
if tt.before != nil {
|
if tt.before != nil {
|
||||||
tt.before()
|
tt.before()
|
||||||
}
|
}
|
||||||
got, err := c.toCRIContainerStats(tt.args.ctx, tt.args.stats, tt.args.containers)
|
css, err := c.toContainerStats(tt.args.ctx, tt.args.stats, tt.args.containers)
|
||||||
if tt.after != nil {
|
if tt.after != nil {
|
||||||
tt.after()
|
tt.after()
|
||||||
}
|
}
|
||||||
@ -428,6 +428,10 @@ func TestListContainerStats(t *testing.T) {
|
|||||||
t.Errorf("ListContainerStats() error = %v, wantErr %v", err, tt.wantErr)
|
t.Errorf("ListContainerStats() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
var got *runtime.ListContainerStatsResponse
|
||||||
|
if err == nil {
|
||||||
|
got = c.toCRIContainerStats(css)
|
||||||
|
}
|
||||||
if !reflect.DeepEqual(got, tt.want) {
|
if !reflect.DeepEqual(got, tt.want) {
|
||||||
t.Errorf("ListContainerStats() = %v, want %v", got, tt.want)
|
t.Errorf("ListContainerStats() = %v, want %v", got, tt.want)
|
||||||
}
|
}
|
||||||
|
@ -84,40 +84,20 @@ func (c *criService) podSandboxStats(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
listContainerStatsRequest := &runtime.ListContainerStatsRequest{Filter: &runtime.ContainerStatsFilter{PodSandboxId: meta.ID}}
|
||||||
|
css, err := c.listContainerStats(ctx, listContainerStatsRequest)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to obtain container stats during podSandboxStats call: %w", err)
|
||||||
|
}
|
||||||
var pidCount uint64
|
var pidCount uint64
|
||||||
for _, cntr := range c.containerStore.List() {
|
for _, cs := range css {
|
||||||
if cntr.SandboxID != sandbox.ID {
|
pidCount += cs.pids
|
||||||
continue
|
podSandboxStats.Linux.Containers = append(podSandboxStats.Linux.Containers, cs.stats)
|
||||||
}
|
|
||||||
|
|
||||||
state := cntr.Status.Get().State()
|
|
||||||
if state != runtime.ContainerState_CONTAINER_RUNNING {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
task, err := cntr.Container.Task(ctx, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
processes, err := task.Pids(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
pidCount += uint64(len(processes))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
podSandboxStats.Linux.Process = &runtime.ProcessUsage{
|
podSandboxStats.Linux.Process = &runtime.ProcessUsage{
|
||||||
Timestamp: timestamp.UnixNano(),
|
Timestamp: timestamp.UnixNano(),
|
||||||
ProcessCount: &runtime.UInt64Value{Value: pidCount},
|
ProcessCount: &runtime.UInt64Value{Value: pidCount},
|
||||||
}
|
}
|
||||||
|
|
||||||
listContainerStatsRequest := &runtime.ListContainerStatsRequest{Filter: &runtime.ContainerStatsFilter{PodSandboxId: meta.ID}}
|
|
||||||
resp, err := c.ListContainerStats(ctx, listContainerStatsRequest)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to obtain container stats during podSandboxStats call: %w", err)
|
|
||||||
}
|
|
||||||
podSandboxStats.Linux.Containers = resp.GetStats()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return podSandboxStats, nil
|
return podSandboxStats, nil
|
||||||
|
Loading…
Reference in New Issue
Block a user