resource-metrics: add pod/sandbox metrics to endpoint
Pod metrics may not be the same as the sum of container metrics. Add support for pod specific metrics to allow for more accurate accounting of resources. Signed-off-by: Eric Ernst <eric_ernst@apple.com>
This commit is contained in:
@@ -54,7 +54,21 @@ var (
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
resouceScrapeResultDesc = metrics.NewDesc("scrape_error",
|
||||
podCPUUsageDesc = metrics.NewDesc("pod_cpu_usage_seconds_total",
|
||||
"Cumulative cpu time consumed by the pod in core-seconds",
|
||||
[]string{"pod", "namespace"},
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
podMemoryUsageDesc = metrics.NewDesc("pod_memory_working_set_bytes",
|
||||
"Current working set of the pod in bytes",
|
||||
[]string{"pod", "namespace"},
|
||||
nil,
|
||||
metrics.ALPHA,
|
||||
"")
|
||||
|
||||
resourceScrapeResultDesc = metrics.NewDesc("scrape_error",
|
||||
"1 if there was an error while getting container metrics, 0 otherwise",
|
||||
nil,
|
||||
nil,
|
||||
@@ -84,7 +98,9 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des
|
||||
ch <- nodeMemoryUsageDesc
|
||||
ch <- containerCPUUsageDesc
|
||||
ch <- containerMemoryUsageDesc
|
||||
ch <- resouceScrapeResultDesc
|
||||
ch <- podCPUUsageDesc
|
||||
ch <- podMemoryUsageDesc
|
||||
ch <- resourceScrapeResultDesc
|
||||
}
|
||||
|
||||
// CollectWithStability implements metrics.StableCollector
|
||||
@@ -94,7 +110,7 @@ func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Des
|
||||
func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) {
|
||||
var errorCount float64
|
||||
defer func() {
|
||||
ch <- metrics.NewLazyConstMetric(resouceScrapeResultDesc, metrics.GaugeValue, errorCount)
|
||||
ch <- metrics.NewLazyConstMetric(resourceScrapeResultDesc, metrics.GaugeValue, errorCount)
|
||||
}()
|
||||
statsSummary, err := rc.provider.GetCPUAndMemoryStats()
|
||||
if err != nil {
|
||||
@@ -111,6 +127,8 @@ func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metri
|
||||
rc.collectContainerCPUMetrics(ch, pod, container)
|
||||
rc.collectContainerMemoryMetrics(ch, pod, container)
|
||||
}
|
||||
rc.collectPodCPUMetrics(ch, pod)
|
||||
rc.collectPodMemoryMetrics(ch, pod)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,3 +169,23 @@ func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metr
|
||||
metrics.NewLazyConstMetric(containerMemoryUsageDesc, metrics.GaugeValue,
|
||||
float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
|
||||
}
|
||||
|
||||
func (rc *resourceMetricsCollector) collectPodCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
|
||||
if pod.CPU == nil {
|
||||
return
|
||||
}
|
||||
|
||||
ch <- metrics.NewLazyMetricWithTimestamp(pod.CPU.Time.Time,
|
||||
metrics.NewLazyConstMetric(podCPUUsageDesc, metrics.CounterValue,
|
||||
float64(*pod.CPU.UsageCoreNanoSeconds)/float64(time.Second), pod.PodRef.Name, pod.PodRef.Namespace))
|
||||
}
|
||||
|
||||
func (rc *resourceMetricsCollector) collectPodMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats) {
|
||||
if pod.Memory == nil {
|
||||
return
|
||||
}
|
||||
|
||||
ch <- metrics.NewLazyMetricWithTimestamp(pod.Memory.Time.Time,
|
||||
metrics.NewLazyConstMetric(podMemoryUsageDesc, metrics.GaugeValue,
|
||||
float64(*pod.Memory.WorkingSetBytes), pod.PodRef.Name, pod.PodRef.Namespace))
|
||||
}
|
||||
|
@@ -51,6 +51,8 @@ func TestCollectResourceMetrics(t *testing.T) {
|
||||
"node_memory_working_set_bytes",
|
||||
"container_cpu_usage_seconds_total",
|
||||
"container_memory_working_set_bytes",
|
||||
"pod_cpu_usage_seconds_total",
|
||||
"pod_memory_working_set_bytes",
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
@@ -168,6 +170,39 @@ func TestCollectResourceMetrics(t *testing.T) {
|
||||
container_memory_working_set_bytes{container="container_b",namespace="namespace_a",pod="pod_a"} 1000 2000
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "arbitrary pod metrics",
|
||||
summary: &statsapi.Summary{
|
||||
Pods: []statsapi.PodStats{
|
||||
{
|
||||
PodRef: statsapi.PodReference{
|
||||
Name: "pod_a",
|
||||
Namespace: "namespace_a",
|
||||
},
|
||||
CPU: &statsapi.CPUStats{
|
||||
Time: testTime,
|
||||
UsageCoreNanoSeconds: uint64Ptr(10000000000),
|
||||
},
|
||||
Memory: &statsapi.MemoryStats{
|
||||
Time: testTime,
|
||||
WorkingSetBytes: uint64Ptr(1000),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
summaryErr: nil,
|
||||
expectedMetrics: `
|
||||
# HELP scrape_error [ALPHA] 1 if there was an error while getting container metrics, 0 otherwise
|
||||
# TYPE scrape_error gauge
|
||||
scrape_error 0
|
||||
# HELP pod_cpu_usage_seconds_total [ALPHA] Cumulative cpu time consumed by the pod in core-seconds
|
||||
# TYPE pod_cpu_usage_seconds_total counter
|
||||
pod_cpu_usage_seconds_total{namespace="namespace_a",pod="pod_a"} 10 2000
|
||||
# HELP pod_memory_working_set_bytes [ALPHA] Current working set of the pod in bytes
|
||||
# TYPE pod_memory_working_set_bytes gauge
|
||||
pod_memory_working_set_bytes{namespace="namespace_a",pod="pod_a"} 1000 2000
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
Reference in New Issue
Block a user