From 9c38abd482d7f47c0521ab951a1dc16a9ef0fac2 Mon Sep 17 00:00:00 2001 From: Rohit Agarwal Date: Mon, 6 Nov 2017 14:05:45 -0800 Subject: [PATCH] Expose accelerator metrics in the summary API. --- pkg/kubelet/apis/stats/v1alpha1/types.go | 26 ++++++++++++++++++++++++ pkg/kubelet/server/stats/summary_test.go | 3 +++ pkg/kubelet/stats/helper.go | 11 ++++++++++ test/e2e_node/summary_test.go | 2 ++ 4 files changed, 42 insertions(+) diff --git a/pkg/kubelet/apis/stats/v1alpha1/types.go b/pkg/kubelet/apis/stats/v1alpha1/types.go index e08128658d2..ebe65861d82 100644 --- a/pkg/kubelet/apis/stats/v1alpha1/types.go +++ b/pkg/kubelet/apis/stats/v1alpha1/types.go @@ -109,6 +109,8 @@ type ContainerStats struct { // Stats pertaining to memory (RAM) resources. // +optional Memory *MemoryStats `json:"memory,omitempty"` + // Metrics for Accelerators. Each Accelerator corresponds to one element in the array. + Accelerators []AcceleratorStats `json:"accelerators,omitempty"` // Stats pertaining to container rootfs usage of filesystem resources. // Rootfs.UsedBytes is the number of bytes used for the container write layer. // +optional @@ -188,6 +190,30 @@ type MemoryStats struct { MajorPageFaults *uint64 `json:"majorPageFaults,omitempty"` } +// AcceleratorStats contains stats for accelerators attached to the container. +type AcceleratorStats struct { + // Make of the accelerator (nvidia, amd, google etc.) + Make string `json:"make"` + + // Model of the accelerator (tesla-p100, tesla-k80 etc.) + Model string `json:"model"` + + // ID of the accelerator. + ID string `json:"id"` + + // Total accelerator memory. + // unit: bytes + MemoryTotal uint64 `json:"memory_total"` + + // Total accelerator memory allocated. + // unit: bytes + MemoryUsed uint64 `json:"memory_used"` + + // Percent of time over the past sample period (10s) during which + // the accelerator was actively processing. + DutyCycle uint64 `json:"duty_cycle"` +} + // VolumeStats contains data about Volume filesystem usage. type VolumeStats struct { // Embedded FsStats diff --git a/pkg/kubelet/server/stats/summary_test.go b/pkg/kubelet/server/stats/summary_test.go index 13e4ae0b76d..422688446f9 100644 --- a/pkg/kubelet/server/stats/summary_test.go +++ b/pkg/kubelet/server/stats/summary_test.go @@ -92,6 +92,7 @@ func TestSummaryProvider(t *testing.T) { StartTime: cgroupStatsMap["/kubelet"].cs.StartTime, CPU: cgroupStatsMap["/kubelet"].cs.CPU, Memory: cgroupStatsMap["/kubelet"].cs.Memory, + Accelerators: cgroupStatsMap["/kubelet"].cs.Accelerators, UserDefinedMetrics: cgroupStatsMap["/kubelet"].cs.UserDefinedMetrics, }) assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{ @@ -99,6 +100,7 @@ func TestSummaryProvider(t *testing.T) { StartTime: cgroupStatsMap["/misc"].cs.StartTime, CPU: cgroupStatsMap["/misc"].cs.CPU, Memory: cgroupStatsMap["/misc"].cs.Memory, + Accelerators: cgroupStatsMap["/misc"].cs.Accelerators, UserDefinedMetrics: cgroupStatsMap["/misc"].cs.UserDefinedMetrics, }) assert.Contains(summary.Node.SystemContainers, statsapi.ContainerStats{ @@ -106,6 +108,7 @@ func TestSummaryProvider(t *testing.T) { StartTime: cgroupStatsMap["/runtime"].cs.StartTime, CPU: cgroupStatsMap["/runtime"].cs.CPU, Memory: cgroupStatsMap["/runtime"].cs.Memory, + Accelerators: cgroupStatsMap["/runtime"].cs.Accelerators, UserDefinedMetrics: cgroupStatsMap["/runtime"].cs.UserDefinedMetrics, }) assert.Equal(summary.Pods, podStats) diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index ae19faa78a0..9b416d7e39f 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -119,6 +119,17 @@ func cadvisorInfoToContainerStats(name string, info *cadvisorapiv2.ContainerInfo } } + for _, acc := range cstat.Accelerators { + result.Accelerators = append(result.Accelerators, statsapi.AcceleratorStats{ + Make: acc.Make, + Model: acc.Model, + ID: acc.ID, + MemoryTotal: acc.MemoryTotal, + MemoryUsed: acc.MemoryUsed, + DutyCycle: acc.DutyCycle, + }) + } + result.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(info) return result diff --git a/test/e2e_node/summary_test.go b/test/e2e_node/summary_test.go index e657abb0a07..fe7654ca243 100644 --- a/test/e2e_node/summary_test.go +++ b/test/e2e_node/summary_test.go @@ -97,6 +97,7 @@ var _ = framework.KubeDescribe("Summary API", func() { "PageFaults": bounded(1000, 1E9), "MajorPageFaults": bounded(0, 100000), }), + "Accelerators": BeEmpty(), "Rootfs": BeNil(), "Logs": BeNil(), "UserDefinedMetrics": BeEmpty(), @@ -145,6 +146,7 @@ var _ = framework.KubeDescribe("Summary API", func() { "PageFaults": bounded(100, 1000000), "MajorPageFaults": bounded(0, 10), }), + "Accelerators": BeEmpty(), "Rootfs": ptrMatchAllFields(gstruct.Fields{ "Time": recent(maxStatsAge), "AvailableBytes": fsCapacityBounds,