Updating dependency github.com/google/cadvisor to version 6a8d614

Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
Davanum Srinivas
2020-05-14 17:29:52 -04:00
parent 449810c785
commit 082578c22f
109 changed files with 3417 additions and 1312 deletions

View File

@@ -2,7 +2,12 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["prometheus.go"],
srcs = [
"metrics.go",
"prometheus.go",
"prometheus_fake.go",
"prometheus_machine.go",
],
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/metrics",
importpath = "github.com/google/cadvisor/metrics",
visibility = ["//visibility:public"],
@@ -10,7 +15,8 @@ go_library(
"//vendor/github.com/google/cadvisor/container:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/clock:go_default_library",
],
)

42
vendor/github.com/google/cadvisor/metrics/metrics.go generated vendored Normal file
View File

@@ -0,0 +1,42 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"time"
info "github.com/google/cadvisor/info/v1"
)
// metricValue describes a single metric value for a given set of label values
// within a parent containerMetric.
type metricValue struct {
value float64
labels []string
timestamp time.Time
}
type metricValues []metricValue
// infoProvider will usually be manager.Manager, but can be swapped out for testing.
type infoProvider interface {
// SubcontainersInfo provides information about all subcontainers of the
// specified container including itself.
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// GetVersionInfo provides information about the version.
GetVersionInfo() (*info.VersionInfo, error)
// GetMachineInfo provides information about the machine.
GetMachineInfo() (*info.MachineInfo, error)
}

View File

@@ -17,35 +17,17 @@ package metrics
import (
"fmt"
"regexp"
"strconv"
"time"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
// infoProvider will usually be manager.Manager, but can be swapped out for testing.
type infoProvider interface {
// SubcontainersInfo provides information about all subcontainers of the
// specified container including itself.
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// GetVersionInfo provides information about the version.
GetVersionInfo() (*info.VersionInfo, error)
// GetMachineInfo provides information about the machine.
GetMachineInfo() (*info.MachineInfo, error)
}
// metricValue describes a single metric value for a given set of label values
// within a parent containerMetric.
type metricValue struct {
value float64
labels []string
timestamp time.Time
}
type metricValues []metricValue
// asFloat64 converts a uint64 into a float64.
func asFloat64(v uint64) float64 { return float64(v) }
@@ -121,7 +103,7 @@ type PrometheusCollector struct {
// ContainerLabelsFunc specifies which base labels will be attached to all
// exported metrics. If left to nil, the DefaultContainerLabels function
// will be used instead.
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet) *PrometheusCollector {
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet, now clock.Clock) *PrometheusCollector {
if f == nil {
f = DefaultContainerLabels
}
@@ -140,8 +122,8 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{
value: float64(time.Now().Unix()),
timestamp: time.Now(),
value: float64(now.Now().Unix()),
timestamp: now.Now(),
}}
},
},
@@ -1562,16 +1544,66 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
},
}...)
}
if c.includedMetrics.Has(container.PerfMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_metric",
help: "Perf event metric",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_metric_scaling_ratio",
help: "Perf event metric scaling ratio",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
}...)
}
if includedMetrics.Has(container.ReferencedMemoryMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_referenced_bytes",
help: "Container referenced bytes during last measurements cycle",
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.ReferencedMemory), timestamp: s.Timestamp}}
},
},
}...)
}
return c
}
var (
versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
machineInfoCoresDesc = prometheus.NewDesc("machine_cpu_cores", "Number of CPU cores on the machine.", nil, nil)
machineInfoMemoryDesc = prometheus.NewDesc("machine_memory_bytes", "Amount of memory installed on the machine.", nil, nil)
versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
startTimeDesc = prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", nil, nil)
cpuPeriodDesc = prometheus.NewDesc("container_spec_cpu_period", "CPU period of the container.", nil, nil)
cpuQuotaDesc = prometheus.NewDesc("container_spec_cpu_quota", "CPU quota of the container.", nil, nil)
cpuSharesDesc = prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", nil, nil)
)
// Describe describes all the metrics ever exported by cadvisor. It
@@ -1581,16 +1613,17 @@ func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
for _, cm := range c.containerMetrics {
ch <- cm.desc([]string{})
}
ch <- startTimeDesc
ch <- cpuPeriodDesc
ch <- cpuQuotaDesc
ch <- cpuSharesDesc
ch <- versionInfoDesc
ch <- machineInfoCoresDesc
ch <- machineInfoMemoryDesc
}
// Collect fetches the stats from all containers and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
c.errors.Set(0)
c.collectMachineInfo(ch)
c.collectVersionInfo(ch)
c.collectContainersInfo(ch)
c.errors.Collect(ch)
@@ -1745,7 +1778,6 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric)
}
}
}
}
func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
@@ -1758,17 +1790,6 @@ func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion, versionInfo.CadvisorRevision}...)
}
func (c *PrometheusCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
machineInfo, err := c.infoProvider.GetMachineInfo()
if err != nil {
c.errors.Set(1)
klog.Warningf("Couldn't get machine info: %s", err)
return
}
ch <- prometheus.MustNewConstMetric(machineInfoCoresDesc, prometheus.GaugeValue, float64(machineInfo.NumCores))
ch <- prometheus.MustNewConstMetric(machineInfoMemoryDesc, prometheus.GaugeValue, float64(machineInfo.MemoryCapacity))
}
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
const maxMemorySize = uint64(1 << 62)
@@ -1780,10 +1801,10 @@ func specMemoryValue(v uint64) float64 {
return float64(v)
}
var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
// sanitizeLabelName replaces anything that doesn't match
// client_label.LabelNameRE with an underscore.
func sanitizeLabelName(name string) string {
return invalidLabelCharRE.ReplaceAllString(name, "_")
return invalidNameCharRE.ReplaceAllString(name, "_")
}

View File

@@ -0,0 +1,683 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"errors"
"time"
info "github.com/google/cadvisor/info/v1"
)
type testSubcontainersInfoProvider struct{}
func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
return &info.VersionInfo{
KernelVersion: "4.1.6-200.fc22.x86_64",
ContainerOsVersion: "Fedora 22 (Twenty Two)",
DockerVersion: "1.8.1",
CadvisorVersion: "0.16.0",
CadvisorRevision: "abcdef",
}, nil
}
func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
return &info.MachineInfo{
Timestamp: time.Unix(1395066363, 0),
NumCores: 4,
NumPhysicalCores: 1,
NumSockets: 1,
MemoryCapacity: 1024,
MemoryByType: map[string]*info.MemoryInfo{
"Non-volatile-RAM": {Capacity: 2168421613568, DimmCount: 8},
"Unbuffered-DDR4": {Capacity: 412316860416, DimmCount: 12},
},
NVMInfo: info.NVMInfo{
MemoryModeCapacity: 429496729600,
AppDirectModeCapacity: 1735166787584,
},
MachineID: "machine-id-test",
SystemUUID: "system-uuid-test",
BootID: "boot-id-test",
Topology: []info.Node{
{
Id: 0,
Memory: 33604804608,
HugePages: []info.HugePagesInfo{
{
PageSize: uint64(1048576),
NumPages: uint64(0),
},
{
PageSize: uint64(2048),
NumPages: uint64(0),
},
},
Cores: []info.Core{
{
Id: 0,
Threads: []int{0, 1},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 1,
Threads: []int{2, 3},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 2,
Threads: []int{4, 5},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 3,
Threads: []int{6, 7},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
},
},
{
Id: 1,
Memory: 33604804606,
HugePages: []info.HugePagesInfo{
{
PageSize: uint64(1048576),
NumPages: uint64(2),
},
{
PageSize: uint64(2048),
NumPages: uint64(4),
},
},
Cores: []info.Core{
{
Id: 4,
Threads: []int{8, 9},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 5,
Threads: []int{10, 11},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 6,
Threads: []int{12, 13},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 7,
Threads: []int{14, 15},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
},
Caches: []info.Cache{
{
Size: 8388608,
Type: "Unified",
Level: 3,
},
},
},
},
}, nil
}
func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return []*info.ContainerInfo{
{
ContainerReference: info.ContainerReference{
Name: "testcontainer",
Aliases: []string{"testcontaineralias"},
},
Spec: info.ContainerSpec{
Image: "test",
HasCpu: true,
Cpu: info.CpuSpec{
Limit: 1000,
Period: 100000,
Quota: 10000,
},
Memory: info.MemorySpec{
Limit: 2048,
Reservation: 1024,
SwapLimit: 4096,
},
HasHugetlb: true,
HasProcesses: true,
Processes: info.ProcessSpec{
Limit: 100,
},
CreationTime: time.Unix(1257894000, 0),
Labels: map[string]string{
"foo.label": "bar",
},
Envs: map[string]string{
"foo+env": "prod",
},
},
Stats: []*info.ContainerStats{
{
Timestamp: time.Unix(1395066363, 0),
Cpu: info.CpuStats{
Usage: info.CpuUsage{
Total: 1,
PerCpu: []uint64{2, 3, 4, 5},
User: 6,
System: 7,
},
CFS: info.CpuCFS{
Periods: 723,
ThrottledPeriods: 18,
ThrottledTime: 1724314000,
},
Schedstat: info.CpuSchedstat{
RunTime: 53643567,
RunqueueTime: 479424566378,
RunPeriods: 984285,
},
LoadAverage: 2,
},
Memory: info.MemoryStats{
Usage: 8,
MaxUsage: 8,
WorkingSet: 9,
ContainerData: info.MemoryStatsMemoryData{
Pgfault: 10,
Pgmajfault: 11,
},
HierarchicalData: info.MemoryStatsMemoryData{
Pgfault: 12,
Pgmajfault: 13,
},
Cache: 14,
RSS: 15,
MappedFile: 16,
Swap: 8192,
},
Hugetlb: map[string]info.HugetlbStats{
"2Mi": {
Usage: 4,
MaxUsage: 10,
Failcnt: 1,
},
"1Gi": {
Usage: 0,
MaxUsage: 0,
Failcnt: 0,
},
},
Network: info.NetworkStats{
InterfaceStats: info.InterfaceStats{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
Interfaces: []info.InterfaceStats{
{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
},
Tcp: info.TcpStat{
Established: 13,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
Tcp6: info.TcpStat{
Established: 11,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
TcpAdvanced: info.TcpAdvancedStat{
TCPFullUndo: 2361,
TCPMD5NotFound: 0,
TCPDSACKRecv: 83680,
TCPSackShifted: 2,
TCPSackShiftFallback: 298,
PFMemallocDrop: 0,
EstabResets: 37,
InSegs: 140370590,
TCPPureAcks: 24251339,
TCPDSACKOldSent: 15633,
IPReversePathFilter: 0,
TCPFastOpenPassiveFail: 0,
InCsumErrors: 0,
TCPRenoFailures: 43414,
TCPMemoryPressuresChrono: 0,
TCPDeferAcceptDrop: 0,
TW: 10436427,
TCPSpuriousRTOs: 0,
TCPDSACKIgnoredNoUndo: 71885,
RtoMax: 120000,
ActiveOpens: 11038621,
EmbryonicRsts: 0,
RcvPruned: 0,
TCPLossProbeRecovery: 401,
TCPHPHits: 56096478,
TCPPartialUndo: 3,
TCPAbortOnMemory: 0,
AttemptFails: 48997,
RetransSegs: 462961,
SyncookiesFailed: 0,
OfoPruned: 0,
TCPAbortOnLinger: 0,
TCPAbortFailed: 0,
TCPRenoReorder: 839,
TCPRcvCollapsed: 0,
TCPDSACKIgnoredOld: 0,
TCPReqQFullDrop: 0,
OutOfWindowIcmps: 0,
TWKilled: 0,
TCPLossProbes: 88648,
TCPRenoRecoveryFail: 394,
TCPFastOpenCookieReqd: 0,
TCPHPAcks: 21490641,
TCPSACKReneging: 0,
TCPTSReorder: 3,
TCPSlowStartRetrans: 290832,
MaxConn: -1,
SyncookiesRecv: 0,
TCPSackFailures: 60,
DelayedACKLocked: 90,
TCPDSACKOfoSent: 1,
TCPSynRetrans: 988,
TCPDSACKOfoRecv: 10,
TCPSACKDiscard: 0,
TCPMD5Unexpected: 0,
TCPSackMerged: 6,
RtoMin: 200,
CurrEstab: 22,
TCPTimeWaitOverflow: 0,
ListenOverflows: 0,
DelayedACKs: 503975,
TCPLossUndo: 61374,
TCPOrigDataSent: 130698387,
TCPBacklogDrop: 0,
TCPReqQFullDoCookies: 0,
TCPFastOpenPassive: 0,
PAWSActive: 0,
OutRsts: 91699,
TCPSackRecoveryFail: 2,
DelayedACKLost: 18843,
TCPAbortOnData: 8,
TCPMinTTLDrop: 0,
PruneCalled: 0,
TWRecycled: 0,
ListenDrops: 0,
TCPAbortOnTimeout: 0,
SyncookiesSent: 0,
TCPSACKReorder: 11,
TCPDSACKUndo: 33,
TCPMD5Failure: 0,
TCPLostRetransmit: 0,
TCPAbortOnClose: 7,
TCPFastOpenListenOverflow: 0,
OutSegs: 211580512,
InErrs: 31,
TCPTimeouts: 27422,
TCPLossFailures: 729,
TCPSackRecovery: 159,
RtoAlgorithm: 1,
PassiveOpens: 59,
LockDroppedIcmps: 0,
TCPRenoRecovery: 3519,
TCPFACKReorder: 0,
TCPFastRetrans: 11794,
TCPRetransFail: 0,
TCPMemoryPressures: 0,
TCPFastOpenActive: 0,
TCPFastOpenActiveFail: 0,
PAWSEstab: 0,
},
Udp: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
Udp6: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
},
Filesystem: []info.FsStats{
{
Device: "sda1",
InodesFree: 524288,
Inodes: 2097152,
Limit: 22,
Usage: 23,
ReadsCompleted: 24,
ReadsMerged: 25,
SectorsRead: 26,
ReadTime: 27,
WritesCompleted: 28,
WritesMerged: 39,
SectorsWritten: 40,
WriteTime: 41,
IoInProgress: 42,
IoTime: 43,
WeightedIoTime: 44,
},
{
Device: "sda2",
InodesFree: 262144,
Inodes: 2097152,
Limit: 37,
Usage: 38,
ReadsCompleted: 39,
ReadsMerged: 40,
SectorsRead: 41,
ReadTime: 42,
WritesCompleted: 43,
WritesMerged: 44,
SectorsWritten: 45,
WriteTime: 46,
IoInProgress: 47,
IoTime: 48,
WeightedIoTime: 49,
},
},
Accelerators: []info.AcceleratorStats{
{
Make: "nvidia",
Model: "tesla-p100",
ID: "GPU-deadbeef-1234-5678-90ab-feedfacecafe",
MemoryTotal: 20304050607,
MemoryUsed: 2030405060,
DutyCycle: 12,
},
{
Make: "nvidia",
Model: "tesla-k80",
ID: "GPU-deadbeef-0123-4567-89ab-feedfacecafe",
MemoryTotal: 10203040506,
MemoryUsed: 1020304050,
DutyCycle: 6,
},
},
Processes: info.ProcessStats{
ProcessCount: 1,
FdCount: 5,
SocketCount: 3,
ThreadsCurrent: 5,
ThreadsMax: 100,
Ulimits: []info.UlimitSpec{
{
Name: "max_open_files",
SoftLimit: 16384,
HardLimit: 16384,
},
},
},
TaskStats: info.LoadStats{
NrSleeping: 50,
NrRunning: 51,
NrStopped: 52,
NrUninterruptible: 53,
NrIoWait: 54,
},
CustomMetrics: map[string][]info.MetricVal{
"container_custom_app_metric_1": {
{
FloatValue: float64(1.1),
Timestamp: time.Now(),
Label: "testlabel_1_1_1",
Labels: map[string]string{"test_label": "1_1", "test_label_2": "2_1"},
},
{
FloatValue: float64(1.2),
Timestamp: time.Now(),
Label: "testlabel_1_1_2",
Labels: map[string]string{"test_label": "1_2", "test_label_2": "2_2"},
},
},
"container_custom_app_metric_2": {
{
FloatValue: float64(2),
Timestamp: time.Now(),
Label: "testlabel2",
Labels: map[string]string{"test_label": "test_value"},
},
},
"container_custom_app_metric_3": {
{
FloatValue: float64(3),
Timestamp: time.Now(),
Label: "testlabel3",
Labels: map[string]string{"test_label": "test_value"},
},
},
},
PerfStats: []info.PerfStat{
{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions",
Cpu: 0,
},
{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions",
Cpu: 1,
},
{
ScalingRatio: 0.66666666666,
Value: 321,
Name: "instructions_retired",
Cpu: 0,
},
{
ScalingRatio: 0.33333333333,
Value: 789,
Name: "instructions_retired",
Cpu: 1,
},
},
ReferencedMemory: 1234,
},
},
},
}, nil
}
type erroringSubcontainersInfoProvider struct {
successfulProvider testSubcontainersInfoProvider
shouldFail bool
}
func (p *erroringSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 1")
}
return p.successfulProvider.GetVersionInfo()
}
func (p *erroringSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 2")
}
return p.successfulProvider.GetMachineInfo()
}
func (p *erroringSubcontainersInfoProvider) SubcontainersInfo(
a string, r *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
if p.shouldFail {
return []*info.ContainerInfo{}, errors.New("Oops 3")
}
return p.successfulProvider.SubcontainersInfo(a, r)
}

View File

@@ -0,0 +1,349 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"strconv"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog/v2"
)
var baseLabelsNames = []string{"machine_id", "system_uuid", "boot_id"}
const (
prometheusModeLabelName = "mode"
prometheusTypeLabelName = "type"
prometheusLevelLabelName = "level"
prometheusNodeLabelName = "node_id"
prometheusCoreLabelName = "core_id"
prometheusThreadLabelName = "thread_id"
prometheusPageSizeLabelName = "page_size"
nvmMemoryMode = "memory_mode"
nvmAppDirectMode = "app_direct_mode"
memoryByTypeDimmCountKey = "DimmCount"
memoryByTypeDimmCapacityKey = "Capacity"
emptyLabelValue = ""
)
// machineMetric describes a multi-dimensional metric used for exposing a
// certain type of machine statistic.
type machineMetric struct {
name string
help string
valueType prometheus.ValueType
extraLabels []string
condition func(machineInfo *info.MachineInfo) bool
getValues func(machineInfo *info.MachineInfo) metricValues
}
func (metric *machineMetric) desc(baseLabels []string) *prometheus.Desc {
return prometheus.NewDesc(metric.name, metric.help, append(baseLabels, metric.extraLabels...), nil)
}
// PrometheusMachineCollector implements prometheus.Collector.
type PrometheusMachineCollector struct {
infoProvider infoProvider
errors prometheus.Gauge
machineMetrics []machineMetric
}
// NewPrometheusMachineCollector returns a new PrometheusCollector.
func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.MetricSet) *PrometheusMachineCollector {
c := &PrometheusMachineCollector{
infoProvider: i,
errors: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "machine",
Name: "scrape_error",
Help: "1 if there was an error while getting machine metrics, 0 otherwise.",
}),
machineMetrics: []machineMetric{
{
name: "machine_cpu_physical_cores",
help: "Number of physical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumPhysicalCores), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_cpu_cores",
help: "Number of logical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumCores), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_cpu_sockets",
help: "Number of CPU sockets.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumSockets), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_memory_bytes",
help: "Amount of memory installed on the machine.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.MemoryCapacity), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_dimm_count",
help: "Number of RAM DIMM (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCountKey)
},
},
{
name: "machine_dimm_capacity_bytes",
help: "Total RAM DIMM capacity (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCapacityKey)
},
},
{
name: "machine_nvm_capacity",
help: "NVM capacity value labeled by NVM mode (memory mode or app direct mode).",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusModeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{
{value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}, timestamp: machineInfo.Timestamp},
{value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}, timestamp: machineInfo.Timestamp},
}
},
},
{
name: "machine_nvm_avg_power_budget_watts",
help: "NVM power budget.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NVMInfo.AvgPowerBudget), timestamp: machineInfo.Timestamp}}
},
},
},
}
if includedMetrics.Has(container.CPUTopologyMetrics) {
c.machineMetrics = append(c.machineMetrics, []machineMetric{
{
name: "machine_cpu_cache_capacity_bytes",
help: "Cache size in bytes assigned to NUMA node and CPU core.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusTypeLabelName, prometheusLevelLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getCaches(machineInfo)
},
},
{
name: "machine_thread_siblings_count",
help: "Number of CPU thread siblings.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusThreadLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getThreadsSiblingsCount(machineInfo)
},
},
{
name: "machine_node_memory_capacity_bytes",
help: "Amount of memory assigned to NUMA node.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getNodeMemory(machineInfo)
},
},
{
name: "machine_node_hugepages_count",
help: "Numer of hugepages assigned to NUMA node.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusPageSizeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getHugePagesCount(machineInfo)
},
},
}...)
}
return c
}
// Describe describes all the machine metrics ever exported by cadvisor. It
// implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Describe(ch chan<- *prometheus.Desc) {
collector.errors.Describe(ch)
for _, metric := range collector.machineMetrics {
ch <- metric.desc([]string{})
}
}
// Collect fetches information about machine and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Collect(ch chan<- prometheus.Metric) {
collector.errors.Set(0)
collector.collectMachineInfo(ch)
collector.errors.Collect(ch)
}
func (collector *PrometheusMachineCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
machineInfo, err := collector.infoProvider.GetMachineInfo()
if err != nil {
collector.errors.Set(1)
klog.Warningf("Couldn't get machine info: %s", err)
return
}
baseLabelsValues := []string{machineInfo.MachineID, machineInfo.SystemUUID, machineInfo.BootID}
for _, metric := range collector.machineMetrics {
if metric.condition != nil && !metric.condition(machineInfo) {
continue
}
for _, metricValue := range metric.getValues(machineInfo) {
labelValues := make([]string, len(baseLabelsValues))
copy(labelValues, baseLabelsValues)
if len(metric.extraLabels) != 0 {
labelValues = append(labelValues, metricValue.labels...)
}
prometheusMetric := prometheus.MustNewConstMetric(metric.desc(baseLabelsNames),
metric.valueType, metricValue.value, labelValues...)
if metricValue.timestamp.IsZero() {
ch <- prometheusMetric
} else {
ch <- prometheus.NewMetricWithTimestamp(metricValue.timestamp, prometheusMetric)
}
}
}
}
func getMemoryByType(machineInfo *info.MachineInfo, property string) metricValues {
mValues := make(metricValues, 0, len(machineInfo.MemoryByType))
for memoryType, memoryInfo := range machineInfo.MemoryByType {
propertyValue := 0.0
switch property {
case memoryByTypeDimmCapacityKey:
propertyValue = float64(memoryInfo.Capacity)
case memoryByTypeDimmCountKey:
propertyValue = float64(memoryInfo.DimmCount)
default:
klog.Warningf("Incorrect propery name for MemoryByType, property %s", property)
return metricValues{}
}
mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}, timestamp: machineInfo.Timestamp})
}
return mValues
}
func getThreadsSiblingsCount(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0, machineInfo.NumCores)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, core := range node.Cores {
coreID := strconv.Itoa(core.Id)
siblingsCount := len(core.Threads)
for _, thread := range core.Threads {
mValues = append(mValues,
metricValue{
value: float64(siblingsCount),
labels: []string{nodeID, coreID, strconv.Itoa(thread)},
timestamp: machineInfo.Timestamp,
})
}
}
}
return mValues
}
func getNodeMemory(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0, len(machineInfo.Topology))
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
mValues = append(mValues,
metricValue{
value: float64(node.Memory),
labels: []string{nodeID},
timestamp: machineInfo.Timestamp,
})
}
return mValues
}
func getHugePagesCount(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, hugePage := range node.HugePages {
mValues = append(mValues,
metricValue{
value: float64(hugePage.NumPages),
labels: []string{nodeID, strconv.FormatUint(hugePage.PageSize, 10)},
timestamp: machineInfo.Timestamp,
})
}
}
return mValues
}
func getCaches(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, core := range node.Cores {
coreID := strconv.Itoa(core.Id)
for _, cache := range core.Caches {
mValues = append(mValues,
metricValue{
value: float64(cache.Size),
labels: []string{nodeID, coreID, cache.Type, strconv.Itoa(cache.Level)},
timestamp: machineInfo.Timestamp,
})
}
}
for _, cache := range node.Caches {
mValues = append(mValues,
metricValue{
value: float64(cache.Size),
labels: []string{nodeID, emptyLabelValue, cache.Type, strconv.Itoa(cache.Level)},
timestamp: machineInfo.Timestamp,
})
}
}
return mValues
}