Move cgroups prom to containerd metrics

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2017-06-26 16:52:23 -07:00
parent 49f9dc494f
commit 85f61f6f51
9 changed files with 1390 additions and 5 deletions

101
metrics/cgroups/blkio.go Normal file
View File

@ -0,0 +1,101 @@
package cgroups
import (
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
var blkioMetrics = []*metric{
{
name: "blkio_io_merged_recursive",
help: "The blkio io merged recursive",
unit: metrics.Total,
vt: prometheus.GaugeValue,
labels: []string{"op", "device", "major", "minor"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Blkio == nil {
return nil
}
return blkioValues(stats.Blkio.IoMergedRecursive)
},
},
{
name: "blkio_io_queued_recursive",
help: "The blkio io queued recursive",
unit: metrics.Total,
vt: prometheus.GaugeValue,
labels: []string{"op", "device", "major", "minor"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Blkio == nil {
return nil
}
return blkioValues(stats.Blkio.IoQueuedRecursive)
},
},
{
name: "blkio_io_service_bytes_recursive",
help: "The blkio io service bytes recursive",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
labels: []string{"op", "device", "major", "minor"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Blkio == nil {
return nil
}
return blkioValues(stats.Blkio.IoServiceBytesRecursive)
},
},
{
name: "blkio_io_service_time_recursive",
help: "The blkio io servie time recursive",
unit: metrics.Total,
vt: prometheus.GaugeValue,
labels: []string{"op", "device", "major", "minor"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Blkio == nil {
return nil
}
return blkioValues(stats.Blkio.IoServiceTimeRecursive)
},
},
{
name: "blkio_io_serviced_recursive",
help: "The blkio io servied recursive",
unit: metrics.Total,
vt: prometheus.GaugeValue,
labels: []string{"op", "device", "major", "minor"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Blkio == nil {
return nil
}
return blkioValues(stats.Blkio.IoServicedRecursive)
},
},
{
name: "blkio_io_time_recursive",
help: "The blkio io time recursive",
unit: metrics.Total,
vt: prometheus.GaugeValue,
labels: []string{"op", "device", "major", "minor"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Blkio == nil {
return nil
}
return blkioValues(stats.Blkio.IoTimeRecursive)
},
},
{
name: "blkio_sectors_recursive",
help: "The blkio sectors recursive",
unit: metrics.Total,
vt: prometheus.GaugeValue,
labels: []string{"op", "device", "major", "minor"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Blkio == nil {
return nil
}
return blkioValues(stats.Blkio.SectorsRecursive)
},
},
}

View File

@ -7,7 +7,6 @@ import (
"time" "time"
"github.com/containerd/cgroups" "github.com/containerd/cgroups"
"github.com/containerd/cgroups/prometheus"
"github.com/containerd/containerd/plugin" "github.com/containerd/containerd/plugin"
metrics "github.com/docker/go-metrics" metrics "github.com/docker/go-metrics"
"golang.org/x/net/context" "golang.org/x/net/context"
@ -24,9 +23,9 @@ func init() {
func New(ic *plugin.InitContext) (interface{}, error) { func New(ic *plugin.InitContext) (interface{}, error) {
var ( var (
ns = metrics.NewNamespace("container", "", nil) ns = metrics.NewNamespace("container", "", nil)
collector = prometheus.New(ns) collector = NewCollector(ns)
) )
oom, err := prometheus.NewOOMCollector(ns) oom, err := NewOOMCollector(ns)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -39,8 +38,8 @@ func New(ic *plugin.InitContext) (interface{}, error) {
} }
type cgroupsMonitor struct { type cgroupsMonitor struct {
collector *prometheus.Collector collector *Collector
oom *prometheus.OOMCollector oom *OOMCollector
context context.Context context context.Context
events chan<- *plugin.Event events chan<- *plugin.Event
} }

128
metrics/cgroups/cpu.go Normal file
View File

@ -0,0 +1,128 @@
package cgroups
import (
"strconv"
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
var cpuMetrics = []*metric{
{
name: "cpu_total",
help: "The total cpu time",
unit: metrics.Nanoseconds,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Cpu == nil {
return nil
}
return []value{
{
v: float64(stats.Cpu.Usage.Total),
},
}
},
},
{
name: "cpu_kernel",
help: "The total kernel cpu time",
unit: metrics.Nanoseconds,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Cpu == nil {
return nil
}
return []value{
{
v: float64(stats.Cpu.Usage.Kernel),
},
}
},
},
{
name: "cpu_user",
help: "The total user cpu time",
unit: metrics.Nanoseconds,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Cpu == nil {
return nil
}
return []value{
{
v: float64(stats.Cpu.Usage.User),
},
}
},
},
{
name: "per_cpu",
help: "The total cpu time per cpu",
unit: metrics.Nanoseconds,
vt: prometheus.GaugeValue,
labels: []string{"cpu"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Cpu == nil {
return nil
}
var out []value
for i, v := range stats.Cpu.Usage.PerCpu {
out = append(out, value{
v: float64(v),
l: []string{strconv.Itoa(i)},
})
}
return out
},
},
{
name: "cpu_throttle_periods",
help: "The total cpu throttle periods",
unit: metrics.Total,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Cpu == nil {
return nil
}
return []value{
{
v: float64(stats.Cpu.Throttling.Periods),
},
}
},
},
{
name: "cpu_throttled_periods",
help: "The total cpu throttled periods",
unit: metrics.Total,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Cpu == nil {
return nil
}
return []value{
{
v: float64(stats.Cpu.Throttling.ThrottledPeriods),
},
}
},
},
{
name: "cpu_throttled_time",
help: "The total cpu throttled time",
unit: metrics.Nanoseconds,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Cpu == nil {
return nil
}
return []value{
{
v: float64(stats.Cpu.Throttling.ThrottledTime),
},
}
},
},
}

View File

@ -0,0 +1,70 @@
package cgroups
import (
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
var hugetlbMetrics = []*metric{
{
name: "hugetlb_usage",
help: "The hugetlb usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
labels: []string{"page"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Hugetlb == nil {
return nil
}
var out []value
for page, v := range stats.Hugetlb {
out = append(out, value{
v: float64(v.Usage),
l: []string{page},
})
}
return out
},
},
{
name: "hugetlb_failcnt",
help: "The hugetlb failcnt",
unit: metrics.Total,
vt: prometheus.GaugeValue,
labels: []string{"page"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Hugetlb == nil {
return nil
}
var out []value
for page, v := range stats.Hugetlb {
out = append(out, value{
v: float64(v.Failcnt),
l: []string{page},
})
}
return out
},
},
{
name: "hugetlb_max",
help: "The hugetlb maximum usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
labels: []string{"page"},
getValues: func(stats *cgroups.Stats) []value {
if stats.Hugetlb == nil {
return nil
}
var out []value
for page, v := range stats.Hugetlb {
out = append(out, value{
v: float64(v.Max),
l: []string{page},
})
}
return out
},
},
}

778
metrics/cgroups/memory.go Normal file
View File

@ -0,0 +1,778 @@
package cgroups
import (
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
var memoryMetrics = []*metric{
{
name: "memory_cache",
help: "The cache amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Cache),
},
}
},
},
{
name: "memory_rss",
help: "The rss amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.RSS),
},
}
},
},
{
name: "memory_rss_huge",
help: "The rss_huge amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.RSSHuge),
},
}
},
},
{
name: "memory_mapped_file",
help: "The mapped_file amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.MappedFile),
},
}
},
},
{
name: "memory_dirty",
help: "The dirty amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Dirty),
},
}
},
},
{
name: "memory_writeback",
help: "The writeback amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Writeback),
},
}
},
},
{
name: "memory_pgpgin",
help: "The pgpgin amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.PgPgIn),
},
}
},
},
{
name: "memory_pgpgout",
help: "The pgpgout amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.PgPgOut),
},
}
},
},
{
name: "memory_pgfault",
help: "The pgfault amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.PgFault),
},
}
},
},
{
name: "memory_pgmajfault",
help: "The pgmajfault amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.PgMajFault),
},
}
},
},
{
name: "memory_inactive_anon",
help: "The inactive_anon amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.InactiveAnon),
},
}
},
},
{
name: "memory_active_anon",
help: "The active_anon amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.ActiveAnon),
},
}
},
},
{
name: "memory_inactive_file",
help: "The inactive_file amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.InactiveFile),
},
}
},
},
{
name: "memory_active_file",
help: "The active_file amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.ActiveFile),
},
}
},
},
{
name: "memory_unevictable",
help: "The unevictable amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Unevictable),
},
}
},
},
{
name: "memory_hierarchical_memory_limit",
help: "The hierarchical_memory_limit amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.HierarchicalMemoryLimit),
},
}
},
},
{
name: "memory_hierarchical_memsw_limit",
help: "The hierarchical_memsw_limit amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.HierarchicalSwapLimit),
},
}
},
},
{
name: "memory_total_cache",
help: "The total_cache amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalCache),
},
}
},
},
{
name: "memory_total_rss",
help: "The total_rss amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalRSS),
},
}
},
},
{
name: "memory_total_rss_huge",
help: "The total_rss_huge amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalRSSHuge),
},
}
},
},
{
name: "memory_total_mapped_file",
help: "The total_mapped_file amount used",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalMappedFile),
},
}
},
},
{
name: "memory_total_dirty",
help: "The total_dirty amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalDirty),
},
}
},
},
{
name: "memory_total_writeback",
help: "The total_writeback amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalWriteback),
},
}
},
},
{
name: "memory_total_pgpgin",
help: "The total_pgpgin amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalPgPgIn),
},
}
},
},
{
name: "memory_total_pgpgout",
help: "The total_pgpgout amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalPgPgOut),
},
}
},
},
{
name: "memory_total_pgfault",
help: "The total_pgfault amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalPgFault),
},
}
},
},
{
name: "memory_total_pgmajfault",
help: "The total_pgmajfault amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalPgMajFault),
},
}
},
},
{
name: "memory_total_inactive_anon",
help: "The total_inactive_anon amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalInactiveAnon),
},
}
},
},
{
name: "memory_total_active_anon",
help: "The total_active_anon amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalActiveAnon),
},
}
},
},
{
name: "memory_total_inactive_file",
help: "The total_inactive_file amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalInactiveFile),
},
}
},
},
{
name: "memory_total_active_file",
help: "The total_active_file amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalActiveFile),
},
}
},
},
{
name: "memory_total_unevictable",
help: "The total_unevictable amount",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.TotalUnevictable),
},
}
},
},
{
name: "memory_usage_failcnt",
help: "The usage failcnt",
unit: metrics.Total,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Usage.Failcnt),
},
}
},
},
{
name: "memory_usage_limit",
help: "The memory limit",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Usage.Limit),
},
}
},
},
{
name: "memory_usage_max",
help: "The memory maximum usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Usage.Max),
},
}
},
},
{
name: "memory_usage_usage",
help: "The memory usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Usage.Usage),
},
}
},
},
{
name: "memory_swap_failcnt",
help: "The swap failcnt",
unit: metrics.Total,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Swap.Failcnt),
},
}
},
},
{
name: "memory_swap_limit",
help: "The swap limit",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Swap.Limit),
},
}
},
},
{
name: "memory_swap_max",
help: "The swap maximum usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Swap.Max),
},
}
},
},
{
name: "memory_swap_usage",
help: "The swap usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Swap.Usage),
},
}
},
},
{
name: "memory_kernel_failcnt",
help: "The kernel failcnt",
unit: metrics.Total,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Kernel.Failcnt),
},
}
},
},
{
name: "memory_kernel_limit",
help: "The kernel limit",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Kernel.Limit),
},
}
},
},
{
name: "memory_kernel_max",
help: "The kernel maximum usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Kernel.Max),
},
}
},
},
{
name: "memory_kernel_usage",
help: "The kernel usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.Kernel.Usage),
},
}
},
},
{
name: "memory_kerneltcp_failcnt",
help: "The kerneltcp failcnt",
unit: metrics.Total,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.KernelTCP.Failcnt),
},
}
},
},
{
name: "memory_kerneltcp_limit",
help: "The kerneltcp limit",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.KernelTCP.Limit),
},
}
},
},
{
name: "memory_kerneltcp_max",
help: "The kerneltcp maximum usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.KernelTCP.Max),
},
}
},
},
{
name: "memory_kerneltcp_usage",
help: "The kerneltcp usage",
unit: metrics.Bytes,
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Memory == nil {
return nil
}
return []value{
{
v: float64(stats.Memory.KernelTCP.Usage),
},
}
},
},
}

33
metrics/cgroups/metric.go Normal file
View File

@ -0,0 +1,33 @@
package cgroups
import (
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
type value struct {
v float64
l []string
}
type metric struct {
name string
help string
unit metrics.Unit
vt prometheus.ValueType
labels []string
// getValues returns the value and labels for the data
getValues func(stats *cgroups.Stats) []value
}
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"id"}, m.labels...)...)
}
func (m *metric) collect(id string, stats *cgroups.Stats, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
values := m.getValues(stats)
for _, v := range values {
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id}, v.l...)...)
}
}

118
metrics/cgroups/metrics.go Normal file
View File

@ -0,0 +1,118 @@
package cgroups
import (
"errors"
"strconv"
"sync"
"github.com/Sirupsen/logrus"
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
var (
ErrAlreadyCollected = errors.New("cgroup is already being collected")
ErrCgroupNotExists = errors.New("cgroup does not exist in the collector")
)
// Trigger will be called when an event happens and provides the cgroup
// where the event originated from
type Trigger func(string, cgroups.Cgroup)
// New registers the Collector with the provided namespace and returns it so
// that cgroups can be added for collection
func NewCollector(ns *metrics.Namespace) *Collector {
// add machine cpus and memory info
c := &Collector{
ns: ns,
cgroups: make(map[string]cgroups.Cgroup),
}
c.metrics = append(c.metrics, pidMetrics...)
c.metrics = append(c.metrics, cpuMetrics...)
c.metrics = append(c.metrics, memoryMetrics...)
c.metrics = append(c.metrics, hugetlbMetrics...)
c.metrics = append(c.metrics, blkioMetrics...)
ns.Add(c)
return c
}
// Collector provides the ability to collect container stats and export
// them in the prometheus format
type Collector struct {
mu sync.RWMutex
cgroups map[string]cgroups.Cgroup
ns *metrics.Namespace
metrics []*metric
}
func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
for _, m := range c.metrics {
ch <- m.desc(c.ns)
}
}
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
c.mu.RLock()
wg := &sync.WaitGroup{}
for id, cg := range c.cgroups {
wg.Add(1)
go c.collect(id, cg, ch, wg)
}
c.mu.RUnlock()
wg.Wait()
}
func (c *Collector) collect(id string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
defer wg.Done()
stats, err := cg.Stat(cgroups.IgnoreNotExist)
if err != nil {
logrus.WithError(err).Errorf("stat cgroup %s", id)
return
}
for _, m := range c.metrics {
m.collect(id, stats, c.ns, ch)
}
}
// Add adds the provided cgroup and id so that metrics are collected and exported
func (c *Collector) Add(id string, cg cgroups.Cgroup) error {
c.mu.Lock()
defer c.mu.Unlock()
if _, ok := c.cgroups[id]; ok {
return ErrAlreadyCollected
}
c.cgroups[id] = cg
return nil
}
// Get returns the cgroup that is being collected under the provided id
// returns ErrCgroupNotExists if the id is not being collected
func (c *Collector) Get(id string) (cgroups.Cgroup, error) {
c.mu.Lock()
defer c.mu.Unlock()
cg, ok := c.cgroups[id]
if !ok {
return nil, ErrCgroupNotExists
}
return cg, nil
}
// Remove removes the provided cgroup by id from the collector
func (c *Collector) Remove(id string) {
c.mu.Lock()
defer c.mu.Unlock()
delete(c.cgroups, id)
}
func blkioValues(l []cgroups.BlkioEntry) []value {
var out []value
for _, e := range l {
out = append(out, value{
v: float64(e.Value),
l: []string{e.Op, e.Device, strconv.FormatUint(e.Major, 10), strconv.FormatUint(e.Minor, 10)},
})
}
return out
}

116
metrics/cgroups/oom.go Normal file
View File

@ -0,0 +1,116 @@
package cgroups
import (
"sync"
"golang.org/x/sys/unix"
"github.com/Sirupsen/logrus"
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
)
func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
if err != nil {
return nil, err
}
c := &OOMCollector{
fd: fd,
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id"),
set: make(map[uintptr]*oom),
}
go c.start()
return c, nil
}
type OOMCollector struct {
mu sync.Mutex
memoryOOM metrics.LabeledGauge
fd int
set map[uintptr]*oom
}
type oom struct {
id string
c cgroups.Cgroup
triggers []Trigger
}
func (o *OOMCollector) Add(id string, cg cgroups.Cgroup, triggers ...Trigger) error {
o.mu.Lock()
defer o.mu.Unlock()
fd, err := cg.OOMEventFD()
if err != nil {
return err
}
o.set[fd] = &oom{
id: id,
c: cg,
triggers: triggers,
}
// set the gauge's default value
o.memoryOOM.WithValues(id).Set(0)
event := unix.EpollEvent{
Fd: int32(fd),
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
}
if err := unix.EpollCtl(o.fd, unix.EPOLL_CTL_ADD, int(fd), &event); err != nil {
return err
}
return nil
}
// Close closes the epoll fd
func (o *OOMCollector) Close() error {
return unix.Close(int(o.fd))
}
func (o *OOMCollector) start() {
var events [128]unix.EpollEvent
for {
n, err := unix.EpollWait(o.fd, events[:], -1)
if err != nil {
if err == unix.EINTR {
continue
}
logrus.WithField("error", err).Fatal("cgroups: epoll wait")
}
for i := 0; i < n; i++ {
o.process(uintptr(events[i].Fd), events[i].Events)
}
}
}
func (o *OOMCollector) process(fd uintptr, event uint32) {
// make sure to always flush the fd
flush(fd)
o.mu.Lock()
info, ok := o.set[fd]
if !ok {
o.mu.Unlock()
return
}
o.mu.Unlock()
// if we received an event but it was caused by the cgroup being deleted and the fd
// being closed make sure we close our copy and remove the container from the set
if info.c.State() == cgroups.Deleted {
o.mu.Lock()
delete(o.set, fd)
o.mu.Unlock()
unix.Close(int(fd))
return
}
o.memoryOOM.WithValues(info.id).Inc(1)
for _, t := range info.triggers {
t(info.id, info.c)
}
}
func flush(fd uintptr) error {
buf := make([]byte, 8)
_, err := unix.Read(int(fd), buf)
return err
}

42
metrics/cgroups/pids.go Normal file
View File

@ -0,0 +1,42 @@
package cgroups
import (
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
var pidMetrics = []*metric{
{
name: "pids",
help: "The limit to the number of pids allowed",
unit: metrics.Unit("limit"),
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Pids == nil {
return nil
}
return []value{
{
v: float64(stats.Pids.Limit),
},
}
},
},
{
name: "pids",
help: "The current number of pids",
unit: metrics.Unit("current"),
vt: prometheus.GaugeValue,
getValues: func(stats *cgroups.Stats) []value {
if stats.Pids == nil {
return nil
}
return []value{
{
v: float64(stats.Pids.Current),
},
}
},
},
}