Add namespace to container metrics

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby
2017-06-27 10:26:02 -07:00
parent 85f61f6f51
commit 6ec84ef83c
7 changed files with 50 additions and 53 deletions

View File

@@ -3,7 +3,6 @@
package cgroups
import (
"fmt"
"time"
"github.com/containerd/cgroups"
@@ -44,12 +43,8 @@ type cgroupsMonitor struct {
events chan<- *plugin.Event
}
func getID(t plugin.Task) string {
return fmt.Sprintf("%s-%s", t.Info().Namespace, t.Info().ID)
}
func (m *cgroupsMonitor) Monitor(c plugin.Task) error {
id := getID(c)
info := c.Info()
state, err := c.State(m.context)
if err != nil {
return err
@@ -58,14 +53,15 @@ func (m *cgroupsMonitor) Monitor(c plugin.Task) error {
if err != nil {
return err
}
if err := m.collector.Add(id, cg); err != nil {
if err := m.collector.Add(info.ID, info.Namespace, cg); err != nil {
return err
}
return m.oom.Add(id, cg, m.trigger)
return m.oom.Add(info.ID, info.Namespace, cg, m.trigger)
}
func (m *cgroupsMonitor) Stop(c plugin.Task) error {
m.collector.Remove(getID(c))
info := c.Info()
m.collector.Remove(info.ID, info.Namespace)
return nil
}

View File

@@ -22,12 +22,13 @@ type metric struct {
}
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"id"}, m.labels...)...)
// the namespace label is for containerd namespaces
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"id", "namespace"}, m.labels...)...)
}
func (m *metric) collect(id string, stats *cgroups.Stats, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
func (m *metric) collect(id, namespace string, stats *cgroups.Stats, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
values := m.getValues(stats)
for _, v := range values {
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id}, v.l...)...)
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
}
}

View File

@@ -26,7 +26,7 @@ func NewCollector(ns *metrics.Namespace) *Collector {
// add machine cpus and memory info
c := &Collector{
ns: ns,
cgroups: make(map[string]cgroups.Cgroup),
cgroups: make(map[string]*task),
}
c.metrics = append(c.metrics, pidMetrics...)
c.metrics = append(c.metrics, cpuMetrics...)
@@ -37,12 +37,18 @@ func NewCollector(ns *metrics.Namespace) *Collector {
return c
}
type task struct {
id string
namespace string
cgroup cgroups.Cgroup
}
// Collector provides the ability to collect container stats and export
// them in the prometheus format
type Collector struct {
mu sync.RWMutex
cgroups map[string]cgroups.Cgroup
cgroups map[string]*task
ns *metrics.Namespace
metrics []*metric
}
@@ -56,15 +62,15 @@ func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
c.mu.RLock()
wg := &sync.WaitGroup{}
for id, cg := range c.cgroups {
for _, t := range c.cgroups {
wg.Add(1)
go c.collect(id, cg, ch, wg)
go c.collect(t.id, t.namespace, t.cgroup, ch, wg)
}
c.mu.RUnlock()
wg.Wait()
}
func (c *Collector) collect(id string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
func (c *Collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
defer wg.Done()
stats, err := cg.Stat(cgroups.IgnoreNotExist)
if err != nil {
@@ -72,38 +78,42 @@ func (c *Collector) collect(id string, cg cgroups.Cgroup, ch chan<- prometheus.M
return
}
for _, m := range c.metrics {
m.collect(id, stats, c.ns, ch)
m.collect(id, namespace, stats, c.ns, ch)
}
}
// Add adds the provided cgroup and id so that metrics are collected and exported
func (c *Collector) Add(id string, cg cgroups.Cgroup) error {
func (c *Collector) Add(id, namespace string, cg cgroups.Cgroup) error {
c.mu.Lock()
defer c.mu.Unlock()
if _, ok := c.cgroups[id]; ok {
if _, ok := c.cgroups[id+namespace]; ok {
return ErrAlreadyCollected
}
c.cgroups[id] = cg
c.cgroups[id+namespace] = &task{
id: id,
namespace: namespace,
cgroup: cg,
}
return nil
}
// Get returns the cgroup that is being collected under the provided id
// returns ErrCgroupNotExists if the id is not being collected
func (c *Collector) Get(id string) (cgroups.Cgroup, error) {
func (c *Collector) Get(id, namespace string) (cgroups.Cgroup, error) {
c.mu.Lock()
defer c.mu.Unlock()
cg, ok := c.cgroups[id]
t, ok := c.cgroups[id+namespace]
if !ok {
return nil, ErrCgroupNotExists
}
return cg, nil
return t.cgroup, nil
}
// Remove removes the provided cgroup by id from the collector
func (c *Collector) Remove(id string) {
func (c *Collector) Remove(id, namespace string) {
c.mu.Lock()
defer c.mu.Unlock()
delete(c.cgroups, id)
delete(c.cgroups, id+namespace)
}
func blkioValues(l []cgroups.BlkioEntry) []value {

View File

@@ -17,7 +17,7 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
}
c := &OOMCollector{
fd: fd,
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id"),
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id", "namespace"),
set: make(map[uintptr]*oom),
}
go c.start()
@@ -33,12 +33,13 @@ type OOMCollector struct {
}
type oom struct {
id string
c cgroups.Cgroup
triggers []Trigger
id string
namespace string
c cgroups.Cgroup
triggers []Trigger
}
func (o *OOMCollector) Add(id string, cg cgroups.Cgroup, triggers ...Trigger) error {
func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
o.mu.Lock()
defer o.mu.Unlock()
fd, err := cg.OOMEventFD()
@@ -46,12 +47,13 @@ func (o *OOMCollector) Add(id string, cg cgroups.Cgroup, triggers ...Trigger) er
return err
}
o.set[fd] = &oom{
id: id,
c: cg,
triggers: triggers,
id: id,
c: cg,
triggers: triggers,
namespace: namespace,
}
// set the gauge's default value
o.memoryOOM.WithValues(id).Set(0)
o.memoryOOM.WithValues(id, namespace).Set(0)
event := unix.EpollEvent{
Fd: int32(fd),
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
@@ -103,7 +105,7 @@ func (o *OOMCollector) process(fd uintptr, event uint32) {
unix.Close(int(fd))
return
}
o.memoryOOM.WithValues(info.id).Inc(1)
o.memoryOOM.WithValues(info.id, info.namespace).Inc(1)
for _, t := range info.triggers {
t(info.id, info.c)
}