containerd/metrics/cgroups/metrics.go
Michael Crosby 72bcdb8fa9 Add config for exporting container metrics to prom
This adds an option for the cgroups monitor to include container metrics
in the prometheus output.  We will have to use the plugin to emit oom
events via the events service but when the `no_prom` setting is set for
the plugin container metrics will not be included in the prom output.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
2017-09-07 13:40:55 -04:00

121 lines
2.8 KiB
Go

// +build linux
package cgroups
import (
"errors"
"fmt"
"sync"
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
var (
ErrAlreadyCollected = errors.New("cgroup is already being collected")
ErrCgroupNotExists = errors.New("cgroup does not exist in the collector")
)
// Trigger will be called when an event happens and provides the cgroup
// where the event originated from
type Trigger func(string, string, cgroups.Cgroup)
// New registers the Collector with the provided namespace and returns it so
// that cgroups can be added for collection
func NewCollector(ns *metrics.Namespace) *Collector {
if ns == nil {
return &Collector{}
}
// add machine cpus and memory info
c := &Collector{
ns: ns,
cgroups: make(map[string]*task),
}
c.metrics = append(c.metrics, pidMetrics...)
c.metrics = append(c.metrics, cpuMetrics...)
c.metrics = append(c.metrics, memoryMetrics...)
c.metrics = append(c.metrics, hugetlbMetrics...)
c.metrics = append(c.metrics, blkioMetrics...)
ns.Add(c)
return c
}
type task struct {
id string
namespace string
cgroup cgroups.Cgroup
}
func taskID(id, namespace string) string {
return fmt.Sprintf("%s-%s", id, namespace)
}
// Collector provides the ability to collect container stats and export
// them in the prometheus format
type Collector struct {
mu sync.RWMutex
cgroups map[string]*task
ns *metrics.Namespace
metrics []*metric
}
func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
for _, m := range c.metrics {
ch <- m.desc(c.ns)
}
}
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
c.mu.RLock()
wg := &sync.WaitGroup{}
for _, t := range c.cgroups {
wg.Add(1)
go c.collect(t.id, t.namespace, t.cgroup, ch, wg)
}
c.mu.RUnlock()
wg.Wait()
}
func (c *Collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
defer wg.Done()
stats, err := cg.Stat(cgroups.IgnoreNotExist)
if err != nil {
logrus.WithError(err).Errorf("stat cgroup %s", id)
return
}
for _, m := range c.metrics {
m.collect(id, namespace, stats, c.ns, ch)
}
}
// Add adds the provided cgroup and id so that metrics are collected and exported
func (c *Collector) Add(id, namespace string, cg cgroups.Cgroup) error {
if c.ns == nil {
return nil
}
c.mu.Lock()
defer c.mu.Unlock()
if _, ok := c.cgroups[taskID(id, namespace)]; ok {
return ErrAlreadyCollected
}
c.cgroups[taskID(id, namespace)] = &task{
id: id,
namespace: namespace,
cgroup: cg,
}
return nil
}
// Remove removes the provided cgroup by id from the collector
func (c *Collector) Remove(id, namespace string) {
if c.ns == nil {
return
}
c.mu.Lock()
defer c.mu.Unlock()
delete(c.cgroups, taskID(id, namespace))
}