Add namespace to container metrics

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2017-06-27 10:26:02 -07:00
parent 85f61f6f51
commit 6ec84ef83c
7 changed files with 50 additions and 53 deletions

View File

@ -276,7 +276,7 @@ func TestContainerExec(t *testing.T) {
"exit 6", "exit 6",
} }
process, err := task.Exec(ctx, &processSpec, empty()) process, err := task.Exec(ctx, processSpec, empty())
if err != nil { if err != nil {
t.Error(err) t.Error(err)
return return

View File

@ -400,21 +400,9 @@ func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string)
if err != nil { if err != nil {
return err return err
} }
if err := rt.Kill(ctx, id, int(unix.SIGKILL), &runc.KillOpts{All: true}); err != nil { if err := rt.Delete(ctx, id, &runc.DeleteOpts{
log.G(ctx).WithError(err).Warnf("kill all processes for %s", id) Force: true,
} }); err != nil {
// it can take a while for the container to be killed so poll for the container's status
// until it is in a stopped state
status := "running"
for status != "stopped" {
c, err := rt.State(ctx, id)
if err != nil {
break
}
status = c.Status
time.Sleep(50 * time.Millisecond)
}
if err := rt.Delete(ctx, id); err != nil {
log.G(ctx).WithError(err).Warnf("delete runtime state %s", id) log.G(ctx).WithError(err).Warnf("delete runtime state %s", id)
} }
if err := unix.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil { if err := unix.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil {

View File

@ -205,7 +205,7 @@ func (p *initProcess) Delete(context context.Context) error {
} }
p.killAll(context) p.killAll(context)
p.Wait() p.Wait()
err = p.runc.Delete(context, p.id) err = p.runc.Delete(context, p.id, nil)
if p.io != nil { if p.io != nil {
for _, c := range p.closers { for _, c := range p.closers {
c.Close() c.Close()

View File

@ -3,7 +3,6 @@
package cgroups package cgroups
import ( import (
"fmt"
"time" "time"
"github.com/containerd/cgroups" "github.com/containerd/cgroups"
@ -44,12 +43,8 @@ type cgroupsMonitor struct {
events chan<- *plugin.Event events chan<- *plugin.Event
} }
func getID(t plugin.Task) string {
return fmt.Sprintf("%s-%s", t.Info().Namespace, t.Info().ID)
}
func (m *cgroupsMonitor) Monitor(c plugin.Task) error { func (m *cgroupsMonitor) Monitor(c plugin.Task) error {
id := getID(c) info := c.Info()
state, err := c.State(m.context) state, err := c.State(m.context)
if err != nil { if err != nil {
return err return err
@ -58,14 +53,15 @@ func (m *cgroupsMonitor) Monitor(c plugin.Task) error {
if err != nil { if err != nil {
return err return err
} }
if err := m.collector.Add(id, cg); err != nil { if err := m.collector.Add(info.ID, info.Namespace, cg); err != nil {
return err return err
} }
return m.oom.Add(id, cg, m.trigger) return m.oom.Add(info.ID, info.Namespace, cg, m.trigger)
} }
func (m *cgroupsMonitor) Stop(c plugin.Task) error { func (m *cgroupsMonitor) Stop(c plugin.Task) error {
m.collector.Remove(getID(c)) info := c.Info()
m.collector.Remove(info.ID, info.Namespace)
return nil return nil
} }

View File

@ -22,12 +22,13 @@ type metric struct {
} }
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc { func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"id"}, m.labels...)...) // the namespace label is for containerd namespaces
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"id", "namespace"}, m.labels...)...)
} }
func (m *metric) collect(id string, stats *cgroups.Stats, ns *metrics.Namespace, ch chan<- prometheus.Metric) { func (m *metric) collect(id, namespace string, stats *cgroups.Stats, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
values := m.getValues(stats) values := m.getValues(stats)
for _, v := range values { for _, v := range values {
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id}, v.l...)...) ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
} }
} }

View File

@ -26,7 +26,7 @@ func NewCollector(ns *metrics.Namespace) *Collector {
// add machine cpus and memory info // add machine cpus and memory info
c := &Collector{ c := &Collector{
ns: ns, ns: ns,
cgroups: make(map[string]cgroups.Cgroup), cgroups: make(map[string]*task),
} }
c.metrics = append(c.metrics, pidMetrics...) c.metrics = append(c.metrics, pidMetrics...)
c.metrics = append(c.metrics, cpuMetrics...) c.metrics = append(c.metrics, cpuMetrics...)
@ -37,12 +37,18 @@ func NewCollector(ns *metrics.Namespace) *Collector {
return c return c
} }
type task struct {
id string
namespace string
cgroup cgroups.Cgroup
}
// Collector provides the ability to collect container stats and export // Collector provides the ability to collect container stats and export
// them in the prometheus format // them in the prometheus format
type Collector struct { type Collector struct {
mu sync.RWMutex mu sync.RWMutex
cgroups map[string]cgroups.Cgroup cgroups map[string]*task
ns *metrics.Namespace ns *metrics.Namespace
metrics []*metric metrics []*metric
} }
@ -56,15 +62,15 @@ func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
func (c *Collector) Collect(ch chan<- prometheus.Metric) { func (c *Collector) Collect(ch chan<- prometheus.Metric) {
c.mu.RLock() c.mu.RLock()
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
for id, cg := range c.cgroups { for _, t := range c.cgroups {
wg.Add(1) wg.Add(1)
go c.collect(id, cg, ch, wg) go c.collect(t.id, t.namespace, t.cgroup, ch, wg)
} }
c.mu.RUnlock() c.mu.RUnlock()
wg.Wait() wg.Wait()
} }
func (c *Collector) collect(id string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) { func (c *Collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
defer wg.Done() defer wg.Done()
stats, err := cg.Stat(cgroups.IgnoreNotExist) stats, err := cg.Stat(cgroups.IgnoreNotExist)
if err != nil { if err != nil {
@ -72,38 +78,42 @@ func (c *Collector) collect(id string, cg cgroups.Cgroup, ch chan<- prometheus.M
return return
} }
for _, m := range c.metrics { for _, m := range c.metrics {
m.collect(id, stats, c.ns, ch) m.collect(id, namespace, stats, c.ns, ch)
} }
} }
// Add adds the provided cgroup and id so that metrics are collected and exported // Add adds the provided cgroup and id so that metrics are collected and exported
func (c *Collector) Add(id string, cg cgroups.Cgroup) error { func (c *Collector) Add(id, namespace string, cg cgroups.Cgroup) error {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock() defer c.mu.Unlock()
if _, ok := c.cgroups[id]; ok { if _, ok := c.cgroups[id+namespace]; ok {
return ErrAlreadyCollected return ErrAlreadyCollected
} }
c.cgroups[id] = cg c.cgroups[id+namespace] = &task{
id: id,
namespace: namespace,
cgroup: cg,
}
return nil return nil
} }
// Get returns the cgroup that is being collected under the provided id // Get returns the cgroup that is being collected under the provided id
// returns ErrCgroupNotExists if the id is not being collected // returns ErrCgroupNotExists if the id is not being collected
func (c *Collector) Get(id string) (cgroups.Cgroup, error) { func (c *Collector) Get(id, namespace string) (cgroups.Cgroup, error) {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock() defer c.mu.Unlock()
cg, ok := c.cgroups[id] t, ok := c.cgroups[id+namespace]
if !ok { if !ok {
return nil, ErrCgroupNotExists return nil, ErrCgroupNotExists
} }
return cg, nil return t.cgroup, nil
} }
// Remove removes the provided cgroup by id from the collector // Remove removes the provided cgroup by id from the collector
func (c *Collector) Remove(id string) { func (c *Collector) Remove(id, namespace string) {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock() defer c.mu.Unlock()
delete(c.cgroups, id) delete(c.cgroups, id+namespace)
} }
func blkioValues(l []cgroups.BlkioEntry) []value { func blkioValues(l []cgroups.BlkioEntry) []value {

View File

@ -17,7 +17,7 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
} }
c := &OOMCollector{ c := &OOMCollector{
fd: fd, fd: fd,
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id"), memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id", "namespace"),
set: make(map[uintptr]*oom), set: make(map[uintptr]*oom),
} }
go c.start() go c.start()
@ -34,11 +34,12 @@ type OOMCollector struct {
type oom struct { type oom struct {
id string id string
namespace string
c cgroups.Cgroup c cgroups.Cgroup
triggers []Trigger triggers []Trigger
} }
func (o *OOMCollector) Add(id string, cg cgroups.Cgroup, triggers ...Trigger) error { func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
o.mu.Lock() o.mu.Lock()
defer o.mu.Unlock() defer o.mu.Unlock()
fd, err := cg.OOMEventFD() fd, err := cg.OOMEventFD()
@ -49,9 +50,10 @@ func (o *OOMCollector) Add(id string, cg cgroups.Cgroup, triggers ...Trigger) er
id: id, id: id,
c: cg, c: cg,
triggers: triggers, triggers: triggers,
namespace: namespace,
} }
// set the gauge's default value // set the gauge's default value
o.memoryOOM.WithValues(id).Set(0) o.memoryOOM.WithValues(id, namespace).Set(0)
event := unix.EpollEvent{ event := unix.EpollEvent{
Fd: int32(fd), Fd: int32(fd),
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
@ -103,7 +105,7 @@ func (o *OOMCollector) process(fd uintptr, event uint32) {
unix.Close(int(fd)) unix.Close(int(fd))
return return
} }
o.memoryOOM.WithValues(info.id).Inc(1) o.memoryOOM.WithValues(info.id, info.namespace).Inc(1)
for _, t := range info.triggers { for _, t := range info.triggers {
t(info.id, info.c) t(info.id, info.c)
} }