Merge pull request #1586 from mathieui/postmortem-stats

Collect cgroup stats one last time before exit
This commit is contained in:
Michael Crosby 2017-10-06 15:44:28 -04:00 committed by GitHub
commit 6f351f0a85
3 changed files with 34 additions and 9 deletions

View File

@ -70,6 +70,8 @@ func (m *cgroupsMonitor) Monitor(c runtime.Task) error {
func (m *cgroupsMonitor) Stop(c runtime.Task) error { func (m *cgroupsMonitor) Stop(c runtime.Task) error {
info := c.Info() info := c.Info()
t := c.(*linux.Task)
m.collector.collect(info.ID, info.Namespace, t.Cgroup(), m.collector.storedMetrics, false, nil)
m.collector.Remove(info.ID, info.Namespace) m.collector.Remove(info.ID, info.Namespace)
return nil return nil
} }

View File

@ -28,9 +28,18 @@ func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"container_id", "namespace"}, m.labels...)...) return ns.NewDesc(m.name, m.help, m.unit, append([]string{"container_id", "namespace"}, m.labels...)...)
} }
func (m *metric) collect(id, namespace string, stats *cgroups.Metrics, ns *metrics.Namespace, ch chan<- prometheus.Metric) { func (m *metric) collect(id, namespace string, stats *cgroups.Metrics, ns *metrics.Namespace, ch chan<- prometheus.Metric, block bool) {
values := m.getValues(stats) values := m.getValues(stats)
for _, v := range values { for _, v := range values {
// block signals to block on the sending the metrics so none are missed
if block {
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...) ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
continue
}
// non-blocking metrics can be dropped if the chan is full
select {
case ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...):
default:
}
} }
} }

View File

@ -40,6 +40,7 @@ func newCollector(ns *metrics.Namespace) *collector {
c.metrics = append(c.metrics, memoryMetrics...) c.metrics = append(c.metrics, memoryMetrics...)
c.metrics = append(c.metrics, hugetlbMetrics...) c.metrics = append(c.metrics, hugetlbMetrics...)
c.metrics = append(c.metrics, blkioMetrics...) c.metrics = append(c.metrics, blkioMetrics...)
c.storedMetrics = make(chan prometheus.Metric, 100*len(c.metrics))
ns.Add(c) ns.Add(c)
return c return c
} }
@ -62,6 +63,7 @@ type collector struct {
cgroups map[string]*task cgroups map[string]*task
ns *metrics.Namespace ns *metrics.Namespace
metrics []*metric metrics []*metric
storedMetrics chan prometheus.Metric
} }
func (c *collector) Describe(ch chan<- *prometheus.Desc) { func (c *collector) Describe(ch chan<- *prometheus.Desc) {
@ -75,21 +77,33 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
wg := &sync.WaitGroup{} wg := &sync.WaitGroup{}
for _, t := range c.cgroups { for _, t := range c.cgroups {
wg.Add(1) wg.Add(1)
go c.collect(t.id, t.namespace, t.cgroup, ch, wg) go c.collect(t.id, t.namespace, t.cgroup, ch, true, wg)
}
storedLoop:
for {
// read stored metrics until the channel is flushed
select {
case m := <-c.storedMetrics:
ch <- m
default:
break storedLoop
}
} }
c.mu.RUnlock() c.mu.RUnlock()
wg.Wait() wg.Wait()
} }
func (c *collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) { func (c *collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, block bool, wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done() defer wg.Done()
}
stats, err := cg.Stat(cgroups.IgnoreNotExist) stats, err := cg.Stat(cgroups.IgnoreNotExist)
if err != nil { if err != nil {
logrus.WithError(err).Errorf("stat cgroup %s", id) logrus.WithError(err).Errorf("stat cgroup %s", id)
return return
} }
for _, m := range c.metrics { for _, m := range c.metrics {
m.collect(id, namespace, stats, c.ns, ch) m.collect(id, namespace, stats, c.ns, ch, block)
} }
} }