Merge pull request #1586 from mathieui/postmortem-stats

Collect cgroup stats one last time before exit
This commit is contained in:
Michael Crosby 2017-10-06 15:44:28 -04:00 committed by GitHub
commit 6f351f0a85
3 changed files with 34 additions and 9 deletions

View File

@ -70,6 +70,8 @@ func (m *cgroupsMonitor) Monitor(c runtime.Task) error {
func (m *cgroupsMonitor) Stop(c runtime.Task) error {
info := c.Info()
t := c.(*linux.Task)
m.collector.collect(info.ID, info.Namespace, t.Cgroup(), m.collector.storedMetrics, false, nil)
m.collector.Remove(info.ID, info.Namespace)
return nil
}

View File

@ -28,9 +28,18 @@ func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"container_id", "namespace"}, m.labels...)...)
}
func (m *metric) collect(id, namespace string, stats *cgroups.Metrics, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
func (m *metric) collect(id, namespace string, stats *cgroups.Metrics, ns *metrics.Namespace, ch chan<- prometheus.Metric, block bool) {
values := m.getValues(stats)
for _, v := range values {
// block signals to block on the sending the metrics so none are missed
if block {
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
continue
}
// non-blocking metrics can be dropped if the chan is full
select {
case ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...):
default:
}
}
}

View File

@ -40,6 +40,7 @@ func newCollector(ns *metrics.Namespace) *collector {
c.metrics = append(c.metrics, memoryMetrics...)
c.metrics = append(c.metrics, hugetlbMetrics...)
c.metrics = append(c.metrics, blkioMetrics...)
c.storedMetrics = make(chan prometheus.Metric, 100*len(c.metrics))
ns.Add(c)
return c
}
@ -62,6 +63,7 @@ type collector struct {
cgroups map[string]*task
ns *metrics.Namespace
metrics []*metric
storedMetrics chan prometheus.Metric
}
func (c *collector) Describe(ch chan<- *prometheus.Desc) {
@ -75,21 +77,33 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
wg := &sync.WaitGroup{}
for _, t := range c.cgroups {
wg.Add(1)
go c.collect(t.id, t.namespace, t.cgroup, ch, wg)
go c.collect(t.id, t.namespace, t.cgroup, ch, true, wg)
}
storedLoop:
for {
// read stored metrics until the channel is flushed
select {
case m := <-c.storedMetrics:
ch <- m
default:
break storedLoop
}
}
c.mu.RUnlock()
wg.Wait()
}
func (c *collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
func (c *collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, block bool, wg *sync.WaitGroup) {
if wg != nil {
defer wg.Done()
}
stats, err := cg.Stat(cgroups.IgnoreNotExist)
if err != nil {
logrus.WithError(err).Errorf("stat cgroup %s", id)
return
}
for _, m := range c.metrics {
m.collect(id, namespace, stats, c.ns, ch)
m.collect(id, namespace, stats, c.ns, ch, block)
}
}