Change oom metric to const

This removes the metric vec that was holding onto all task id and
namespace combinations forever, until containerd was restarted.  This
was causing a memory leak with many task.

This also removes the shim cmd where the `Args` is quite large from the
reaper after the shim has been started cutting down on another leak.

This is the first pass through the reaper but more code is required to
fix all the issues when commands are added.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2017-08-17 16:23:20 -04:00
parent 89daacfe3f
commit e800f08f9f
4 changed files with 39 additions and 11 deletions

View File

@ -49,6 +49,7 @@ func WithStart(binary, address string, debug bool) ClientOpt {
if err != nil {
terminate(cmd)
}
reaper.Default.Delete(cmd.Process.Pid)
}()
log.G(ctx).WithFields(logrus.Fields{
"pid": cmd.Process.Pid,

View File

@ -65,6 +65,7 @@ func (m *cgroupsMonitor) Monitor(c runtime.Task) error {
func (m *cgroupsMonitor) Stop(c runtime.Task) error {
info := c.Info()
m.collector.Remove(info.ID, info.Namespace)
m.oom.Remove(info.ID, info.Namespace)
return nil
}

View File

@ -9,6 +9,7 @@ import (
"github.com/containerd/cgroups"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
@ -19,19 +20,20 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
}
c := &OOMCollector{
fd: fd,
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "container_id", "namespace"),
set: make(map[uintptr]*oom),
desc: ns.NewDesc("memory_oom", "The number of times a container has received an oom event", metrics.Total, "container_id", "namespace"),
}
go c.start()
ns.Add(c)
return c, nil
}
type OOMCollector struct {
mu sync.Mutex
memoryOOM metrics.LabeledGauge
fd int
set map[uintptr]*oom
desc *prometheus.Desc
}
type oom struct {
@ -39,6 +41,7 @@ type oom struct {
namespace string
c cgroups.Cgroup
triggers []Trigger
count int
}
func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
@ -50,12 +53,10 @@ func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...
}
o.set[fd] = &oom{
id: id,
namespace: namespace,
c: cg,
triggers: triggers,
namespace: namespace,
}
// set the gauge's default value
o.memoryOOM.WithValues(id, namespace).Set(0)
event := unix.EpollEvent{
Fd: int32(fd),
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
@ -66,11 +67,37 @@ func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...
return nil
}
func (o *OOMCollector) Remove(id, namespace string) {
o.mu.Lock()
defer o.mu.Unlock()
for fd, t := range o.set {
if t.id == id && t.namespace == namespace {
unix.Close(int(fd))
delete(o.set, fd)
return
}
}
}
// Close closes the epoll fd
func (o *OOMCollector) Close() error {
return unix.Close(int(o.fd))
}
func (o *OOMCollector) Describe(ch chan<- *prometheus.Desc) {
o.mu.Lock()
defer o.mu.Unlock()
ch <- o.desc
}
func (o *OOMCollector) Collect(ch chan<- prometheus.Metric) {
o.mu.Lock()
defer o.mu.Unlock()
for _, t := range o.set {
ch <- prometheus.MustNewConstMetric(o.desc, prometheus.GaugeValue, float64(t.count), t.id, t.namespace)
}
}
func (o *OOMCollector) start() {
var events [128]unix.EpollEvent
for {
@ -107,7 +134,7 @@ func (o *OOMCollector) process(fd uintptr, event uint32) {
unix.Close(int(fd))
return
}
o.memoryOOM.WithValues(info.id, info.namespace).Inc(1)
info.count++
for _, t := range info.triggers {
t(info.id, info.c)
}

View File

@ -9,7 +9,6 @@ import (
type TaskInfo struct {
ID string
Runtime string
Spec []byte
Namespace string
}