diff --git a/linux/shim/client.go b/linux/shim/client.go index 2b5016fc5..f34403ceb 100644 --- a/linux/shim/client.go +++ b/linux/shim/client.go @@ -49,6 +49,7 @@ func WithStart(binary, address string, debug bool) ClientOpt { if err != nil { terminate(cmd) } + reaper.Default.Delete(cmd.Process.Pid) }() log.G(ctx).WithFields(logrus.Fields{ "pid": cmd.Process.Pid, diff --git a/metrics/cgroups/cgroups.go b/metrics/cgroups/cgroups.go index 5ef16c0c3..31d2f2148 100644 --- a/metrics/cgroups/cgroups.go +++ b/metrics/cgroups/cgroups.go @@ -65,6 +65,7 @@ func (m *cgroupsMonitor) Monitor(c runtime.Task) error { func (m *cgroupsMonitor) Stop(c runtime.Task) error { info := c.Info() m.collector.Remove(info.ID, info.Namespace) + m.oom.Remove(info.ID, info.Namespace) return nil } diff --git a/metrics/cgroups/oom.go b/metrics/cgroups/oom.go index 47d979f58..fd1845c1f 100644 --- a/metrics/cgroups/oom.go +++ b/metrics/cgroups/oom.go @@ -9,6 +9,7 @@ import ( "github.com/containerd/cgroups" metrics "github.com/docker/go-metrics" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" ) @@ -18,20 +19,21 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) { return nil, err } c := &OOMCollector{ - fd: fd, - memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "container_id", "namespace"), - set: make(map[uintptr]*oom), + fd: fd, + set: make(map[uintptr]*oom), + desc: ns.NewDesc("memory_oom", "The number of times a container has received an oom event", metrics.Total, "container_id", "namespace"), } go c.start() + ns.Add(c) return c, nil } type OOMCollector struct { mu sync.Mutex - memoryOOM metrics.LabeledGauge - fd int - set map[uintptr]*oom + fd int + set map[uintptr]*oom + desc *prometheus.Desc } type oom struct { @@ -39,6 +41,7 @@ type oom struct { namespace string c cgroups.Cgroup triggers []Trigger + count int } func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error { @@ -50,12 +53,10 @@ func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ... } o.set[fd] = &oom{ id: id, + namespace: namespace, c: cg, triggers: triggers, - namespace: namespace, } - // set the gauge's default value - o.memoryOOM.WithValues(id, namespace).Set(0) event := unix.EpollEvent{ Fd: int32(fd), Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR, @@ -66,11 +67,37 @@ func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ... return nil } +func (o *OOMCollector) Remove(id, namespace string) { + o.mu.Lock() + defer o.mu.Unlock() + for fd, t := range o.set { + if t.id == id && t.namespace == namespace { + unix.Close(int(fd)) + delete(o.set, fd) + return + } + } +} + // Close closes the epoll fd func (o *OOMCollector) Close() error { return unix.Close(int(o.fd)) } +func (o *OOMCollector) Describe(ch chan<- *prometheus.Desc) { + o.mu.Lock() + defer o.mu.Unlock() + ch <- o.desc +} + +func (o *OOMCollector) Collect(ch chan<- prometheus.Metric) { + o.mu.Lock() + defer o.mu.Unlock() + for _, t := range o.set { + ch <- prometheus.MustNewConstMetric(o.desc, prometheus.GaugeValue, float64(t.count), t.id, t.namespace) + } +} + func (o *OOMCollector) start() { var events [128]unix.EpollEvent for { @@ -107,7 +134,7 @@ func (o *OOMCollector) process(fd uintptr, event uint32) { unix.Close(int(fd)) return } - o.memoryOOM.WithValues(info.id, info.namespace).Inc(1) + info.count++ for _, t := range info.triggers { t(info.id, info.c) } diff --git a/runtime/task.go b/runtime/task.go index 4a5685e10..62fa171e6 100644 --- a/runtime/task.go +++ b/runtime/task.go @@ -9,7 +9,6 @@ import ( type TaskInfo struct { ID string Runtime string - Spec []byte Namespace string }