Change oom metric to const
This removes the metric vec that was holding onto all task id and namespace combinations forever, until containerd was restarted. This was causing a memory leak with many task. This also removes the shim cmd where the `Args` is quite large from the reaper after the shim has been started cutting down on another leak. This is the first pass through the reaper but more code is required to fix all the issues when commands are added. Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
89daacfe3f
commit
e800f08f9f
@ -49,6 +49,7 @@ func WithStart(binary, address string, debug bool) ClientOpt {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
terminate(cmd)
|
terminate(cmd)
|
||||||
}
|
}
|
||||||
|
reaper.Default.Delete(cmd.Process.Pid)
|
||||||
}()
|
}()
|
||||||
log.G(ctx).WithFields(logrus.Fields{
|
log.G(ctx).WithFields(logrus.Fields{
|
||||||
"pid": cmd.Process.Pid,
|
"pid": cmd.Process.Pid,
|
||||||
|
@ -65,6 +65,7 @@ func (m *cgroupsMonitor) Monitor(c runtime.Task) error {
|
|||||||
func (m *cgroupsMonitor) Stop(c runtime.Task) error {
|
func (m *cgroupsMonitor) Stop(c runtime.Task) error {
|
||||||
info := c.Info()
|
info := c.Info()
|
||||||
m.collector.Remove(info.ID, info.Namespace)
|
m.collector.Remove(info.ID, info.Namespace)
|
||||||
|
m.oom.Remove(info.ID, info.Namespace)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
|
|
||||||
"github.com/containerd/cgroups"
|
"github.com/containerd/cgroups"
|
||||||
metrics "github.com/docker/go-metrics"
|
metrics "github.com/docker/go-metrics"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -18,20 +19,21 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
c := &OOMCollector{
|
c := &OOMCollector{
|
||||||
fd: fd,
|
fd: fd,
|
||||||
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "container_id", "namespace"),
|
set: make(map[uintptr]*oom),
|
||||||
set: make(map[uintptr]*oom),
|
desc: ns.NewDesc("memory_oom", "The number of times a container has received an oom event", metrics.Total, "container_id", "namespace"),
|
||||||
}
|
}
|
||||||
go c.start()
|
go c.start()
|
||||||
|
ns.Add(c)
|
||||||
return c, nil
|
return c, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type OOMCollector struct {
|
type OOMCollector struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
|
||||||
memoryOOM metrics.LabeledGauge
|
fd int
|
||||||
fd int
|
set map[uintptr]*oom
|
||||||
set map[uintptr]*oom
|
desc *prometheus.Desc
|
||||||
}
|
}
|
||||||
|
|
||||||
type oom struct {
|
type oom struct {
|
||||||
@ -39,6 +41,7 @@ type oom struct {
|
|||||||
namespace string
|
namespace string
|
||||||
c cgroups.Cgroup
|
c cgroups.Cgroup
|
||||||
triggers []Trigger
|
triggers []Trigger
|
||||||
|
count int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
|
func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
|
||||||
@ -50,12 +53,10 @@ func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...
|
|||||||
}
|
}
|
||||||
o.set[fd] = &oom{
|
o.set[fd] = &oom{
|
||||||
id: id,
|
id: id,
|
||||||
|
namespace: namespace,
|
||||||
c: cg,
|
c: cg,
|
||||||
triggers: triggers,
|
triggers: triggers,
|
||||||
namespace: namespace,
|
|
||||||
}
|
}
|
||||||
// set the gauge's default value
|
|
||||||
o.memoryOOM.WithValues(id, namespace).Set(0)
|
|
||||||
event := unix.EpollEvent{
|
event := unix.EpollEvent{
|
||||||
Fd: int32(fd),
|
Fd: int32(fd),
|
||||||
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
|
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
|
||||||
@ -66,11 +67,37 @@ func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *OOMCollector) Remove(id, namespace string) {
|
||||||
|
o.mu.Lock()
|
||||||
|
defer o.mu.Unlock()
|
||||||
|
for fd, t := range o.set {
|
||||||
|
if t.id == id && t.namespace == namespace {
|
||||||
|
unix.Close(int(fd))
|
||||||
|
delete(o.set, fd)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Close closes the epoll fd
|
// Close closes the epoll fd
|
||||||
func (o *OOMCollector) Close() error {
|
func (o *OOMCollector) Close() error {
|
||||||
return unix.Close(int(o.fd))
|
return unix.Close(int(o.fd))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *OOMCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||||
|
o.mu.Lock()
|
||||||
|
defer o.mu.Unlock()
|
||||||
|
ch <- o.desc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OOMCollector) Collect(ch chan<- prometheus.Metric) {
|
||||||
|
o.mu.Lock()
|
||||||
|
defer o.mu.Unlock()
|
||||||
|
for _, t := range o.set {
|
||||||
|
ch <- prometheus.MustNewConstMetric(o.desc, prometheus.GaugeValue, float64(t.count), t.id, t.namespace)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (o *OOMCollector) start() {
|
func (o *OOMCollector) start() {
|
||||||
var events [128]unix.EpollEvent
|
var events [128]unix.EpollEvent
|
||||||
for {
|
for {
|
||||||
@ -107,7 +134,7 @@ func (o *OOMCollector) process(fd uintptr, event uint32) {
|
|||||||
unix.Close(int(fd))
|
unix.Close(int(fd))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
o.memoryOOM.WithValues(info.id, info.namespace).Inc(1)
|
info.count++
|
||||||
for _, t := range info.triggers {
|
for _, t := range info.triggers {
|
||||||
t(info.id, info.c)
|
t(info.id, info.c)
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,6 @@ import (
|
|||||||
type TaskInfo struct {
|
type TaskInfo struct {
|
||||||
ID string
|
ID string
|
||||||
Runtime string
|
Runtime string
|
||||||
Spec []byte
|
|
||||||
Namespace string
|
Namespace string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user