Convert OOM Metric to Const
This converts the oom metric to be a const metric so that deleted tasks do not fill up the metric labels. Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
4291fb4803
commit
b04e408a4b
@ -251,7 +251,9 @@ func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts
|
|||||||
}
|
}
|
||||||
// after the task is created, add it to the monitor
|
// after the task is created, add it to the monitor
|
||||||
if err = r.monitor.Monitor(t); err != nil {
|
if err = r.monitor.Monitor(t); err != nil {
|
||||||
r.tasks.Delete(ctx, t)
|
if _, err := r.Delete(ctx, t); err != nil {
|
||||||
|
log.G(ctx).WithError(err).Error("deleting task after failed monitor")
|
||||||
|
}
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return t, nil
|
return t, nil
|
||||||
@ -269,6 +271,7 @@ func (r *Runtime) Delete(ctx context.Context, c runtime.Task) (*runtime.Exit, er
|
|||||||
if err := r.monitor.Stop(lc); err != nil {
|
if err := r.monitor.Stop(lc); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
rsp, err := lc.shim.Delete(ctx, empty)
|
rsp, err := lc.shim.Delete(ctx, empty)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errdefs.FromGRPC(err)
|
return nil, errdefs.FromGRPC(err)
|
||||||
|
@ -7,9 +7,11 @@ import (
|
|||||||
eventsapi "github.com/containerd/containerd/api/services/events/v1"
|
eventsapi "github.com/containerd/containerd/api/services/events/v1"
|
||||||
"github.com/containerd/containerd/events"
|
"github.com/containerd/containerd/events"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
|
"github.com/containerd/containerd/namespaces"
|
||||||
"github.com/containerd/containerd/plugin"
|
"github.com/containerd/containerd/plugin"
|
||||||
"github.com/containerd/containerd/runtime"
|
"github.com/containerd/containerd/runtime"
|
||||||
metrics "github.com/docker/go-metrics"
|
metrics "github.com/docker/go-metrics"
|
||||||
|
"github.com/pkg/errors"
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -54,7 +56,7 @@ func (m *cgroupsMonitor) Monitor(c runtime.Task) error {
|
|||||||
}
|
}
|
||||||
cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(int(state.Pid)))
|
cg, err := cgroups.Load(cgroups.V1, cgroups.PidPath(int(state.Pid)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return errors.Wrapf(err, "load cgroup for %d", state.Pid)
|
||||||
}
|
}
|
||||||
if err := m.collector.Add(info.ID, info.Namespace, cg); err != nil {
|
if err := m.collector.Add(info.ID, info.Namespace, cg); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -68,8 +70,9 @@ func (m *cgroupsMonitor) Stop(c runtime.Task) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *cgroupsMonitor) trigger(id string, cg cgroups.Cgroup) {
|
func (m *cgroupsMonitor) trigger(id, namespace string, cg cgroups.Cgroup) {
|
||||||
if err := m.publisher.Publish(m.context, runtime.TaskOOMEventTopic, &eventsapi.TaskOOM{
|
ctx := namespaces.WithNamespace(m.context, namespace)
|
||||||
|
if err := m.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventsapi.TaskOOM{
|
||||||
ContainerID: id,
|
ContainerID: id,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
log.G(m.context).WithError(err).Error("post OOM event")
|
log.G(m.context).WithError(err).Error("post OOM event")
|
||||||
|
@ -21,7 +21,7 @@ var (
|
|||||||
|
|
||||||
// Trigger will be called when an event happens and provides the cgroup
|
// Trigger will be called when an event happens and provides the cgroup
|
||||||
// where the event originated from
|
// where the event originated from
|
||||||
type Trigger func(string, cgroups.Cgroup)
|
type Trigger func(string, string, cgroups.Cgroup)
|
||||||
|
|
||||||
// New registers the Collector with the provided namespace and returns it so
|
// New registers the Collector with the provided namespace and returns it so
|
||||||
// that cgroups can be added for collection
|
// that cgroups can be added for collection
|
||||||
|
@ -4,11 +4,13 @@ package cgroups
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"github.com/containerd/cgroups"
|
"github.com/containerd/cgroups"
|
||||||
metrics "github.com/docker/go-metrics"
|
metrics "github.com/docker/go-metrics"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -18,10 +20,11 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
c := &OOMCollector{
|
c := &OOMCollector{
|
||||||
fd: fd,
|
fd: fd,
|
||||||
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "container_id", "namespace"),
|
desc: ns.NewDesc("memory_oom", "The number of times a container has received an oom event", metrics.Total, "container_id", "namespace"),
|
||||||
set: make(map[uintptr]*oom),
|
set: make(map[uintptr]*oom),
|
||||||
}
|
}
|
||||||
|
ns.Add(c)
|
||||||
go c.start()
|
go c.start()
|
||||||
return c, nil
|
return c, nil
|
||||||
}
|
}
|
||||||
@ -29,9 +32,9 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
|
|||||||
type OOMCollector struct {
|
type OOMCollector struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
|
||||||
memoryOOM metrics.LabeledGauge
|
desc *prometheus.Desc
|
||||||
fd int
|
fd int
|
||||||
set map[uintptr]*oom
|
set map[uintptr]*oom
|
||||||
}
|
}
|
||||||
|
|
||||||
type oom struct {
|
type oom struct {
|
||||||
@ -39,6 +42,7 @@ type oom struct {
|
|||||||
namespace string
|
namespace string
|
||||||
c cgroups.Cgroup
|
c cgroups.Cgroup
|
||||||
triggers []Trigger
|
triggers []Trigger
|
||||||
|
count int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
|
func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
|
||||||
@ -54,16 +58,24 @@ func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...
|
|||||||
triggers: triggers,
|
triggers: triggers,
|
||||||
namespace: namespace,
|
namespace: namespace,
|
||||||
}
|
}
|
||||||
// set the gauge's default value
|
|
||||||
o.memoryOOM.WithValues(id, namespace).Set(0)
|
|
||||||
event := unix.EpollEvent{
|
event := unix.EpollEvent{
|
||||||
Fd: int32(fd),
|
Fd: int32(fd),
|
||||||
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
|
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
|
||||||
}
|
}
|
||||||
if err := unix.EpollCtl(o.fd, unix.EPOLL_CTL_ADD, int(fd), &event); err != nil {
|
return unix.EpollCtl(o.fd, unix.EPOLL_CTL_ADD, int(fd), &event)
|
||||||
return err
|
}
|
||||||
|
|
||||||
|
func (o *OOMCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||||
|
ch <- o.desc
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OOMCollector) Collect(ch chan<- prometheus.Metric) {
|
||||||
|
o.mu.Lock()
|
||||||
|
defer o.mu.Unlock()
|
||||||
|
for _, t := range o.set {
|
||||||
|
c := atomic.LoadInt64(&t.count)
|
||||||
|
ch <- prometheus.MustNewConstMetric(o.desc, prometheus.CounterValue, float64(c), t.id, t.namespace)
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close closes the epoll fd
|
// Close closes the epoll fd
|
||||||
@ -107,14 +119,14 @@ func (o *OOMCollector) process(fd uintptr, event uint32) {
|
|||||||
unix.Close(int(fd))
|
unix.Close(int(fd))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
o.memoryOOM.WithValues(info.id, info.namespace).Inc(1)
|
atomic.AddInt64(&info.count, 1)
|
||||||
for _, t := range info.triggers {
|
for _, t := range info.triggers {
|
||||||
t(info.id, info.c)
|
t(info.id, info.namespace, info.c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func flush(fd uintptr) error {
|
func flush(fd uintptr) error {
|
||||||
buf := make([]byte, 8)
|
var buf [8]byte
|
||||||
_, err := unix.Read(int(fd), buf)
|
_, err := unix.Read(int(fd), buf[:])
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user