Add prometheus container level metrics
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
110
vendor/github.com/crosbymichael/cgroups/prometheus/oom.go
generated
vendored
Normal file
110
vendor/github.com/crosbymichael/cgroups/prometheus/oom.go
generated
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/crosbymichael/cgroups"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
)
|
||||
|
||||
func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
|
||||
fd, err := syscall.EpollCreate1(syscall.EPOLL_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c := &OOMCollector{
|
||||
fd: fd,
|
||||
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id"),
|
||||
set: make(map[uintptr]*oom),
|
||||
}
|
||||
go c.start()
|
||||
return c, nil
|
||||
}
|
||||
|
||||
type OOMCollector struct {
|
||||
mu sync.Mutex
|
||||
|
||||
memoryOOM metrics.LabeledGauge
|
||||
fd int
|
||||
set map[uintptr]*oom
|
||||
}
|
||||
|
||||
type oom struct {
|
||||
id string
|
||||
c cgroups.Cgroup
|
||||
}
|
||||
|
||||
func (o *OOMCollector) Add(id string, cg cgroups.Cgroup) error {
|
||||
o.mu.Lock()
|
||||
defer o.mu.Unlock()
|
||||
fd, err := cg.OOMEventFD()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.set[fd] = &oom{
|
||||
id: id,
|
||||
c: cg,
|
||||
}
|
||||
// set the gauge's default value
|
||||
o.memoryOOM.WithValues(id).Set(0)
|
||||
event := syscall.EpollEvent{
|
||||
Fd: int32(fd),
|
||||
Events: syscall.EPOLLHUP | syscall.EPOLLIN | syscall.EPOLLERR,
|
||||
}
|
||||
if err := syscall.EpollCtl(o.fd, syscall.EPOLL_CTL_ADD, int(fd), &event); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close closes the epoll fd
|
||||
func (o *OOMCollector) Close() error {
|
||||
return syscall.Close(int(o.fd))
|
||||
}
|
||||
|
||||
func (o *OOMCollector) start() {
|
||||
var events [128]syscall.EpollEvent
|
||||
for {
|
||||
n, err := syscall.EpollWait(o.fd, events[:], -1)
|
||||
if err != nil {
|
||||
if err == syscall.EINTR {
|
||||
continue
|
||||
}
|
||||
logrus.WithField("error", err).Fatal("cgroups: epoll wait")
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
o.process(uintptr(events[i].Fd), events[i].Events)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *OOMCollector) process(fd uintptr, event uint32) {
|
||||
// make sure to always flush the fd
|
||||
flush(fd)
|
||||
|
||||
o.mu.Lock()
|
||||
info, ok := o.set[fd]
|
||||
if !ok {
|
||||
o.mu.Unlock()
|
||||
return
|
||||
}
|
||||
o.mu.Unlock()
|
||||
// if we received an event but it was caused by the cgroup being deleted and the fd
|
||||
// being closed make sure we close our copy and remove the container from the set
|
||||
if info.c.State() == cgroups.Deleted {
|
||||
o.mu.Lock()
|
||||
delete(o.set, fd)
|
||||
o.mu.Unlock()
|
||||
syscall.Close(int(fd))
|
||||
return
|
||||
}
|
||||
o.memoryOOM.WithValues(info.id).Inc(1)
|
||||
}
|
||||
|
||||
func flush(fd uintptr) error {
|
||||
buf := make([]byte, 8)
|
||||
_, err := syscall.Read(int(fd), buf)
|
||||
return err
|
||||
}
|
||||
Reference in New Issue
Block a user