Add namespace to container metrics
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
parent
85f61f6f51
commit
6ec84ef83c
@ -276,7 +276,7 @@ func TestContainerExec(t *testing.T) {
|
|||||||
"exit 6",
|
"exit 6",
|
||||||
}
|
}
|
||||||
|
|
||||||
process, err := task.Exec(ctx, &processSpec, empty())
|
process, err := task.Exec(ctx, processSpec, empty())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
return
|
return
|
||||||
|
@ -400,21 +400,9 @@ func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string)
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := rt.Kill(ctx, id, int(unix.SIGKILL), &runc.KillOpts{All: true}); err != nil {
|
if err := rt.Delete(ctx, id, &runc.DeleteOpts{
|
||||||
log.G(ctx).WithError(err).Warnf("kill all processes for %s", id)
|
Force: true,
|
||||||
}
|
}); err != nil {
|
||||||
// it can take a while for the container to be killed so poll for the container's status
|
|
||||||
// until it is in a stopped state
|
|
||||||
status := "running"
|
|
||||||
for status != "stopped" {
|
|
||||||
c, err := rt.State(ctx, id)
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
status = c.Status
|
|
||||||
time.Sleep(50 * time.Millisecond)
|
|
||||||
}
|
|
||||||
if err := rt.Delete(ctx, id); err != nil {
|
|
||||||
log.G(ctx).WithError(err).Warnf("delete runtime state %s", id)
|
log.G(ctx).WithError(err).Warnf("delete runtime state %s", id)
|
||||||
}
|
}
|
||||||
if err := unix.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil {
|
if err := unix.Unmount(filepath.Join(bundle.path, "rootfs"), 0); err != nil {
|
||||||
|
@ -205,7 +205,7 @@ func (p *initProcess) Delete(context context.Context) error {
|
|||||||
}
|
}
|
||||||
p.killAll(context)
|
p.killAll(context)
|
||||||
p.Wait()
|
p.Wait()
|
||||||
err = p.runc.Delete(context, p.id)
|
err = p.runc.Delete(context, p.id, nil)
|
||||||
if p.io != nil {
|
if p.io != nil {
|
||||||
for _, c := range p.closers {
|
for _, c := range p.closers {
|
||||||
c.Close()
|
c.Close()
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
package cgroups
|
package cgroups
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/containerd/cgroups"
|
"github.com/containerd/cgroups"
|
||||||
@ -44,12 +43,8 @@ type cgroupsMonitor struct {
|
|||||||
events chan<- *plugin.Event
|
events chan<- *plugin.Event
|
||||||
}
|
}
|
||||||
|
|
||||||
func getID(t plugin.Task) string {
|
|
||||||
return fmt.Sprintf("%s-%s", t.Info().Namespace, t.Info().ID)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *cgroupsMonitor) Monitor(c plugin.Task) error {
|
func (m *cgroupsMonitor) Monitor(c plugin.Task) error {
|
||||||
id := getID(c)
|
info := c.Info()
|
||||||
state, err := c.State(m.context)
|
state, err := c.State(m.context)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -58,14 +53,15 @@ func (m *cgroupsMonitor) Monitor(c plugin.Task) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := m.collector.Add(id, cg); err != nil {
|
if err := m.collector.Add(info.ID, info.Namespace, cg); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return m.oom.Add(id, cg, m.trigger)
|
return m.oom.Add(info.ID, info.Namespace, cg, m.trigger)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *cgroupsMonitor) Stop(c plugin.Task) error {
|
func (m *cgroupsMonitor) Stop(c plugin.Task) error {
|
||||||
m.collector.Remove(getID(c))
|
info := c.Info()
|
||||||
|
m.collector.Remove(info.ID, info.Namespace)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,12 +22,13 @@ type metric struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
|
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
|
||||||
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"id"}, m.labels...)...)
|
// the namespace label is for containerd namespaces
|
||||||
|
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"id", "namespace"}, m.labels...)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *metric) collect(id string, stats *cgroups.Stats, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
|
func (m *metric) collect(id, namespace string, stats *cgroups.Stats, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
|
||||||
values := m.getValues(stats)
|
values := m.getValues(stats)
|
||||||
for _, v := range values {
|
for _, v := range values {
|
||||||
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id}, v.l...)...)
|
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ func NewCollector(ns *metrics.Namespace) *Collector {
|
|||||||
// add machine cpus and memory info
|
// add machine cpus and memory info
|
||||||
c := &Collector{
|
c := &Collector{
|
||||||
ns: ns,
|
ns: ns,
|
||||||
cgroups: make(map[string]cgroups.Cgroup),
|
cgroups: make(map[string]*task),
|
||||||
}
|
}
|
||||||
c.metrics = append(c.metrics, pidMetrics...)
|
c.metrics = append(c.metrics, pidMetrics...)
|
||||||
c.metrics = append(c.metrics, cpuMetrics...)
|
c.metrics = append(c.metrics, cpuMetrics...)
|
||||||
@ -37,12 +37,18 @@ func NewCollector(ns *metrics.Namespace) *Collector {
|
|||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type task struct {
|
||||||
|
id string
|
||||||
|
namespace string
|
||||||
|
cgroup cgroups.Cgroup
|
||||||
|
}
|
||||||
|
|
||||||
// Collector provides the ability to collect container stats and export
|
// Collector provides the ability to collect container stats and export
|
||||||
// them in the prometheus format
|
// them in the prometheus format
|
||||||
type Collector struct {
|
type Collector struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
|
|
||||||
cgroups map[string]cgroups.Cgroup
|
cgroups map[string]*task
|
||||||
ns *metrics.Namespace
|
ns *metrics.Namespace
|
||||||
metrics []*metric
|
metrics []*metric
|
||||||
}
|
}
|
||||||
@ -56,15 +62,15 @@ func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
|
|||||||
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
|
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
|
||||||
c.mu.RLock()
|
c.mu.RLock()
|
||||||
wg := &sync.WaitGroup{}
|
wg := &sync.WaitGroup{}
|
||||||
for id, cg := range c.cgroups {
|
for _, t := range c.cgroups {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go c.collect(id, cg, ch, wg)
|
go c.collect(t.id, t.namespace, t.cgroup, ch, wg)
|
||||||
}
|
}
|
||||||
c.mu.RUnlock()
|
c.mu.RUnlock()
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Collector) collect(id string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
|
func (c *Collector) collect(id, namespace string, cg cgroups.Cgroup, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
stats, err := cg.Stat(cgroups.IgnoreNotExist)
|
stats, err := cg.Stat(cgroups.IgnoreNotExist)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -72,38 +78,42 @@ func (c *Collector) collect(id string, cg cgroups.Cgroup, ch chan<- prometheus.M
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
for _, m := range c.metrics {
|
for _, m := range c.metrics {
|
||||||
m.collect(id, stats, c.ns, ch)
|
m.collect(id, namespace, stats, c.ns, ch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add adds the provided cgroup and id so that metrics are collected and exported
|
// Add adds the provided cgroup and id so that metrics are collected and exported
|
||||||
func (c *Collector) Add(id string, cg cgroups.Cgroup) error {
|
func (c *Collector) Add(id, namespace string, cg cgroups.Cgroup) error {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
if _, ok := c.cgroups[id]; ok {
|
if _, ok := c.cgroups[id+namespace]; ok {
|
||||||
return ErrAlreadyCollected
|
return ErrAlreadyCollected
|
||||||
}
|
}
|
||||||
c.cgroups[id] = cg
|
c.cgroups[id+namespace] = &task{
|
||||||
|
id: id,
|
||||||
|
namespace: namespace,
|
||||||
|
cgroup: cg,
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get returns the cgroup that is being collected under the provided id
|
// Get returns the cgroup that is being collected under the provided id
|
||||||
// returns ErrCgroupNotExists if the id is not being collected
|
// returns ErrCgroupNotExists if the id is not being collected
|
||||||
func (c *Collector) Get(id string) (cgroups.Cgroup, error) {
|
func (c *Collector) Get(id, namespace string) (cgroups.Cgroup, error) {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
cg, ok := c.cgroups[id]
|
t, ok := c.cgroups[id+namespace]
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, ErrCgroupNotExists
|
return nil, ErrCgroupNotExists
|
||||||
}
|
}
|
||||||
return cg, nil
|
return t.cgroup, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove removes the provided cgroup by id from the collector
|
// Remove removes the provided cgroup by id from the collector
|
||||||
func (c *Collector) Remove(id string) {
|
func (c *Collector) Remove(id, namespace string) {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
delete(c.cgroups, id)
|
delete(c.cgroups, id+namespace)
|
||||||
}
|
}
|
||||||
|
|
||||||
func blkioValues(l []cgroups.BlkioEntry) []value {
|
func blkioValues(l []cgroups.BlkioEntry) []value {
|
||||||
|
@ -17,7 +17,7 @@ func NewOOMCollector(ns *metrics.Namespace) (*OOMCollector, error) {
|
|||||||
}
|
}
|
||||||
c := &OOMCollector{
|
c := &OOMCollector{
|
||||||
fd: fd,
|
fd: fd,
|
||||||
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id"),
|
memoryOOM: ns.NewLabeledGauge("memory_oom", "The number of times a container received an oom event", metrics.Total, "id", "namespace"),
|
||||||
set: make(map[uintptr]*oom),
|
set: make(map[uintptr]*oom),
|
||||||
}
|
}
|
||||||
go c.start()
|
go c.start()
|
||||||
@ -34,11 +34,12 @@ type OOMCollector struct {
|
|||||||
|
|
||||||
type oom struct {
|
type oom struct {
|
||||||
id string
|
id string
|
||||||
|
namespace string
|
||||||
c cgroups.Cgroup
|
c cgroups.Cgroup
|
||||||
triggers []Trigger
|
triggers []Trigger
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OOMCollector) Add(id string, cg cgroups.Cgroup, triggers ...Trigger) error {
|
func (o *OOMCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
|
||||||
o.mu.Lock()
|
o.mu.Lock()
|
||||||
defer o.mu.Unlock()
|
defer o.mu.Unlock()
|
||||||
fd, err := cg.OOMEventFD()
|
fd, err := cg.OOMEventFD()
|
||||||
@ -49,9 +50,10 @@ func (o *OOMCollector) Add(id string, cg cgroups.Cgroup, triggers ...Trigger) er
|
|||||||
id: id,
|
id: id,
|
||||||
c: cg,
|
c: cg,
|
||||||
triggers: triggers,
|
triggers: triggers,
|
||||||
|
namespace: namespace,
|
||||||
}
|
}
|
||||||
// set the gauge's default value
|
// set the gauge's default value
|
||||||
o.memoryOOM.WithValues(id).Set(0)
|
o.memoryOOM.WithValues(id, namespace).Set(0)
|
||||||
event := unix.EpollEvent{
|
event := unix.EpollEvent{
|
||||||
Fd: int32(fd),
|
Fd: int32(fd),
|
||||||
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
|
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
|
||||||
@ -103,7 +105,7 @@ func (o *OOMCollector) process(fd uintptr, event uint32) {
|
|||||||
unix.Close(int(fd))
|
unix.Close(int(fd))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
o.memoryOOM.WithValues(info.id).Inc(1)
|
o.memoryOOM.WithValues(info.id, info.namespace).Inc(1)
|
||||||
for _, t := range info.triggers {
|
for _, t := range info.triggers {
|
||||||
t(info.id, info.c)
|
t(info.id, info.c)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user