Move metrics to core/metrics
Signed-off-by: Derek McGowan <derek@mcg.dev>
This commit is contained in:
81
core/metrics/cgroups/cgroups.go
Normal file
81
core/metrics/cgroups/cgroups.go
Normal file
@@ -0,0 +1,81 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"github.com/containerd/cgroups/v3"
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/cgroups/v1"
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/cgroups/v2"
|
||||
"github.com/containerd/containerd/v2/pkg/events"
|
||||
"github.com/containerd/containerd/v2/platforms"
|
||||
"github.com/containerd/containerd/v2/plugins"
|
||||
"github.com/containerd/containerd/v2/runtime"
|
||||
"github.com/containerd/plugin"
|
||||
"github.com/containerd/plugin/registry"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
)
|
||||
|
||||
// Config for the cgroups monitor
|
||||
type Config struct {
|
||||
NoPrometheus bool `toml:"no_prometheus"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.Register(&plugin.Registration{
|
||||
Type: plugins.TaskMonitorPlugin,
|
||||
ID: "cgroups",
|
||||
InitFn: New,
|
||||
Requires: []plugin.Type{
|
||||
plugins.EventPlugin,
|
||||
},
|
||||
Config: &Config{},
|
||||
})
|
||||
}
|
||||
|
||||
// New returns a new cgroups monitor
|
||||
func New(ic *plugin.InitContext) (interface{}, error) {
|
||||
var ns *metrics.Namespace
|
||||
config := ic.Config.(*Config)
|
||||
if !config.NoPrometheus {
|
||||
ns = metrics.NewNamespace("container", "", nil)
|
||||
}
|
||||
var (
|
||||
tm runtime.TaskMonitor
|
||||
err error
|
||||
)
|
||||
|
||||
ep, err := ic.GetSingle(plugins.EventPlugin)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
tm, err = v2.NewTaskMonitor(ic.Context, ep.(events.Publisher), ns)
|
||||
} else {
|
||||
tm, err = v1.NewTaskMonitor(ic.Context, ep.(events.Publisher), ns)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if ns != nil {
|
||||
metrics.Register(ns)
|
||||
}
|
||||
ic.Meta.Platforms = append(ic.Meta.Platforms, platforms.DefaultSpec())
|
||||
return tm, nil
|
||||
}
|
||||
32
core/metrics/cgroups/common/type.go
Normal file
32
core/metrics/cgroups/common/type.go
Normal file
@@ -0,0 +1,32 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package common
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/containerd/containerd/v2/protobuf/types"
|
||||
)
|
||||
|
||||
// Statable type that returns cgroup metrics
|
||||
type Statable interface {
|
||||
ID() string
|
||||
Namespace() string
|
||||
Stats(context.Context) (*types.Any, error)
|
||||
}
|
||||
157
core/metrics/cgroups/metrics_test.go
Normal file
157
core/metrics/cgroups/metrics_test.go
Normal file
@@ -0,0 +1,157 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups/v3"
|
||||
"github.com/containerd/containerd/v2/core/metrics/cgroups/common"
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/cgroups/v1"
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/cgroups/v2"
|
||||
v1types "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
v2types "github.com/containerd/containerd/v2/core/metrics/types/v2"
|
||||
"github.com/containerd/containerd/v2/protobuf"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
"github.com/containerd/containerd/v2/protobuf/types"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
)
|
||||
|
||||
// TestRegressionIssue6772 should not have dead-lock when Collect and Add run
|
||||
// in the same time.
|
||||
//
|
||||
// Issue: https://github.com/containerd/containerd/issues/6772.
|
||||
func TestRegressionIssue6772(t *testing.T) {
|
||||
ns := metrics.NewNamespace("test-container", "", nil)
|
||||
isV1 := true
|
||||
|
||||
var collecter Collecter
|
||||
if cgroups.Mode() == cgroups.Unified {
|
||||
isV1 = false
|
||||
collecter = v2.NewCollector(ns)
|
||||
} else {
|
||||
collecter = v1.NewCollector(ns)
|
||||
}
|
||||
|
||||
doneCh := make(chan struct{})
|
||||
defer close(doneCh)
|
||||
|
||||
maxItem := 100
|
||||
startCh := make(chan struct{})
|
||||
|
||||
metricCh := make(chan prometheus.Metric, maxItem)
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-doneCh:
|
||||
return
|
||||
case <-metricCh:
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
go func() {
|
||||
// pulling the metrics to trigger dead-lock
|
||||
ns.Collect(metricCh)
|
||||
close(startCh)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-doneCh:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
ns.Collect(metricCh)
|
||||
}
|
||||
}()
|
||||
<-startCh
|
||||
|
||||
labels := map[string]string{"issue": "6772"}
|
||||
errCh := make(chan error, 1)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < maxItem; i++ {
|
||||
id := i
|
||||
wg.Add(1)
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
err := collecter.Add(
|
||||
&mockStatT{
|
||||
id: strconv.Itoa(id),
|
||||
namespace: "issue6772",
|
||||
isV1: isV1,
|
||||
},
|
||||
labels,
|
||||
)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
finishedCh := make(chan struct{})
|
||||
go func() {
|
||||
defer close(finishedCh)
|
||||
|
||||
wg.Wait()
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-errCh:
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
case <-finishedCh:
|
||||
case <-time.After(30 * time.Second):
|
||||
t.Fatal("should finish the Add in time")
|
||||
}
|
||||
}
|
||||
|
||||
type Collecter interface {
|
||||
Collect(ch chan<- prometheus.Metric)
|
||||
|
||||
Add(t common.Statable, labels map[string]string) error
|
||||
}
|
||||
|
||||
type mockStatT struct {
|
||||
id, namespace string
|
||||
isV1 bool
|
||||
}
|
||||
|
||||
func (t *mockStatT) ID() string {
|
||||
return t.id
|
||||
}
|
||||
|
||||
func (t *mockStatT) Namespace() string {
|
||||
return t.namespace
|
||||
}
|
||||
|
||||
func (t *mockStatT) Stats(context.Context) (*types.Any, error) {
|
||||
if t.isV1 {
|
||||
return protobuf.MarshalAnyToProto(&v1types.Metrics{})
|
||||
}
|
||||
return protobuf.MarshalAnyToProto(&v2types.Metrics{})
|
||||
}
|
||||
132
core/metrics/cgroups/v1/blkio.go
Normal file
132
core/metrics/cgroups/v1/blkio.go
Normal file
@@ -0,0 +1,132 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var blkioMetrics = []*metric{
|
||||
{
|
||||
name: "blkio_io_merged_recursive",
|
||||
help: "The blkio io merged recursive",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"op", "device", "major", "minor"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Blkio == nil {
|
||||
return nil
|
||||
}
|
||||
return blkioValues(stats.Blkio.IoMergedRecursive)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "blkio_io_queued_recursive",
|
||||
help: "The blkio io queued recursive",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"op", "device", "major", "minor"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Blkio == nil {
|
||||
return nil
|
||||
}
|
||||
return blkioValues(stats.Blkio.IoQueuedRecursive)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "blkio_io_service_bytes_recursive",
|
||||
help: "The blkio io service bytes recursive",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"op", "device", "major", "minor"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Blkio == nil {
|
||||
return nil
|
||||
}
|
||||
return blkioValues(stats.Blkio.IoServiceBytesRecursive)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "blkio_io_service_time_recursive",
|
||||
help: "The blkio io service time recursive",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"op", "device", "major", "minor"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Blkio == nil {
|
||||
return nil
|
||||
}
|
||||
return blkioValues(stats.Blkio.IoServiceTimeRecursive)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "blkio_io_serviced_recursive",
|
||||
help: "The blkio io serviced recursive",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"op", "device", "major", "minor"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Blkio == nil {
|
||||
return nil
|
||||
}
|
||||
return blkioValues(stats.Blkio.IoServicedRecursive)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "blkio_io_time_recursive",
|
||||
help: "The blkio io time recursive",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"op", "device", "major", "minor"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Blkio == nil {
|
||||
return nil
|
||||
}
|
||||
return blkioValues(stats.Blkio.IoTimeRecursive)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "blkio_sectors_recursive",
|
||||
help: "The blkio sectors recursive",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"op", "device", "major", "minor"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Blkio == nil {
|
||||
return nil
|
||||
}
|
||||
return blkioValues(stats.Blkio.SectorsRecursive)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func blkioValues(l []*v1.BlkIOEntry) []value {
|
||||
var out []value
|
||||
for _, e := range l {
|
||||
out = append(out, value{
|
||||
v: float64(e.Value),
|
||||
l: []string{e.Op, e.Device, strconv.FormatUint(e.Major, 10), strconv.FormatUint(e.Minor, 10)},
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
95
core/metrics/cgroups/v1/cgroups.go
Normal file
95
core/metrics/cgroups/v1/cgroups.go
Normal file
@@ -0,0 +1,95 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
cgroups "github.com/containerd/cgroups/v3/cgroup1"
|
||||
eventstypes "github.com/containerd/containerd/v2/api/events"
|
||||
"github.com/containerd/containerd/v2/pkg/errdefs"
|
||||
"github.com/containerd/containerd/v2/pkg/events"
|
||||
"github.com/containerd/containerd/v2/pkg/namespaces"
|
||||
"github.com/containerd/containerd/v2/runtime"
|
||||
"github.com/containerd/log"
|
||||
"github.com/docker/go-metrics"
|
||||
)
|
||||
|
||||
// NewTaskMonitor returns a new cgroups monitor
|
||||
func NewTaskMonitor(ctx context.Context, publisher events.Publisher, ns *metrics.Namespace) (runtime.TaskMonitor, error) {
|
||||
collector := NewCollector(ns)
|
||||
oom, err := newOOMCollector(ns)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cgroupsMonitor{
|
||||
collector: collector,
|
||||
oom: oom,
|
||||
context: ctx,
|
||||
publisher: publisher,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type cgroupsMonitor struct {
|
||||
collector *Collector
|
||||
oom *oomCollector
|
||||
context context.Context
|
||||
publisher events.Publisher
|
||||
}
|
||||
|
||||
type cgroupTask interface {
|
||||
Cgroup() (cgroups.Cgroup, error)
|
||||
}
|
||||
|
||||
func (m *cgroupsMonitor) Monitor(c runtime.Task, labels map[string]string) error {
|
||||
if err := m.collector.Add(c, labels); err != nil {
|
||||
return err
|
||||
}
|
||||
t, ok := c.(cgroupTask)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
cg, err := t.Cgroup()
|
||||
if err != nil {
|
||||
if errdefs.IsNotFound(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
err = m.oom.Add(c.ID(), c.Namespace(), cg, m.trigger)
|
||||
if err == cgroups.ErrMemoryNotSupported {
|
||||
log.L.WithError(err).Warn("OOM monitoring failed")
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *cgroupsMonitor) Stop(c runtime.Task) error {
|
||||
m.collector.Remove(c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *cgroupsMonitor) trigger(id, namespace string, cg cgroups.Cgroup) {
|
||||
ctx := namespaces.WithNamespace(m.context, namespace)
|
||||
if err := m.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{
|
||||
ContainerID: id,
|
||||
}); err != nil {
|
||||
log.G(m.context).WithError(err).Error("post OOM event")
|
||||
}
|
||||
}
|
||||
146
core/metrics/cgroups/v1/cpu.go
Normal file
146
core/metrics/cgroups/v1/cpu.go
Normal file
@@ -0,0 +1,146 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var cpuMetrics = []*metric{
|
||||
{
|
||||
name: "cpu_total",
|
||||
help: "The total cpu time",
|
||||
unit: metrics.Nanoseconds,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.Usage.Total),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_kernel",
|
||||
help: "The total kernel cpu time",
|
||||
unit: metrics.Nanoseconds,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.Usage.Kernel),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_user",
|
||||
help: "The total user cpu time",
|
||||
unit: metrics.Nanoseconds,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.Usage.User),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "per_cpu",
|
||||
help: "The total cpu time per cpu",
|
||||
unit: metrics.Nanoseconds,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"cpu"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for i, v := range stats.CPU.Usage.PerCPU {
|
||||
out = append(out, value{
|
||||
v: float64(v),
|
||||
l: []string{strconv.Itoa(i)},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_throttle_periods",
|
||||
help: "The total cpu throttle periods",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.Throttling.Periods),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_throttled_periods",
|
||||
help: "The total cpu throttled periods",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.Throttling.ThrottledPeriods),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_throttled_time",
|
||||
help: "The total cpu throttled time",
|
||||
unit: metrics.Nanoseconds,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.Throttling.ThrottledTime),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
88
core/metrics/cgroups/v1/hugetlb.go
Normal file
88
core/metrics/cgroups/v1/hugetlb.go
Normal file
@@ -0,0 +1,88 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var hugetlbMetrics = []*metric{
|
||||
{
|
||||
name: "hugetlb_usage",
|
||||
help: "The hugetlb usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"page"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Hugetlb == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for _, v := range stats.Hugetlb {
|
||||
out = append(out, value{
|
||||
v: float64(v.Usage),
|
||||
l: []string{v.Pagesize},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "hugetlb_failcnt",
|
||||
help: "The hugetlb failcnt",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"page"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Hugetlb == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for _, v := range stats.Hugetlb {
|
||||
out = append(out, value{
|
||||
v: float64(v.Failcnt),
|
||||
l: []string{v.Pagesize},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "hugetlb_max",
|
||||
help: "The hugetlb maximum usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"page"},
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Hugetlb == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for _, v := range stats.Hugetlb {
|
||||
out = append(out, value{
|
||||
v: float64(v.Max),
|
||||
l: []string{v.Pagesize},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
}
|
||||
796
core/metrics/cgroups/v1/memory.go
Normal file
796
core/metrics/cgroups/v1/memory.go
Normal file
@@ -0,0 +1,796 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var memoryMetrics = []*metric{
|
||||
{
|
||||
name: "memory_cache",
|
||||
help: "The cache amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Cache),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_rss",
|
||||
help: "The rss amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.RSS),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_rss_huge",
|
||||
help: "The rss_huge amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.RSSHuge),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_mapped_file",
|
||||
help: "The mapped_file amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.MappedFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_dirty",
|
||||
help: "The dirty amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Dirty),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_writeback",
|
||||
help: "The writeback amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Writeback),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgpgin",
|
||||
help: "The pgpgin amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.PgPgIn),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgpgout",
|
||||
help: "The pgpgout amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.PgPgOut),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgfault",
|
||||
help: "The pgfault amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.PgFault),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgmajfault",
|
||||
help: "The pgmajfault amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.PgMajFault),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_inactive_anon",
|
||||
help: "The inactive_anon amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.InactiveAnon),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_active_anon",
|
||||
help: "The active_anon amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.ActiveAnon),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_inactive_file",
|
||||
help: "The inactive_file amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.InactiveFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_active_file",
|
||||
help: "The active_file amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.ActiveFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_unevictable",
|
||||
help: "The unevictable amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Unevictable),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_hierarchical_memory_limit",
|
||||
help: "The hierarchical_memory_limit amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.HierarchicalMemoryLimit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_hierarchical_memsw_limit",
|
||||
help: "The hierarchical_memsw_limit amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.HierarchicalSwapLimit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_cache",
|
||||
help: "The total_cache amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalCache),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_rss",
|
||||
help: "The total_rss amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalRSS),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_rss_huge",
|
||||
help: "The total_rss_huge amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalRSSHuge),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_mapped_file",
|
||||
help: "The total_mapped_file amount used",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalMappedFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_dirty",
|
||||
help: "The total_dirty amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalDirty),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_writeback",
|
||||
help: "The total_writeback amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalWriteback),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_pgpgin",
|
||||
help: "The total_pgpgin amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalPgPgIn),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_pgpgout",
|
||||
help: "The total_pgpgout amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalPgPgOut),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_pgfault",
|
||||
help: "The total_pgfault amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalPgFault),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_pgmajfault",
|
||||
help: "The total_pgmajfault amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalPgMajFault),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_inactive_anon",
|
||||
help: "The total_inactive_anon amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalInactiveAnon),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_active_anon",
|
||||
help: "The total_active_anon amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalActiveAnon),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_inactive_file",
|
||||
help: "The total_inactive_file amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalInactiveFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_active_file",
|
||||
help: "The total_active_file amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalActiveFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_total_unevictable",
|
||||
help: "The total_unevictable amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.TotalUnevictable),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_usage_failcnt",
|
||||
help: "The usage failcnt",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetUsage() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Usage.Failcnt),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_usage_limit",
|
||||
help: "The memory limit",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetUsage() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Usage.Limit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_usage_max",
|
||||
help: "The memory maximum usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetUsage() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Usage.Max),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_usage_usage",
|
||||
help: "The memory usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetUsage() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Usage.Usage),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_swap_failcnt",
|
||||
help: "The swap failcnt",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetSwap() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Swap.Failcnt),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_swap_limit",
|
||||
help: "The swap limit",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetSwap() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Swap.Limit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_swap_max",
|
||||
help: "The swap maximum usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetSwap() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Swap.Max),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_swap_usage",
|
||||
help: "The swap usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetSwap() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Swap.Usage),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kernel_failcnt",
|
||||
help: "The kernel failcnt",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernel() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Kernel.Failcnt),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kernel_limit",
|
||||
help: "The kernel limit",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernel() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Kernel.Limit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kernel_max",
|
||||
help: "The kernel maximum usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernel() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Kernel.Max),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kernel_usage",
|
||||
help: "The kernel usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernel() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Kernel.Usage),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kerneltcp_failcnt",
|
||||
help: "The kerneltcp failcnt",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernelTCP() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.KernelTCP.Failcnt),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kerneltcp_limit",
|
||||
help: "The kerneltcp limit",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernelTCP() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.KernelTCP.Limit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kerneltcp_max",
|
||||
help: "The kerneltcp maximum usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernelTCP() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.KernelTCP.Max),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kerneltcp_usage",
|
||||
help: "The kerneltcp usage",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.GetMemory().GetKernelTCP() == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.KernelTCP.Usage),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
64
core/metrics/cgroups/v1/metric.go
Normal file
64
core/metrics/cgroups/v1/metric.go
Normal file
@@ -0,0 +1,64 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// IDName is the name that is used to identify the id being collected in the metric
|
||||
var IDName = "container_id"
|
||||
|
||||
type value struct {
|
||||
v float64
|
||||
l []string
|
||||
}
|
||||
|
||||
type metric struct {
|
||||
name string
|
||||
help string
|
||||
unit metrics.Unit
|
||||
vt prometheus.ValueType
|
||||
labels []string
|
||||
// getValues returns the value and labels for the data
|
||||
getValues func(stats *v1.Metrics) []value
|
||||
}
|
||||
|
||||
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
|
||||
// the namespace label is for containerd namespaces
|
||||
return ns.NewDesc(m.name, m.help, m.unit, append([]string{IDName, "namespace"}, m.labels...)...)
|
||||
}
|
||||
|
||||
func (m *metric) collect(id, namespace string, stats *v1.Metrics, ns *metrics.Namespace, ch chan<- prometheus.Metric, block bool) {
|
||||
values := m.getValues(stats)
|
||||
for _, v := range values {
|
||||
// block signals to block on the sending the metrics so none are missed
|
||||
if block {
|
||||
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
|
||||
continue
|
||||
}
|
||||
// non-blocking metrics can be dropped if the chan is full
|
||||
select {
|
||||
case ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...):
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
212
core/metrics/cgroups/v1/metrics.go
Normal file
212
core/metrics/cgroups/v1/metrics.go
Normal file
@@ -0,0 +1,212 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
cgroups "github.com/containerd/cgroups/v3/cgroup1"
|
||||
cmetrics "github.com/containerd/containerd/v2/core/metrics"
|
||||
"github.com/containerd/containerd/v2/core/metrics/cgroups/common"
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
"github.com/containerd/containerd/v2/pkg/namespaces"
|
||||
"github.com/containerd/containerd/v2/pkg/timeout"
|
||||
"github.com/containerd/log"
|
||||
"github.com/containerd/typeurl/v2"
|
||||
"github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// Trigger will be called when an event happens and provides the cgroup
|
||||
// where the event originated from
|
||||
type Trigger func(string, string, cgroups.Cgroup)
|
||||
|
||||
// NewCollector registers the collector with the provided namespace and returns it so
|
||||
// that cgroups can be added for collection
|
||||
func NewCollector(ns *metrics.Namespace) *Collector {
|
||||
if ns == nil {
|
||||
return &Collector{}
|
||||
}
|
||||
// add machine cpus and memory info
|
||||
c := &Collector{
|
||||
ns: ns,
|
||||
tasks: make(map[string]entry),
|
||||
}
|
||||
c.metrics = append(c.metrics, pidMetrics...)
|
||||
c.metrics = append(c.metrics, cpuMetrics...)
|
||||
c.metrics = append(c.metrics, memoryMetrics...)
|
||||
c.metrics = append(c.metrics, hugetlbMetrics...)
|
||||
c.metrics = append(c.metrics, blkioMetrics...)
|
||||
c.storedMetrics = make(chan prometheus.Metric, 100*len(c.metrics))
|
||||
ns.Add(c)
|
||||
return c
|
||||
}
|
||||
|
||||
func taskID(id, namespace string) string {
|
||||
return fmt.Sprintf("%s-%s", id, namespace)
|
||||
}
|
||||
|
||||
type entry struct {
|
||||
task common.Statable
|
||||
// ns is an optional child namespace that contains additional to parent labels.
|
||||
// This can be used to append task specific labels to be able to differentiate the different containerd metrics.
|
||||
ns *metrics.Namespace
|
||||
}
|
||||
|
||||
// Collector provides the ability to collect container stats and export
|
||||
// them in the prometheus format
|
||||
type Collector struct {
|
||||
ns *metrics.Namespace
|
||||
storedMetrics chan prometheus.Metric
|
||||
|
||||
// TODO(fuweid):
|
||||
//
|
||||
// The Collector.Collect will be the field ns'Collect's callback,
|
||||
// which be invoked periodically with internal lock. And Collector.Add
|
||||
// might also invoke ns.Lock if the labels is not nil, which is easy to
|
||||
// cause dead-lock.
|
||||
//
|
||||
// Goroutine X:
|
||||
//
|
||||
// ns.Collect
|
||||
// ns.Lock
|
||||
// Collector.Collect
|
||||
// Collector.RLock
|
||||
//
|
||||
//
|
||||
// Goroutine Y:
|
||||
//
|
||||
// Collector.Add
|
||||
// ...(RLock/Lock)
|
||||
// ns.Lock
|
||||
//
|
||||
// I think we should seek the way to decouple ns from Collector.
|
||||
mu sync.RWMutex
|
||||
tasks map[string]entry
|
||||
metrics []*metric
|
||||
}
|
||||
|
||||
// Describe prometheus metrics
|
||||
func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, m := range c.metrics {
|
||||
ch <- m.desc(c.ns)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect prometheus metrics
|
||||
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
|
||||
c.mu.RLock()
|
||||
wg := &sync.WaitGroup{}
|
||||
for _, t := range c.tasks {
|
||||
wg.Add(1)
|
||||
go c.collect(t, ch, true, wg)
|
||||
}
|
||||
storedLoop:
|
||||
for {
|
||||
// read stored metrics until the channel is flushed
|
||||
select {
|
||||
case m := <-c.storedMetrics:
|
||||
ch <- m
|
||||
default:
|
||||
break storedLoop
|
||||
}
|
||||
}
|
||||
c.mu.RUnlock()
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (c *Collector) collect(entry entry, ch chan<- prometheus.Metric, block bool, wg *sync.WaitGroup) {
|
||||
if wg != nil {
|
||||
defer wg.Done()
|
||||
}
|
||||
|
||||
t := entry.task
|
||||
ctx, cancel := timeout.WithContext(context.Background(), cmetrics.ShimStatsRequestTimeout)
|
||||
stats, err := t.Stats(namespaces.WithNamespace(ctx, t.Namespace()))
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
log.L.WithError(err).Errorf("stat task %s", t.ID())
|
||||
return
|
||||
}
|
||||
|
||||
data, err := typeurl.UnmarshalAny(stats)
|
||||
if err != nil {
|
||||
log.L.WithError(err).Errorf("unmarshal stats for %s", t.ID())
|
||||
return
|
||||
}
|
||||
s, ok := data.(*v1.Metrics)
|
||||
if !ok {
|
||||
log.L.WithError(err).Errorf("invalid metric type for %s", t.ID())
|
||||
return
|
||||
}
|
||||
ns := entry.ns
|
||||
if ns == nil {
|
||||
ns = c.ns
|
||||
}
|
||||
for _, m := range c.metrics {
|
||||
m.collect(t.ID(), t.Namespace(), s, ns, ch, block)
|
||||
}
|
||||
}
|
||||
|
||||
// Add adds the provided cgroup and id so that metrics are collected and exported
|
||||
func (c *Collector) Add(t common.Statable, labels map[string]string) error {
|
||||
if c.ns == nil {
|
||||
return nil
|
||||
}
|
||||
c.mu.RLock()
|
||||
id := taskID(t.ID(), t.Namespace())
|
||||
_, ok := c.tasks[id]
|
||||
c.mu.RUnlock()
|
||||
if ok {
|
||||
return nil // requests to collect metrics should be idempotent
|
||||
}
|
||||
|
||||
entry := entry{task: t}
|
||||
if labels != nil {
|
||||
entry.ns = c.ns.WithConstLabels(labels)
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
c.tasks[id] = entry
|
||||
c.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove removes the provided cgroup by id from the collector
|
||||
func (c *Collector) Remove(t common.Statable) {
|
||||
if c.ns == nil {
|
||||
return
|
||||
}
|
||||
c.mu.Lock()
|
||||
delete(c.tasks, taskID(t.ID(), t.Namespace()))
|
||||
c.mu.Unlock()
|
||||
}
|
||||
|
||||
// RemoveAll statable items from the collector
|
||||
func (c *Collector) RemoveAll() {
|
||||
if c.ns == nil {
|
||||
return
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.tasks = make(map[string]entry)
|
||||
c.mu.Unlock()
|
||||
}
|
||||
161
core/metrics/cgroups/v1/oom.go
Normal file
161
core/metrics/cgroups/v1/oom.go
Normal file
@@ -0,0 +1,161 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
cgroups "github.com/containerd/cgroups/v3/cgroup1"
|
||||
"github.com/containerd/log"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func newOOMCollector(ns *metrics.Namespace) (*oomCollector, error) {
|
||||
fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var desc *prometheus.Desc
|
||||
if ns != nil {
|
||||
desc = ns.NewDesc("memory_oom", "The number of times a container has received an oom event", metrics.Total, "container_id", "namespace")
|
||||
}
|
||||
c := &oomCollector{
|
||||
fd: fd,
|
||||
desc: desc,
|
||||
set: make(map[uintptr]*oom),
|
||||
}
|
||||
if ns != nil {
|
||||
ns.Add(c)
|
||||
}
|
||||
go c.start()
|
||||
return c, nil
|
||||
}
|
||||
|
||||
type oomCollector struct {
|
||||
mu sync.Mutex
|
||||
|
||||
desc *prometheus.Desc
|
||||
fd int
|
||||
set map[uintptr]*oom
|
||||
}
|
||||
|
||||
type oom struct {
|
||||
// count needs to stay the first member of this struct to ensure 64bits
|
||||
// alignment on a 32bits machine (e.g. arm32). This is necessary as we use
|
||||
// the sync/atomic operations on this field.
|
||||
count int64
|
||||
id string
|
||||
namespace string
|
||||
c cgroups.Cgroup
|
||||
triggers []Trigger
|
||||
}
|
||||
|
||||
func (o *oomCollector) Add(id, namespace string, cg cgroups.Cgroup, triggers ...Trigger) error {
|
||||
o.mu.Lock()
|
||||
defer o.mu.Unlock()
|
||||
fd, err := cg.OOMEventFD()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.set[fd] = &oom{
|
||||
id: id,
|
||||
c: cg,
|
||||
triggers: triggers,
|
||||
namespace: namespace,
|
||||
}
|
||||
event := unix.EpollEvent{
|
||||
Fd: int32(fd),
|
||||
Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
|
||||
}
|
||||
return unix.EpollCtl(o.fd, unix.EPOLL_CTL_ADD, int(fd), &event)
|
||||
}
|
||||
|
||||
func (o *oomCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- o.desc
|
||||
}
|
||||
|
||||
func (o *oomCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
o.mu.Lock()
|
||||
defer o.mu.Unlock()
|
||||
for _, t := range o.set {
|
||||
t := t
|
||||
c := atomic.LoadInt64(&t.count)
|
||||
ch <- prometheus.MustNewConstMetric(o.desc, prometheus.CounterValue, float64(c), t.id, t.namespace)
|
||||
}
|
||||
}
|
||||
|
||||
// Close closes the epoll fd
|
||||
func (o *oomCollector) Close() error {
|
||||
return unix.Close(o.fd)
|
||||
}
|
||||
|
||||
func (o *oomCollector) start() {
|
||||
var events [128]unix.EpollEvent
|
||||
for {
|
||||
n, err := unix.EpollWait(o.fd, events[:], -1)
|
||||
if err != nil {
|
||||
if err == unix.EINTR {
|
||||
continue
|
||||
}
|
||||
log.L.WithError(err).Error("cgroups: epoll wait failed, OOM notifications disabled")
|
||||
return
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
o.process(uintptr(events[i].Fd))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (o *oomCollector) process(fd uintptr) {
|
||||
// make sure to always flush the eventfd
|
||||
flushEventfd(fd)
|
||||
|
||||
o.mu.Lock()
|
||||
info, ok := o.set[fd]
|
||||
if !ok {
|
||||
o.mu.Unlock()
|
||||
return
|
||||
}
|
||||
o.mu.Unlock()
|
||||
// if we received an event but it was caused by the cgroup being deleted and the fd
|
||||
// being closed make sure we close our copy and remove the container from the set
|
||||
if info.c.State() == cgroups.Deleted {
|
||||
o.mu.Lock()
|
||||
delete(o.set, fd)
|
||||
o.mu.Unlock()
|
||||
unix.Close(int(fd))
|
||||
return
|
||||
}
|
||||
atomic.AddInt64(&info.count, 1)
|
||||
for _, t := range info.triggers {
|
||||
t(info.id, info.namespace, info.c)
|
||||
}
|
||||
}
|
||||
|
||||
func flushEventfd(efd uintptr) error {
|
||||
// Buffer must be >= 8 bytes for eventfd reads
|
||||
// https://man7.org/linux/man-pages/man2/eventfd.2.html
|
||||
var buf [8]byte
|
||||
_, err := unix.Read(int(efd), buf[:])
|
||||
return err
|
||||
}
|
||||
60
core/metrics/cgroups/v1/pids.go
Normal file
60
core/metrics/cgroups/v1/pids.go
Normal file
@@ -0,0 +1,60 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
v1 "github.com/containerd/containerd/v2/core/metrics/types/v1"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var pidMetrics = []*metric{
|
||||
{
|
||||
name: "pids",
|
||||
help: "The limit to the number of pids allowed",
|
||||
unit: metrics.Unit("limit"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Pids == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Pids.Limit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "pids",
|
||||
help: "The current number of pids",
|
||||
unit: metrics.Unit("current"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v1.Metrics) []value {
|
||||
if stats.Pids == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Pids.Current),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
55
core/metrics/cgroups/v2/cgroups.go
Normal file
55
core/metrics/cgroups/v2/cgroups.go
Normal file
@@ -0,0 +1,55 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/containerd/containerd/v2/pkg/events"
|
||||
"github.com/containerd/containerd/v2/runtime"
|
||||
"github.com/docker/go-metrics"
|
||||
)
|
||||
|
||||
// NewTaskMonitor returns a new cgroups monitor
|
||||
func NewTaskMonitor(ctx context.Context, publisher events.Publisher, ns *metrics.Namespace) (runtime.TaskMonitor, error) {
|
||||
collector := NewCollector(ns)
|
||||
return &cgroupsMonitor{
|
||||
collector: collector,
|
||||
context: ctx,
|
||||
publisher: publisher,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type cgroupsMonitor struct {
|
||||
collector *Collector
|
||||
context context.Context
|
||||
publisher events.Publisher
|
||||
}
|
||||
|
||||
func (m *cgroupsMonitor) Monitor(c runtime.Task, labels map[string]string) error {
|
||||
if err := m.collector.Add(c, labels); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *cgroupsMonitor) Stop(c runtime.Task) error {
|
||||
m.collector.Remove(c)
|
||||
return nil
|
||||
}
|
||||
124
core/metrics/cgroups/v2/cpu.go
Normal file
124
core/metrics/cgroups/v2/cpu.go
Normal file
@@ -0,0 +1,124 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/types/v2"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var cpuMetrics = []*metric{
|
||||
{
|
||||
name: "cpu_usage_usec",
|
||||
help: "Total cpu usage (cgroup v2)",
|
||||
unit: metrics.Unit("microseconds"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.UsageUsec),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_user_usec",
|
||||
help: "Current cpu usage in user space (cgroup v2)",
|
||||
unit: metrics.Unit("microseconds"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.UserUsec),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_system_usec",
|
||||
help: "Current cpu usage in kernel space (cgroup v2)",
|
||||
unit: metrics.Unit("microseconds"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.SystemUsec),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_nr_periods",
|
||||
help: "Current cpu number of periods (only if controller is enabled)",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.NrPeriods),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_nr_throttled",
|
||||
help: "Total number of times tasks have been throttled (only if controller is enabled)",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.NrThrottled),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cpu_throttled_usec",
|
||||
help: "Total time duration for which tasks have been throttled. (only if controller is enabled)",
|
||||
unit: metrics.Unit("microseconds"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.CPU == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.CPU.ThrottledUsec),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
110
core/metrics/cgroups/v2/io.go
Normal file
110
core/metrics/cgroups/v2/io.go
Normal file
@@ -0,0 +1,110 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/types/v2"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var ioMetrics = []*metric{
|
||||
{
|
||||
name: "io_rbytes",
|
||||
help: "IO bytes read",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"major", "minor"},
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Io == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for _, e := range stats.Io.Usage {
|
||||
out = append(out, value{
|
||||
v: float64(e.Rbytes),
|
||||
l: []string{strconv.FormatUint(e.Major, 10), strconv.FormatUint(e.Minor, 10)},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "io_wbytes",
|
||||
help: "IO bytes written",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"major", "minor"},
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Io == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for _, e := range stats.Io.Usage {
|
||||
out = append(out, value{
|
||||
v: float64(e.Wbytes),
|
||||
l: []string{strconv.FormatUint(e.Major, 10), strconv.FormatUint(e.Minor, 10)},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "io_rios",
|
||||
help: "Number of read IOs",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"major", "minor"},
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Io == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for _, e := range stats.Io.Usage {
|
||||
out = append(out, value{
|
||||
v: float64(e.Rios),
|
||||
l: []string{strconv.FormatUint(e.Major, 10), strconv.FormatUint(e.Minor, 10)},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "io_wios",
|
||||
help: "Number of write IOs",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
labels: []string{"major", "minor"},
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Io == nil {
|
||||
return nil
|
||||
}
|
||||
var out []value
|
||||
for _, e := range stats.Io.Usage {
|
||||
out = append(out, value{
|
||||
v: float64(e.Wios),
|
||||
l: []string{strconv.FormatUint(e.Major, 10), strconv.FormatUint(e.Minor, 10)},
|
||||
})
|
||||
}
|
||||
return out
|
||||
},
|
||||
},
|
||||
}
|
||||
605
core/metrics/cgroups/v2/memory.go
Normal file
605
core/metrics/cgroups/v2/memory.go
Normal file
@@ -0,0 +1,605 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/types/v2"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var memoryMetrics = []*metric{
|
||||
{
|
||||
name: "memory_usage",
|
||||
help: "Current memory usage (cgroup v2)",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Usage),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_usage_limit",
|
||||
help: "Current memory usage limit (cgroup v2)",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.UsageLimit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_swap_usage",
|
||||
help: "Current swap usage (cgroup v2)",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.SwapUsage),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_swap_limit",
|
||||
help: "Current swap usage limit (cgroup v2)",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.SwapLimit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
name: "memory_file_mapped",
|
||||
help: "The file_mapped amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.FileMapped),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_file_dirty",
|
||||
help: "The file_dirty amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.FileDirty),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_file_writeback",
|
||||
help: "The file_writeback amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.FileWriteback),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgactivate",
|
||||
help: "The pgactivate amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pgactivate),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgdeactivate",
|
||||
help: "The pgdeactivate amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pgdeactivate),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgfault",
|
||||
help: "The pgfault amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pgfault),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgmajfault",
|
||||
help: "The pgmajfault amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pgmajfault),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pglazyfree",
|
||||
help: "The pglazyfree amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pglazyfree),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgrefill",
|
||||
help: "The pgrefill amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pgrefill),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pglazyfreed",
|
||||
help: "The pglazyfreed amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pglazyfreed),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgscan",
|
||||
help: "The pgscan amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pgscan),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_pgsteal",
|
||||
help: "The pgsteal amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Pgsteal),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_inactive_anon",
|
||||
help: "The inactive_anon amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.InactiveAnon),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_active_anon",
|
||||
help: "The active_anon amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.ActiveAnon),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_inactive_file",
|
||||
help: "The inactive_file amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.InactiveFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_active_file",
|
||||
help: "The active_file amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.ActiveFile),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_unevictable",
|
||||
help: "The unevictable amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Unevictable),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_anon",
|
||||
help: "The anon amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Anon),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_file",
|
||||
help: "The file amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.File),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_kernel_stack",
|
||||
help: "The kernel_stack amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.KernelStack),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_slab",
|
||||
help: "The slab amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Slab),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_sock",
|
||||
help: "The sock amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Sock),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_shmem",
|
||||
help: "The shmem amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.Shmem),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_anon_thp",
|
||||
help: "The anon_thp amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.AnonThp),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_slab_reclaimable",
|
||||
help: "The slab_reclaimable amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.SlabReclaimable),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_slab_unreclaimable",
|
||||
help: "The slab_unreclaimable amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.SlabUnreclaimable),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_workingset_refault",
|
||||
help: "The workingset_refault amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.WorkingsetRefault),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_workingset_activate",
|
||||
help: "The workingset_activate amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.WorkingsetActivate),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_workingset_nodereclaim",
|
||||
help: "The workingset_nodereclaim amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.WorkingsetNodereclaim),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_thp_fault_alloc",
|
||||
help: "The thp_fault_alloc amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.ThpFaultAlloc),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_thp_collapse_alloc",
|
||||
help: "The thp_collapse_alloc amount",
|
||||
unit: metrics.Bytes,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Memory == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Memory.ThpCollapseAlloc),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory_oom",
|
||||
help: "The number of times a container has received an oom event",
|
||||
unit: metrics.Total,
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.MemoryEvents == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.MemoryEvents.Oom),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
64
core/metrics/cgroups/v2/metric.go
Normal file
64
core/metrics/cgroups/v2/metric.go
Normal file
@@ -0,0 +1,64 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/types/v2"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// IDName is the name that is used to identify the id being collected in the metric
|
||||
var IDName = "container_id"
|
||||
|
||||
type value struct {
|
||||
v float64
|
||||
l []string
|
||||
}
|
||||
|
||||
type metric struct {
|
||||
name string
|
||||
help string
|
||||
unit metrics.Unit
|
||||
vt prometheus.ValueType
|
||||
labels []string
|
||||
// getValues returns the value and labels for the data
|
||||
getValues func(stats *v2.Metrics) []value
|
||||
}
|
||||
|
||||
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
|
||||
// the namespace label is for containerd namespaces
|
||||
return ns.NewDesc(m.name, m.help, m.unit, append([]string{IDName, "namespace"}, m.labels...)...)
|
||||
}
|
||||
|
||||
func (m *metric) collect(id, namespace string, stats *v2.Metrics, ns *metrics.Namespace, ch chan<- prometheus.Metric, block bool) {
|
||||
values := m.getValues(stats)
|
||||
for _, v := range values {
|
||||
// block signals to block on the sending the metrics so none are missed
|
||||
if block {
|
||||
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...)
|
||||
continue
|
||||
}
|
||||
// non-blocking metrics can be dropped if the chan is full
|
||||
select {
|
||||
case ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{id, namespace}, v.l...)...):
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
203
core/metrics/cgroups/v2/metrics.go
Normal file
203
core/metrics/cgroups/v2/metrics.go
Normal file
@@ -0,0 +1,203 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
cmetrics "github.com/containerd/containerd/v2/core/metrics"
|
||||
"github.com/containerd/containerd/v2/core/metrics/cgroups/common"
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/types/v2"
|
||||
"github.com/containerd/containerd/v2/pkg/namespaces"
|
||||
"github.com/containerd/containerd/v2/pkg/timeout"
|
||||
"github.com/containerd/log"
|
||||
"github.com/containerd/typeurl/v2"
|
||||
"github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// NewCollector registers the collector with the provided namespace and returns it so
|
||||
// that cgroups can be added for collection
|
||||
func NewCollector(ns *metrics.Namespace) *Collector {
|
||||
if ns == nil {
|
||||
return &Collector{}
|
||||
}
|
||||
c := &Collector{
|
||||
ns: ns,
|
||||
tasks: make(map[string]entry),
|
||||
}
|
||||
c.metrics = append(c.metrics, pidMetrics...)
|
||||
c.metrics = append(c.metrics, cpuMetrics...)
|
||||
c.metrics = append(c.metrics, memoryMetrics...)
|
||||
c.metrics = append(c.metrics, ioMetrics...)
|
||||
c.storedMetrics = make(chan prometheus.Metric, 100*len(c.metrics))
|
||||
ns.Add(c)
|
||||
return c
|
||||
}
|
||||
|
||||
func taskID(id, namespace string) string {
|
||||
return fmt.Sprintf("%s-%s", id, namespace)
|
||||
}
|
||||
|
||||
type entry struct {
|
||||
task common.Statable
|
||||
// ns is an optional child namespace that contains additional to parent labels.
|
||||
// This can be used to append task specific labels to be able to differentiate the different containerd metrics.
|
||||
ns *metrics.Namespace
|
||||
}
|
||||
|
||||
// Collector provides the ability to collect container stats and export
|
||||
// them in the prometheus format
|
||||
type Collector struct {
|
||||
ns *metrics.Namespace
|
||||
storedMetrics chan prometheus.Metric
|
||||
|
||||
// TODO(fuweid):
|
||||
//
|
||||
// The Collector.Collect will be the field ns'Collect's callback,
|
||||
// which be invoked periodically with internal lock. And Collector.Add
|
||||
// might also invoke ns.Lock if the labels is not nil, which is easy to
|
||||
// cause dead-lock.
|
||||
//
|
||||
// Goroutine X:
|
||||
//
|
||||
// ns.Collect
|
||||
// ns.Lock
|
||||
// Collector.Collect
|
||||
// Collector.RLock
|
||||
//
|
||||
//
|
||||
// Goroutine Y:
|
||||
//
|
||||
// Collector.Add
|
||||
// ...(RLock/Lock)
|
||||
// ns.Lock
|
||||
//
|
||||
// I think we should seek the way to decouple ns from Collector.
|
||||
mu sync.RWMutex
|
||||
tasks map[string]entry
|
||||
metrics []*metric
|
||||
}
|
||||
|
||||
// Describe prometheus metrics
|
||||
func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
|
||||
for _, m := range c.metrics {
|
||||
ch <- m.desc(c.ns)
|
||||
}
|
||||
}
|
||||
|
||||
// Collect prometheus metrics
|
||||
func (c *Collector) Collect(ch chan<- prometheus.Metric) {
|
||||
c.mu.RLock()
|
||||
wg := &sync.WaitGroup{}
|
||||
for _, t := range c.tasks {
|
||||
wg.Add(1)
|
||||
go c.collect(t, ch, true, wg)
|
||||
}
|
||||
storedLoop:
|
||||
for {
|
||||
// read stored metrics until the channel is flushed
|
||||
select {
|
||||
case m := <-c.storedMetrics:
|
||||
ch <- m
|
||||
default:
|
||||
break storedLoop
|
||||
}
|
||||
}
|
||||
c.mu.RUnlock()
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (c *Collector) collect(entry entry, ch chan<- prometheus.Metric, block bool, wg *sync.WaitGroup) {
|
||||
if wg != nil {
|
||||
defer wg.Done()
|
||||
}
|
||||
|
||||
t := entry.task
|
||||
ctx, cancel := timeout.WithContext(context.Background(), cmetrics.ShimStatsRequestTimeout)
|
||||
stats, err := t.Stats(namespaces.WithNamespace(ctx, t.Namespace()))
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
log.L.WithError(err).Errorf("stat task %s", t.ID())
|
||||
return
|
||||
}
|
||||
|
||||
data, err := typeurl.UnmarshalAny(stats)
|
||||
if err != nil {
|
||||
log.L.WithError(err).Errorf("unmarshal stats for %s", t.ID())
|
||||
return
|
||||
}
|
||||
s, ok := data.(*v2.Metrics)
|
||||
if !ok {
|
||||
log.L.WithError(err).Errorf("invalid metric type for %s", t.ID())
|
||||
return
|
||||
}
|
||||
ns := entry.ns
|
||||
if ns == nil {
|
||||
ns = c.ns
|
||||
}
|
||||
for _, m := range c.metrics {
|
||||
m.collect(t.ID(), t.Namespace(), s, ns, ch, block)
|
||||
}
|
||||
}
|
||||
|
||||
// Add adds the provided cgroup and id so that metrics are collected and exported
|
||||
func (c *Collector) Add(t common.Statable, labels map[string]string) error {
|
||||
if c.ns == nil {
|
||||
return nil
|
||||
}
|
||||
c.mu.RLock()
|
||||
id := taskID(t.ID(), t.Namespace())
|
||||
_, ok := c.tasks[id]
|
||||
c.mu.RUnlock()
|
||||
if ok {
|
||||
return nil // requests to collect metrics should be idempotent
|
||||
}
|
||||
entry := entry{task: t}
|
||||
if labels != nil {
|
||||
entry.ns = c.ns.WithConstLabels(labels)
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.tasks[id] = entry
|
||||
c.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove removes the provided cgroup by id from the collector
|
||||
func (c *Collector) Remove(t common.Statable) {
|
||||
if c.ns == nil {
|
||||
return
|
||||
}
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
delete(c.tasks, taskID(t.ID(), t.Namespace()))
|
||||
}
|
||||
|
||||
// RemoveAll statable items from the collector
|
||||
func (c *Collector) RemoveAll() {
|
||||
if c.ns == nil {
|
||||
return
|
||||
}
|
||||
c.mu.Lock()
|
||||
c.tasks = make(map[string]entry)
|
||||
c.mu.Unlock()
|
||||
}
|
||||
60
core/metrics/cgroups/v2/pids.go
Normal file
60
core/metrics/cgroups/v2/pids.go
Normal file
@@ -0,0 +1,60 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
v2 "github.com/containerd/containerd/v2/core/metrics/types/v2"
|
||||
metrics "github.com/docker/go-metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var pidMetrics = []*metric{
|
||||
{
|
||||
name: "pids",
|
||||
help: "The limit to the number of pids allowed (cgroup v2)",
|
||||
unit: metrics.Unit("limit"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Pids == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Pids.Limit),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "pids",
|
||||
help: "The current number of pids (cgroup v2)",
|
||||
unit: metrics.Unit("current"),
|
||||
vt: prometheus.GaugeValue,
|
||||
getValues: func(stats *v2.Metrics) []value {
|
||||
if stats.Pids == nil {
|
||||
return nil
|
||||
}
|
||||
return []value{
|
||||
{
|
||||
v: float64(stats.Pids.Current),
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
37
core/metrics/metrics.go
Normal file
37
core/metrics/metrics.go
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/v2/pkg/timeout"
|
||||
"github.com/containerd/containerd/v2/version"
|
||||
goMetrics "github.com/docker/go-metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
ShimStatsRequestTimeout = "io.containerd.timeout.metrics.shimstats"
|
||||
)
|
||||
|
||||
func init() {
|
||||
ns := goMetrics.NewNamespace("containerd", "", nil)
|
||||
c := ns.NewLabeledCounter("build_info", "containerd build information", "version", "revision")
|
||||
c.WithValues(version.Version, version.Revision).Inc()
|
||||
goMetrics.Register(ns)
|
||||
timeout.Set(ShimStatsRequestTimeout, 2*time.Second)
|
||||
}
|
||||
46
core/metrics/types/v1/types.go
Normal file
46
core/metrics/types/v1/types.go
Normal file
@@ -0,0 +1,46 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v1
|
||||
|
||||
import (
|
||||
v1 "github.com/containerd/cgroups/v3/cgroup1/stats"
|
||||
)
|
||||
|
||||
type (
|
||||
// Metrics alias
|
||||
Metrics = v1.Metrics
|
||||
// BlkIOEntry alias
|
||||
BlkIOEntry = v1.BlkIOEntry
|
||||
// MemoryStat alias
|
||||
MemoryStat = v1.MemoryStat
|
||||
// CPUStat alias
|
||||
CPUStat = v1.CPUStat
|
||||
// CPUUsage alias
|
||||
CPUUsage = v1.CPUUsage
|
||||
// BlkIOStat alias
|
||||
BlkIOStat = v1.BlkIOStat
|
||||
// PidsStat alias
|
||||
PidsStat = v1.PidsStat
|
||||
// RdmaStat alias
|
||||
RdmaStat = v1.RdmaStat
|
||||
// RdmaEntry alias
|
||||
RdmaEntry = v1.RdmaEntry
|
||||
// HugetlbStat alias
|
||||
HugetlbStat = v1.HugetlbStat
|
||||
)
|
||||
36
core/metrics/types/v2/types.go
Normal file
36
core/metrics/types/v2/types.go
Normal file
@@ -0,0 +1,36 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package v2
|
||||
|
||||
import (
|
||||
v2 "github.com/containerd/cgroups/v3/cgroup2/stats"
|
||||
)
|
||||
|
||||
type (
|
||||
// Metrics alias
|
||||
Metrics = v2.Metrics
|
||||
// MemoryStat alias
|
||||
MemoryStat = v2.MemoryStat
|
||||
// CPUStat alias
|
||||
CPUStat = v2.CPUStat
|
||||
// PidsStat alias
|
||||
PidsStat = v2.PidsStat
|
||||
// IOStat alias
|
||||
IOStat = v2.IOStat
|
||||
)
|
||||
Reference in New Issue
Block a user