add network plugin metrics

Add network plugin metrics.

The metrics are the same that were used in dockershim/kubelet until
it was deprecated in kubernetes 1.23

https://github.com/kubernetes/kubernetes/blob/release-1.23/pkg/kubelet/dockershim/network/metrics/metrics.go

Signed-off-by: Antonio Ojea <aojea@google.com>
This commit is contained in:
Antonio Ojea 2022-12-22 19:27:20 +00:00
parent 6c8c427166
commit ba0a7185f0
8 changed files with 77 additions and 6 deletions

View File

@ -36,6 +36,10 @@ var (
containerStopTimer metrics.LabeledTimer containerStopTimer metrics.LabeledTimer
containerStartTimer metrics.LabeledTimer containerStartTimer metrics.LabeledTimer
networkPluginOperations metrics.LabeledCounter
networkPluginOperationsErrors metrics.LabeledCounter
networkPluginOperationsLatency metrics.LabeledTimer
imagePulls metrics.LabeledCounter imagePulls metrics.LabeledCounter
inProgressImagePulls metrics.Gauge inProgressImagePulls metrics.Gauge
// pull duration / (image size / 1MBi) // pull duration / (image size / 1MBi)
@ -60,6 +64,10 @@ func init() {
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type")
networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type")
networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type")
imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status")
inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total)
imagePullThroughput = prom.NewHistogram( imagePullThroughput = prom.NewHistogram(
@ -72,3 +80,11 @@ func init() {
metrics.Register(ns) metrics.Register(ns)
} }
// for backwards compatibility with kubelet/dockershim metrics
// https://github.com/containerd/containerd/issues/7801
const (
networkStatusOp = "get_pod_network_status"
networkSetUpOp = "set_up_pod"
networkTearDownOp = "tear_down_pod"
)

View File

@ -340,8 +340,12 @@ func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.
return fmt.Errorf("get cni namespace options: %w", err) return fmt.Errorf("get cni namespace options: %w", err)
} }
log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup")
netStart := time.Now()
result, err := netPlugin.Setup(ctx, id, path, opts...) result, err := netPlugin.Setup(ctx, id, path, opts...)
networkPluginOperations.WithValues(networkSetUpOp).Inc()
networkPluginOperationsLatency.WithValues(networkSetUpOp).UpdateSince(netStart)
if err != nil { if err != nil {
networkPluginOperationsErrors.WithValues(networkSetUpOp).Inc()
return err return err
} }
logDebugCNIResult(ctx, id, result) logDebugCNIResult(ctx, id, result)

View File

@ -132,5 +132,13 @@ func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstor
return fmt.Errorf("get cni namespace options: %w", err) return fmt.Errorf("get cni namespace options: %w", err)
} }
return netPlugin.Remove(ctx, id, path, opts...) netStart := time.Now()
err = netPlugin.Remove(ctx, id, path, opts...)
networkPluginOperations.WithValues(networkTearDownOp).Inc()
networkPluginOperationsLatency.WithValues(networkTearDownOp).UpdateSince(netStart)
if err != nil {
networkPluginOperationsErrors.WithValues(networkTearDownOp).Inc()
return err
}
return nil
} }

View File

@ -24,6 +24,7 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"text/template" "text/template"
"time"
"github.com/containerd/containerd/log" "github.com/containerd/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1" runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@ -74,10 +75,16 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR
log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil return &runtime.UpdateRuntimeConfigResponse{}, nil
} }
if err := netPlugin.Status(); err == nil { netStart := time.Now()
err = netPlugin.Status()
networkPluginOperations.WithValues(networkStatusOp).Inc()
networkPluginOperationsLatency.WithValues(networkStatusOp).UpdateSince(netStart)
if err == nil {
log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil return &runtime.UpdateRuntimeConfigResponse{}, nil
} else if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { }
networkPluginOperationsErrors.WithValues(networkStatusOp).Inc()
if err := netPlugin.Load(c.cniLoadOptions()...); err == nil {
log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil return &runtime.UpdateRuntimeConfigResponse{}, nil
} }

View File

@ -36,6 +36,10 @@ var (
containerStopTimer metrics.LabeledTimer containerStopTimer metrics.LabeledTimer
containerStartTimer metrics.LabeledTimer containerStartTimer metrics.LabeledTimer
networkPluginOperations metrics.LabeledCounter
networkPluginOperationsErrors metrics.LabeledCounter
networkPluginOperationsLatency metrics.LabeledTimer
imagePulls metrics.LabeledCounter imagePulls metrics.LabeledCounter
inProgressImagePulls metrics.Gauge inProgressImagePulls metrics.Gauge
// pull duration / (image size / 1MBi) // pull duration / (image size / 1MBi)
@ -60,6 +64,10 @@ func init() {
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type")
networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type")
networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type")
imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status")
inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total)
imagePullThroughput = prom.NewHistogram( imagePullThroughput = prom.NewHistogram(
@ -72,3 +80,11 @@ func init() {
metrics.Register(ns) metrics.Register(ns)
} }
// for backwards compatibility with kubelet/dockershim metrics
// https://github.com/containerd/containerd/issues/7801
const (
networkStatusOp = "get_pod_network_status"
networkSetUpOp = "set_up_pod"
networkTearDownOp = "tear_down_pod"
)

View File

@ -440,12 +440,16 @@ func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.
return fmt.Errorf("get cni namespace options: %w", err) return fmt.Errorf("get cni namespace options: %w", err)
} }
log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup")
netStart := time.Now()
if c.config.CniConfig.NetworkPluginSetupSerially { if c.config.CniConfig.NetworkPluginSetupSerially {
result, err = netPlugin.SetupSerially(ctx, id, path, opts...) result, err = netPlugin.SetupSerially(ctx, id, path, opts...)
} else { } else {
result, err = netPlugin.Setup(ctx, id, path, opts...) result, err = netPlugin.Setup(ctx, id, path, opts...)
} }
networkPluginOperations.WithValues(networkSetUpOp).Inc()
networkPluginOperationsLatency.WithValues(networkSetUpOp).UpdateSince(netStart)
if err != nil { if err != nil {
networkPluginOperationsErrors.WithValues(networkSetUpOp).Inc()
return err return err
} }
logDebugCNIResult(ctx, id, result) logDebugCNIResult(ctx, id, result)

View File

@ -193,7 +193,15 @@ func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstor
return fmt.Errorf("get cni namespace options: %w", err) return fmt.Errorf("get cni namespace options: %w", err)
} }
return netPlugin.Remove(ctx, id, path, opts...) netStart := time.Now()
err = netPlugin.Remove(ctx, id, path, opts...)
networkPluginOperations.WithValues(networkTearDownOp).Inc()
networkPluginOperationsLatency.WithValues(networkTearDownOp).UpdateSince(netStart)
if err != nil {
networkPluginOperationsErrors.WithValues(networkTearDownOp).Inc()
return err
}
return nil
} }
// cleanupUnknownSandbox cleanup stopped sandbox in unknown state. // cleanupUnknownSandbox cleanup stopped sandbox in unknown state.

View File

@ -24,6 +24,7 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"text/template" "text/template"
"time"
"github.com/containerd/containerd/log" "github.com/containerd/containerd/log"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1" runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
@ -74,10 +75,17 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR
log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil return &runtime.UpdateRuntimeConfigResponse{}, nil
} }
if err := netPlugin.Status(); err == nil {
netStart := time.Now()
err = netPlugin.Status()
networkPluginOperations.WithValues(networkStatusOp).Inc()
networkPluginOperationsLatency.WithValues(networkStatusOp).UpdateSince(netStart)
if err == nil {
log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil return &runtime.UpdateRuntimeConfigResponse{}, nil
} else if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { }
networkPluginOperationsErrors.WithValues(networkStatusOp).Inc()
if err := netPlugin.Load(c.cniLoadOptions()...); err == nil {
log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil return &runtime.UpdateRuntimeConfigResponse{}, nil
} }