Merge pull request #7858 from aojea/network_metrics
This commit is contained in:
		| @@ -36,6 +36,10 @@ var ( | |||||||
| 	containerStopTimer   metrics.LabeledTimer | 	containerStopTimer   metrics.LabeledTimer | ||||||
| 	containerStartTimer  metrics.LabeledTimer | 	containerStartTimer  metrics.LabeledTimer | ||||||
|  |  | ||||||
|  | 	networkPluginOperations        metrics.LabeledCounter | ||||||
|  | 	networkPluginOperationsErrors  metrics.LabeledCounter | ||||||
|  | 	networkPluginOperationsLatency metrics.LabeledTimer | ||||||
|  |  | ||||||
| 	imagePulls           metrics.LabeledCounter | 	imagePulls           metrics.LabeledCounter | ||||||
| 	inProgressImagePulls metrics.Gauge | 	inProgressImagePulls metrics.Gauge | ||||||
| 	//  pull duration / (image size / 1MBi) | 	//  pull duration / (image size / 1MBi) | ||||||
| @@ -60,6 +64,10 @@ func init() { | |||||||
| 	containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") | 	containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") | ||||||
| 	containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") | 	containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") | ||||||
|  |  | ||||||
|  | 	networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type") | ||||||
|  | 	networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type") | ||||||
|  | 	networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type") | ||||||
|  |  | ||||||
| 	imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") | 	imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") | ||||||
| 	inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) | 	inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) | ||||||
| 	imagePullThroughput = prom.NewHistogram( | 	imagePullThroughput = prom.NewHistogram( | ||||||
| @@ -72,3 +80,11 @@ func init() { | |||||||
|  |  | ||||||
| 	metrics.Register(ns) | 	metrics.Register(ns) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // for backwards compatibility with kubelet/dockershim metrics | ||||||
|  | // https://github.com/containerd/containerd/issues/7801 | ||||||
|  | const ( | ||||||
|  | 	networkStatusOp   = "get_pod_network_status" | ||||||
|  | 	networkSetUpOp    = "set_up_pod" | ||||||
|  | 	networkTearDownOp = "tear_down_pod" | ||||||
|  | ) | ||||||
|   | |||||||
| @@ -340,8 +340,12 @@ func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore. | |||||||
| 		return fmt.Errorf("get cni namespace options: %w", err) | 		return fmt.Errorf("get cni namespace options: %w", err) | ||||||
| 	} | 	} | ||||||
| 	log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") | 	log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") | ||||||
|  | 	netStart := time.Now() | ||||||
| 	result, err := netPlugin.Setup(ctx, id, path, opts...) | 	result, err := netPlugin.Setup(ctx, id, path, opts...) | ||||||
|  | 	networkPluginOperations.WithValues(networkSetUpOp).Inc() | ||||||
|  | 	networkPluginOperationsLatency.WithValues(networkSetUpOp).UpdateSince(netStart) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|  | 		networkPluginOperationsErrors.WithValues(networkSetUpOp).Inc() | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	logDebugCNIResult(ctx, id, result) | 	logDebugCNIResult(ctx, id, result) | ||||||
|   | |||||||
| @@ -132,5 +132,13 @@ func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstor | |||||||
| 		return fmt.Errorf("get cni namespace options: %w", err) | 		return fmt.Errorf("get cni namespace options: %w", err) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return netPlugin.Remove(ctx, id, path, opts...) | 	netStart := time.Now() | ||||||
|  | 	err = netPlugin.Remove(ctx, id, path, opts...) | ||||||
|  | 	networkPluginOperations.WithValues(networkTearDownOp).Inc() | ||||||
|  | 	networkPluginOperationsLatency.WithValues(networkTearDownOp).UpdateSince(netStart) | ||||||
|  | 	if err != nil { | ||||||
|  | 		networkPluginOperationsErrors.WithValues(networkTearDownOp).Inc() | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
| } | } | ||||||
|   | |||||||
| @@ -24,6 +24,7 @@ import ( | |||||||
| 	"path/filepath" | 	"path/filepath" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"text/template" | 	"text/template" | ||||||
|  | 	"time" | ||||||
|  |  | ||||||
| 	"github.com/containerd/containerd/log" | 	"github.com/containerd/containerd/log" | ||||||
| 	runtime "k8s.io/cri-api/pkg/apis/runtime/v1" | 	runtime "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||||
| @@ -74,10 +75,16 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR | |||||||
| 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | ||||||
| 		return &runtime.UpdateRuntimeConfigResponse{}, nil | 		return &runtime.UpdateRuntimeConfigResponse{}, nil | ||||||
| 	} | 	} | ||||||
| 	if err := netPlugin.Status(); err == nil { | 	netStart := time.Now() | ||||||
|  | 	err = netPlugin.Status() | ||||||
|  | 	networkPluginOperations.WithValues(networkStatusOp).Inc() | ||||||
|  | 	networkPluginOperationsLatency.WithValues(networkStatusOp).UpdateSince(netStart) | ||||||
|  | 	if err == nil { | ||||||
| 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | ||||||
| 		return &runtime.UpdateRuntimeConfigResponse{}, nil | 		return &runtime.UpdateRuntimeConfigResponse{}, nil | ||||||
| 	} else if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { | 	} | ||||||
|  | 	networkPluginOperationsErrors.WithValues(networkStatusOp).Inc() | ||||||
|  | 	if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { | ||||||
| 		log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) | 		log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) | ||||||
| 		return &runtime.UpdateRuntimeConfigResponse{}, nil | 		return &runtime.UpdateRuntimeConfigResponse{}, nil | ||||||
| 	} | 	} | ||||||
|   | |||||||
| @@ -36,6 +36,10 @@ var ( | |||||||
| 	containerStopTimer   metrics.LabeledTimer | 	containerStopTimer   metrics.LabeledTimer | ||||||
| 	containerStartTimer  metrics.LabeledTimer | 	containerStartTimer  metrics.LabeledTimer | ||||||
|  |  | ||||||
|  | 	networkPluginOperations        metrics.LabeledCounter | ||||||
|  | 	networkPluginOperationsErrors  metrics.LabeledCounter | ||||||
|  | 	networkPluginOperationsLatency metrics.LabeledTimer | ||||||
|  |  | ||||||
| 	imagePulls           metrics.LabeledCounter | 	imagePulls           metrics.LabeledCounter | ||||||
| 	inProgressImagePulls metrics.Gauge | 	inProgressImagePulls metrics.Gauge | ||||||
| 	//  pull duration / (image size / 1MBi) | 	//  pull duration / (image size / 1MBi) | ||||||
| @@ -60,6 +64,10 @@ func init() { | |||||||
| 	containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") | 	containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") | ||||||
| 	containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") | 	containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") | ||||||
|  |  | ||||||
|  | 	networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type") | ||||||
|  | 	networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type") | ||||||
|  | 	networkPluginOperationsLatency = ns.NewLabeledTimer("network_plugin_operations_duration_seconds", "latency in seconds of network plugin operations. Broken down by operation type", "operation_type") | ||||||
|  |  | ||||||
| 	imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") | 	imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") | ||||||
| 	inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) | 	inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) | ||||||
| 	imagePullThroughput = prom.NewHistogram( | 	imagePullThroughput = prom.NewHistogram( | ||||||
| @@ -72,3 +80,11 @@ func init() { | |||||||
|  |  | ||||||
| 	metrics.Register(ns) | 	metrics.Register(ns) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // for backwards compatibility with kubelet/dockershim metrics | ||||||
|  | // https://github.com/containerd/containerd/issues/7801 | ||||||
|  | const ( | ||||||
|  | 	networkStatusOp   = "get_pod_network_status" | ||||||
|  | 	networkSetUpOp    = "set_up_pod" | ||||||
|  | 	networkTearDownOp = "tear_down_pod" | ||||||
|  | ) | ||||||
|   | |||||||
| @@ -440,12 +440,16 @@ func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore. | |||||||
| 		return fmt.Errorf("get cni namespace options: %w", err) | 		return fmt.Errorf("get cni namespace options: %w", err) | ||||||
| 	} | 	} | ||||||
| 	log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") | 	log.G(ctx).WithField("podsandboxid", id).Debugf("begin cni setup") | ||||||
|  | 	netStart := time.Now() | ||||||
| 	if c.config.CniConfig.NetworkPluginSetupSerially { | 	if c.config.CniConfig.NetworkPluginSetupSerially { | ||||||
| 		result, err = netPlugin.SetupSerially(ctx, id, path, opts...) | 		result, err = netPlugin.SetupSerially(ctx, id, path, opts...) | ||||||
| 	} else { | 	} else { | ||||||
| 		result, err = netPlugin.Setup(ctx, id, path, opts...) | 		result, err = netPlugin.Setup(ctx, id, path, opts...) | ||||||
| 	} | 	} | ||||||
|  | 	networkPluginOperations.WithValues(networkSetUpOp).Inc() | ||||||
|  | 	networkPluginOperationsLatency.WithValues(networkSetUpOp).UpdateSince(netStart) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|  | 		networkPluginOperationsErrors.WithValues(networkSetUpOp).Inc() | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	logDebugCNIResult(ctx, id, result) | 	logDebugCNIResult(ctx, id, result) | ||||||
|   | |||||||
| @@ -193,7 +193,15 @@ func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstor | |||||||
| 		return fmt.Errorf("get cni namespace options: %w", err) | 		return fmt.Errorf("get cni namespace options: %w", err) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return netPlugin.Remove(ctx, id, path, opts...) | 	netStart := time.Now() | ||||||
|  | 	err = netPlugin.Remove(ctx, id, path, opts...) | ||||||
|  | 	networkPluginOperations.WithValues(networkTearDownOp).Inc() | ||||||
|  | 	networkPluginOperationsLatency.WithValues(networkTearDownOp).UpdateSince(netStart) | ||||||
|  | 	if err != nil { | ||||||
|  | 		networkPluginOperationsErrors.WithValues(networkTearDownOp).Inc() | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // cleanupUnknownSandbox cleanup stopped sandbox in unknown state. | // cleanupUnknownSandbox cleanup stopped sandbox in unknown state. | ||||||
|   | |||||||
| @@ -24,6 +24,7 @@ import ( | |||||||
| 	"path/filepath" | 	"path/filepath" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"text/template" | 	"text/template" | ||||||
|  | 	"time" | ||||||
|  |  | ||||||
| 	"github.com/containerd/containerd/log" | 	"github.com/containerd/containerd/log" | ||||||
| 	runtime "k8s.io/cri-api/pkg/apis/runtime/v1" | 	runtime "k8s.io/cri-api/pkg/apis/runtime/v1" | ||||||
| @@ -74,10 +75,17 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR | |||||||
| 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | ||||||
| 		return &runtime.UpdateRuntimeConfigResponse{}, nil | 		return &runtime.UpdateRuntimeConfigResponse{}, nil | ||||||
| 	} | 	} | ||||||
| 	if err := netPlugin.Status(); err == nil { |  | ||||||
|  | 	netStart := time.Now() | ||||||
|  | 	err = netPlugin.Status() | ||||||
|  | 	networkPluginOperations.WithValues(networkStatusOp).Inc() | ||||||
|  | 	networkPluginOperationsLatency.WithValues(networkStatusOp).UpdateSince(netStart) | ||||||
|  | 	if err == nil { | ||||||
| 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | 		log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate) | ||||||
| 		return &runtime.UpdateRuntimeConfigResponse{}, nil | 		return &runtime.UpdateRuntimeConfigResponse{}, nil | ||||||
| 	} else if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { | 	} | ||||||
|  | 	networkPluginOperationsErrors.WithValues(networkStatusOp).Inc() | ||||||
|  | 	if err := netPlugin.Load(c.cniLoadOptions()...); err == nil { | ||||||
| 		log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) | 		log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate) | ||||||
| 		return &runtime.UpdateRuntimeConfigResponse{}, nil | 		return &runtime.UpdateRuntimeConfigResponse{}, nil | ||||||
| 	} | 	} | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Fu Wei
					Fu Wei