diff --git a/pkg/cri/sbserver/helpers.go b/pkg/cri/sbserver/helpers.go index c4d39c401..5adcbc28d 100644 --- a/pkg/cri/sbserver/helpers.go +++ b/pkg/cri/sbserver/helpers.go @@ -457,6 +457,7 @@ func (c *criService) generateAndSendContainerEvent(ctx context.Context, containe select { case c.containerEventsChan <- event: default: + containerEventsDroppedCount.Inc() log.G(ctx).Debugf("containerEventsChan is full, discarding event %+v", event) } } diff --git a/pkg/cri/sbserver/metrics.go b/pkg/cri/sbserver/metrics.go index 1a1d3d2fb..b5ed72b10 100644 --- a/pkg/cri/sbserver/metrics.go +++ b/pkg/cri/sbserver/metrics.go @@ -29,11 +29,12 @@ var ( sandboxRuntimeStopTimer metrics.LabeledTimer sandboxRemoveTimer metrics.LabeledTimer - containerListTimer metrics.Timer - containerRemoveTimer metrics.LabeledTimer - containerCreateTimer metrics.LabeledTimer - containerStopTimer metrics.LabeledTimer - containerStartTimer metrics.LabeledTimer + containerListTimer metrics.Timer + containerRemoveTimer metrics.LabeledTimer + containerCreateTimer metrics.LabeledTimer + containerStopTimer metrics.LabeledTimer + containerStartTimer metrics.LabeledTimer + containerEventsDroppedCount metrics.Counter networkPluginOperations metrics.LabeledCounter networkPluginOperationsErrors metrics.LabeledCounter @@ -57,6 +58,7 @@ func init() { containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime") containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") + containerEventsDroppedCount = ns.NewCounter("container_events_dropped", "count container discarding event total from server start") networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type") networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type") diff --git a/pkg/cri/server/helpers.go b/pkg/cri/server/helpers.go index 2b42538d4..1f5e5cd53 100644 --- a/pkg/cri/server/helpers.go +++ b/pkg/cri/server/helpers.go @@ -534,6 +534,7 @@ func (c *criService) generateAndSendContainerEvent(ctx context.Context, containe select { case c.containerEventsChan <- event: default: + containerEventsDroppedCount.Inc() logrus.Debugf("containerEventsChan is full, discarding event %+v", event) } } diff --git a/pkg/cri/server/metrics.go b/pkg/cri/server/metrics.go index db03ab087..f27674705 100644 --- a/pkg/cri/server/metrics.go +++ b/pkg/cri/server/metrics.go @@ -30,11 +30,12 @@ var ( sandboxRuntimeStopTimer metrics.LabeledTimer sandboxRemoveTimer metrics.LabeledTimer - containerListTimer metrics.Timer - containerRemoveTimer metrics.LabeledTimer - containerCreateTimer metrics.LabeledTimer - containerStopTimer metrics.LabeledTimer - containerStartTimer metrics.LabeledTimer + containerListTimer metrics.Timer + containerRemoveTimer metrics.LabeledTimer + containerCreateTimer metrics.LabeledTimer + containerStopTimer metrics.LabeledTimer + containerStartTimer metrics.LabeledTimer + containerEventsDroppedCount metrics.Counter networkPluginOperations metrics.LabeledCounter networkPluginOperationsErrors metrics.LabeledCounter @@ -68,6 +69,7 @@ func init() { containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime") containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") + containerEventsDroppedCount = ns.NewCounter("container_events_dropped", "count container discarding event total from server start") networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type") networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type")