Enable grpc timing histograms

This enables the grpc timing histograms via a config option as they are
metrics of high cardinality.

This is useful for perf testing and debugging but should not be the
default on production systems unless needed.

```toml
[metrics]
	grpc_histogram = true

```

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2018-01-05 15:03:15 -05:00
parent 002c0e2901
commit fd29dbe4c8
2 changed files with 8 additions and 1 deletions

View File

@ -50,7 +50,8 @@ type Debug struct {
// MetricsConfig provides metrics configuration // MetricsConfig provides metrics configuration
type MetricsConfig struct { type MetricsConfig struct {
Address string `toml:"address"` Address string `toml:"address"`
GRPCHistogram bool `toml:"grpc_histogram"`
} }
// CgroupConfig provides cgroup configuration // CgroupConfig provides cgroup configuration

View File

@ -70,6 +70,7 @@ func New(ctx context.Context, config *Config) (*Server, error) {
s = &Server{ s = &Server{
rpc: rpc, rpc: rpc,
events: exchange.NewExchange(), events: exchange.NewExchange(),
config: config,
} }
initialized = plugin.NewPluginSet() initialized = plugin.NewPluginSet()
) )
@ -127,10 +128,15 @@ func New(ctx context.Context, config *Config) (*Server, error) {
type Server struct { type Server struct {
rpc *grpc.Server rpc *grpc.Server
events *exchange.Exchange events *exchange.Exchange
config *Config
} }
// ServeGRPC provides the containerd grpc APIs on the provided listener // ServeGRPC provides the containerd grpc APIs on the provided listener
func (s *Server) ServeGRPC(l net.Listener) error { func (s *Server) ServeGRPC(l net.Listener) error {
if s.config.Metrics.GRPCHistogram {
// enable grpc time histograms to measure rpc latencies
grpc_prometheus.EnableHandlingTimeHistogram()
}
// before we start serving the grpc API regster the grpc_prometheus metrics // before we start serving the grpc API regster the grpc_prometheus metrics
// handler. This needs to be the last service registered so that it can collect // handler. This needs to be the last service registered so that it can collect
// metrics for every other service // metrics for every other service