From 012d68ff90f6890dab86d5084a073ff15bf99ff2 Mon Sep 17 00:00:00 2001 From: Paco Xu Date: Mon, 6 Jun 2022 18:21:49 +0800 Subject: [PATCH] add gc scheduler metrics: collection count Signed-off-by: Paco Xu --- gc/scheduler/metrics.go | 34 ++++++++++++++++++++++++++++++++++ gc/scheduler/scheduler.go | 14 ++++++++------ 2 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 gc/scheduler/metrics.go diff --git a/gc/scheduler/metrics.go b/gc/scheduler/metrics.go new file mode 100644 index 000000000..3caa8cbac --- /dev/null +++ b/gc/scheduler/metrics.go @@ -0,0 +1,34 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package scheduler + +import "github.com/docker/go-metrics" + +var ( + // collectionCounter metrics for counter of gc scheduler collections. + collectionCounter metrics.LabeledCounter + + // gcTimeHist histogram metrics for duration of gc scheduler collections. + gcTimeHist metrics.Timer +) + +func init() { + ns := metrics.NewNamespace("containerd", "gc", nil) + collectionCounter = ns.NewLabeledCounter("collections", "counter of gc scheduler collections", "status") + gcTimeHist = ns.NewTimer("gc", "duration of gc scheduler collections") + metrics.Register(ns) +} diff --git a/gc/scheduler/scheduler.go b/gc/scheduler/scheduler.go index f6999249d..4a72906a4 100644 --- a/gc/scheduler/scheduler.go +++ b/gc/scheduler/scheduler.go @@ -253,9 +253,8 @@ func (s *gcScheduler) run(ctx context.Context) { nextCollection *time.Time interval = time.Second - gcTime time.Duration + gcTimeSum time.Duration collections int - // TODO(dmcg): expose collection stats as metrics triggered bool deletions int @@ -311,6 +310,7 @@ func (s *gcScheduler) run(ctx context.Context) { last := time.Now() if err != nil { log.G(ctx).WithError(err).Error("garbage collection failed") + collectionCounter.WithValues("fail").Inc() // Reschedule garbage collection for same duration + 1 second schedC, nextCollection = schedule(nextCollection.Sub(*lastCollection) + time.Second) @@ -326,10 +326,12 @@ func (s *gcScheduler) run(ctx context.Context) { continue } - log.G(ctx).WithField("d", stats.Elapsed()).Debug("garbage collected") - - gcTime += stats.Elapsed() + gcTime := stats.Elapsed() + gcTimeHist.Update(gcTime) + log.G(ctx).WithField("d", gcTime).Debug("garbage collected") + gcTimeSum += gcTime collections++ + collectionCounter.WithValues("success").Inc() triggered = false deletions = 0 mutations = 0 @@ -340,7 +342,7 @@ func (s *gcScheduler) run(ctx context.Context) { // This algorithm ensures that a gc is scheduled to allow enough // runtime in between gc to reach the pause threshold. // Pause threshold is always 0.0 < threshold <= 0.5 - avg := float64(gcTime) / float64(collections) + avg := float64(gcTimeSum) / float64(collections) interval = time.Duration(avg/s.pauseThreshold - avg) }