From c59f1635f0b64eed4238e656e20744c40245fa3c Mon Sep 17 00:00:00 2001 From: Paco Xu Date: Sun, 21 Aug 2022 19:06:38 +0800 Subject: [PATCH] add metrics for image pulling: success/failure count; in progress count; thoughput Signed-off-by: Paco Xu --- pkg/cri/sbserver/image_pull.go | 25 ++++++++++++++++++++++--- pkg/cri/sbserver/metrics.go | 16 ++++++++++++++++ pkg/cri/server/image_pull.go | 25 ++++++++++++++++++++++--- pkg/cri/server/metrics.go | 16 ++++++++++++++++ 4 files changed, 76 insertions(+), 6 deletions(-) diff --git a/pkg/cri/sbserver/image_pull.go b/pkg/cri/sbserver/image_pull.go index 96c8c8dbf..29e2cd403 100644 --- a/pkg/cri/sbserver/image_pull.go +++ b/pkg/cri/sbserver/image_pull.go @@ -28,6 +28,7 @@ import ( "net/url" "os" "path/filepath" + "strconv" "strings" "sync" "sync/atomic" @@ -93,8 +94,22 @@ import ( // contents are missing but snapshots are ready, is the image still "READY"? // PullImage pulls an image with authentication config. -func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (*runtime.PullImageResponse, error) { +func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (_ *runtime.PullImageResponse, err error) { span := tracing.SpanFromContext(ctx) + defer func() { + // TODO: add domain label for imagePulls metrics, and we may need to provide a mechanism + // for the user to configure the set of registries that they are interested in. + if err != nil { + imagePulls.WithValues("failure").Inc() + } else { + imagePulls.WithValues("success").Inc() + } + }() + + inProgressImagePulls.Inc() + defer inProgressImagePulls.Dec() + startTime := time.Now() + imageRef := r.GetImage().GetImage() namedRef, err := distribution.ParseDockerRef(imageRef) if err != nil { @@ -195,8 +210,12 @@ func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) } } - log.G(ctx).Debugf("Pulled image %q with image id %q, repo tag %q, repo digest %q", imageRef, imageID, - repoTag, repoDigest) + size, _ := image.Size(ctx) + imagePullingSpeed := float64(size) / time.Since(startTime).Seconds() + imagePullThroughput.Observe(imagePullingSpeed) + + log.G(ctx).Infof("Pulled image %q with image id %q, repo tag %q, repo digest %q, size %q in %s", imageRef, imageID, + repoTag, repoDigest, strconv.FormatInt(size, 10), time.Since(startTime)) // NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain // in-memory image store, it's only for in-memory indexing. The image could be removed // by someone else anytime, before/during/after we create the metadata. We should always diff --git a/pkg/cri/sbserver/metrics.go b/pkg/cri/sbserver/metrics.go index ddf71af35..455927819 100644 --- a/pkg/cri/sbserver/metrics.go +++ b/pkg/cri/sbserver/metrics.go @@ -18,6 +18,7 @@ package sbserver import ( "github.com/docker/go-metrics" + prom "github.com/prometheus/client_golang/prometheus" ) var ( @@ -34,6 +35,11 @@ var ( containerCreateTimer metrics.LabeledTimer containerStopTimer metrics.LabeledTimer containerStartTimer metrics.LabeledTimer + + imagePulls metrics.LabeledCounter + inProgressImagePulls metrics.Gauge + // pull duration / (image size / 1MBi) + imagePullThroughput prom.Histogram ) func init() { @@ -54,5 +60,15 @@ func init() { containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") + imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") + inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) + imagePullThroughput = prom.NewHistogram( + prom.HistogramOpts{ + Name: "image_pulling_throughput", + Help: "image pull throughput", + Buckets: prom.DefBuckets, + }, + ) + metrics.Register(ns) } diff --git a/pkg/cri/server/image_pull.go b/pkg/cri/server/image_pull.go index f07631e50..b1b55ec74 100644 --- a/pkg/cri/server/image_pull.go +++ b/pkg/cri/server/image_pull.go @@ -28,6 +28,7 @@ import ( "net/url" "os" "path/filepath" + "strconv" "strings" "sync" "sync/atomic" @@ -93,8 +94,22 @@ import ( // contents are missing but snapshots are ready, is the image still "READY"? // PullImage pulls an image with authentication config. -func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (*runtime.PullImageResponse, error) { +func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (_ *runtime.PullImageResponse, err error) { span := tracing.SpanFromContext(ctx) + defer func() { + // TODO: add domain label for imagePulls metrics, and we may need to provide a mechanism + // for the user to configure the set of registries that they are interested in. + if err != nil { + imagePulls.WithValues("failure").Inc() + } else { + imagePulls.WithValues("success").Inc() + } + }() + + inProgressImagePulls.Inc() + defer inProgressImagePulls.Dec() + startTime := time.Now() + imageRef := r.GetImage().GetImage() namedRef, err := distribution.ParseDockerRef(imageRef) if err != nil { @@ -194,8 +209,12 @@ func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) } } - log.G(ctx).Debugf("Pulled image %q with image id %q, repo tag %q, repo digest %q", imageRef, imageID, - repoTag, repoDigest) + size, _ := image.Size(ctx) + imagePullingSpeed := float64(size) / time.Since(startTime).Seconds() + imagePullThroughput.Observe(imagePullingSpeed) + + log.G(ctx).Infof("Pulled image %q with image id %q, repo tag %q, repo digest %q, size %q in %s", imageRef, imageID, + repoTag, repoDigest, strconv.FormatInt(size, 10), time.Since(startTime)) // NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain // in-memory image store, it's only for in-memory indexing. The image could be removed // by someone else anytime, before/during/after we create the metadata. We should always diff --git a/pkg/cri/server/metrics.go b/pkg/cri/server/metrics.go index 9712bb112..49e2a7477 100644 --- a/pkg/cri/server/metrics.go +++ b/pkg/cri/server/metrics.go @@ -18,6 +18,7 @@ package server import ( metrics "github.com/docker/go-metrics" + prom "github.com/prometheus/client_golang/prometheus" ) var ( @@ -34,6 +35,11 @@ var ( containerCreateTimer metrics.LabeledTimer containerStopTimer metrics.LabeledTimer containerStartTimer metrics.LabeledTimer + + imagePulls metrics.LabeledCounter + inProgressImagePulls metrics.Gauge + // pull duration / (image size / 1MBi) + imagePullThroughput prom.Histogram ) func init() { @@ -54,5 +60,15 @@ func init() { containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") + imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status") + inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total) + imagePullThroughput = prom.NewHistogram( + prom.HistogramOpts{ + Name: "image_pulling_throughput", + Help: "image pull throughput", + Buckets: prom.DefBuckets, + }, + ) + metrics.Register(ns) }