add metrics for image pulling: success/failure count; in progress count; thoughput

Signed-off-by: Paco Xu <paco.xu@daocloud.io>
This commit is contained in:
Paco Xu 2022-08-21 19:06:38 +08:00
parent 0fa51f54df
commit c59f1635f0
4 changed files with 76 additions and 6 deletions

View File

@ -28,6 +28,7 @@ import (
"net/url" "net/url"
"os" "os"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
@ -93,8 +94,22 @@ import (
// contents are missing but snapshots are ready, is the image still "READY"? // contents are missing but snapshots are ready, is the image still "READY"?
// PullImage pulls an image with authentication config. // PullImage pulls an image with authentication config.
func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (*runtime.PullImageResponse, error) { func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (_ *runtime.PullImageResponse, err error) {
span := tracing.SpanFromContext(ctx) span := tracing.SpanFromContext(ctx)
defer func() {
// TODO: add domain label for imagePulls metrics, and we may need to provide a mechanism
// for the user to configure the set of registries that they are interested in.
if err != nil {
imagePulls.WithValues("failure").Inc()
} else {
imagePulls.WithValues("success").Inc()
}
}()
inProgressImagePulls.Inc()
defer inProgressImagePulls.Dec()
startTime := time.Now()
imageRef := r.GetImage().GetImage() imageRef := r.GetImage().GetImage()
namedRef, err := distribution.ParseDockerRef(imageRef) namedRef, err := distribution.ParseDockerRef(imageRef)
if err != nil { if err != nil {
@ -195,8 +210,12 @@ func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest)
} }
} }
log.G(ctx).Debugf("Pulled image %q with image id %q, repo tag %q, repo digest %q", imageRef, imageID, size, _ := image.Size(ctx)
repoTag, repoDigest) imagePullingSpeed := float64(size) / time.Since(startTime).Seconds()
imagePullThroughput.Observe(imagePullingSpeed)
log.G(ctx).Infof("Pulled image %q with image id %q, repo tag %q, repo digest %q, size %q in %s", imageRef, imageID,
repoTag, repoDigest, strconv.FormatInt(size, 10), time.Since(startTime))
// NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain // NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain
// in-memory image store, it's only for in-memory indexing. The image could be removed // in-memory image store, it's only for in-memory indexing. The image could be removed
// by someone else anytime, before/during/after we create the metadata. We should always // by someone else anytime, before/during/after we create the metadata. We should always

View File

@ -18,6 +18,7 @@ package sbserver
import ( import (
"github.com/docker/go-metrics" "github.com/docker/go-metrics"
prom "github.com/prometheus/client_golang/prometheus"
) )
var ( var (
@ -34,6 +35,11 @@ var (
containerCreateTimer metrics.LabeledTimer containerCreateTimer metrics.LabeledTimer
containerStopTimer metrics.LabeledTimer containerStopTimer metrics.LabeledTimer
containerStartTimer metrics.LabeledTimer containerStartTimer metrics.LabeledTimer
imagePulls metrics.LabeledCounter
inProgressImagePulls metrics.Gauge
// pull duration / (image size / 1MBi)
imagePullThroughput prom.Histogram
) )
func init() { func init() {
@ -54,5 +60,15 @@ func init() {
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status")
inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total)
imagePullThroughput = prom.NewHistogram(
prom.HistogramOpts{
Name: "image_pulling_throughput",
Help: "image pull throughput",
Buckets: prom.DefBuckets,
},
)
metrics.Register(ns) metrics.Register(ns)
} }

View File

@ -28,6 +28,7 @@ import (
"net/url" "net/url"
"os" "os"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
@ -93,8 +94,22 @@ import (
// contents are missing but snapshots are ready, is the image still "READY"? // contents are missing but snapshots are ready, is the image still "READY"?
// PullImage pulls an image with authentication config. // PullImage pulls an image with authentication config.
func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (*runtime.PullImageResponse, error) { func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (_ *runtime.PullImageResponse, err error) {
span := tracing.SpanFromContext(ctx) span := tracing.SpanFromContext(ctx)
defer func() {
// TODO: add domain label for imagePulls metrics, and we may need to provide a mechanism
// for the user to configure the set of registries that they are interested in.
if err != nil {
imagePulls.WithValues("failure").Inc()
} else {
imagePulls.WithValues("success").Inc()
}
}()
inProgressImagePulls.Inc()
defer inProgressImagePulls.Dec()
startTime := time.Now()
imageRef := r.GetImage().GetImage() imageRef := r.GetImage().GetImage()
namedRef, err := distribution.ParseDockerRef(imageRef) namedRef, err := distribution.ParseDockerRef(imageRef)
if err != nil { if err != nil {
@ -194,8 +209,12 @@ func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest)
} }
} }
log.G(ctx).Debugf("Pulled image %q with image id %q, repo tag %q, repo digest %q", imageRef, imageID, size, _ := image.Size(ctx)
repoTag, repoDigest) imagePullingSpeed := float64(size) / time.Since(startTime).Seconds()
imagePullThroughput.Observe(imagePullingSpeed)
log.G(ctx).Infof("Pulled image %q with image id %q, repo tag %q, repo digest %q, size %q in %s", imageRef, imageID,
repoTag, repoDigest, strconv.FormatInt(size, 10), time.Since(startTime))
// NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain // NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain
// in-memory image store, it's only for in-memory indexing. The image could be removed // in-memory image store, it's only for in-memory indexing. The image could be removed
// by someone else anytime, before/during/after we create the metadata. We should always // by someone else anytime, before/during/after we create the metadata. We should always

View File

@ -18,6 +18,7 @@ package server
import ( import (
metrics "github.com/docker/go-metrics" metrics "github.com/docker/go-metrics"
prom "github.com/prometheus/client_golang/prometheus"
) )
var ( var (
@ -34,6 +35,11 @@ var (
containerCreateTimer metrics.LabeledTimer containerCreateTimer metrics.LabeledTimer
containerStopTimer metrics.LabeledTimer containerStopTimer metrics.LabeledTimer
containerStartTimer metrics.LabeledTimer containerStartTimer metrics.LabeledTimer
imagePulls metrics.LabeledCounter
inProgressImagePulls metrics.Gauge
// pull duration / (image size / 1MBi)
imagePullThroughput prom.Histogram
) )
func init() { func init() {
@ -54,5 +60,15 @@ func init() {
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
imagePulls = ns.NewLabeledCounter("image_pulls", "succeeded and failed counters", "status")
inProgressImagePulls = ns.NewGauge("in_progress_image_pulls", "in progress pulls", metrics.Total)
imagePullThroughput = prom.NewHistogram(
prom.HistogramOpts{
Name: "image_pulling_throughput",
Help: "image pull throughput",
Buckets: prom.DefBuckets,
},
)
metrics.Register(ns) metrics.Register(ns)
} }