Merge pull request #6111 from crosbymichael/latency-metrics
[cri] add sandbox and container latency metrics
This commit is contained in:
commit
9afc778b73
@ -102,6 +102,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
|
||||
return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
// Run container using the same runtime with sandbox.
|
||||
sandboxInfo, err := sandbox.Container.Info(ctx)
|
||||
if err != nil {
|
||||
@ -278,6 +279,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
|
||||
return nil, errors.Wrapf(err, "failed to add container %q into store", id)
|
||||
}
|
||||
|
||||
containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start)
|
||||
|
||||
return &runtime.CreateContainerResponse{ContainerId: id}, nil
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,8 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
@ -26,6 +28,7 @@ import (
|
||||
|
||||
// ListContainers lists all containers matching the filter.
|
||||
func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
|
||||
start := time.Now()
|
||||
// List all containers from store.
|
||||
containersInStore := c.containerStore.List()
|
||||
|
||||
@ -35,6 +38,8 @@ func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContaine
|
||||
}
|
||||
|
||||
containers = c.filterCRIContainers(containers, r.GetFilter())
|
||||
|
||||
containerListTimer.UpdateSince(start)
|
||||
return &runtime.ListContainersResponse{Containers: containers}, nil
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,8 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/log"
|
||||
@ -30,6 +32,7 @@ import (
|
||||
|
||||
// RemoveContainer removes the container.
|
||||
func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
|
||||
start := time.Now()
|
||||
container, err := c.containerStore.Get(r.GetContainerId())
|
||||
if err != nil {
|
||||
if !errdefs.IsNotFound(err) {
|
||||
@ -40,6 +43,10 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
|
||||
return &runtime.RemoveContainerResponse{}, nil
|
||||
}
|
||||
id := container.ID
|
||||
i, err := container.Container.Info(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get container info")
|
||||
}
|
||||
|
||||
// Forcibly stop the containers if they are in running or unknown state
|
||||
state := container.Status.Get().State()
|
||||
@ -99,6 +106,8 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
|
||||
|
||||
c.containerNameIndex.ReleaseByKey(id)
|
||||
|
||||
containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start)
|
||||
|
||||
return &runtime.RemoveContainerResponse{}, nil
|
||||
}
|
||||
|
||||
|
@ -40,11 +40,17 @@ import (
|
||||
|
||||
// StartContainer starts the container.
|
||||
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
|
||||
start := time.Now()
|
||||
cntr, err := c.containerStore.Get(r.GetContainerId())
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
|
||||
}
|
||||
|
||||
info, err := cntr.Container.Info(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get container info")
|
||||
}
|
||||
|
||||
id := cntr.ID
|
||||
meta := cntr.Metadata
|
||||
container := cntr.Container
|
||||
@ -170,6 +176,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
|
||||
// It handles the TaskExit event and update container state after this.
|
||||
c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh)
|
||||
|
||||
containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start)
|
||||
|
||||
return &runtime.StartContainerResponse{}, nil
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,7 @@ import (
|
||||
|
||||
// StopContainer stops a running container with a grace period (i.e., timeout).
|
||||
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
|
||||
start := time.Now()
|
||||
// Get container config from container store.
|
||||
container, err := c.containerStore.Get(r.GetContainerId())
|
||||
if err != nil {
|
||||
@ -45,6 +46,13 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer
|
||||
return nil, err
|
||||
}
|
||||
|
||||
i, err := container.Container.Info(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get container info")
|
||||
}
|
||||
|
||||
containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start)
|
||||
|
||||
return &runtime.StopContainerResponse{}, nil
|
||||
}
|
||||
|
||||
|
58
pkg/cri/server/metrics.go
Normal file
58
pkg/cri/server/metrics.go
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
metrics "github.com/docker/go-metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
sandboxListTimer metrics.Timer
|
||||
sandboxCreateNetworkTimer metrics.Timer
|
||||
sandboxDeleteNetwork metrics.Timer
|
||||
|
||||
sandboxRuntimeCreateTimer metrics.LabeledTimer
|
||||
sandboxRuntimeStopTimer metrics.LabeledTimer
|
||||
sandboxRemoveTimer metrics.LabeledTimer
|
||||
|
||||
containerListTimer metrics.Timer
|
||||
containerRemoveTimer metrics.LabeledTimer
|
||||
containerCreateTimer metrics.LabeledTimer
|
||||
containerStopTimer metrics.LabeledTimer
|
||||
containerStartTimer metrics.LabeledTimer
|
||||
)
|
||||
|
||||
func init() {
|
||||
// these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle.
|
||||
ns := metrics.NewNamespace("containerd", "cri", nil)
|
||||
|
||||
sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes")
|
||||
sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox")
|
||||
sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network")
|
||||
|
||||
sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime")
|
||||
sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime")
|
||||
sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime")
|
||||
|
||||
containerListTimer = ns.NewTimer("container_list", "time to list containers")
|
||||
containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime")
|
||||
containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime")
|
||||
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
|
||||
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
|
||||
|
||||
metrics.Register(ns)
|
||||
}
|
@ -17,6 +17,8 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
|
||||
@ -25,6 +27,7 @@ import (
|
||||
|
||||
// ListPodSandbox returns a list of Sandbox.
|
||||
func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
|
||||
start := time.Now()
|
||||
// List all sandboxes from store.
|
||||
sandboxesInStore := c.sandboxStore.List()
|
||||
var sandboxes []*runtime.PodSandbox
|
||||
@ -36,6 +39,8 @@ func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandb
|
||||
}
|
||||
|
||||
sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter())
|
||||
|
||||
sandboxListTimer.UpdateSince(start)
|
||||
return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,8 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/log"
|
||||
@ -30,6 +32,7 @@ import (
|
||||
// RemovePodSandbox removes the sandbox. If there are running containers in the
|
||||
// sandbox, they should be forcibly removed.
|
||||
func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) {
|
||||
start := time.Now()
|
||||
sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
|
||||
if err != nil {
|
||||
if !errdefs.IsNotFound(err) {
|
||||
@ -108,5 +111,7 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
|
||||
// Release the sandbox name reserved for the sandbox.
|
||||
c.sandboxNameIndex.ReleaseByKey(id)
|
||||
|
||||
sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start)
|
||||
|
||||
return &runtime.RemovePodSandboxResponse{}, nil
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"path/filepath"
|
||||
goruntime "runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
containerdio "github.com/containerd/containerd/cio"
|
||||
@ -123,6 +124,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
||||
}
|
||||
|
||||
if podNetwork {
|
||||
netStart := time.Now()
|
||||
// If it is not in host network namespace then create a namespace and set the sandbox
|
||||
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
||||
// namespaces. If the pod is in host network namespace then both are empty and should not
|
||||
@ -163,8 +165,10 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
||||
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
|
||||
}
|
||||
sandboxCreateNetworkTimer.UpdateSince(netStart)
|
||||
}
|
||||
|
||||
runtimeStart := time.Now()
|
||||
// Create sandbox container.
|
||||
// NOTE: sandboxContainerSpec SHOULD NOT have side
|
||||
// effect, e.g. accessing/creating files, so that we can test
|
||||
@ -348,6 +352,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
||||
// but we don't care about sandbox TaskOOM right now, so it is fine.
|
||||
c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh)
|
||||
|
||||
sandboxRuntimeCreateTimer.WithValues(ociRuntime.Type).UpdateSince(runtimeStart)
|
||||
|
||||
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
|
||||
// Stop all containers inside the sandbox. This terminates the container forcibly,
|
||||
// and container may still be created, so production should not rely on this behavior.
|
||||
// TODO(random-liu): Introduce a state in sandbox to avoid future container creation.
|
||||
stop := time.Now()
|
||||
containers := c.containerStore.List()
|
||||
for _, container := range containers {
|
||||
if container.SandboxID != id {
|
||||
@ -77,9 +78,11 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
|
||||
return errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state)
|
||||
}
|
||||
}
|
||||
sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop)
|
||||
|
||||
// Teardown network for sandbox.
|
||||
if sandbox.NetNS != nil {
|
||||
netStop := time.Now()
|
||||
// Use empty netns path if netns is not available. This is defined in:
|
||||
// https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
|
||||
if closed, err := sandbox.NetNS.Closed(); err != nil {
|
||||
@ -93,6 +96,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
|
||||
if err := sandbox.NetNS.Remove(); err != nil {
|
||||
return errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
|
||||
}
|
||||
sandboxDeleteNetwork.UpdateSince(netStop)
|
||||
}
|
||||
|
||||
log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)
|
||||
|
Loading…
Reference in New Issue
Block a user