[cri] add sandbox and container latency metrics
These are simple metrics that allow users to view more fine grained metrics on internal operations. Signed-off-by: Michael Crosby <michael@thepasture.io>
This commit is contained in:
parent
432ddecaae
commit
91bbaf6799
@ -102,6 +102,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
|
|||||||
return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
|
return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
// Run container using the same runtime with sandbox.
|
// Run container using the same runtime with sandbox.
|
||||||
sandboxInfo, err := sandbox.Container.Info(ctx)
|
sandboxInfo, err := sandbox.Container.Info(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -278,6 +279,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
|
|||||||
return nil, errors.Wrapf(err, "failed to add container %q into store", id)
|
return nil, errors.Wrapf(err, "failed to add container %q into store", id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start)
|
||||||
|
|
||||||
return &runtime.CreateContainerResponse{ContainerId: id}, nil
|
return &runtime.CreateContainerResponse{ContainerId: id}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
|
|
||||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
@ -26,6 +28,7 @@ import (
|
|||||||
|
|
||||||
// ListContainers lists all containers matching the filter.
|
// ListContainers lists all containers matching the filter.
|
||||||
func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
|
func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
|
||||||
|
start := time.Now()
|
||||||
// List all containers from store.
|
// List all containers from store.
|
||||||
containersInStore := c.containerStore.List()
|
containersInStore := c.containerStore.List()
|
||||||
|
|
||||||
@ -35,6 +38,8 @@ func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContaine
|
|||||||
}
|
}
|
||||||
|
|
||||||
containers = c.filterCRIContainers(containers, r.GetFilter())
|
containers = c.filterCRIContainers(containers, r.GetFilter())
|
||||||
|
|
||||||
|
containerListTimer.UpdateSince(start)
|
||||||
return &runtime.ListContainersResponse{Containers: containers}, nil
|
return &runtime.ListContainersResponse{Containers: containers}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/containerd/containerd"
|
"github.com/containerd/containerd"
|
||||||
"github.com/containerd/containerd/errdefs"
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
@ -30,6 +32,7 @@ import (
|
|||||||
|
|
||||||
// RemoveContainer removes the container.
|
// RemoveContainer removes the container.
|
||||||
func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
|
func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
|
||||||
|
start := time.Now()
|
||||||
container, err := c.containerStore.Get(r.GetContainerId())
|
container, err := c.containerStore.Get(r.GetContainerId())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !errdefs.IsNotFound(err) {
|
if !errdefs.IsNotFound(err) {
|
||||||
@ -40,6 +43,10 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
|
|||||||
return &runtime.RemoveContainerResponse{}, nil
|
return &runtime.RemoveContainerResponse{}, nil
|
||||||
}
|
}
|
||||||
id := container.ID
|
id := container.ID
|
||||||
|
i, err := container.Container.Info(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrap(err, "get container info")
|
||||||
|
}
|
||||||
|
|
||||||
// Forcibly stop the containers if they are in running or unknown state
|
// Forcibly stop the containers if they are in running or unknown state
|
||||||
state := container.Status.Get().State()
|
state := container.Status.Get().State()
|
||||||
@ -99,6 +106,8 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
|
|||||||
|
|
||||||
c.containerNameIndex.ReleaseByKey(id)
|
c.containerNameIndex.ReleaseByKey(id)
|
||||||
|
|
||||||
|
containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start)
|
||||||
|
|
||||||
return &runtime.RemoveContainerResponse{}, nil
|
return &runtime.RemoveContainerResponse{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,11 +40,17 @@ import (
|
|||||||
|
|
||||||
// StartContainer starts the container.
|
// StartContainer starts the container.
|
||||||
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
|
func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
|
||||||
|
start := time.Now()
|
||||||
cntr, err := c.containerStore.Get(r.GetContainerId())
|
cntr, err := c.containerStore.Get(r.GetContainerId())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
|
return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info, err := cntr.Container.Info(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrap(err, "get container info")
|
||||||
|
}
|
||||||
|
|
||||||
id := cntr.ID
|
id := cntr.ID
|
||||||
meta := cntr.Metadata
|
meta := cntr.Metadata
|
||||||
container := cntr.Container
|
container := cntr.Container
|
||||||
@ -162,6 +168,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
|
|||||||
// It handles the TaskExit event and update container state after this.
|
// It handles the TaskExit event and update container state after this.
|
||||||
c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh)
|
c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh)
|
||||||
|
|
||||||
|
containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start)
|
||||||
|
|
||||||
return &runtime.StartContainerResponse{}, nil
|
return &runtime.StartContainerResponse{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@ import (
|
|||||||
|
|
||||||
// StopContainer stops a running container with a grace period (i.e., timeout).
|
// StopContainer stops a running container with a grace period (i.e., timeout).
|
||||||
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
|
func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
|
||||||
|
start := time.Now()
|
||||||
// Get container config from container store.
|
// Get container config from container store.
|
||||||
container, err := c.containerStore.Get(r.GetContainerId())
|
container, err := c.containerStore.Get(r.GetContainerId())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -45,6 +46,13 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
i, err := container.Container.Info(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrap(err, "get container info")
|
||||||
|
}
|
||||||
|
|
||||||
|
containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start)
|
||||||
|
|
||||||
return &runtime.StopContainerResponse{}, nil
|
return &runtime.StopContainerResponse{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
58
pkg/cri/server/metrics.go
Normal file
58
pkg/cri/server/metrics.go
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
metrics "github.com/docker/go-metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
sandboxListTimer metrics.Timer
|
||||||
|
sandboxCreateNetworkTimer metrics.Timer
|
||||||
|
sandboxDeleteNetwork metrics.Timer
|
||||||
|
|
||||||
|
sandboxRuntimeCreateTimer metrics.LabeledTimer
|
||||||
|
sandboxRuntimeStopTimer metrics.LabeledTimer
|
||||||
|
sandboxRemoveTimer metrics.LabeledTimer
|
||||||
|
|
||||||
|
containerListTimer metrics.Timer
|
||||||
|
containerRemoveTimer metrics.LabeledTimer
|
||||||
|
containerCreateTimer metrics.LabeledTimer
|
||||||
|
containerStopTimer metrics.LabeledTimer
|
||||||
|
containerStartTimer metrics.LabeledTimer
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle.
|
||||||
|
ns := metrics.NewNamespace("containerd", "cri", nil)
|
||||||
|
|
||||||
|
sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes")
|
||||||
|
sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox")
|
||||||
|
sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network")
|
||||||
|
|
||||||
|
sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime")
|
||||||
|
sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime")
|
||||||
|
sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime")
|
||||||
|
|
||||||
|
containerListTimer = ns.NewTimer("container_list", "time to list containers")
|
||||||
|
containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime")
|
||||||
|
containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime")
|
||||||
|
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
|
||||||
|
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
|
||||||
|
|
||||||
|
metrics.Register(ns)
|
||||||
|
}
|
@ -17,6 +17,8 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"golang.org/x/net/context"
|
"golang.org/x/net/context"
|
||||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
|
||||||
@ -25,6 +27,7 @@ import (
|
|||||||
|
|
||||||
// ListPodSandbox returns a list of Sandbox.
|
// ListPodSandbox returns a list of Sandbox.
|
||||||
func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
|
func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
|
||||||
|
start := time.Now()
|
||||||
// List all sandboxes from store.
|
// List all sandboxes from store.
|
||||||
sandboxesInStore := c.sandboxStore.List()
|
sandboxesInStore := c.sandboxStore.List()
|
||||||
var sandboxes []*runtime.PodSandbox
|
var sandboxes []*runtime.PodSandbox
|
||||||
@ -36,6 +39,8 @@ func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandb
|
|||||||
}
|
}
|
||||||
|
|
||||||
sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter())
|
sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter())
|
||||||
|
|
||||||
|
sandboxListTimer.UpdateSince(start)
|
||||||
return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil
|
return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/containerd/containerd"
|
"github.com/containerd/containerd"
|
||||||
"github.com/containerd/containerd/errdefs"
|
"github.com/containerd/containerd/errdefs"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
@ -30,6 +32,7 @@ import (
|
|||||||
// RemovePodSandbox removes the sandbox. If there are running containers in the
|
// RemovePodSandbox removes the sandbox. If there are running containers in the
|
||||||
// sandbox, they should be forcibly removed.
|
// sandbox, they should be forcibly removed.
|
||||||
func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) {
|
func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) {
|
||||||
|
start := time.Now()
|
||||||
sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
|
sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !errdefs.IsNotFound(err) {
|
if !errdefs.IsNotFound(err) {
|
||||||
@ -108,5 +111,7 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
|
|||||||
// Release the sandbox name reserved for the sandbox.
|
// Release the sandbox name reserved for the sandbox.
|
||||||
c.sandboxNameIndex.ReleaseByKey(id)
|
c.sandboxNameIndex.ReleaseByKey(id)
|
||||||
|
|
||||||
|
sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start)
|
||||||
|
|
||||||
return &runtime.RemovePodSandboxResponse{}, nil
|
return &runtime.RemovePodSandboxResponse{}, nil
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
goruntime "runtime"
|
goruntime "runtime"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/containerd/containerd"
|
"github.com/containerd/containerd"
|
||||||
containerdio "github.com/containerd/containerd/cio"
|
containerdio "github.com/containerd/containerd/cio"
|
||||||
@ -123,6 +124,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
}
|
}
|
||||||
|
|
||||||
if podNetwork {
|
if podNetwork {
|
||||||
|
netStart := time.Now()
|
||||||
// If it is not in host network namespace then create a namespace and set the sandbox
|
// If it is not in host network namespace then create a namespace and set the sandbox
|
||||||
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
||||||
// namespaces. If the pod is in host network namespace then both are empty and should not
|
// namespaces. If the pod is in host network namespace then both are empty and should not
|
||||||
@ -163,8 +165,10 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
||||||
return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
|
return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
|
||||||
}
|
}
|
||||||
|
sandboxCreateNetworkTimer.UpdateSince(netStart)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
runtimeStart := time.Now()
|
||||||
// Create sandbox container.
|
// Create sandbox container.
|
||||||
// NOTE: sandboxContainerSpec SHOULD NOT have side
|
// NOTE: sandboxContainerSpec SHOULD NOT have side
|
||||||
// effect, e.g. accessing/creating files, so that we can test
|
// effect, e.g. accessing/creating files, so that we can test
|
||||||
@ -345,6 +349,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|||||||
// but we don't care about sandbox TaskOOM right now, so it is fine.
|
// but we don't care about sandbox TaskOOM right now, so it is fine.
|
||||||
c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh)
|
c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh)
|
||||||
|
|
||||||
|
sandboxRuntimeCreateTimer.WithValues(ociRuntime.Type).UpdateSince(runtimeStart)
|
||||||
|
|
||||||
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
|
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,6 +54,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
|
|||||||
// Stop all containers inside the sandbox. This terminates the container forcibly,
|
// Stop all containers inside the sandbox. This terminates the container forcibly,
|
||||||
// and container may still be created, so production should not rely on this behavior.
|
// and container may still be created, so production should not rely on this behavior.
|
||||||
// TODO(random-liu): Introduce a state in sandbox to avoid future container creation.
|
// TODO(random-liu): Introduce a state in sandbox to avoid future container creation.
|
||||||
|
stop := time.Now()
|
||||||
containers := c.containerStore.List()
|
containers := c.containerStore.List()
|
||||||
for _, container := range containers {
|
for _, container := range containers {
|
||||||
if container.SandboxID != id {
|
if container.SandboxID != id {
|
||||||
@ -77,9 +78,11 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
|
|||||||
return errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state)
|
return errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop)
|
||||||
|
|
||||||
// Teardown network for sandbox.
|
// Teardown network for sandbox.
|
||||||
if sandbox.NetNS != nil {
|
if sandbox.NetNS != nil {
|
||||||
|
netStop := time.Now()
|
||||||
// Use empty netns path if netns is not available. This is defined in:
|
// Use empty netns path if netns is not available. This is defined in:
|
||||||
// https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
|
// https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
|
||||||
if closed, err := sandbox.NetNS.Closed(); err != nil {
|
if closed, err := sandbox.NetNS.Closed(); err != nil {
|
||||||
@ -93,6 +96,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa
|
|||||||
if err := sandbox.NetNS.Remove(); err != nil {
|
if err := sandbox.NetNS.Remove(); err != nil {
|
||||||
return errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
|
return errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
|
||||||
}
|
}
|
||||||
|
sandboxDeleteNetwork.UpdateSince(netStop)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)
|
log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)
|
||||||
|
Loading…
Reference in New Issue
Block a user