diff --git a/pkg/cri/server/container_create.go b/pkg/cri/server/container_create.go index 5b158d80d..39ac33a9e 100644 --- a/pkg/cri/server/container_create.go +++ b/pkg/cri/server/container_create.go @@ -102,6 +102,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID) } + start := time.Now() // Run container using the same runtime with sandbox. sandboxInfo, err := sandbox.Container.Info(ctx) if err != nil { @@ -278,6 +279,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta return nil, errors.Wrapf(err, "failed to add container %q into store", id) } + containerCreateTimer.WithValues(ociRuntime.Type).UpdateSince(start) + return &runtime.CreateContainerResponse{ContainerId: id}, nil } diff --git a/pkg/cri/server/container_list.go b/pkg/cri/server/container_list.go index 355f158e3..6ac9efdc1 100644 --- a/pkg/cri/server/container_list.go +++ b/pkg/cri/server/container_list.go @@ -17,6 +17,8 @@ package server import ( + "time" + "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" @@ -26,6 +28,7 @@ import ( // ListContainers lists all containers matching the filter. func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) { + start := time.Now() // List all containers from store. containersInStore := c.containerStore.List() @@ -35,6 +38,8 @@ func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContaine } containers = c.filterCRIContainers(containers, r.GetFilter()) + + containerListTimer.UpdateSince(start) return &runtime.ListContainersResponse{Containers: containers}, nil } diff --git a/pkg/cri/server/container_remove.go b/pkg/cri/server/container_remove.go index 3cdc501f4..f9d38ab44 100644 --- a/pkg/cri/server/container_remove.go +++ b/pkg/cri/server/container_remove.go @@ -17,6 +17,8 @@ package server import ( + "time" + "github.com/containerd/containerd" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/log" @@ -30,6 +32,7 @@ import ( // RemoveContainer removes the container. func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) { + start := time.Now() container, err := c.containerStore.Get(r.GetContainerId()) if err != nil { if !errdefs.IsNotFound(err) { @@ -40,6 +43,10 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta return &runtime.RemoveContainerResponse{}, nil } id := container.ID + i, err := container.Container.Info(ctx) + if err != nil { + return nil, errors.Wrap(err, "get container info") + } // Forcibly stop the containers if they are in running or unknown state state := container.Status.Get().State() @@ -99,6 +106,8 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta c.containerNameIndex.ReleaseByKey(id) + containerRemoveTimer.WithValues(i.Runtime.Name).UpdateSince(start) + return &runtime.RemoveContainerResponse{}, nil } diff --git a/pkg/cri/server/container_start.go b/pkg/cri/server/container_start.go index 09f2d2f97..be5532a64 100644 --- a/pkg/cri/server/container_start.go +++ b/pkg/cri/server/container_start.go @@ -40,11 +40,17 @@ import ( // StartContainer starts the container. func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) { + start := time.Now() cntr, err := c.containerStore.Get(r.GetContainerId()) if err != nil { return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId()) } + info, err := cntr.Container.Info(ctx) + if err != nil { + return nil, errors.Wrap(err, "get container info") + } + id := cntr.ID meta := cntr.Metadata container := cntr.Container @@ -170,6 +176,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain // It handles the TaskExit event and update container state after this. c.eventMonitor.startContainerExitMonitor(context.Background(), id, task.Pid(), exitCh) + containerStartTimer.WithValues(info.Runtime.Name).UpdateSince(start) + return &runtime.StartContainerResponse{}, nil } diff --git a/pkg/cri/server/container_stop.go b/pkg/cri/server/container_stop.go index 50db5f596..88460f606 100644 --- a/pkg/cri/server/container_stop.go +++ b/pkg/cri/server/container_stop.go @@ -35,6 +35,7 @@ import ( // StopContainer stops a running container with a grace period (i.e., timeout). func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) { + start := time.Now() // Get container config from container store. container, err := c.containerStore.Get(r.GetContainerId()) if err != nil { @@ -45,6 +46,13 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer return nil, err } + i, err := container.Container.Info(ctx) + if err != nil { + return nil, errors.Wrap(err, "get container info") + } + + containerStopTimer.WithValues(i.Runtime.Name).UpdateSince(start) + return &runtime.StopContainerResponse{}, nil } diff --git a/pkg/cri/server/metrics.go b/pkg/cri/server/metrics.go new file mode 100644 index 000000000..9712bb112 --- /dev/null +++ b/pkg/cri/server/metrics.go @@ -0,0 +1,58 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package server + +import ( + metrics "github.com/docker/go-metrics" +) + +var ( + sandboxListTimer metrics.Timer + sandboxCreateNetworkTimer metrics.Timer + sandboxDeleteNetwork metrics.Timer + + sandboxRuntimeCreateTimer metrics.LabeledTimer + sandboxRuntimeStopTimer metrics.LabeledTimer + sandboxRemoveTimer metrics.LabeledTimer + + containerListTimer metrics.Timer + containerRemoveTimer metrics.LabeledTimer + containerCreateTimer metrics.LabeledTimer + containerStopTimer metrics.LabeledTimer + containerStartTimer metrics.LabeledTimer +) + +func init() { + // these CRI metrics record latencies for successful operations around a sandbox and container's lifecycle. + ns := metrics.NewNamespace("containerd", "cri", nil) + + sandboxListTimer = ns.NewTimer("sandbox_list", "time to list sandboxes") + sandboxCreateNetworkTimer = ns.NewTimer("sandbox_create_network", "time to create the network for a sandbox") + sandboxDeleteNetwork = ns.NewTimer("sandbox_delete_network", "time to delete a sandbox's network") + + sandboxRuntimeCreateTimer = ns.NewLabeledTimer("sandbox_runtime_create", "time to create a sandbox in the runtime", "runtime") + sandboxRuntimeStopTimer = ns.NewLabeledTimer("sandbox_runtime_stop", "time to stop a sandbox", "runtime") + sandboxRemoveTimer = ns.NewLabeledTimer("sandbox_remove", "time to remove a sandbox", "runtime") + + containerListTimer = ns.NewTimer("container_list", "time to list containers") + containerRemoveTimer = ns.NewLabeledTimer("container_remove", "time to remove a container", "runtime") + containerCreateTimer = ns.NewLabeledTimer("container_create", "time to create a container", "runtime") + containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime") + containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime") + + metrics.Register(ns) +} diff --git a/pkg/cri/server/sandbox_list.go b/pkg/cri/server/sandbox_list.go index a236cfe68..b4f07221e 100644 --- a/pkg/cri/server/sandbox_list.go +++ b/pkg/cri/server/sandbox_list.go @@ -17,6 +17,8 @@ package server import ( + "time" + "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1" @@ -25,6 +27,7 @@ import ( // ListPodSandbox returns a list of Sandbox. func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) { + start := time.Now() // List all sandboxes from store. sandboxesInStore := c.sandboxStore.List() var sandboxes []*runtime.PodSandbox @@ -36,6 +39,8 @@ func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandb } sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter()) + + sandboxListTimer.UpdateSince(start) return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil } diff --git a/pkg/cri/server/sandbox_remove.go b/pkg/cri/server/sandbox_remove.go index 6dcf5be48..0d4a1344e 100644 --- a/pkg/cri/server/sandbox_remove.go +++ b/pkg/cri/server/sandbox_remove.go @@ -17,6 +17,8 @@ package server import ( + "time" + "github.com/containerd/containerd" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/log" @@ -30,6 +32,7 @@ import ( // RemovePodSandbox removes the sandbox. If there are running containers in the // sandbox, they should be forcibly removed. func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) { + start := time.Now() sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId()) if err != nil { if !errdefs.IsNotFound(err) { @@ -108,5 +111,7 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS // Release the sandbox name reserved for the sandbox. c.sandboxNameIndex.ReleaseByKey(id) + sandboxRemoveTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(start) + return &runtime.RemovePodSandboxResponse{}, nil } diff --git a/pkg/cri/server/sandbox_run.go b/pkg/cri/server/sandbox_run.go index bd5e02b63..891578ce5 100644 --- a/pkg/cri/server/sandbox_run.go +++ b/pkg/cri/server/sandbox_run.go @@ -22,6 +22,7 @@ import ( "path/filepath" goruntime "runtime" "strings" + "time" "github.com/containerd/containerd" containerdio "github.com/containerd/containerd/cio" @@ -123,6 +124,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } if podNetwork { + netStart := time.Now() // If it is not in host network namespace then create a namespace and set the sandbox // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network // namespaces. If the pod is in host network namespace then both are empty and should not @@ -163,8 +165,10 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err := c.setupPodNetwork(ctx, &sandbox); err != nil { return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id) } + sandboxCreateNetworkTimer.UpdateSince(netStart) } + runtimeStart := time.Now() // Create sandbox container. // NOTE: sandboxContainerSpec SHOULD NOT have side // effect, e.g. accessing/creating files, so that we can test @@ -348,6 +352,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox // but we don't care about sandbox TaskOOM right now, so it is fine. c.eventMonitor.startSandboxExitMonitor(context.Background(), id, task.Pid(), exitCh) + sandboxRuntimeCreateTimer.WithValues(ociRuntime.Type).UpdateSince(runtimeStart) + return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil } diff --git a/pkg/cri/server/sandbox_stop.go b/pkg/cri/server/sandbox_stop.go index 6ef7a987f..c64398e97 100644 --- a/pkg/cri/server/sandbox_stop.go +++ b/pkg/cri/server/sandbox_stop.go @@ -54,6 +54,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa // Stop all containers inside the sandbox. This terminates the container forcibly, // and container may still be created, so production should not rely on this behavior. // TODO(random-liu): Introduce a state in sandbox to avoid future container creation. + stop := time.Now() containers := c.containerStore.List() for _, container := range containers { if container.SandboxID != id { @@ -77,9 +78,11 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa return errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state) } } + sandboxRuntimeStopTimer.WithValues(sandbox.RuntimeHandler).UpdateSince(stop) // Teardown network for sandbox. if sandbox.NetNS != nil { + netStop := time.Now() // Use empty netns path if netns is not available. This is defined in: // https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md if closed, err := sandbox.NetNS.Closed(); err != nil { @@ -93,6 +96,7 @@ func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sa if err := sandbox.NetNS.Remove(); err != nil { return errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id) } + sandboxDeleteNetwork.UpdateSince(netStop) } log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)