Wire through CRI ContainerCheckpoint RPC
This connects the new CRI ContainerCheckpoint RPC to the existing internal checkpoint functions. With this commit it is possible to checkpoint a container in Kubernetes using the Forensic Container Checkpointing KEP (#2008): # curl X POST "https://localhost:10250/checkpoint/namespace/podId/container" Which will result in containerd creating a checkpoint in the location specified by Kubernetes (usually /var/lib/kubelet/checkpoints). This is a Linux only feature because CRIU only exists on Linux. Rewritten with the help of Phil Estes. Signed-off-by: Phil Estes <estesp@gmail.com> Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
@@ -565,7 +565,7 @@ func (in *instrumentedService) CheckpointContainer(ctx context.Context, r *runti
|
||||
}
|
||||
}()
|
||||
|
||||
res, err = in.c.CheckpointContainer(ctx, r)
|
||||
res, err = in.c.CheckpointContainer(ctrdutil.WithNamespace(ctx), r)
|
||||
return res, errdefs.ToGRPC(err)
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
//go:build !linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
@@ -18,6 +20,7 @@ package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
@@ -25,5 +28,7 @@ import (
|
||||
)
|
||||
|
||||
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (res *runtime.CheckpointContainerResponse, err error) {
|
||||
// The next line is just needed to make the linter happy.
|
||||
containerCheckpointTimer.WithValues("no-runtime").UpdateSince(time.Now())
|
||||
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
|
||||
}
|
||||
|
||||
306
internal/cri/server/container_checkpoint_linux.go
Normal file
306
internal/cri/server/container_checkpoint_linux.go
Normal file
@@ -0,0 +1,306 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
crmetadata "github.com/checkpoint-restore/checkpointctl/lib"
|
||||
"github.com/checkpoint-restore/go-criu/v7"
|
||||
"github.com/containerd/containerd/v2/core/content"
|
||||
"github.com/containerd/containerd/v2/core/images"
|
||||
"github.com/containerd/containerd/v2/core/runtime/v2/runc/options"
|
||||
"github.com/containerd/containerd/v2/pkg/archive"
|
||||
"github.com/containerd/containerd/v2/plugins"
|
||||
"github.com/containerd/containerd/v2/protobuf/proto"
|
||||
ptypes "github.com/containerd/containerd/v2/protobuf/types"
|
||||
"github.com/containerd/log"
|
||||
v1 "github.com/opencontainers/image-spec/specs-go/v1"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
"github.com/containerd/containerd/v2/client"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
// PodCriuVersion is the version of CRIU needed for
|
||||
// checkpointing and restoring containers out of and into Pods.
|
||||
const podCriuVersion = 31600
|
||||
|
||||
// CheckForCriu uses CRIU's go bindings to check if the CRIU
|
||||
// binary exists and if it at least the version Podman needs.
|
||||
func checkForCriu(version int) error {
|
||||
c := criu.MakeCriu()
|
||||
criuVersion, err := c.GetCriuVersion()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check for criu version: %w", err)
|
||||
}
|
||||
|
||||
if criuVersion >= version {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("checkpoint/restore requires at least CRIU %d, current version is %d", version, criuVersion)
|
||||
}
|
||||
|
||||
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (*runtime.CheckpointContainerResponse, error) {
|
||||
start := time.Now()
|
||||
if err := checkForCriu(podCriuVersion); err != nil {
|
||||
// This is the wrong error message and needs to be adapted once
|
||||
// Kubernetes (the e2e_node/checkpoint) test has been changed to
|
||||
// handle too old or missing CRIU error messages.
|
||||
log.G(ctx).WithError(err).Errorf("Failed to checkpoint container %q", r.GetContainerId())
|
||||
return nil, status.Errorf(codes.Unimplemented, "method CheckpointContainer not implemented")
|
||||
}
|
||||
|
||||
container, err := c.containerStore.Get(r.GetContainerId())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
|
||||
}
|
||||
|
||||
state := container.Status.Get().State()
|
||||
if state != runtime.ContainerState_CONTAINER_RUNNING {
|
||||
return nil, fmt.Errorf(
|
||||
"container %q is in %s state. only %s containers can be checkpointed",
|
||||
r.GetContainerId(),
|
||||
criContainerStateToString(state),
|
||||
criContainerStateToString(runtime.ContainerState_CONTAINER_RUNNING),
|
||||
)
|
||||
}
|
||||
|
||||
imageRef := container.ImageRef
|
||||
image, err := c.GetImage(imageRef)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("getting container image failed: %w", err)
|
||||
}
|
||||
|
||||
i, err := container.Container.Info(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get container info: %w", err)
|
||||
}
|
||||
|
||||
configJSON, err := json.Marshal(&crmetadata.ContainerConfig{
|
||||
ID: container.ID,
|
||||
Name: container.Name,
|
||||
RootfsImageName: func() string {
|
||||
if len(image.References) > 0 {
|
||||
return image.References[0]
|
||||
}
|
||||
return ""
|
||||
}(),
|
||||
RootfsImageRef: imageRef,
|
||||
OCIRuntime: i.Runtime.Name,
|
||||
RootfsImage: container.Config.GetImage().UserSpecifiedImage,
|
||||
CheckpointedAt: time.Now(),
|
||||
CreatedTime: i.CreatedAt,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("generating container config JSON failed: %w", err)
|
||||
}
|
||||
|
||||
task, err := container.Container.Task(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get task for container %q: %w", r.GetContainerId(), err)
|
||||
}
|
||||
img, err := task.Checkpoint(ctx, []client.CheckpointTaskOpts{withCheckpointOpts(i.Runtime.Name, c.getContainerRootDir(r.GetContainerId()))}...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("checkpointing container %q failed: %w", r.GetContainerId(), err)
|
||||
}
|
||||
|
||||
// the checkpoint image has been provided as an index with manifests representing the tar of criu data, the rw layer, and the config
|
||||
var (
|
||||
index v1.Index
|
||||
rawIndex []byte
|
||||
targetDesc = img.Target()
|
||||
contentStore = img.ContentStore()
|
||||
)
|
||||
|
||||
rawIndex, err = content.ReadBlob(ctx, contentStore, targetDesc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to retrieve checkpoint index blob from content store: %w", err)
|
||||
}
|
||||
if err = json.Unmarshal(rawIndex, &index); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshall blob into checkpoint data OCI index: %w", err)
|
||||
}
|
||||
|
||||
cpPath := filepath.Join(c.getContainerRootDir(r.GetContainerId()), "ctrd-checkpoint")
|
||||
if err := os.MkdirAll(cpPath, 0o700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer os.RemoveAll(cpPath)
|
||||
|
||||
if err := os.WriteFile(filepath.Join(cpPath, crmetadata.ConfigDumpFile), configJSON, 0o600); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// walk the manifests and pull out the blobs that we need to save in the checkpoint tarball:
|
||||
// - the checkpoint criu data
|
||||
// - the rw diff tarball
|
||||
// - the spec blob
|
||||
for _, manifest := range index.Manifests {
|
||||
switch manifest.MediaType {
|
||||
case images.MediaTypeContainerd1Checkpoint:
|
||||
if err := writeCriuCheckpointData(ctx, contentStore, manifest, cpPath); err != nil {
|
||||
return nil, fmt.Errorf("failed to copy CRIU checkpoint blob to checkpoint dir: %w", err)
|
||||
}
|
||||
case v1.MediaTypeImageLayerGzip:
|
||||
if err := writeRootFsDiffTar(ctx, contentStore, manifest, cpPath); err != nil {
|
||||
return nil, fmt.Errorf("failed to copy rw filesystem layer blob to checkpoint dir: %w", err)
|
||||
}
|
||||
case images.MediaTypeContainerd1CheckpointConfig:
|
||||
if err := writeSpecDumpFile(ctx, contentStore, manifest, cpPath); err != nil {
|
||||
return nil, fmt.Errorf("failed to copy container spec blob to checkpoint dir: %w", err)
|
||||
}
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// write final tarball of all content
|
||||
tar := archive.Diff(ctx, "", cpPath)
|
||||
|
||||
outFile, err := os.OpenFile(r.Location, os.O_RDWR|os.O_CREATE, 0600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer outFile.Close()
|
||||
_, err = io.Copy(outFile, tar)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tar.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
containerCheckpointTimer.WithValues(i.Runtime.Name).UpdateSince(start)
|
||||
|
||||
return &runtime.CheckpointContainerResponse{}, nil
|
||||
}
|
||||
|
||||
func withCheckpointOpts(rt, rootDir string) client.CheckpointTaskOpts {
|
||||
return func(r *client.CheckpointTaskInfo) error {
|
||||
// Kubernetes currently supports checkpointing of container
|
||||
// as part of the Forensic Container Checkpointing KEP.
|
||||
// This implies that the container is never stopped
|
||||
leaveRunning := true
|
||||
|
||||
switch rt {
|
||||
case plugins.RuntimeRuncV2:
|
||||
if r.Options == nil {
|
||||
r.Options = &options.CheckpointOptions{}
|
||||
}
|
||||
opts, _ := r.Options.(*options.CheckpointOptions)
|
||||
|
||||
opts.Exit = !leaveRunning
|
||||
opts.WorkPath = rootDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func writeCriuCheckpointData(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
|
||||
ra, err := store.ReaderAt(ctx, desc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer ra.Close()
|
||||
|
||||
checkpointDirectory := filepath.Join(cpPath, crmetadata.CheckpointDirectory)
|
||||
// This is the criu data tarball. Let's unpack it
|
||||
// and put it into the crmetadata.CheckpointDirectory directory.
|
||||
if err := os.MkdirAll(checkpointDirectory, 0o700); err != nil {
|
||||
return err
|
||||
}
|
||||
tr := tar.NewReader(content.NewReader(ra))
|
||||
for {
|
||||
header, err := tr.Next()
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
if strings.Contains(header.Name, "..") {
|
||||
return fmt.Errorf("found illegal string '..' in checkpoint archive")
|
||||
}
|
||||
destFile, err := os.Create(filepath.Join(checkpointDirectory, header.Name))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
_, err = io.CopyN(destFile, tr, header.Size)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeRootFsDiffTar(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
|
||||
ra, err := store.ReaderAt(ctx, desc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer ra.Close()
|
||||
|
||||
// the rw layer tarball
|
||||
f, err := os.Create(filepath.Join(cpPath, crmetadata.RootFsDiffTar))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = io.Copy(f, content.NewReader(ra))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func writeSpecDumpFile(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
|
||||
// this is the container spec
|
||||
f, err := os.Create(filepath.Join(cpPath, crmetadata.SpecDumpFile))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
data, err := content.ReadBlob(ctx, store, desc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var any ptypes.Any
|
||||
if err := proto.Unmarshal(data, &any); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = f.Write(any.Value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -35,6 +35,7 @@ var (
|
||||
containerStopTimer metrics.LabeledTimer
|
||||
containerStartTimer metrics.LabeledTimer
|
||||
containerEventsDroppedCount metrics.Counter
|
||||
containerCheckpointTimer metrics.LabeledTimer
|
||||
|
||||
networkPluginOperations metrics.LabeledCounter
|
||||
networkPluginOperationsErrors metrics.LabeledCounter
|
||||
@@ -59,6 +60,7 @@ func init() {
|
||||
containerStopTimer = ns.NewLabeledTimer("container_stop", "time to stop a container", "runtime")
|
||||
containerStartTimer = ns.NewLabeledTimer("container_start", "time to start a container", "runtime")
|
||||
containerEventsDroppedCount = ns.NewCounter("container_events_dropped", "count container discarding event total from server start")
|
||||
containerCheckpointTimer = ns.NewLabeledTimer("container_checkpoint", "time to checkpoint a container", "runtime")
|
||||
|
||||
networkPluginOperations = ns.NewLabeledCounter("network_plugin_operations_total", "cumulative number of network plugin operations by operation type", "operation_type")
|
||||
networkPluginOperationsErrors = ns.NewLabeledCounter("network_plugin_operations_errors_total", "cumulative number of network plugin operations by operation type", "operation_type")
|
||||
|
||||
Reference in New Issue
Block a user