314 lines
9.1 KiB
Go
314 lines
9.1 KiB
Go
//go:build linux
|
|
|
|
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package server
|
|
|
|
import (
|
|
"archive/tar"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
crmetadata "github.com/checkpoint-restore/checkpointctl/lib"
|
|
"github.com/checkpoint-restore/go-criu/v7"
|
|
"github.com/containerd/containerd/api/types/runc/options"
|
|
"github.com/containerd/containerd/v2/core/content"
|
|
"github.com/containerd/containerd/v2/core/images"
|
|
"github.com/containerd/containerd/v2/pkg/archive"
|
|
"github.com/containerd/containerd/v2/pkg/protobuf/proto"
|
|
ptypes "github.com/containerd/containerd/v2/pkg/protobuf/types"
|
|
"github.com/containerd/containerd/v2/plugins"
|
|
"github.com/containerd/log"
|
|
v1 "github.com/opencontainers/image-spec/specs-go/v1"
|
|
|
|
"github.com/containerd/containerd/v2/client"
|
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
|
)
|
|
|
|
// PodCriuVersion is the version of CRIU needed for
|
|
// checkpointing and restoring containers out of and into Pods.
|
|
const podCriuVersion = 31600
|
|
|
|
// CheckForCriu uses CRIU's go bindings to check if the CRIU
|
|
// binary exists and if it at least the version Podman needs.
|
|
func checkForCriu(version int) error {
|
|
c := criu.MakeCriu()
|
|
criuVersion, err := c.GetCriuVersion()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to check for criu version: %w", err)
|
|
}
|
|
|
|
if criuVersion >= version {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("checkpoint/restore requires at least CRIU %d, current version is %d", version, criuVersion)
|
|
}
|
|
|
|
func (c *criService) CheckpointContainer(ctx context.Context, r *runtime.CheckpointContainerRequest) (*runtime.CheckpointContainerResponse, error) {
|
|
start := time.Now()
|
|
if err := checkForCriu(podCriuVersion); err != nil {
|
|
// This is the wrong error message and needs to be adapted once
|
|
// Kubernetes (the e2e_node/checkpoint) test has been changed to
|
|
// handle too old or missing CRIU error messages.
|
|
errorMessage := fmt.Sprintf(
|
|
"CRIU binary not found or too old (<%d). Failed to checkpoint container %q",
|
|
podCriuVersion,
|
|
r.GetContainerId(),
|
|
)
|
|
log.G(ctx).WithError(err).Errorf(errorMessage)
|
|
return nil, fmt.Errorf(
|
|
"%s: %w",
|
|
errorMessage,
|
|
err,
|
|
)
|
|
}
|
|
|
|
container, err := c.containerStore.Get(r.GetContainerId())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("an error occurred when try to find container %q: %w", r.GetContainerId(), err)
|
|
}
|
|
|
|
state := container.Status.Get().State()
|
|
if state != runtime.ContainerState_CONTAINER_RUNNING {
|
|
return nil, fmt.Errorf(
|
|
"container %q is in %s state. only %s containers can be checkpointed",
|
|
r.GetContainerId(),
|
|
criContainerStateToString(state),
|
|
criContainerStateToString(runtime.ContainerState_CONTAINER_RUNNING),
|
|
)
|
|
}
|
|
|
|
imageRef := container.ImageRef
|
|
image, err := c.GetImage(imageRef)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting container image failed: %w", err)
|
|
}
|
|
|
|
i, err := container.Container.Info(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("get container info: %w", err)
|
|
}
|
|
|
|
configJSON, err := json.Marshal(&crmetadata.ContainerConfig{
|
|
ID: container.ID,
|
|
Name: container.Name,
|
|
RootfsImageName: func() string {
|
|
if len(image.References) > 0 {
|
|
return image.References[0]
|
|
}
|
|
return ""
|
|
}(),
|
|
RootfsImageRef: imageRef,
|
|
OCIRuntime: i.Runtime.Name,
|
|
RootfsImage: container.Config.GetImage().UserSpecifiedImage,
|
|
CheckpointedAt: time.Now(),
|
|
CreatedTime: i.CreatedAt,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("generating container config JSON failed: %w", err)
|
|
}
|
|
|
|
task, err := container.Container.Task(ctx, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to get task for container %q: %w", r.GetContainerId(), err)
|
|
}
|
|
img, err := task.Checkpoint(ctx, []client.CheckpointTaskOpts{withCheckpointOpts(i.Runtime.Name, c.getContainerRootDir(r.GetContainerId()))}...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("checkpointing container %q failed: %w", r.GetContainerId(), err)
|
|
}
|
|
|
|
// the checkpoint image has been provided as an index with manifests representing the tar of criu data, the rw layer, and the config
|
|
var (
|
|
index v1.Index
|
|
rawIndex []byte
|
|
targetDesc = img.Target()
|
|
contentStore = img.ContentStore()
|
|
)
|
|
|
|
rawIndex, err = content.ReadBlob(ctx, contentStore, targetDesc)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to retrieve checkpoint index blob from content store: %w", err)
|
|
}
|
|
if err = json.Unmarshal(rawIndex, &index); err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshall blob into checkpoint data OCI index: %w", err)
|
|
}
|
|
|
|
cpPath := filepath.Join(c.getContainerRootDir(r.GetContainerId()), "ctrd-checkpoint")
|
|
if err := os.MkdirAll(cpPath, 0o700); err != nil {
|
|
return nil, err
|
|
}
|
|
defer os.RemoveAll(cpPath)
|
|
|
|
if err := os.WriteFile(filepath.Join(cpPath, crmetadata.ConfigDumpFile), configJSON, 0o600); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// walk the manifests and pull out the blobs that we need to save in the checkpoint tarball:
|
|
// - the checkpoint criu data
|
|
// - the rw diff tarball
|
|
// - the spec blob
|
|
for _, manifest := range index.Manifests {
|
|
switch manifest.MediaType {
|
|
case images.MediaTypeContainerd1Checkpoint:
|
|
if err := writeCriuCheckpointData(ctx, contentStore, manifest, cpPath); err != nil {
|
|
return nil, fmt.Errorf("failed to copy CRIU checkpoint blob to checkpoint dir: %w", err)
|
|
}
|
|
case v1.MediaTypeImageLayerGzip:
|
|
if err := writeRootFsDiffTar(ctx, contentStore, manifest, cpPath); err != nil {
|
|
return nil, fmt.Errorf("failed to copy rw filesystem layer blob to checkpoint dir: %w", err)
|
|
}
|
|
case images.MediaTypeContainerd1CheckpointConfig:
|
|
if err := writeSpecDumpFile(ctx, contentStore, manifest, cpPath); err != nil {
|
|
return nil, fmt.Errorf("failed to copy container spec blob to checkpoint dir: %w", err)
|
|
}
|
|
default:
|
|
}
|
|
}
|
|
|
|
// write final tarball of all content
|
|
tar := archive.Diff(ctx, "", cpPath)
|
|
|
|
outFile, err := os.OpenFile(r.Location, os.O_RDWR|os.O_CREATE, 0600)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer outFile.Close()
|
|
_, err = io.Copy(outFile, tar)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := tar.Close(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
containerCheckpointTimer.WithValues(i.Runtime.Name).UpdateSince(start)
|
|
|
|
return &runtime.CheckpointContainerResponse{}, nil
|
|
}
|
|
|
|
func withCheckpointOpts(rt, rootDir string) client.CheckpointTaskOpts {
|
|
return func(r *client.CheckpointTaskInfo) error {
|
|
// Kubernetes currently supports checkpointing of container
|
|
// as part of the Forensic Container Checkpointing KEP.
|
|
// This implies that the container is never stopped
|
|
leaveRunning := true
|
|
|
|
switch rt {
|
|
case plugins.RuntimeRuncV2:
|
|
if r.Options == nil {
|
|
r.Options = &options.CheckpointOptions{}
|
|
}
|
|
opts, _ := r.Options.(*options.CheckpointOptions)
|
|
|
|
opts.Exit = !leaveRunning
|
|
opts.WorkPath = rootDir
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func writeCriuCheckpointData(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
|
|
ra, err := store.ReaderAt(ctx, desc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer ra.Close()
|
|
|
|
checkpointDirectory := filepath.Join(cpPath, crmetadata.CheckpointDirectory)
|
|
// This is the criu data tarball. Let's unpack it
|
|
// and put it into the crmetadata.CheckpointDirectory directory.
|
|
if err := os.MkdirAll(checkpointDirectory, 0o700); err != nil {
|
|
return err
|
|
}
|
|
tr := tar.NewReader(content.NewReader(ra))
|
|
for {
|
|
header, err := tr.Next()
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
return err
|
|
}
|
|
if strings.Contains(header.Name, "..") {
|
|
return fmt.Errorf("found illegal string '..' in checkpoint archive")
|
|
}
|
|
destFile, err := os.Create(filepath.Join(checkpointDirectory, header.Name))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer destFile.Close()
|
|
|
|
_, err = io.CopyN(destFile, tr, header.Size)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func writeRootFsDiffTar(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
|
|
ra, err := store.ReaderAt(ctx, desc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer ra.Close()
|
|
|
|
// the rw layer tarball
|
|
f, err := os.Create(filepath.Join(cpPath, crmetadata.RootFsDiffTar))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
_, err = io.Copy(f, content.NewReader(ra))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func writeSpecDumpFile(ctx context.Context, store content.Store, desc v1.Descriptor, cpPath string) error {
|
|
// this is the container spec
|
|
f, err := os.Create(filepath.Join(cpPath, crmetadata.SpecDumpFile))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
data, err := content.ReadBlob(ctx, store, desc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var any ptypes.Any
|
|
if err := proto.Unmarshal(data, &any); err != nil {
|
|
return err
|
|
}
|
|
_, err = f.Write(any.Value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|