Add checkpoint and restore
Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Update go-runc to 49b2a02ec1ed3e4ae52d30b54a291b75 Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Add shim to restore creation Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Keep checkpoint path in service Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Add C/R to non-shim build Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Checkpoint rw and image Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Pause container on bind checkpoints Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Return dump.log in error on checkpoint failure Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Pause container for checkpoint Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Update runc to 639454475cb9c8b861cc599f8bcd5c8c790ae402 For checkpoint into to work you need runc version 639454475cb9c8b861cc599f8bcd5c8c790ae402 + and criu 3.0 as this is what I have been testing with. Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Move restore behind create calls This remove the restore RPCs in favor of providing the checkpoint information to the `Create` calls of a container. If provided, the container will be created/restored from the checkpoint instead of an existing container. Signed-off-by: Michael Crosby <crosbymichael@gmail.com> Regen protos after rebase Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
@@ -25,15 +25,17 @@ func (s State) Status() plugin.Status {
|
||||
return s.status
|
||||
}
|
||||
|
||||
func newContainer(id string, shim shim.ShimClient) *Container {
|
||||
func newContainer(id string, spec []byte, shim shim.ShimClient) *Container {
|
||||
return &Container{
|
||||
id: id,
|
||||
shim: shim,
|
||||
spec: spec,
|
||||
}
|
||||
}
|
||||
|
||||
type Container struct {
|
||||
id string
|
||||
id string
|
||||
spec []byte
|
||||
|
||||
shim shim.ShimClient
|
||||
}
|
||||
@@ -42,6 +44,7 @@ func (c *Container) Info() plugin.ContainerInfo {
|
||||
return plugin.ContainerInfo{
|
||||
ID: c.id,
|
||||
Runtime: runtimeName,
|
||||
Spec: c.spec,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,6 +151,19 @@ func (c *Container) CloseStdin(ctx context.Context, pid uint32) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *Container) Checkpoint(ctx context.Context, opts plugin.CheckpointOpts) error {
|
||||
_, err := c.shim.Checkpoint(ctx, &shim.CheckpointRequest{
|
||||
Exit: opts.Exit,
|
||||
AllowTcp: opts.AllowTCP,
|
||||
AllowUnixSockets: opts.AllowUnixSockets,
|
||||
AllowTerminal: opts.AllowTerminal,
|
||||
FileLocks: opts.FileLocks,
|
||||
EmptyNamespaces: opts.EmptyNamespaces,
|
||||
Image: opts.Path,
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
type Process struct {
|
||||
pid int
|
||||
c *Container
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
"github.com/containerd/containerd/api/services/shim"
|
||||
"github.com/containerd/containerd/api/types/container"
|
||||
"github.com/containerd/containerd/api/types/mount"
|
||||
@@ -65,7 +64,7 @@ func New(ic *plugin.InitContext) (interface{}, error) {
|
||||
remote: !cfg.NoShim,
|
||||
shim: cfg.Shim,
|
||||
runtime: cfg.Runtime,
|
||||
events: make(chan *containerd.Event, 2048),
|
||||
events: make(chan *plugin.Event, 2048),
|
||||
eventsContext: c,
|
||||
eventsCancel: cancel,
|
||||
monitor: ic.Monitor,
|
||||
@@ -81,7 +80,7 @@ type Runtime struct {
|
||||
runtime string
|
||||
remote bool
|
||||
|
||||
events chan *containerd.Event
|
||||
events chan *plugin.Event
|
||||
eventsContext context.Context
|
||||
eventsCancel func()
|
||||
monitor plugin.ContainerMonitor
|
||||
@@ -108,13 +107,14 @@ func (r *Runtime) Create(ctx context.Context, id string, opts plugin.CreateOpts)
|
||||
return nil, err
|
||||
}
|
||||
sopts := &shim.CreateRequest{
|
||||
ID: id,
|
||||
Bundle: path,
|
||||
Runtime: r.runtime,
|
||||
Stdin: opts.IO.Stdin,
|
||||
Stdout: opts.IO.Stdout,
|
||||
Stderr: opts.IO.Stderr,
|
||||
Terminal: opts.IO.Terminal,
|
||||
ID: id,
|
||||
Bundle: path,
|
||||
Runtime: r.runtime,
|
||||
Stdin: opts.IO.Stdin,
|
||||
Stdout: opts.IO.Stdout,
|
||||
Stderr: opts.IO.Stderr,
|
||||
Terminal: opts.IO.Terminal,
|
||||
Checkpoint: opts.Checkpoint,
|
||||
}
|
||||
for _, m := range opts.Rootfs {
|
||||
sopts.Rootfs = append(sopts.Rootfs, &mount.Mount{
|
||||
@@ -127,8 +127,8 @@ func (r *Runtime) Create(ctx context.Context, id string, opts plugin.CreateOpts)
|
||||
os.RemoveAll(path)
|
||||
return nil, err
|
||||
}
|
||||
c := newContainer(id, s)
|
||||
// after the container is create add it to the monitor
|
||||
c := newContainer(id, opts.Spec, s)
|
||||
// after the container is created, add it to the monitor
|
||||
if err = r.monitor.Monitor(c); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -182,7 +182,7 @@ func (r *Runtime) Containers(ctx context.Context) ([]plugin.Container, error) {
|
||||
return o, nil
|
||||
}
|
||||
|
||||
func (r *Runtime) Events(ctx context.Context) <-chan *containerd.Event {
|
||||
func (r *Runtime) Events(ctx context.Context) <-chan *plugin.Event {
|
||||
return r.events
|
||||
}
|
||||
|
||||
@@ -204,20 +204,20 @@ func (r *Runtime) forward(events shim.Shim_EventsClient) {
|
||||
}
|
||||
return
|
||||
}
|
||||
var et containerd.EventType
|
||||
var et plugin.EventType
|
||||
switch e.Type {
|
||||
case container.Event_CREATE:
|
||||
et = containerd.CreateEvent
|
||||
et = plugin.CreateEvent
|
||||
case container.Event_EXEC_ADDED:
|
||||
et = containerd.ExecAddEvent
|
||||
et = plugin.ExecAddEvent
|
||||
case container.Event_EXIT:
|
||||
et = containerd.ExitEvent
|
||||
et = plugin.ExitEvent
|
||||
case container.Event_OOM:
|
||||
et = containerd.OOMEvent
|
||||
et = plugin.OOMEvent
|
||||
case container.Event_START:
|
||||
et = containerd.StartEvent
|
||||
et = plugin.StartEvent
|
||||
}
|
||||
r.events <- &containerd.Event{
|
||||
r.events <- &plugin.Event{
|
||||
Timestamp: time.Now(),
|
||||
Runtime: runtimeName,
|
||||
Type: et,
|
||||
@@ -256,9 +256,14 @@ func (r *Runtime) loadContainer(path string) (*Container, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
data, err := ioutil.ReadFile(filepath.Join(path, configFilename))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Container{
|
||||
id: id,
|
||||
shim: s,
|
||||
spec: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -99,6 +99,10 @@ func (c *client) CloseStdin(ctx context.Context, in *shimapi.CloseStdinRequest,
|
||||
return c.s.CloseStdin(ctx, in)
|
||||
}
|
||||
|
||||
func (c *client) Checkpoint(ctx context.Context, in *shimapi.CheckpointRequest, opts ...grpc.CallOption) (*google_protobuf.Empty, error) {
|
||||
return c.s.Checkpoint(ctx, in)
|
||||
}
|
||||
|
||||
type events struct {
|
||||
c chan *container.Event
|
||||
ctx context.Context
|
||||
|
||||
@@ -4,9 +4,11 @@ package shim
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -16,6 +18,7 @@ import (
|
||||
"github.com/containerd/console"
|
||||
"github.com/containerd/containerd"
|
||||
shimapi "github.com/containerd/containerd/api/services/shim"
|
||||
"github.com/containerd/containerd/log"
|
||||
"github.com/containerd/fifo"
|
||||
runc "github.com/containerd/go-runc"
|
||||
)
|
||||
@@ -74,16 +77,35 @@ func newInitProcess(context context.Context, path string, r *shimapi.CreateReque
|
||||
}
|
||||
p.io = io
|
||||
}
|
||||
opts := &runc.CreateOpts{
|
||||
PidFile: filepath.Join(path, "init.pid"),
|
||||
IO: io,
|
||||
NoPivot: r.NoPivot,
|
||||
}
|
||||
if socket != nil {
|
||||
opts.ConsoleSocket = socket
|
||||
}
|
||||
if err := p.runc.Create(context, r.ID, r.Bundle, opts); err != nil {
|
||||
return nil, err
|
||||
pidFile := filepath.Join(path, "init.pid")
|
||||
if r.Checkpoint != "" {
|
||||
opts := &runc.RestoreOpts{
|
||||
CheckpointOpts: runc.CheckpointOpts{
|
||||
ImagePath: r.Checkpoint,
|
||||
WorkDir: filepath.Join(r.Bundle, "work"),
|
||||
ParentPath: r.ParentCheckpoint,
|
||||
},
|
||||
PidFile: pidFile,
|
||||
IO: io,
|
||||
NoPivot: r.NoPivot,
|
||||
Detach: true,
|
||||
NoSubreaper: true,
|
||||
}
|
||||
if _, err := p.runc.Restore(context, r.ID, r.Bundle, opts); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
opts := &runc.CreateOpts{
|
||||
PidFile: pidFile,
|
||||
IO: io,
|
||||
NoPivot: r.NoPivot,
|
||||
}
|
||||
if socket != nil {
|
||||
opts.ConsoleSocket = socket
|
||||
}
|
||||
if err := p.runc.Create(context, r.ID, r.Bundle, opts); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if r.Stdin != "" {
|
||||
sc, err := fifo.OpenFifo(context, r.Stdin, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
|
||||
@@ -109,7 +131,7 @@ func newInitProcess(context context.Context, path string, r *shimapi.CreateReque
|
||||
}
|
||||
}
|
||||
copyWaitGroup.Wait()
|
||||
pid, err := runc.ReadPidFile(opts.PidFile)
|
||||
pid, err := runc.ReadPidFile(pidFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -194,3 +216,50 @@ func (p *initProcess) Signal(sig int) error {
|
||||
func (p *initProcess) Stdin() io.Closer {
|
||||
return p.stdin
|
||||
}
|
||||
|
||||
func (p *initProcess) Checkpoint(context context.Context, r *shimapi.CheckpointRequest) error {
|
||||
var actions []runc.CheckpointAction
|
||||
if !r.Exit {
|
||||
actions = append(actions, runc.LeaveRunning)
|
||||
}
|
||||
work := filepath.Join(p.bundle, "work")
|
||||
defer os.RemoveAll(work)
|
||||
if err := p.runc.Checkpoint(context, p.id, &runc.CheckpointOpts{
|
||||
WorkDir: work,
|
||||
ImagePath: r.Image,
|
||||
AllowOpenTCP: r.AllowTcp,
|
||||
AllowExternalUnixSockets: r.AllowUnixSockets,
|
||||
AllowTerminal: r.AllowTerminal,
|
||||
FileLocks: r.FileLocks,
|
||||
EmptyNamespaces: r.EmptyNamespaces,
|
||||
}, actions...); err != nil {
|
||||
dumpLog := filepath.Join(p.bundle, "criu-dump.log")
|
||||
if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
|
||||
log.G(context).Error(err)
|
||||
}
|
||||
return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// criuError returns only the first line of the error message from criu
|
||||
// it tries to add an invalid dump log location when returning the message
|
||||
func criuError(err error) string {
|
||||
parts := strings.Split(err.Error(), "\n")
|
||||
return parts[0]
|
||||
}
|
||||
|
||||
func copyFile(to, from string) error {
|
||||
ff, err := os.Open(from)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer ff.Close()
|
||||
tt, err := os.Create(to)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tt.Close()
|
||||
_, err = io.Copy(tt, ff)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -86,9 +86,8 @@ func (s *Service) Delete(ctx context.Context, r *shimapi.DeleteRequest) (*shimap
|
||||
if !ok {
|
||||
p = s.initProcess
|
||||
}
|
||||
if err := p.Delete(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// TODO: how to handle errors here
|
||||
p.Delete(ctx)
|
||||
s.mu.Lock()
|
||||
delete(s.processes, p.Pid())
|
||||
s.mu.Unlock()
|
||||
@@ -281,6 +280,13 @@ func (s *Service) CloseStdin(ctx context.Context, r *shimapi.CloseStdinRequest)
|
||||
return empty, nil
|
||||
}
|
||||
|
||||
func (s *Service) Checkpoint(ctx context.Context, r *shimapi.CheckpointRequest) (*google_protobuf.Empty, error) {
|
||||
if err := s.initProcess.Checkpoint(ctx, r); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return empty, nil
|
||||
}
|
||||
|
||||
func (s *Service) waitExit(p process, pid int, cmd *reaper.Cmd) {
|
||||
status := <-cmd.ExitCh
|
||||
p.Exited(status)
|
||||
|
||||
Reference in New Issue
Block a user