mv pkg/process cmd/containerd-shim-runc-v2/process

The package is quite specific to runc and only imported by
containerd-shim-runc-v2

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda
2023-11-30 21:50:04 +09:00
parent 58af8cda54
commit 8e567aa581
15 changed files with 4 additions and 4 deletions

View File

@@ -30,11 +30,11 @@ import (
"github.com/containerd/cgroups/v3"
"github.com/containerd/cgroups/v3/cgroup1"
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/runc"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/oci"
"github.com/containerd/containerd/v2/pkg/process"
"github.com/containerd/containerd/v2/pkg/schedcore"
"github.com/containerd/containerd/v2/runtime/v2/runc/options"
"github.com/containerd/containerd/v2/runtime/v2/shim"

View File

@@ -0,0 +1,76 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"errors"
"fmt"
"github.com/containerd/console"
"github.com/containerd/containerd/v2/errdefs"
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
)
type deletedState struct {
}
func (s *deletedState) Pause(ctx context.Context) error {
return errors.New("cannot pause a deleted process")
}
func (s *deletedState) Resume(ctx context.Context) error {
return errors.New("cannot resume a deleted process")
}
func (s *deletedState) Update(context context.Context, r *google_protobuf.Any) error {
return errors.New("cannot update a deleted process")
}
func (s *deletedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.New("cannot checkpoint a deleted process")
}
func (s *deletedState) Resize(ws console.WinSize) error {
return errors.New("cannot resize a deleted process")
}
func (s *deletedState) Start(ctx context.Context) error {
return errors.New("cannot start a deleted process")
}
func (s *deletedState) Delete(ctx context.Context) error {
return fmt.Errorf("cannot delete a deleted process: %w", errdefs.ErrNotFound)
}
func (s *deletedState) Kill(ctx context.Context, sig uint32, all bool) error {
return fmt.Errorf("cannot kill a deleted process: %w", errdefs.ErrNotFound)
}
func (s *deletedState) SetExited(status int) {
// no op
}
func (s *deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.New("cannot exec in a deleted state")
}
func (s *deletedState) Status(ctx context.Context) (string, error) {
return "stopped", nil
}

View File

@@ -0,0 +1,263 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"syscall"
"time"
"golang.org/x/sys/unix"
"github.com/containerd/console"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/pkg/stdio"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
type execProcess struct {
wg sync.WaitGroup
execState execState
mu sync.Mutex
id string
console console.Console
io *processIO
status int
exited time.Time
pid safePid
closers []io.Closer
stdin io.Closer
stdio stdio.Stdio
path string
spec specs.Process
parent *Init
waitBlock chan struct{}
}
func (e *execProcess) Wait() {
<-e.waitBlock
}
func (e *execProcess) ID() string {
return e.id
}
func (e *execProcess) Pid() int {
return e.pid.get()
}
func (e *execProcess) ExitStatus() int {
e.mu.Lock()
defer e.mu.Unlock()
return e.status
}
func (e *execProcess) ExitedAt() time.Time {
e.mu.Lock()
defer e.mu.Unlock()
return e.exited
}
func (e *execProcess) SetExited(status int) {
e.mu.Lock()
defer e.mu.Unlock()
e.execState.SetExited(status)
}
func (e *execProcess) setExited(status int) {
e.status = status
e.exited = time.Now()
e.parent.Platform.ShutdownConsole(context.Background(), e.console)
close(e.waitBlock)
}
func (e *execProcess) Delete(ctx context.Context) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Delete(ctx)
}
func (e *execProcess) delete(ctx context.Context) error {
waitTimeout(ctx, &e.wg, 2*time.Second)
if e.io != nil {
for _, c := range e.closers {
c.Close()
}
e.io.Close()
}
pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id))
// silently ignore error
os.Remove(pidfile)
return nil
}
func (e *execProcess) Resize(ws console.WinSize) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Resize(ws)
}
func (e *execProcess) resize(ws console.WinSize) error {
if e.console == nil {
return nil
}
return e.console.Resize(ws)
}
func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Kill(ctx, sig, false)
}
func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error {
pid := e.pid.get()
switch {
case pid == 0:
return fmt.Errorf("process not created: %w", errdefs.ErrFailedPrecondition)
case !e.exited.IsZero():
return fmt.Errorf("process already finished: %w", errdefs.ErrNotFound)
default:
if err := unix.Kill(pid, syscall.Signal(sig)); err != nil {
return fmt.Errorf("exec kill error: %w", checkKillError(err))
}
}
return nil
}
func (e *execProcess) Stdin() io.Closer {
return e.stdin
}
func (e *execProcess) Stdio() stdio.Stdio {
return e.stdio
}
func (e *execProcess) Start(ctx context.Context) error {
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Start(ctx)
}
func (e *execProcess) start(ctx context.Context) (err error) {
// The reaper may receive exit signal right after
// the container is started, before the e.pid is updated.
// In that case, we want to block the signal handler to
// access e.pid until it is updated.
e.pid.Lock()
defer e.pid.Unlock()
var (
socket *runc.Socket
pio *processIO
pidFile = newExecPidFile(e.path, e.id)
)
if e.stdio.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return fmt.Errorf("failed to create runc console socket: %w", err)
}
defer socket.Close()
} else {
if pio, err = createIO(ctx, e.id, e.parent.IoUID, e.parent.IoGID, e.stdio); err != nil {
return fmt.Errorf("failed to create init process I/O: %w", err)
}
e.io = pio
}
opts := &runc.ExecOpts{
PidFile: pidFile.Path(),
Detach: true,
}
if pio != nil {
opts.IO = pio.IO()
}
if socket != nil {
opts.ConsoleSocket = socket
}
if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil {
close(e.waitBlock)
return e.parent.runtimeError(err, "OCI runtime exec failed")
}
if e.stdio.Stdin != "" {
if err := e.openStdin(e.stdio.Stdin); err != nil {
return err
}
}
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return fmt.Errorf("failed to retrieve console master: %w", err)
}
if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.id, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg); err != nil {
return fmt.Errorf("failed to start console copy: %w", err)
}
} else {
if err := pio.Copy(ctx, &e.wg); err != nil {
return fmt.Errorf("failed to start io pipe copy: %w", err)
}
}
pid, err := pidFile.Read()
if err != nil {
return fmt.Errorf("failed to retrieve OCI runtime exec pi: %wd", err)
}
e.pid.pid = pid
return nil
}
func (e *execProcess) openStdin(path string) error {
sc, err := fifo.OpenFifo(context.Background(), path, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return fmt.Errorf("failed to open stdin fifo %s: %w", path, err)
}
e.stdin = sc
e.closers = append(e.closers, sc)
return nil
}
func (e *execProcess) Status(ctx context.Context) (string, error) {
s, err := e.parent.Status(ctx)
if err != nil {
return "", err
}
// if the container as a whole is in the pausing/paused state, so are all
// other processes inside the container, use container state here
switch s {
case "paused", "pausing":
return s, nil
}
e.mu.Lock()
defer e.mu.Unlock()
return e.execState.Status(ctx)
}

View File

@@ -0,0 +1,173 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"errors"
"fmt"
"github.com/containerd/console"
)
type execState interface {
Resize(console.WinSize) error
Start(context.Context) error
Delete(context.Context) error
Kill(context.Context, uint32, bool) error
SetExited(int)
Status(context.Context) (string, error)
}
type execCreatedState struct {
p *execProcess
}
func (s *execCreatedState) transition(name string) error {
switch name {
case "running":
s.p.execState = &execRunningState{p: s.p}
case "stopped":
s.p.execState = &execStoppedState{p: s.p}
case "deleted":
s.p.execState = &deletedState{}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execCreatedState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *execCreatedState) Start(ctx context.Context) error {
if err := s.p.start(ctx); err != nil {
return err
}
return s.transition("running")
}
func (s *execCreatedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execCreatedState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *execCreatedState) Status(ctx context.Context) (string, error) {
return "created", nil
}
type execRunningState struct {
p *execProcess
}
func (s *execRunningState) transition(name string) error {
switch name {
case "stopped":
s.p.execState = &execStoppedState{p: s.p}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execRunningState) Resize(ws console.WinSize) error {
return s.p.resize(ws)
}
func (s *execRunningState) Start(ctx context.Context) error {
return errors.New("cannot start a running process")
}
func (s *execRunningState) Delete(ctx context.Context) error {
return errors.New("cannot delete a running process")
}
func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execRunningState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *execRunningState) Status(ctx context.Context) (string, error) {
return "running", nil
}
type execStoppedState struct {
p *execProcess
}
func (s *execStoppedState) transition(name string) error {
switch name {
case "deleted":
s.p.execState = &deletedState{}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *execStoppedState) Resize(ws console.WinSize) error {
return errors.New("cannot resize a stopped container")
}
func (s *execStoppedState) Start(ctx context.Context) error {
return errors.New("cannot start a stopped process")
}
func (s *execStoppedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *execStoppedState) SetExited(status int) {
// no op
}
func (s *execStoppedState) Status(ctx context.Context) (string, error) {
return "stopped", nil
}

View File

@@ -0,0 +1,554 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/containerd/console"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/pkg/stdio"
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
"github.com/containerd/log"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)
// Init represents an initial process for a container
type Init struct {
wg sync.WaitGroup
initState initState
// mu is used to ensure that `Start()` and `Exited()` calls return in
// the right order when invoked in separate goroutines.
// This is the case within the shim implementation as it makes use of
// the reaper interface.
mu sync.Mutex
waitBlock chan struct{}
WorkDir string
id string
Bundle string
console console.Console
Platform stdio.Platform
io *processIO
runtime *runc.Runc
// pausing preserves the pausing state.
pausing atomic.Bool
status int
exited time.Time
pid int
closers []io.Closer
stdin io.Closer
stdio stdio.Stdio
Rootfs string
IoUID int
IoGID int
NoPivotRoot bool
NoNewKeyring bool
CriuWorkPath string
}
// NewRunc returns a new runc instance for a process
func NewRunc(root, path, namespace, runtime string, systemd bool) *runc.Runc {
if root == "" {
root = RuncRoot
}
return &runc.Runc{
Command: runtime,
Log: filepath.Join(path, "log.json"),
LogFormat: runc.JSON,
PdeathSignal: unix.SIGKILL,
Root: filepath.Join(root, namespace),
SystemdCgroup: systemd,
}
}
// New returns a new process
func New(id string, runtime *runc.Runc, stdio stdio.Stdio) *Init {
p := &Init{
id: id,
runtime: runtime,
stdio: stdio,
status: 0,
waitBlock: make(chan struct{}),
}
p.initState = &createdState{p: p}
return p
}
// Create the process with the provided config
func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
var (
err error
socket *runc.Socket
pio *processIO
pidFile = newPidFile(p.Bundle)
)
if r.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return fmt.Errorf("failed to create OCI runtime console socket: %w", err)
}
defer socket.Close()
} else {
if pio, err = createIO(ctx, p.id, p.IoUID, p.IoGID, p.stdio); err != nil {
return fmt.Errorf("failed to create init process I/O: %w", err)
}
p.io = pio
}
if r.Checkpoint != "" {
return p.createCheckpointedState(r, pidFile)
}
opts := &runc.CreateOpts{
PidFile: pidFile.Path(),
NoPivot: p.NoPivotRoot,
NoNewKeyring: p.NoNewKeyring,
}
if p.io != nil {
opts.IO = p.io.IO()
}
if socket != nil {
opts.ConsoleSocket = socket
}
// runc ignores silently features it doesn't know about, so for things that this is
// problematic let's check if this runc version supports them.
if err := p.validateRuncFeatures(ctx, r.Bundle); err != nil {
return fmt.Errorf("failed to detect OCI runtime features: %w", err)
}
if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
return p.runtimeError(err, "OCI runtime create failed")
}
if r.Stdin != "" {
if err := p.openStdin(r.Stdin); err != nil {
return err
}
}
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return fmt.Errorf("failed to retrieve console master: %w", err)
}
console, err = p.Platform.CopyConsole(ctx, console, p.id, r.Stdin, r.Stdout, r.Stderr, &p.wg)
if err != nil {
return fmt.Errorf("failed to start console copy: %w", err)
}
p.console = console
} else {
if err := pio.Copy(ctx, &p.wg); err != nil {
return fmt.Errorf("failed to start io pipe copy: %w", err)
}
}
pid, err := pidFile.Read()
if err != nil {
return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err)
}
p.pid = pid
return nil
}
func (p *Init) validateRuncFeatures(ctx context.Context, bundle string) error {
// TODO: We should remove the logic from here and rebase on #8509.
// This way we can avoid the call to readConfig() here and the call to p.runtime.Features()
// in validateIDMapMounts().
// But that PR is not yet merged nor it is clear if it will be refactored.
// Do this contained hack for now.
spec, err := readConfig(bundle)
if err != nil {
return fmt.Errorf("failed to read config: %w", err)
}
if err := p.validateIDMapMounts(ctx, spec); err != nil {
return fmt.Errorf("OCI runtime doesn't support idmap mounts: %w", err)
}
return nil
}
func (p *Init) validateIDMapMounts(ctx context.Context, spec *specs.Spec) error {
var used bool
for _, m := range spec.Mounts {
if m.UIDMappings != nil || m.GIDMappings != nil {
used = true
break
}
}
if !used {
return nil
}
// From here onwards, we require idmap mounts. So if we fail to check, we return an error.
features, err := p.runtime.Features(ctx)
if err != nil {
// If the features command is not implemented, then runc is too old.
return fmt.Errorf("features command failed: %w", err)
}
if features.Linux.MountExtensions == nil || features.Linux.MountExtensions.IDMap == nil {
return errors.New("missing `mountExtensions.idmap` entry in `features` command")
}
if enabled := features.Linux.MountExtensions.IDMap.Enabled; enabled == nil || !*enabled {
return errors.New("idmap mounts not supported")
}
return nil
}
func (p *Init) openStdin(path string) error {
sc, err := fifo.OpenFifo(context.Background(), path, unix.O_WRONLY|unix.O_NONBLOCK, 0)
if err != nil {
return fmt.Errorf("failed to open stdin fifo %s: %w", path, err)
}
p.stdin = sc
p.closers = append(p.closers, sc)
return nil
}
func (p *Init) createCheckpointedState(r *CreateConfig, pidFile *pidFile) error {
opts := &runc.RestoreOpts{
CheckpointOpts: runc.CheckpointOpts{
ImagePath: r.Checkpoint,
WorkDir: p.CriuWorkPath,
ParentPath: r.ParentCheckpoint,
},
PidFile: pidFile.Path(),
NoPivot: p.NoPivotRoot,
Detach: true,
NoSubreaper: true,
}
if p.io != nil {
opts.IO = p.io.IO()
}
p.initState = &createdCheckpointState{
p: p,
opts: opts,
}
return nil
}
// Wait for the process to exit
func (p *Init) Wait() {
<-p.waitBlock
}
// ID of the process
func (p *Init) ID() string {
return p.id
}
// Pid of the process
func (p *Init) Pid() int {
return p.pid
}
// ExitStatus of the process
func (p *Init) ExitStatus() int {
p.mu.Lock()
defer p.mu.Unlock()
return p.status
}
// ExitedAt at time when the process exited
func (p *Init) ExitedAt() time.Time {
p.mu.Lock()
defer p.mu.Unlock()
return p.exited
}
// Status of the process
func (p *Init) Status(ctx context.Context) (string, error) {
if p.pausing.Load() {
return "pausing", nil
}
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Status(ctx)
}
// Start the init process
func (p *Init) Start(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Start(ctx)
}
func (p *Init) start(ctx context.Context) error {
err := p.runtime.Start(ctx, p.id)
return p.runtimeError(err, "OCI runtime start failed")
}
// SetExited of the init process with the next status
func (p *Init) SetExited(status int) {
p.mu.Lock()
defer p.mu.Unlock()
p.initState.SetExited(status)
}
func (p *Init) setExited(status int) {
p.exited = time.Now()
p.status = status
p.Platform.ShutdownConsole(context.Background(), p.console)
close(p.waitBlock)
}
// Delete the init process
func (p *Init) Delete(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Delete(ctx)
}
func (p *Init) delete(ctx context.Context) error {
waitTimeout(ctx, &p.wg, 2*time.Second)
err := p.runtime.Delete(ctx, p.id, nil)
// ignore errors if a runtime has already deleted the process
// but we still hold metadata and pipes
//
// this is common during a checkpoint, runc will delete the container state
// after a checkpoint and the container will no longer exist within runc
if err != nil {
if strings.Contains(err.Error(), "does not exist") {
err = nil
} else {
err = p.runtimeError(err, "failed to delete task")
}
}
if p.io != nil {
for _, c := range p.closers {
c.Close()
}
p.io.Close()
}
if err2 := mount.UnmountRecursive(p.Rootfs, 0); err2 != nil {
log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
if err == nil {
err = fmt.Errorf("failed rootfs umount: %w", err2)
}
}
return err
}
// Resize the init processes console
func (p *Init) Resize(ws console.WinSize) error {
p.mu.Lock()
defer p.mu.Unlock()
if p.console == nil {
return nil
}
return p.console.Resize(ws)
}
// Pause the init process and all its child processes
func (p *Init) Pause(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Pause(ctx)
}
// Resume the init process and all its child processes
func (p *Init) Resume(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Resume(ctx)
}
// Kill the init process
func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Kill(ctx, signal, all)
}
func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
err := p.runtime.Kill(ctx, p.id, int(signal), &runc.KillOpts{
All: all,
})
return checkKillError(err)
}
// KillAll processes belonging to the init process
func (p *Init) KillAll(ctx context.Context) error {
p.mu.Lock()
defer p.mu.Unlock()
err := p.runtime.Kill(ctx, p.id, int(unix.SIGKILL), &runc.KillOpts{
All: true,
})
return p.runtimeError(err, "OCI runtime killall failed")
}
// Stdin of the process
func (p *Init) Stdin() io.Closer {
return p.stdin
}
// Runtime returns the OCI runtime configured for the init process
func (p *Init) Runtime() *runc.Runc {
return p.runtime
}
// Exec returns a new child process
func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Exec(ctx, path, r)
}
// exec returns a new exec'd process
func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
// process exec request
var spec specs.Process
if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
return nil, err
}
spec.Terminal = r.Terminal
e := &execProcess{
id: r.ID,
path: path,
parent: p,
spec: spec,
stdio: stdio.Stdio{
Stdin: r.Stdin,
Stdout: r.Stdout,
Stderr: r.Stderr,
Terminal: r.Terminal,
},
waitBlock: make(chan struct{}),
}
e.execState = &execCreatedState{p: e}
return e, nil
}
// Checkpoint the init process
func (p *Init) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Checkpoint(ctx, r)
}
func (p *Init) checkpoint(ctx context.Context, r *CheckpointConfig) error {
var actions []runc.CheckpointAction
if !r.Exit {
actions = append(actions, runc.LeaveRunning)
}
// keep criu work directory if criu work dir is set
work := r.WorkDir
if work == "" {
work = filepath.Join(p.WorkDir, "criu-work")
defer os.RemoveAll(work)
}
if err := p.runtime.Checkpoint(ctx, p.id, &runc.CheckpointOpts{
WorkDir: work,
ImagePath: r.Path,
AllowOpenTCP: r.AllowOpenTCP,
AllowExternalUnixSockets: r.AllowExternalUnixSockets,
AllowTerminal: r.AllowTerminal,
FileLocks: r.FileLocks,
EmptyNamespaces: r.EmptyNamespaces,
}, actions...); err != nil {
dumpLog := filepath.Join(p.Bundle, "criu-dump.log")
if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
log.G(ctx).WithError(cerr).Error("failed to copy dump.log to criu-dump.log")
}
return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
}
return nil
}
// Update the processes resource configuration
func (p *Init) Update(ctx context.Context, r *google_protobuf.Any) error {
p.mu.Lock()
defer p.mu.Unlock()
return p.initState.Update(ctx, r)
}
func (p *Init) update(ctx context.Context, r *google_protobuf.Any) error {
var resources specs.LinuxResources
if err := json.Unmarshal(r.Value, &resources); err != nil {
return err
}
return p.runtime.Update(ctx, p.id, &resources)
}
// Stdio of the process
func (p *Init) Stdio() stdio.Stdio {
return p.stdio
}
func (p *Init) runtimeError(rErr error, msg string) error {
if rErr == nil {
return nil
}
rMsg, err := getLastRuntimeError(p.runtime)
switch {
case err != nil:
return fmt.Errorf("%s: %s (%s): %w", msg, "unable to retrieve OCI runtime error", err.Error(), rErr)
case rMsg == "":
return fmt.Errorf("%s: %w", msg, rErr)
default:
return fmt.Errorf("%s: %s", msg, rMsg)
}
}
func withConditionalIO(c stdio.Stdio) runc.IOOpt {
return func(o *runc.IOOption) {
o.OpenStdin = c.Stdin != ""
o.OpenStdout = c.Stdout != ""
o.OpenStderr = c.Stderr != ""
}
}

View File

@@ -0,0 +1,415 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"errors"
"fmt"
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
runc "github.com/containerd/go-runc"
"github.com/containerd/log"
)
type initState interface {
Start(context.Context) error
Delete(context.Context) error
Pause(context.Context) error
Resume(context.Context) error
Update(context.Context, *google_protobuf.Any) error
Checkpoint(context.Context, *CheckpointConfig) error
Exec(context.Context, string, *ExecConfig) (Process, error)
Kill(context.Context, uint32, bool) error
SetExited(int)
Status(context.Context) (string, error)
}
type createdState struct {
p *Init
}
func (s *createdState) transition(name string) error {
switch name {
case "running":
s.p.initState = &runningState{p: s.p}
case "stopped":
s.p.initState = &stoppedState{p: s.p}
case "deleted":
s.p.initState = &deletedState{}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *createdState) Pause(ctx context.Context) error {
return errors.New("cannot pause task in created state")
}
func (s *createdState) Resume(ctx context.Context) error {
return errors.New("cannot resume task in created state")
}
func (s *createdState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *createdState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.New("cannot checkpoint a task in created state")
}
func (s *createdState) Start(ctx context.Context) error {
if err := s.p.start(ctx); err != nil {
return err
}
return s.transition("running")
}
func (s *createdState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *createdState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return s.p.exec(ctx, path, r)
}
func (s *createdState) Status(ctx context.Context) (string, error) {
return "created", nil
}
type createdCheckpointState struct {
p *Init
opts *runc.RestoreOpts
}
func (s *createdCheckpointState) transition(name string) error {
switch name {
case "running":
s.p.initState = &runningState{p: s.p}
case "stopped":
s.p.initState = &stoppedState{p: s.p}
case "deleted":
s.p.initState = &deletedState{}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *createdCheckpointState) Pause(ctx context.Context) error {
return errors.New("cannot pause task in created state")
}
func (s *createdCheckpointState) Resume(ctx context.Context) error {
return errors.New("cannot resume task in created state")
}
func (s *createdCheckpointState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *createdCheckpointState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.New("cannot checkpoint a task in created state")
}
func (s *createdCheckpointState) Start(ctx context.Context) error {
p := s.p
sio := p.stdio
var (
err error
socket *runc.Socket
)
if sio.Terminal {
if socket, err = runc.NewTempConsoleSocket(); err != nil {
return fmt.Errorf("failed to create OCI runtime console socket: %w", err)
}
defer socket.Close()
s.opts.ConsoleSocket = socket
}
if _, err := s.p.runtime.Restore(ctx, p.id, p.Bundle, s.opts); err != nil {
return p.runtimeError(err, "OCI runtime restore failed")
}
if sio.Stdin != "" {
if err := p.openStdin(sio.Stdin); err != nil {
return fmt.Errorf("failed to open stdin fifo %s: %w", sio.Stdin, err)
}
}
if socket != nil {
console, err := socket.ReceiveMaster()
if err != nil {
return fmt.Errorf("failed to retrieve console master: %w", err)
}
console, err = p.Platform.CopyConsole(ctx, console, p.id, sio.Stdin, sio.Stdout, sio.Stderr, &p.wg)
if err != nil {
return fmt.Errorf("failed to start console copy: %w", err)
}
p.console = console
} else {
if err := p.io.Copy(ctx, &p.wg); err != nil {
return fmt.Errorf("failed to start io pipe copy: %w", err)
}
}
pid, err := runc.ReadPidFile(s.opts.PidFile)
if err != nil {
return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err)
}
p.pid = pid
return s.transition("running")
}
func (s *createdCheckpointState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *createdCheckpointState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *createdCheckpointState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *createdCheckpointState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.New("cannot exec in a created state")
}
func (s *createdCheckpointState) Status(ctx context.Context) (string, error) {
return "created", nil
}
type runningState struct {
p *Init
}
func (s *runningState) transition(name string) error {
switch name {
case "stopped":
s.p.initState = &stoppedState{p: s.p}
case "paused":
s.p.initState = &pausedState{p: s.p}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *runningState) Pause(ctx context.Context) error {
s.p.pausing.Store(true)
// NOTE "pausing" will be returned in the short window
// after `transition("paused")`, before `pausing` is reset
// to false. That doesn't break the state machine, just
// delays the "paused" state a little bit.
defer s.p.pausing.Store(false)
if err := s.p.runtime.Pause(ctx, s.p.id); err != nil {
return s.p.runtimeError(err, "OCI runtime pause failed")
}
return s.transition("paused")
}
func (s *runningState) Resume(ctx context.Context) error {
return errors.New("cannot resume a running process")
}
func (s *runningState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *runningState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return s.p.checkpoint(ctx, r)
}
func (s *runningState) Start(ctx context.Context) error {
return errors.New("cannot start a running process")
}
func (s *runningState) Delete(ctx context.Context) error {
return errors.New("cannot delete a running process")
}
func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *runningState) SetExited(status int) {
s.p.setExited(status)
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return s.p.exec(ctx, path, r)
}
func (s *runningState) Status(ctx context.Context) (string, error) {
return "running", nil
}
type pausedState struct {
p *Init
}
func (s *pausedState) transition(name string) error {
switch name {
case "running":
s.p.initState = &runningState{p: s.p}
case "stopped":
s.p.initState = &stoppedState{p: s.p}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *pausedState) Pause(ctx context.Context) error {
return errors.New("cannot pause a paused container")
}
func (s *pausedState) Resume(ctx context.Context) error {
if err := s.p.runtime.Resume(ctx, s.p.id); err != nil {
return s.p.runtimeError(err, "OCI runtime resume failed")
}
return s.transition("running")
}
func (s *pausedState) Update(ctx context.Context, r *google_protobuf.Any) error {
return s.p.update(ctx, r)
}
func (s *pausedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return s.p.checkpoint(ctx, r)
}
func (s *pausedState) Start(ctx context.Context) error {
return errors.New("cannot start a paused process")
}
func (s *pausedState) Delete(ctx context.Context) error {
return errors.New("cannot delete a paused process")
}
func (s *pausedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *pausedState) SetExited(status int) {
s.p.setExited(status)
if err := s.p.runtime.Resume(context.Background(), s.p.id); err != nil {
log.L.WithError(err).Error("resuming exited container from paused state")
}
if err := s.transition("stopped"); err != nil {
panic(err)
}
}
func (s *pausedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.New("cannot exec in a paused state")
}
func (s *pausedState) Status(ctx context.Context) (string, error) {
return "paused", nil
}
type stoppedState struct {
p *Init
}
func (s *stoppedState) transition(name string) error {
switch name {
case "deleted":
s.p.initState = &deletedState{}
default:
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
}
return nil
}
func (s *stoppedState) Pause(ctx context.Context) error {
return errors.New("cannot pause a stopped container")
}
func (s *stoppedState) Resume(ctx context.Context) error {
return errors.New("cannot resume a stopped container")
}
func (s *stoppedState) Update(ctx context.Context, r *google_protobuf.Any) error {
return errors.New("cannot update a stopped container")
}
func (s *stoppedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
return errors.New("cannot checkpoint a stopped container")
}
func (s *stoppedState) Start(ctx context.Context) error {
return errors.New("cannot start a stopped process")
}
func (s *stoppedState) Delete(ctx context.Context) error {
if err := s.p.delete(ctx); err != nil {
return err
}
return s.transition("deleted")
}
func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
return s.p.kill(ctx, sig, all)
}
func (s *stoppedState) SetExited(status int) {
// no op
}
func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
return nil, errors.New("cannot exec in a stopped state")
}
func (s *stoppedState) Status(ctx context.Context) (string, error) {
return "stopped", nil
}

View File

@@ -0,0 +1,434 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"errors"
"fmt"
"io"
"net/url"
"os"
"os/exec"
"path/filepath"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/stdio"
"github.com/containerd/fifo"
runc "github.com/containerd/go-runc"
"github.com/containerd/log"
)
const binaryIOProcTermTimeout = 12 * time.Second // Give logger process solid 10 seconds for cleanup
var bufPool = sync.Pool{
New: func() interface{} {
// setting to 4096 to align with PIPE_BUF
// http://man7.org/linux/man-pages/man7/pipe.7.html
buffer := make([]byte, 4096)
return &buffer
},
}
type processIO struct {
io runc.IO
uri *url.URL
copy bool
stdio stdio.Stdio
}
func (p *processIO) Close() error {
if p.io != nil {
return p.io.Close()
}
return nil
}
func (p *processIO) IO() runc.IO {
return p.io
}
func (p *processIO) Copy(ctx context.Context, wg *sync.WaitGroup) error {
if !p.copy {
return nil
}
var cwg sync.WaitGroup
if err := copyPipes(ctx, p.IO(), p.stdio.Stdin, p.stdio.Stdout, p.stdio.Stderr, wg, &cwg); err != nil {
return fmt.Errorf("unable to copy pipes: %w", err)
}
cwg.Wait()
return nil
}
func createIO(ctx context.Context, id string, ioUID, ioGID int, stdio stdio.Stdio) (*processIO, error) {
pio := &processIO{
stdio: stdio,
}
if stdio.IsNull() {
i, err := runc.NewNullIO()
if err != nil {
return nil, err
}
pio.io = i
return pio, nil
}
u, err := url.Parse(stdio.Stdout)
if err != nil {
return nil, fmt.Errorf("unable to parse stdout uri: %w", err)
}
if u.Scheme == "" {
u.Scheme = "fifo"
}
pio.uri = u
switch u.Scheme {
case "fifo":
pio.copy = true
pio.io, err = runc.NewPipeIO(ioUID, ioGID, withConditionalIO(stdio))
case "binary":
pio.io, err = NewBinaryIO(ctx, id, u)
case "file":
filePath := u.Path
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
return nil, err
}
var f *os.File
f, err = os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return nil, err
}
f.Close()
pio.stdio.Stdout = filePath
pio.stdio.Stderr = filePath
pio.copy = true
pio.io, err = runc.NewPipeIO(ioUID, ioGID, withConditionalIO(stdio))
default:
return nil, fmt.Errorf("unknown STDIO scheme %s", u.Scheme)
}
if err != nil {
return nil, err
}
return pio, nil
}
func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error {
var sameFile *countingWriteCloser
for _, i := range []struct {
name string
dest func(wc io.WriteCloser, rc io.Closer)
}{
{
name: stdout,
dest: func(wc io.WriteCloser, rc io.Closer) {
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil {
log.G(ctx).Warn("error copying stdout")
}
wg.Done()
wc.Close()
if rc != nil {
rc.Close()
}
}()
},
}, {
name: stderr,
dest: func(wc io.WriteCloser, rc io.Closer) {
wg.Add(1)
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil {
log.G(ctx).Warn("error copying stderr")
}
wg.Done()
wc.Close()
if rc != nil {
rc.Close()
}
}()
},
},
} {
ok, err := fifo.IsFifo(i.name)
if err != nil {
return err
}
var (
fw io.WriteCloser
fr io.Closer
)
if ok {
if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil {
return fmt.Errorf("containerd-shim: opening w/o fifo %q failed: %w", i.name, err)
}
if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil {
return fmt.Errorf("containerd-shim: opening r/o fifo %q failed: %w", i.name, err)
}
} else {
if sameFile != nil {
sameFile.count++
i.dest(sameFile, nil)
continue
}
if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil {
return fmt.Errorf("containerd-shim: opening file %q failed: %w", i.name, err)
}
if stdout == stderr {
sameFile = &countingWriteCloser{
WriteCloser: fw,
count: 1,
}
}
}
i.dest(fw, fr)
}
if stdin == "" {
return nil
}
f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
if err != nil {
return fmt.Errorf("containerd-shim: opening %s failed: %s", stdin, err)
}
cwg.Add(1)
go func() {
cwg.Done()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
io.CopyBuffer(rio.Stdin(), f, *p)
rio.Stdin().Close()
f.Close()
}()
return nil
}
// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times.
type countingWriteCloser struct {
io.WriteCloser
count int64
}
func (c *countingWriteCloser) Close() error {
if atomic.AddInt64(&c.count, -1) > 0 {
return nil
}
return c.WriteCloser.Close()
}
// NewBinaryIO runs a custom binary process for pluggable shim logging
func NewBinaryIO(ctx context.Context, id string, uri *url.URL) (_ runc.IO, err error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
var closers []func() error
defer func() {
if err == nil {
return
}
result := []error{err}
for _, fn := range closers {
result = append(result, fn())
}
err = errors.Join(result...)
}()
out, err := newPipe()
if err != nil {
return nil, fmt.Errorf("failed to create stdout pipes: %w", err)
}
closers = append(closers, out.Close)
serr, err := newPipe()
if err != nil {
return nil, fmt.Errorf("failed to create stderr pipes: %w", err)
}
closers = append(closers, serr.Close)
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
closers = append(closers, r.Close, w.Close)
cmd := NewBinaryCmd(uri, id, ns)
cmd.ExtraFiles = append(cmd.ExtraFiles, out.r, serr.r, w)
// don't need to register this with the reaper or wait when
// running inside a shim
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("failed to start binary process: %w", err)
}
closers = append(closers, func() error { return cmd.Process.Kill() })
// close our side of the pipe after start
if err := w.Close(); err != nil {
return nil, fmt.Errorf("failed to close write pipe after start: %w", err)
}
// wait for the logging binary to be ready
b := make([]byte, 1)
if _, err := r.Read(b); err != nil && err != io.EOF {
return nil, fmt.Errorf("failed to read from logging binary: %w", err)
}
return &binaryIO{
cmd: cmd,
out: out,
err: serr,
}, nil
}
type binaryIO struct {
cmd *exec.Cmd
out, err *pipe
}
func (b *binaryIO) CloseAfterStart() error {
var result []error
for _, v := range []*pipe{b.out, b.err} {
if v != nil {
if err := v.r.Close(); err != nil {
result = append(result, err)
}
}
}
return errors.Join(result...)
}
func (b *binaryIO) Close() error {
var result []error
for _, v := range []*pipe{b.out, b.err} {
if v != nil {
if err := v.Close(); err != nil {
result = append(result, err)
}
}
}
if err := b.cancel(); err != nil {
result = append(result, err)
}
return errors.Join(result...)
}
func (b *binaryIO) cancel() error {
if b.cmd == nil || b.cmd.Process == nil {
return nil
}
// Send SIGTERM first, so logger process has a chance to flush and exit properly
if err := b.cmd.Process.Signal(syscall.SIGTERM); err != nil {
result := []error{fmt.Errorf("failed to send SIGTERM: %w", err)}
log.L.WithError(err).Warn("failed to send SIGTERM signal, killing logging shim")
if err := b.cmd.Process.Kill(); err != nil {
result = append(result, fmt.Errorf("failed to kill process after faulty SIGTERM: %w", err))
}
return errors.Join(result...)
}
done := make(chan error, 1)
go func() {
done <- b.cmd.Wait()
}()
select {
case err := <-done:
return err
case <-time.After(binaryIOProcTermTimeout):
log.L.Warn("failed to wait for shim logger process to exit, killing")
err := b.cmd.Process.Kill()
if err != nil {
return fmt.Errorf("failed to kill shim logger process: %w", err)
}
return nil
}
}
func (b *binaryIO) Stdin() io.WriteCloser {
return nil
}
func (b *binaryIO) Stdout() io.ReadCloser {
return nil
}
func (b *binaryIO) Stderr() io.ReadCloser {
return nil
}
func (b *binaryIO) Set(cmd *exec.Cmd) {
if b.out != nil {
cmd.Stdout = b.out.w
}
if b.err != nil {
cmd.Stderr = b.err.w
}
}
func newPipe() (*pipe, error) {
r, w, err := os.Pipe()
if err != nil {
return nil, err
}
return &pipe{
r: r,
w: w,
}, nil
}
type pipe struct {
r *os.File
w *os.File
}
func (p *pipe) Close() error {
var result []error
if err := p.w.Close(); err != nil {
result = append(result, fmt.Errorf("pipe: failed to close write pipe: %w", err))
}
if err := p.r.Close(); err != nil {
result = append(result, fmt.Errorf("pipe: failed to close read pipe: %w", err))
}
return errors.Join(result...)
}

View File

@@ -0,0 +1,72 @@
//go:build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"net/url"
"os"
"testing"
"github.com/containerd/containerd/v2/namespaces"
)
func TestNewBinaryIO(t *testing.T) {
ctx := namespaces.WithNamespace(context.Background(), "test")
uri, _ := url.Parse("binary:///bin/echo?test")
before := descriptorCount(t)
io, err := NewBinaryIO(ctx, "1", uri)
if err != nil {
t.Fatal(err)
}
err = io.Close()
if err != nil {
t.Fatal(err)
}
after := descriptorCount(t)
if before != after-1 { // one descriptor must be closed from shim logger side
t.Fatalf("some descriptors weren't closed (%d != %d -1)", before, after)
}
}
func TestNewBinaryIOCleanup(t *testing.T) {
ctx := namespaces.WithNamespace(context.Background(), "test")
uri, _ := url.Parse("binary:///not/existing")
before := descriptorCount(t)
_, err := NewBinaryIO(ctx, "2", uri)
if err == nil {
t.Fatal("error expected for invalid binary")
}
after := descriptorCount(t)
if before != after {
t.Fatalf("some descriptors weren't closed (%d != %d)", before, after)
}
}
func descriptorCount(t *testing.T) int {
t.Helper()
files, _ := os.ReadDir("/proc/self/fd")
return len(files)
}

View File

@@ -0,0 +1,53 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"net/url"
"os"
"os/exec"
)
// NewBinaryCmd returns a Cmd to be used to start a logging binary.
// The Cmd is generated from the provided uri, and the container ID and
// namespace are appended to the Cmd environment.
func NewBinaryCmd(binaryURI *url.URL, id, ns string) *exec.Cmd {
var args []string
for k, vs := range binaryURI.Query() {
args = append(args, k)
if len(vs) > 0 {
args = append(args, vs[0])
}
}
cmd := exec.Command(binaryURI.Path, args...)
cmd.Env = append(cmd.Env,
"CONTAINER_ID="+id,
"CONTAINER_NAMESPACE="+ns,
)
return cmd
}
// CloseFiles closes any files passed in.
// It it used for cleanup in the event of unexpected errors.
func CloseFiles(files ...*os.File) {
for _, file := range files {
file.Close()
}
}

View File

@@ -0,0 +1,56 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"io"
"time"
"github.com/containerd/console"
"github.com/containerd/containerd/v2/pkg/stdio"
)
// Process on a system
type Process interface {
// ID returns the id for the process
ID() string
// Pid returns the pid for the process
Pid() int
// ExitStatus returns the exit status
ExitStatus() int
// ExitedAt is the time the process exited
ExitedAt() time.Time
// Stdin returns the process STDIN
Stdin() io.Closer
// Stdio returns io information for the container
Stdio() stdio.Stdio
// Status returns the process status
Status(context.Context) (string, error)
// Wait blocks until the process has exited
Wait()
// Resize resizes the process console
Resize(ws console.WinSize) error
// Start execution of the process
Start(context.Context) error
// Delete deletes the process and its resourcess
Delete(context.Context) error
// Kill kills the process
Kill(context.Context, uint32, bool) error
// SetExited sets the exit status for the process
SetExited(status int)
}

View File

@@ -0,0 +1,66 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
)
// Mount holds filesystem mount configuration
type Mount struct {
Type string
Source string
Target string
Options []string
}
// CreateConfig hold task creation configuration
type CreateConfig struct {
ID string
Bundle string
Runtime string
Rootfs []Mount
Terminal bool
Stdin string
Stdout string
Stderr string
Checkpoint string
ParentCheckpoint string
Options *google_protobuf.Any
}
// ExecConfig holds exec creation configuration
type ExecConfig struct {
ID string
Terminal bool
Stdin string
Stdout string
Stderr string
Spec *google_protobuf.Any
}
// CheckpointConfig holds task checkpoint configuration
type CheckpointConfig struct {
WorkDir string
Path string
Exit bool
AllowOpenTCP bool
AllowExternalUnixSockets bool
AllowTerminal bool
FileLocks bool
EmptyNamespaces []string
}

View File

@@ -0,0 +1,210 @@
//go:build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package process
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/containerd/containerd/v2/errdefs"
runc "github.com/containerd/go-runc"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)
const (
// RuncRoot is the path to the root runc state directory
RuncRoot = "/run/containerd/runc"
// InitPidFile name of the file that contains the init pid
InitPidFile = "init.pid"
// configFile is the name of the runc config file
configFile = "config.json"
)
// safePid is a thread safe wrapper for pid.
type safePid struct {
sync.Mutex
pid int
}
func (s *safePid) get() int {
s.Lock()
defer s.Unlock()
return s.pid
}
// TODO(mlaventure): move to runc package?
func getLastRuntimeError(r *runc.Runc) (string, error) {
if r.Log == "" {
return "", nil
}
f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400)
if err != nil {
return "", err
}
defer f.Close()
var (
errMsg string
log struct {
Level string
Msg string
Time time.Time
}
)
dec := json.NewDecoder(f)
for err = nil; err == nil; {
if err = dec.Decode(&log); err != nil && err != io.EOF {
return "", err
}
if log.Level == "error" {
errMsg = strings.TrimSpace(log.Msg)
}
}
return errMsg, nil
}
// criuError returns only the first line of the error message from criu
// it tries to add an invalid dump log location when returning the message
func criuError(err error) string {
parts := strings.Split(err.Error(), "\n")
return parts[0]
}
func copyFile(to, from string) error {
ff, err := os.Open(from)
if err != nil {
return err
}
defer ff.Close()
tt, err := os.Create(to)
if err != nil {
return err
}
defer tt.Close()
p := bufPool.Get().(*[]byte)
defer bufPool.Put(p)
_, err = io.CopyBuffer(tt, ff, *p)
return err
}
func checkKillError(err error) error {
if err == nil {
return nil
}
if strings.Contains(err.Error(), "os: process already finished") ||
strings.Contains(err.Error(), "container not running") ||
strings.Contains(strings.ToLower(err.Error()), "no such process") ||
err == unix.ESRCH {
return fmt.Errorf("process already finished: %w", errdefs.ErrNotFound)
} else if strings.Contains(err.Error(), "does not exist") {
return fmt.Errorf("no such container: %w", errdefs.ErrNotFound)
}
return fmt.Errorf("unknown error after kill: %w", err)
}
func newPidFile(bundle string) *pidFile {
return &pidFile{
path: filepath.Join(bundle, InitPidFile),
}
}
func newExecPidFile(bundle, id string) *pidFile {
return &pidFile{
path: filepath.Join(bundle, fmt.Sprintf("%s.pid", id)),
}
}
type pidFile struct {
path string
}
func (p *pidFile) Path() string {
return p.path
}
func (p *pidFile) Read() (int, error) {
return runc.ReadPidFile(p.path)
}
// waitTimeout handles waiting on a waitgroup with a specified timeout.
// this is commonly used for waiting on IO to finish after a process has exited
func waitTimeout(ctx context.Context, wg *sync.WaitGroup, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
select {
case <-done:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func stateName(v interface{}) string {
switch v.(type) {
case *runningState, *execRunningState:
return "running"
case *createdState, *execCreatedState, *createdCheckpointState:
return "created"
case *pausedState:
return "paused"
case *deletedState:
return "deleted"
case *stoppedState:
return "stopped"
}
panic(fmt.Errorf("invalid state %v", v))
}
func readConfig(path string) (spec *specs.Spec, err error) {
cfg := filepath.Join(path, configFile)
f, err := os.Open(cfg)
if err != nil {
if os.IsNotExist(err) {
return nil, fmt.Errorf("JSON specification file %s not found", cfg)
}
return nil, err
}
defer f.Close()
if err = json.NewDecoder(f).Decode(&spec); err != nil {
return nil, fmt.Errorf("failed to parse config: %w", err)
}
if spec == nil {
return nil, errors.New("config cannot be null")
}
return spec, nil
}

View File

@@ -31,10 +31,10 @@ import (
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
"github.com/containerd/console"
"github.com/containerd/containerd/v2/api/runtime/task/v3"
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/process"
"github.com/containerd/containerd/v2/pkg/stdio"
"github.com/containerd/containerd/v2/runtime/v2/runc/options"
"github.com/containerd/log"

View File

@@ -29,8 +29,8 @@ import (
"syscall"
"github.com/containerd/console"
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/process"
"github.com/containerd/containerd/v2/pkg/stdio"
"github.com/containerd/fifo"
)

View File

@@ -30,13 +30,13 @@ import (
eventstypes "github.com/containerd/containerd/v2/api/events"
taskAPI "github.com/containerd/containerd/v2/api/runtime/task/v3"
"github.com/containerd/containerd/v2/api/types/task"
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/runc"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/namespaces"
"github.com/containerd/containerd/v2/pkg/oom"
oomv1 "github.com/containerd/containerd/v2/pkg/oom/v1"
oomv2 "github.com/containerd/containerd/v2/pkg/oom/v2"
"github.com/containerd/containerd/v2/pkg/process"
"github.com/containerd/containerd/v2/pkg/shutdown"
"github.com/containerd/containerd/v2/pkg/stdio"
"github.com/containerd/containerd/v2/pkg/userns"