mv pkg/process cmd/containerd-shim-runc-v2/process
The package is quite specific to runc and only imported by containerd-shim-runc-v2 Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
@@ -30,11 +30,11 @@ import (
|
||||
"github.com/containerd/cgroups/v3"
|
||||
"github.com/containerd/cgroups/v3/cgroup1"
|
||||
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
||||
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
|
||||
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/runc"
|
||||
"github.com/containerd/containerd/v2/mount"
|
||||
"github.com/containerd/containerd/v2/namespaces"
|
||||
"github.com/containerd/containerd/v2/oci"
|
||||
"github.com/containerd/containerd/v2/pkg/process"
|
||||
"github.com/containerd/containerd/v2/pkg/schedcore"
|
||||
"github.com/containerd/containerd/v2/runtime/v2/runc/options"
|
||||
"github.com/containerd/containerd/v2/runtime/v2/shim"
|
||||
|
||||
76
cmd/containerd-shim-runc-v2/process/deleted_state.go
Normal file
76
cmd/containerd-shim-runc-v2/process/deleted_state.go
Normal file
@@ -0,0 +1,76 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/containerd/containerd/v2/errdefs"
|
||||
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
|
||||
)
|
||||
|
||||
type deletedState struct {
|
||||
}
|
||||
|
||||
func (s *deletedState) Pause(ctx context.Context) error {
|
||||
return errors.New("cannot pause a deleted process")
|
||||
}
|
||||
|
||||
func (s *deletedState) Resume(ctx context.Context) error {
|
||||
return errors.New("cannot resume a deleted process")
|
||||
}
|
||||
|
||||
func (s *deletedState) Update(context context.Context, r *google_protobuf.Any) error {
|
||||
return errors.New("cannot update a deleted process")
|
||||
}
|
||||
|
||||
func (s *deletedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
return errors.New("cannot checkpoint a deleted process")
|
||||
}
|
||||
|
||||
func (s *deletedState) Resize(ws console.WinSize) error {
|
||||
return errors.New("cannot resize a deleted process")
|
||||
}
|
||||
|
||||
func (s *deletedState) Start(ctx context.Context) error {
|
||||
return errors.New("cannot start a deleted process")
|
||||
}
|
||||
|
||||
func (s *deletedState) Delete(ctx context.Context) error {
|
||||
return fmt.Errorf("cannot delete a deleted process: %w", errdefs.ErrNotFound)
|
||||
}
|
||||
|
||||
func (s *deletedState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return fmt.Errorf("cannot kill a deleted process: %w", errdefs.ErrNotFound)
|
||||
}
|
||||
|
||||
func (s *deletedState) SetExited(status int) {
|
||||
// no op
|
||||
}
|
||||
|
||||
func (s *deletedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
return nil, errors.New("cannot exec in a deleted state")
|
||||
}
|
||||
|
||||
func (s *deletedState) Status(ctx context.Context) (string, error) {
|
||||
return "stopped", nil
|
||||
}
|
||||
263
cmd/containerd-shim-runc-v2/process/exec.go
Normal file
263
cmd/containerd-shim-runc-v2/process/exec.go
Normal file
@@ -0,0 +1,263 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/containerd/containerd/v2/errdefs"
|
||||
"github.com/containerd/containerd/v2/pkg/stdio"
|
||||
"github.com/containerd/fifo"
|
||||
runc "github.com/containerd/go-runc"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type execProcess struct {
|
||||
wg sync.WaitGroup
|
||||
|
||||
execState execState
|
||||
|
||||
mu sync.Mutex
|
||||
id string
|
||||
console console.Console
|
||||
io *processIO
|
||||
status int
|
||||
exited time.Time
|
||||
pid safePid
|
||||
closers []io.Closer
|
||||
stdin io.Closer
|
||||
stdio stdio.Stdio
|
||||
path string
|
||||
spec specs.Process
|
||||
|
||||
parent *Init
|
||||
waitBlock chan struct{}
|
||||
}
|
||||
|
||||
func (e *execProcess) Wait() {
|
||||
<-e.waitBlock
|
||||
}
|
||||
|
||||
func (e *execProcess) ID() string {
|
||||
return e.id
|
||||
}
|
||||
|
||||
func (e *execProcess) Pid() int {
|
||||
return e.pid.get()
|
||||
}
|
||||
|
||||
func (e *execProcess) ExitStatus() int {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
return e.status
|
||||
}
|
||||
|
||||
func (e *execProcess) ExitedAt() time.Time {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
return e.exited
|
||||
}
|
||||
|
||||
func (e *execProcess) SetExited(status int) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
e.execState.SetExited(status)
|
||||
}
|
||||
|
||||
func (e *execProcess) setExited(status int) {
|
||||
e.status = status
|
||||
e.exited = time.Now()
|
||||
e.parent.Platform.ShutdownConsole(context.Background(), e.console)
|
||||
close(e.waitBlock)
|
||||
}
|
||||
|
||||
func (e *execProcess) Delete(ctx context.Context) error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
return e.execState.Delete(ctx)
|
||||
}
|
||||
|
||||
func (e *execProcess) delete(ctx context.Context) error {
|
||||
waitTimeout(ctx, &e.wg, 2*time.Second)
|
||||
if e.io != nil {
|
||||
for _, c := range e.closers {
|
||||
c.Close()
|
||||
}
|
||||
e.io.Close()
|
||||
}
|
||||
pidfile := filepath.Join(e.path, fmt.Sprintf("%s.pid", e.id))
|
||||
// silently ignore error
|
||||
os.Remove(pidfile)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *execProcess) Resize(ws console.WinSize) error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
return e.execState.Resize(ws)
|
||||
}
|
||||
|
||||
func (e *execProcess) resize(ws console.WinSize) error {
|
||||
if e.console == nil {
|
||||
return nil
|
||||
}
|
||||
return e.console.Resize(ws)
|
||||
}
|
||||
|
||||
func (e *execProcess) Kill(ctx context.Context, sig uint32, _ bool) error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
return e.execState.Kill(ctx, sig, false)
|
||||
}
|
||||
|
||||
func (e *execProcess) kill(ctx context.Context, sig uint32, _ bool) error {
|
||||
pid := e.pid.get()
|
||||
switch {
|
||||
case pid == 0:
|
||||
return fmt.Errorf("process not created: %w", errdefs.ErrFailedPrecondition)
|
||||
case !e.exited.IsZero():
|
||||
return fmt.Errorf("process already finished: %w", errdefs.ErrNotFound)
|
||||
default:
|
||||
if err := unix.Kill(pid, syscall.Signal(sig)); err != nil {
|
||||
return fmt.Errorf("exec kill error: %w", checkKillError(err))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *execProcess) Stdin() io.Closer {
|
||||
return e.stdin
|
||||
}
|
||||
|
||||
func (e *execProcess) Stdio() stdio.Stdio {
|
||||
return e.stdio
|
||||
}
|
||||
|
||||
func (e *execProcess) Start(ctx context.Context) error {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
return e.execState.Start(ctx)
|
||||
}
|
||||
|
||||
func (e *execProcess) start(ctx context.Context) (err error) {
|
||||
// The reaper may receive exit signal right after
|
||||
// the container is started, before the e.pid is updated.
|
||||
// In that case, we want to block the signal handler to
|
||||
// access e.pid until it is updated.
|
||||
e.pid.Lock()
|
||||
defer e.pid.Unlock()
|
||||
|
||||
var (
|
||||
socket *runc.Socket
|
||||
pio *processIO
|
||||
pidFile = newExecPidFile(e.path, e.id)
|
||||
)
|
||||
if e.stdio.Terminal {
|
||||
if socket, err = runc.NewTempConsoleSocket(); err != nil {
|
||||
return fmt.Errorf("failed to create runc console socket: %w", err)
|
||||
}
|
||||
defer socket.Close()
|
||||
} else {
|
||||
if pio, err = createIO(ctx, e.id, e.parent.IoUID, e.parent.IoGID, e.stdio); err != nil {
|
||||
return fmt.Errorf("failed to create init process I/O: %w", err)
|
||||
}
|
||||
e.io = pio
|
||||
}
|
||||
opts := &runc.ExecOpts{
|
||||
PidFile: pidFile.Path(),
|
||||
Detach: true,
|
||||
}
|
||||
if pio != nil {
|
||||
opts.IO = pio.IO()
|
||||
}
|
||||
if socket != nil {
|
||||
opts.ConsoleSocket = socket
|
||||
}
|
||||
if err := e.parent.runtime.Exec(ctx, e.parent.id, e.spec, opts); err != nil {
|
||||
close(e.waitBlock)
|
||||
return e.parent.runtimeError(err, "OCI runtime exec failed")
|
||||
}
|
||||
if e.stdio.Stdin != "" {
|
||||
if err := e.openStdin(e.stdio.Stdin); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
if socket != nil {
|
||||
console, err := socket.ReceiveMaster()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve console master: %w", err)
|
||||
}
|
||||
if e.console, err = e.parent.Platform.CopyConsole(ctx, console, e.id, e.stdio.Stdin, e.stdio.Stdout, e.stdio.Stderr, &e.wg); err != nil {
|
||||
return fmt.Errorf("failed to start console copy: %w", err)
|
||||
}
|
||||
} else {
|
||||
if err := pio.Copy(ctx, &e.wg); err != nil {
|
||||
return fmt.Errorf("failed to start io pipe copy: %w", err)
|
||||
}
|
||||
}
|
||||
pid, err := pidFile.Read()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve OCI runtime exec pi: %wd", err)
|
||||
}
|
||||
e.pid.pid = pid
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *execProcess) openStdin(path string) error {
|
||||
sc, err := fifo.OpenFifo(context.Background(), path, syscall.O_WRONLY|syscall.O_NONBLOCK, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open stdin fifo %s: %w", path, err)
|
||||
}
|
||||
e.stdin = sc
|
||||
e.closers = append(e.closers, sc)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *execProcess) Status(ctx context.Context) (string, error) {
|
||||
s, err := e.parent.Status(ctx)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// if the container as a whole is in the pausing/paused state, so are all
|
||||
// other processes inside the container, use container state here
|
||||
switch s {
|
||||
case "paused", "pausing":
|
||||
return s, nil
|
||||
}
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
return e.execState.Status(ctx)
|
||||
}
|
||||
173
cmd/containerd-shim-runc-v2/process/exec_state.go
Normal file
173
cmd/containerd-shim-runc-v2/process/exec_state.go
Normal file
@@ -0,0 +1,173 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/containerd/console"
|
||||
)
|
||||
|
||||
type execState interface {
|
||||
Resize(console.WinSize) error
|
||||
Start(context.Context) error
|
||||
Delete(context.Context) error
|
||||
Kill(context.Context, uint32, bool) error
|
||||
SetExited(int)
|
||||
Status(context.Context) (string, error)
|
||||
}
|
||||
|
||||
type execCreatedState struct {
|
||||
p *execProcess
|
||||
}
|
||||
|
||||
func (s *execCreatedState) transition(name string) error {
|
||||
switch name {
|
||||
case "running":
|
||||
s.p.execState = &execRunningState{p: s.p}
|
||||
case "stopped":
|
||||
s.p.execState = &execStoppedState{p: s.p}
|
||||
case "deleted":
|
||||
s.p.execState = &deletedState{}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *execCreatedState) Resize(ws console.WinSize) error {
|
||||
return s.p.resize(ws)
|
||||
}
|
||||
|
||||
func (s *execCreatedState) Start(ctx context.Context) error {
|
||||
if err := s.p.start(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.transition("running")
|
||||
}
|
||||
|
||||
func (s *execCreatedState) Delete(ctx context.Context) error {
|
||||
if err := s.p.delete(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.transition("deleted")
|
||||
}
|
||||
|
||||
func (s *execCreatedState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *execCreatedState) SetExited(status int) {
|
||||
s.p.setExited(status)
|
||||
|
||||
if err := s.transition("stopped"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *execCreatedState) Status(ctx context.Context) (string, error) {
|
||||
return "created", nil
|
||||
}
|
||||
|
||||
type execRunningState struct {
|
||||
p *execProcess
|
||||
}
|
||||
|
||||
func (s *execRunningState) transition(name string) error {
|
||||
switch name {
|
||||
case "stopped":
|
||||
s.p.execState = &execStoppedState{p: s.p}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *execRunningState) Resize(ws console.WinSize) error {
|
||||
return s.p.resize(ws)
|
||||
}
|
||||
|
||||
func (s *execRunningState) Start(ctx context.Context) error {
|
||||
return errors.New("cannot start a running process")
|
||||
}
|
||||
|
||||
func (s *execRunningState) Delete(ctx context.Context) error {
|
||||
return errors.New("cannot delete a running process")
|
||||
}
|
||||
|
||||
func (s *execRunningState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *execRunningState) SetExited(status int) {
|
||||
s.p.setExited(status)
|
||||
|
||||
if err := s.transition("stopped"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *execRunningState) Status(ctx context.Context) (string, error) {
|
||||
return "running", nil
|
||||
}
|
||||
|
||||
type execStoppedState struct {
|
||||
p *execProcess
|
||||
}
|
||||
|
||||
func (s *execStoppedState) transition(name string) error {
|
||||
switch name {
|
||||
case "deleted":
|
||||
s.p.execState = &deletedState{}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *execStoppedState) Resize(ws console.WinSize) error {
|
||||
return errors.New("cannot resize a stopped container")
|
||||
}
|
||||
|
||||
func (s *execStoppedState) Start(ctx context.Context) error {
|
||||
return errors.New("cannot start a stopped process")
|
||||
}
|
||||
|
||||
func (s *execStoppedState) Delete(ctx context.Context) error {
|
||||
if err := s.p.delete(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.transition("deleted")
|
||||
}
|
||||
|
||||
func (s *execStoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *execStoppedState) SetExited(status int) {
|
||||
// no op
|
||||
}
|
||||
|
||||
func (s *execStoppedState) Status(ctx context.Context) (string, error) {
|
||||
return "stopped", nil
|
||||
}
|
||||
554
cmd/containerd-shim-runc-v2/process/init.go
Normal file
554
cmd/containerd-shim-runc-v2/process/init.go
Normal file
@@ -0,0 +1,554 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/containerd/containerd/v2/mount"
|
||||
"github.com/containerd/containerd/v2/pkg/stdio"
|
||||
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
|
||||
"github.com/containerd/fifo"
|
||||
runc "github.com/containerd/go-runc"
|
||||
"github.com/containerd/log"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Init represents an initial process for a container
|
||||
type Init struct {
|
||||
wg sync.WaitGroup
|
||||
initState initState
|
||||
|
||||
// mu is used to ensure that `Start()` and `Exited()` calls return in
|
||||
// the right order when invoked in separate goroutines.
|
||||
// This is the case within the shim implementation as it makes use of
|
||||
// the reaper interface.
|
||||
mu sync.Mutex
|
||||
|
||||
waitBlock chan struct{}
|
||||
|
||||
WorkDir string
|
||||
|
||||
id string
|
||||
Bundle string
|
||||
console console.Console
|
||||
Platform stdio.Platform
|
||||
io *processIO
|
||||
runtime *runc.Runc
|
||||
// pausing preserves the pausing state.
|
||||
pausing atomic.Bool
|
||||
status int
|
||||
exited time.Time
|
||||
pid int
|
||||
closers []io.Closer
|
||||
stdin io.Closer
|
||||
stdio stdio.Stdio
|
||||
Rootfs string
|
||||
IoUID int
|
||||
IoGID int
|
||||
NoPivotRoot bool
|
||||
NoNewKeyring bool
|
||||
CriuWorkPath string
|
||||
}
|
||||
|
||||
// NewRunc returns a new runc instance for a process
|
||||
func NewRunc(root, path, namespace, runtime string, systemd bool) *runc.Runc {
|
||||
if root == "" {
|
||||
root = RuncRoot
|
||||
}
|
||||
return &runc.Runc{
|
||||
Command: runtime,
|
||||
Log: filepath.Join(path, "log.json"),
|
||||
LogFormat: runc.JSON,
|
||||
PdeathSignal: unix.SIGKILL,
|
||||
Root: filepath.Join(root, namespace),
|
||||
SystemdCgroup: systemd,
|
||||
}
|
||||
}
|
||||
|
||||
// New returns a new process
|
||||
func New(id string, runtime *runc.Runc, stdio stdio.Stdio) *Init {
|
||||
p := &Init{
|
||||
id: id,
|
||||
runtime: runtime,
|
||||
stdio: stdio,
|
||||
status: 0,
|
||||
waitBlock: make(chan struct{}),
|
||||
}
|
||||
p.initState = &createdState{p: p}
|
||||
return p
|
||||
}
|
||||
|
||||
// Create the process with the provided config
|
||||
func (p *Init) Create(ctx context.Context, r *CreateConfig) error {
|
||||
var (
|
||||
err error
|
||||
socket *runc.Socket
|
||||
pio *processIO
|
||||
pidFile = newPidFile(p.Bundle)
|
||||
)
|
||||
|
||||
if r.Terminal {
|
||||
if socket, err = runc.NewTempConsoleSocket(); err != nil {
|
||||
return fmt.Errorf("failed to create OCI runtime console socket: %w", err)
|
||||
}
|
||||
defer socket.Close()
|
||||
} else {
|
||||
if pio, err = createIO(ctx, p.id, p.IoUID, p.IoGID, p.stdio); err != nil {
|
||||
return fmt.Errorf("failed to create init process I/O: %w", err)
|
||||
}
|
||||
p.io = pio
|
||||
}
|
||||
if r.Checkpoint != "" {
|
||||
return p.createCheckpointedState(r, pidFile)
|
||||
}
|
||||
opts := &runc.CreateOpts{
|
||||
PidFile: pidFile.Path(),
|
||||
NoPivot: p.NoPivotRoot,
|
||||
NoNewKeyring: p.NoNewKeyring,
|
||||
}
|
||||
if p.io != nil {
|
||||
opts.IO = p.io.IO()
|
||||
}
|
||||
if socket != nil {
|
||||
opts.ConsoleSocket = socket
|
||||
}
|
||||
|
||||
// runc ignores silently features it doesn't know about, so for things that this is
|
||||
// problematic let's check if this runc version supports them.
|
||||
if err := p.validateRuncFeatures(ctx, r.Bundle); err != nil {
|
||||
return fmt.Errorf("failed to detect OCI runtime features: %w", err)
|
||||
}
|
||||
|
||||
if err := p.runtime.Create(ctx, r.ID, r.Bundle, opts); err != nil {
|
||||
return p.runtimeError(err, "OCI runtime create failed")
|
||||
}
|
||||
if r.Stdin != "" {
|
||||
if err := p.openStdin(r.Stdin); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
if socket != nil {
|
||||
console, err := socket.ReceiveMaster()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve console master: %w", err)
|
||||
}
|
||||
console, err = p.Platform.CopyConsole(ctx, console, p.id, r.Stdin, r.Stdout, r.Stderr, &p.wg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start console copy: %w", err)
|
||||
}
|
||||
p.console = console
|
||||
} else {
|
||||
if err := pio.Copy(ctx, &p.wg); err != nil {
|
||||
return fmt.Errorf("failed to start io pipe copy: %w", err)
|
||||
}
|
||||
}
|
||||
pid, err := pidFile.Read()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err)
|
||||
}
|
||||
p.pid = pid
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Init) validateRuncFeatures(ctx context.Context, bundle string) error {
|
||||
// TODO: We should remove the logic from here and rebase on #8509.
|
||||
// This way we can avoid the call to readConfig() here and the call to p.runtime.Features()
|
||||
// in validateIDMapMounts().
|
||||
// But that PR is not yet merged nor it is clear if it will be refactored.
|
||||
// Do this contained hack for now.
|
||||
spec, err := readConfig(bundle)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read config: %w", err)
|
||||
}
|
||||
|
||||
if err := p.validateIDMapMounts(ctx, spec); err != nil {
|
||||
return fmt.Errorf("OCI runtime doesn't support idmap mounts: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Init) validateIDMapMounts(ctx context.Context, spec *specs.Spec) error {
|
||||
var used bool
|
||||
for _, m := range spec.Mounts {
|
||||
if m.UIDMappings != nil || m.GIDMappings != nil {
|
||||
used = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !used {
|
||||
return nil
|
||||
}
|
||||
|
||||
// From here onwards, we require idmap mounts. So if we fail to check, we return an error.
|
||||
features, err := p.runtime.Features(ctx)
|
||||
if err != nil {
|
||||
// If the features command is not implemented, then runc is too old.
|
||||
return fmt.Errorf("features command failed: %w", err)
|
||||
|
||||
}
|
||||
|
||||
if features.Linux.MountExtensions == nil || features.Linux.MountExtensions.IDMap == nil {
|
||||
return errors.New("missing `mountExtensions.idmap` entry in `features` command")
|
||||
}
|
||||
|
||||
if enabled := features.Linux.MountExtensions.IDMap.Enabled; enabled == nil || !*enabled {
|
||||
return errors.New("idmap mounts not supported")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Init) openStdin(path string) error {
|
||||
sc, err := fifo.OpenFifo(context.Background(), path, unix.O_WRONLY|unix.O_NONBLOCK, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open stdin fifo %s: %w", path, err)
|
||||
}
|
||||
p.stdin = sc
|
||||
p.closers = append(p.closers, sc)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *Init) createCheckpointedState(r *CreateConfig, pidFile *pidFile) error {
|
||||
opts := &runc.RestoreOpts{
|
||||
CheckpointOpts: runc.CheckpointOpts{
|
||||
ImagePath: r.Checkpoint,
|
||||
WorkDir: p.CriuWorkPath,
|
||||
ParentPath: r.ParentCheckpoint,
|
||||
},
|
||||
PidFile: pidFile.Path(),
|
||||
NoPivot: p.NoPivotRoot,
|
||||
Detach: true,
|
||||
NoSubreaper: true,
|
||||
}
|
||||
|
||||
if p.io != nil {
|
||||
opts.IO = p.io.IO()
|
||||
}
|
||||
|
||||
p.initState = &createdCheckpointState{
|
||||
p: p,
|
||||
opts: opts,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wait for the process to exit
|
||||
func (p *Init) Wait() {
|
||||
<-p.waitBlock
|
||||
}
|
||||
|
||||
// ID of the process
|
||||
func (p *Init) ID() string {
|
||||
return p.id
|
||||
}
|
||||
|
||||
// Pid of the process
|
||||
func (p *Init) Pid() int {
|
||||
return p.pid
|
||||
}
|
||||
|
||||
// ExitStatus of the process
|
||||
func (p *Init) ExitStatus() int {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.status
|
||||
}
|
||||
|
||||
// ExitedAt at time when the process exited
|
||||
func (p *Init) ExitedAt() time.Time {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.exited
|
||||
}
|
||||
|
||||
// Status of the process
|
||||
func (p *Init) Status(ctx context.Context) (string, error) {
|
||||
if p.pausing.Load() {
|
||||
return "pausing", nil
|
||||
}
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Status(ctx)
|
||||
}
|
||||
|
||||
// Start the init process
|
||||
func (p *Init) Start(ctx context.Context) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Start(ctx)
|
||||
}
|
||||
|
||||
func (p *Init) start(ctx context.Context) error {
|
||||
err := p.runtime.Start(ctx, p.id)
|
||||
return p.runtimeError(err, "OCI runtime start failed")
|
||||
}
|
||||
|
||||
// SetExited of the init process with the next status
|
||||
func (p *Init) SetExited(status int) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
p.initState.SetExited(status)
|
||||
}
|
||||
|
||||
func (p *Init) setExited(status int) {
|
||||
p.exited = time.Now()
|
||||
p.status = status
|
||||
p.Platform.ShutdownConsole(context.Background(), p.console)
|
||||
close(p.waitBlock)
|
||||
}
|
||||
|
||||
// Delete the init process
|
||||
func (p *Init) Delete(ctx context.Context) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Delete(ctx)
|
||||
}
|
||||
|
||||
func (p *Init) delete(ctx context.Context) error {
|
||||
waitTimeout(ctx, &p.wg, 2*time.Second)
|
||||
err := p.runtime.Delete(ctx, p.id, nil)
|
||||
// ignore errors if a runtime has already deleted the process
|
||||
// but we still hold metadata and pipes
|
||||
//
|
||||
// this is common during a checkpoint, runc will delete the container state
|
||||
// after a checkpoint and the container will no longer exist within runc
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "does not exist") {
|
||||
err = nil
|
||||
} else {
|
||||
err = p.runtimeError(err, "failed to delete task")
|
||||
}
|
||||
}
|
||||
if p.io != nil {
|
||||
for _, c := range p.closers {
|
||||
c.Close()
|
||||
}
|
||||
p.io.Close()
|
||||
}
|
||||
if err2 := mount.UnmountRecursive(p.Rootfs, 0); err2 != nil {
|
||||
log.G(ctx).WithError(err2).Warn("failed to cleanup rootfs mount")
|
||||
if err == nil {
|
||||
err = fmt.Errorf("failed rootfs umount: %w", err2)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Resize the init processes console
|
||||
func (p *Init) Resize(ws console.WinSize) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
if p.console == nil {
|
||||
return nil
|
||||
}
|
||||
return p.console.Resize(ws)
|
||||
}
|
||||
|
||||
// Pause the init process and all its child processes
|
||||
func (p *Init) Pause(ctx context.Context) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Pause(ctx)
|
||||
}
|
||||
|
||||
// Resume the init process and all its child processes
|
||||
func (p *Init) Resume(ctx context.Context) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Resume(ctx)
|
||||
}
|
||||
|
||||
// Kill the init process
|
||||
func (p *Init) Kill(ctx context.Context, signal uint32, all bool) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Kill(ctx, signal, all)
|
||||
}
|
||||
|
||||
func (p *Init) kill(ctx context.Context, signal uint32, all bool) error {
|
||||
err := p.runtime.Kill(ctx, p.id, int(signal), &runc.KillOpts{
|
||||
All: all,
|
||||
})
|
||||
return checkKillError(err)
|
||||
}
|
||||
|
||||
// KillAll processes belonging to the init process
|
||||
func (p *Init) KillAll(ctx context.Context) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
err := p.runtime.Kill(ctx, p.id, int(unix.SIGKILL), &runc.KillOpts{
|
||||
All: true,
|
||||
})
|
||||
return p.runtimeError(err, "OCI runtime killall failed")
|
||||
}
|
||||
|
||||
// Stdin of the process
|
||||
func (p *Init) Stdin() io.Closer {
|
||||
return p.stdin
|
||||
}
|
||||
|
||||
// Runtime returns the OCI runtime configured for the init process
|
||||
func (p *Init) Runtime() *runc.Runc {
|
||||
return p.runtime
|
||||
}
|
||||
|
||||
// Exec returns a new child process
|
||||
func (p *Init) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Exec(ctx, path, r)
|
||||
}
|
||||
|
||||
// exec returns a new exec'd process
|
||||
func (p *Init) exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
// process exec request
|
||||
var spec specs.Process
|
||||
if err := json.Unmarshal(r.Spec.Value, &spec); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
spec.Terminal = r.Terminal
|
||||
|
||||
e := &execProcess{
|
||||
id: r.ID,
|
||||
path: path,
|
||||
parent: p,
|
||||
spec: spec,
|
||||
stdio: stdio.Stdio{
|
||||
Stdin: r.Stdin,
|
||||
Stdout: r.Stdout,
|
||||
Stderr: r.Stderr,
|
||||
Terminal: r.Terminal,
|
||||
},
|
||||
waitBlock: make(chan struct{}),
|
||||
}
|
||||
e.execState = &execCreatedState{p: e}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// Checkpoint the init process
|
||||
func (p *Init) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Checkpoint(ctx, r)
|
||||
}
|
||||
|
||||
func (p *Init) checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
var actions []runc.CheckpointAction
|
||||
if !r.Exit {
|
||||
actions = append(actions, runc.LeaveRunning)
|
||||
}
|
||||
// keep criu work directory if criu work dir is set
|
||||
work := r.WorkDir
|
||||
if work == "" {
|
||||
work = filepath.Join(p.WorkDir, "criu-work")
|
||||
defer os.RemoveAll(work)
|
||||
}
|
||||
if err := p.runtime.Checkpoint(ctx, p.id, &runc.CheckpointOpts{
|
||||
WorkDir: work,
|
||||
ImagePath: r.Path,
|
||||
AllowOpenTCP: r.AllowOpenTCP,
|
||||
AllowExternalUnixSockets: r.AllowExternalUnixSockets,
|
||||
AllowTerminal: r.AllowTerminal,
|
||||
FileLocks: r.FileLocks,
|
||||
EmptyNamespaces: r.EmptyNamespaces,
|
||||
}, actions...); err != nil {
|
||||
dumpLog := filepath.Join(p.Bundle, "criu-dump.log")
|
||||
if cerr := copyFile(dumpLog, filepath.Join(work, "dump.log")); cerr != nil {
|
||||
log.G(ctx).WithError(cerr).Error("failed to copy dump.log to criu-dump.log")
|
||||
}
|
||||
return fmt.Errorf("%s path= %s", criuError(err), dumpLog)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update the processes resource configuration
|
||||
func (p *Init) Update(ctx context.Context, r *google_protobuf.Any) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
return p.initState.Update(ctx, r)
|
||||
}
|
||||
|
||||
func (p *Init) update(ctx context.Context, r *google_protobuf.Any) error {
|
||||
var resources specs.LinuxResources
|
||||
if err := json.Unmarshal(r.Value, &resources); err != nil {
|
||||
return err
|
||||
}
|
||||
return p.runtime.Update(ctx, p.id, &resources)
|
||||
}
|
||||
|
||||
// Stdio of the process
|
||||
func (p *Init) Stdio() stdio.Stdio {
|
||||
return p.stdio
|
||||
}
|
||||
|
||||
func (p *Init) runtimeError(rErr error, msg string) error {
|
||||
if rErr == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
rMsg, err := getLastRuntimeError(p.runtime)
|
||||
switch {
|
||||
case err != nil:
|
||||
return fmt.Errorf("%s: %s (%s): %w", msg, "unable to retrieve OCI runtime error", err.Error(), rErr)
|
||||
case rMsg == "":
|
||||
return fmt.Errorf("%s: %w", msg, rErr)
|
||||
default:
|
||||
return fmt.Errorf("%s: %s", msg, rMsg)
|
||||
}
|
||||
}
|
||||
|
||||
func withConditionalIO(c stdio.Stdio) runc.IOOpt {
|
||||
return func(o *runc.IOOption) {
|
||||
o.OpenStdin = c.Stdin != ""
|
||||
o.OpenStdout = c.Stdout != ""
|
||||
o.OpenStderr = c.Stderr != ""
|
||||
}
|
||||
}
|
||||
415
cmd/containerd-shim-runc-v2/process/init_state.go
Normal file
415
cmd/containerd-shim-runc-v2/process/init_state.go
Normal file
@@ -0,0 +1,415 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
|
||||
runc "github.com/containerd/go-runc"
|
||||
"github.com/containerd/log"
|
||||
)
|
||||
|
||||
type initState interface {
|
||||
Start(context.Context) error
|
||||
Delete(context.Context) error
|
||||
Pause(context.Context) error
|
||||
Resume(context.Context) error
|
||||
Update(context.Context, *google_protobuf.Any) error
|
||||
Checkpoint(context.Context, *CheckpointConfig) error
|
||||
Exec(context.Context, string, *ExecConfig) (Process, error)
|
||||
Kill(context.Context, uint32, bool) error
|
||||
SetExited(int)
|
||||
Status(context.Context) (string, error)
|
||||
}
|
||||
|
||||
type createdState struct {
|
||||
p *Init
|
||||
}
|
||||
|
||||
func (s *createdState) transition(name string) error {
|
||||
switch name {
|
||||
case "running":
|
||||
s.p.initState = &runningState{p: s.p}
|
||||
case "stopped":
|
||||
s.p.initState = &stoppedState{p: s.p}
|
||||
case "deleted":
|
||||
s.p.initState = &deletedState{}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *createdState) Pause(ctx context.Context) error {
|
||||
return errors.New("cannot pause task in created state")
|
||||
}
|
||||
|
||||
func (s *createdState) Resume(ctx context.Context) error {
|
||||
return errors.New("cannot resume task in created state")
|
||||
}
|
||||
|
||||
func (s *createdState) Update(ctx context.Context, r *google_protobuf.Any) error {
|
||||
return s.p.update(ctx, r)
|
||||
}
|
||||
|
||||
func (s *createdState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
return errors.New("cannot checkpoint a task in created state")
|
||||
}
|
||||
|
||||
func (s *createdState) Start(ctx context.Context) error {
|
||||
if err := s.p.start(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.transition("running")
|
||||
}
|
||||
|
||||
func (s *createdState) Delete(ctx context.Context) error {
|
||||
if err := s.p.delete(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.transition("deleted")
|
||||
}
|
||||
|
||||
func (s *createdState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *createdState) SetExited(status int) {
|
||||
s.p.setExited(status)
|
||||
|
||||
if err := s.transition("stopped"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *createdState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
return s.p.exec(ctx, path, r)
|
||||
}
|
||||
|
||||
func (s *createdState) Status(ctx context.Context) (string, error) {
|
||||
return "created", nil
|
||||
}
|
||||
|
||||
type createdCheckpointState struct {
|
||||
p *Init
|
||||
opts *runc.RestoreOpts
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) transition(name string) error {
|
||||
switch name {
|
||||
case "running":
|
||||
s.p.initState = &runningState{p: s.p}
|
||||
case "stopped":
|
||||
s.p.initState = &stoppedState{p: s.p}
|
||||
case "deleted":
|
||||
s.p.initState = &deletedState{}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Pause(ctx context.Context) error {
|
||||
return errors.New("cannot pause task in created state")
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Resume(ctx context.Context) error {
|
||||
return errors.New("cannot resume task in created state")
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Update(ctx context.Context, r *google_protobuf.Any) error {
|
||||
return s.p.update(ctx, r)
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
return errors.New("cannot checkpoint a task in created state")
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Start(ctx context.Context) error {
|
||||
p := s.p
|
||||
sio := p.stdio
|
||||
|
||||
var (
|
||||
err error
|
||||
socket *runc.Socket
|
||||
)
|
||||
if sio.Terminal {
|
||||
if socket, err = runc.NewTempConsoleSocket(); err != nil {
|
||||
return fmt.Errorf("failed to create OCI runtime console socket: %w", err)
|
||||
}
|
||||
defer socket.Close()
|
||||
s.opts.ConsoleSocket = socket
|
||||
}
|
||||
|
||||
if _, err := s.p.runtime.Restore(ctx, p.id, p.Bundle, s.opts); err != nil {
|
||||
return p.runtimeError(err, "OCI runtime restore failed")
|
||||
}
|
||||
if sio.Stdin != "" {
|
||||
if err := p.openStdin(sio.Stdin); err != nil {
|
||||
return fmt.Errorf("failed to open stdin fifo %s: %w", sio.Stdin, err)
|
||||
}
|
||||
}
|
||||
if socket != nil {
|
||||
console, err := socket.ReceiveMaster()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve console master: %w", err)
|
||||
}
|
||||
console, err = p.Platform.CopyConsole(ctx, console, p.id, sio.Stdin, sio.Stdout, sio.Stderr, &p.wg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start console copy: %w", err)
|
||||
}
|
||||
p.console = console
|
||||
} else {
|
||||
if err := p.io.Copy(ctx, &p.wg); err != nil {
|
||||
return fmt.Errorf("failed to start io pipe copy: %w", err)
|
||||
}
|
||||
}
|
||||
pid, err := runc.ReadPidFile(s.opts.PidFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve OCI runtime container pid: %w", err)
|
||||
}
|
||||
p.pid = pid
|
||||
return s.transition("running")
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Delete(ctx context.Context) error {
|
||||
if err := s.p.delete(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.transition("deleted")
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) SetExited(status int) {
|
||||
s.p.setExited(status)
|
||||
|
||||
if err := s.transition("stopped"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
return nil, errors.New("cannot exec in a created state")
|
||||
}
|
||||
|
||||
func (s *createdCheckpointState) Status(ctx context.Context) (string, error) {
|
||||
return "created", nil
|
||||
}
|
||||
|
||||
type runningState struct {
|
||||
p *Init
|
||||
}
|
||||
|
||||
func (s *runningState) transition(name string) error {
|
||||
switch name {
|
||||
case "stopped":
|
||||
s.p.initState = &stoppedState{p: s.p}
|
||||
case "paused":
|
||||
s.p.initState = &pausedState{p: s.p}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *runningState) Pause(ctx context.Context) error {
|
||||
s.p.pausing.Store(true)
|
||||
// NOTE "pausing" will be returned in the short window
|
||||
// after `transition("paused")`, before `pausing` is reset
|
||||
// to false. That doesn't break the state machine, just
|
||||
// delays the "paused" state a little bit.
|
||||
defer s.p.pausing.Store(false)
|
||||
|
||||
if err := s.p.runtime.Pause(ctx, s.p.id); err != nil {
|
||||
return s.p.runtimeError(err, "OCI runtime pause failed")
|
||||
}
|
||||
|
||||
return s.transition("paused")
|
||||
}
|
||||
|
||||
func (s *runningState) Resume(ctx context.Context) error {
|
||||
return errors.New("cannot resume a running process")
|
||||
}
|
||||
|
||||
func (s *runningState) Update(ctx context.Context, r *google_protobuf.Any) error {
|
||||
return s.p.update(ctx, r)
|
||||
}
|
||||
|
||||
func (s *runningState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
return s.p.checkpoint(ctx, r)
|
||||
}
|
||||
|
||||
func (s *runningState) Start(ctx context.Context) error {
|
||||
return errors.New("cannot start a running process")
|
||||
}
|
||||
|
||||
func (s *runningState) Delete(ctx context.Context) error {
|
||||
return errors.New("cannot delete a running process")
|
||||
}
|
||||
|
||||
func (s *runningState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *runningState) SetExited(status int) {
|
||||
s.p.setExited(status)
|
||||
|
||||
if err := s.transition("stopped"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *runningState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
return s.p.exec(ctx, path, r)
|
||||
}
|
||||
|
||||
func (s *runningState) Status(ctx context.Context) (string, error) {
|
||||
return "running", nil
|
||||
}
|
||||
|
||||
type pausedState struct {
|
||||
p *Init
|
||||
}
|
||||
|
||||
func (s *pausedState) transition(name string) error {
|
||||
switch name {
|
||||
case "running":
|
||||
s.p.initState = &runningState{p: s.p}
|
||||
case "stopped":
|
||||
s.p.initState = &stoppedState{p: s.p}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *pausedState) Pause(ctx context.Context) error {
|
||||
return errors.New("cannot pause a paused container")
|
||||
}
|
||||
|
||||
func (s *pausedState) Resume(ctx context.Context) error {
|
||||
if err := s.p.runtime.Resume(ctx, s.p.id); err != nil {
|
||||
return s.p.runtimeError(err, "OCI runtime resume failed")
|
||||
}
|
||||
|
||||
return s.transition("running")
|
||||
}
|
||||
|
||||
func (s *pausedState) Update(ctx context.Context, r *google_protobuf.Any) error {
|
||||
return s.p.update(ctx, r)
|
||||
}
|
||||
|
||||
func (s *pausedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
return s.p.checkpoint(ctx, r)
|
||||
}
|
||||
|
||||
func (s *pausedState) Start(ctx context.Context) error {
|
||||
return errors.New("cannot start a paused process")
|
||||
}
|
||||
|
||||
func (s *pausedState) Delete(ctx context.Context) error {
|
||||
return errors.New("cannot delete a paused process")
|
||||
}
|
||||
|
||||
func (s *pausedState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *pausedState) SetExited(status int) {
|
||||
s.p.setExited(status)
|
||||
|
||||
if err := s.p.runtime.Resume(context.Background(), s.p.id); err != nil {
|
||||
log.L.WithError(err).Error("resuming exited container from paused state")
|
||||
}
|
||||
|
||||
if err := s.transition("stopped"); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *pausedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
return nil, errors.New("cannot exec in a paused state")
|
||||
}
|
||||
|
||||
func (s *pausedState) Status(ctx context.Context) (string, error) {
|
||||
return "paused", nil
|
||||
}
|
||||
|
||||
type stoppedState struct {
|
||||
p *Init
|
||||
}
|
||||
|
||||
func (s *stoppedState) transition(name string) error {
|
||||
switch name {
|
||||
case "deleted":
|
||||
s.p.initState = &deletedState{}
|
||||
default:
|
||||
return fmt.Errorf("invalid state transition %q to %q", stateName(s), name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stoppedState) Pause(ctx context.Context) error {
|
||||
return errors.New("cannot pause a stopped container")
|
||||
}
|
||||
|
||||
func (s *stoppedState) Resume(ctx context.Context) error {
|
||||
return errors.New("cannot resume a stopped container")
|
||||
}
|
||||
|
||||
func (s *stoppedState) Update(ctx context.Context, r *google_protobuf.Any) error {
|
||||
return errors.New("cannot update a stopped container")
|
||||
}
|
||||
|
||||
func (s *stoppedState) Checkpoint(ctx context.Context, r *CheckpointConfig) error {
|
||||
return errors.New("cannot checkpoint a stopped container")
|
||||
}
|
||||
|
||||
func (s *stoppedState) Start(ctx context.Context) error {
|
||||
return errors.New("cannot start a stopped process")
|
||||
}
|
||||
|
||||
func (s *stoppedState) Delete(ctx context.Context) error {
|
||||
if err := s.p.delete(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.transition("deleted")
|
||||
}
|
||||
|
||||
func (s *stoppedState) Kill(ctx context.Context, sig uint32, all bool) error {
|
||||
return s.p.kill(ctx, sig, all)
|
||||
}
|
||||
|
||||
func (s *stoppedState) SetExited(status int) {
|
||||
// no op
|
||||
}
|
||||
|
||||
func (s *stoppedState) Exec(ctx context.Context, path string, r *ExecConfig) (Process, error) {
|
||||
return nil, errors.New("cannot exec in a stopped state")
|
||||
}
|
||||
|
||||
func (s *stoppedState) Status(ctx context.Context) (string, error) {
|
||||
return "stopped", nil
|
||||
}
|
||||
434
cmd/containerd-shim-runc-v2/process/io.go
Normal file
434
cmd/containerd-shim-runc-v2/process/io.go
Normal file
@@ -0,0 +1,434 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/v2/namespaces"
|
||||
"github.com/containerd/containerd/v2/pkg/stdio"
|
||||
"github.com/containerd/fifo"
|
||||
runc "github.com/containerd/go-runc"
|
||||
"github.com/containerd/log"
|
||||
)
|
||||
|
||||
const binaryIOProcTermTimeout = 12 * time.Second // Give logger process solid 10 seconds for cleanup
|
||||
|
||||
var bufPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
// setting to 4096 to align with PIPE_BUF
|
||||
// http://man7.org/linux/man-pages/man7/pipe.7.html
|
||||
buffer := make([]byte, 4096)
|
||||
return &buffer
|
||||
},
|
||||
}
|
||||
|
||||
type processIO struct {
|
||||
io runc.IO
|
||||
|
||||
uri *url.URL
|
||||
copy bool
|
||||
stdio stdio.Stdio
|
||||
}
|
||||
|
||||
func (p *processIO) Close() error {
|
||||
if p.io != nil {
|
||||
return p.io.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *processIO) IO() runc.IO {
|
||||
return p.io
|
||||
}
|
||||
|
||||
func (p *processIO) Copy(ctx context.Context, wg *sync.WaitGroup) error {
|
||||
if !p.copy {
|
||||
return nil
|
||||
}
|
||||
var cwg sync.WaitGroup
|
||||
if err := copyPipes(ctx, p.IO(), p.stdio.Stdin, p.stdio.Stdout, p.stdio.Stderr, wg, &cwg); err != nil {
|
||||
return fmt.Errorf("unable to copy pipes: %w", err)
|
||||
}
|
||||
cwg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
func createIO(ctx context.Context, id string, ioUID, ioGID int, stdio stdio.Stdio) (*processIO, error) {
|
||||
pio := &processIO{
|
||||
stdio: stdio,
|
||||
}
|
||||
if stdio.IsNull() {
|
||||
i, err := runc.NewNullIO()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pio.io = i
|
||||
return pio, nil
|
||||
}
|
||||
u, err := url.Parse(stdio.Stdout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to parse stdout uri: %w", err)
|
||||
}
|
||||
if u.Scheme == "" {
|
||||
u.Scheme = "fifo"
|
||||
}
|
||||
pio.uri = u
|
||||
switch u.Scheme {
|
||||
case "fifo":
|
||||
pio.copy = true
|
||||
pio.io, err = runc.NewPipeIO(ioUID, ioGID, withConditionalIO(stdio))
|
||||
case "binary":
|
||||
pio.io, err = NewBinaryIO(ctx, id, u)
|
||||
case "file":
|
||||
filePath := u.Path
|
||||
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var f *os.File
|
||||
f, err = os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f.Close()
|
||||
pio.stdio.Stdout = filePath
|
||||
pio.stdio.Stderr = filePath
|
||||
pio.copy = true
|
||||
pio.io, err = runc.NewPipeIO(ioUID, ioGID, withConditionalIO(stdio))
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown STDIO scheme %s", u.Scheme)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return pio, nil
|
||||
}
|
||||
|
||||
func copyPipes(ctx context.Context, rio runc.IO, stdin, stdout, stderr string, wg, cwg *sync.WaitGroup) error {
|
||||
var sameFile *countingWriteCloser
|
||||
for _, i := range []struct {
|
||||
name string
|
||||
dest func(wc io.WriteCloser, rc io.Closer)
|
||||
}{
|
||||
{
|
||||
name: stdout,
|
||||
dest: func(wc io.WriteCloser, rc io.Closer) {
|
||||
wg.Add(1)
|
||||
cwg.Add(1)
|
||||
go func() {
|
||||
cwg.Done()
|
||||
p := bufPool.Get().(*[]byte)
|
||||
defer bufPool.Put(p)
|
||||
if _, err := io.CopyBuffer(wc, rio.Stdout(), *p); err != nil {
|
||||
log.G(ctx).Warn("error copying stdout")
|
||||
}
|
||||
wg.Done()
|
||||
wc.Close()
|
||||
if rc != nil {
|
||||
rc.Close()
|
||||
}
|
||||
}()
|
||||
},
|
||||
}, {
|
||||
name: stderr,
|
||||
dest: func(wc io.WriteCloser, rc io.Closer) {
|
||||
wg.Add(1)
|
||||
cwg.Add(1)
|
||||
go func() {
|
||||
cwg.Done()
|
||||
p := bufPool.Get().(*[]byte)
|
||||
defer bufPool.Put(p)
|
||||
if _, err := io.CopyBuffer(wc, rio.Stderr(), *p); err != nil {
|
||||
log.G(ctx).Warn("error copying stderr")
|
||||
}
|
||||
wg.Done()
|
||||
wc.Close()
|
||||
if rc != nil {
|
||||
rc.Close()
|
||||
}
|
||||
}()
|
||||
},
|
||||
},
|
||||
} {
|
||||
ok, err := fifo.IsFifo(i.name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var (
|
||||
fw io.WriteCloser
|
||||
fr io.Closer
|
||||
)
|
||||
if ok {
|
||||
if fw, err = fifo.OpenFifo(ctx, i.name, syscall.O_WRONLY, 0); err != nil {
|
||||
return fmt.Errorf("containerd-shim: opening w/o fifo %q failed: %w", i.name, err)
|
||||
}
|
||||
if fr, err = fifo.OpenFifo(ctx, i.name, syscall.O_RDONLY, 0); err != nil {
|
||||
return fmt.Errorf("containerd-shim: opening r/o fifo %q failed: %w", i.name, err)
|
||||
}
|
||||
} else {
|
||||
if sameFile != nil {
|
||||
sameFile.count++
|
||||
i.dest(sameFile, nil)
|
||||
continue
|
||||
}
|
||||
if fw, err = os.OpenFile(i.name, syscall.O_WRONLY|syscall.O_APPEND, 0); err != nil {
|
||||
return fmt.Errorf("containerd-shim: opening file %q failed: %w", i.name, err)
|
||||
}
|
||||
if stdout == stderr {
|
||||
sameFile = &countingWriteCloser{
|
||||
WriteCloser: fw,
|
||||
count: 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
i.dest(fw, fr)
|
||||
}
|
||||
if stdin == "" {
|
||||
return nil
|
||||
}
|
||||
f, err := fifo.OpenFifo(context.Background(), stdin, syscall.O_RDONLY|syscall.O_NONBLOCK, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("containerd-shim: opening %s failed: %s", stdin, err)
|
||||
}
|
||||
cwg.Add(1)
|
||||
go func() {
|
||||
cwg.Done()
|
||||
p := bufPool.Get().(*[]byte)
|
||||
defer bufPool.Put(p)
|
||||
|
||||
io.CopyBuffer(rio.Stdin(), f, *p)
|
||||
rio.Stdin().Close()
|
||||
f.Close()
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
// countingWriteCloser masks io.Closer() until close has been invoked a certain number of times.
|
||||
type countingWriteCloser struct {
|
||||
io.WriteCloser
|
||||
count int64
|
||||
}
|
||||
|
||||
func (c *countingWriteCloser) Close() error {
|
||||
if atomic.AddInt64(&c.count, -1) > 0 {
|
||||
return nil
|
||||
}
|
||||
return c.WriteCloser.Close()
|
||||
}
|
||||
|
||||
// NewBinaryIO runs a custom binary process for pluggable shim logging
|
||||
func NewBinaryIO(ctx context.Context, id string, uri *url.URL) (_ runc.IO, err error) {
|
||||
ns, err := namespaces.NamespaceRequired(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var closers []func() error
|
||||
defer func() {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
result := []error{err}
|
||||
for _, fn := range closers {
|
||||
result = append(result, fn())
|
||||
}
|
||||
err = errors.Join(result...)
|
||||
}()
|
||||
|
||||
out, err := newPipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create stdout pipes: %w", err)
|
||||
}
|
||||
closers = append(closers, out.Close)
|
||||
|
||||
serr, err := newPipe()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create stderr pipes: %w", err)
|
||||
}
|
||||
closers = append(closers, serr.Close)
|
||||
|
||||
r, w, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
closers = append(closers, r.Close, w.Close)
|
||||
|
||||
cmd := NewBinaryCmd(uri, id, ns)
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, out.r, serr.r, w)
|
||||
// don't need to register this with the reaper or wait when
|
||||
// running inside a shim
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("failed to start binary process: %w", err)
|
||||
}
|
||||
closers = append(closers, func() error { return cmd.Process.Kill() })
|
||||
|
||||
// close our side of the pipe after start
|
||||
if err := w.Close(); err != nil {
|
||||
return nil, fmt.Errorf("failed to close write pipe after start: %w", err)
|
||||
}
|
||||
|
||||
// wait for the logging binary to be ready
|
||||
b := make([]byte, 1)
|
||||
if _, err := r.Read(b); err != nil && err != io.EOF {
|
||||
return nil, fmt.Errorf("failed to read from logging binary: %w", err)
|
||||
}
|
||||
|
||||
return &binaryIO{
|
||||
cmd: cmd,
|
||||
out: out,
|
||||
err: serr,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type binaryIO struct {
|
||||
cmd *exec.Cmd
|
||||
out, err *pipe
|
||||
}
|
||||
|
||||
func (b *binaryIO) CloseAfterStart() error {
|
||||
var result []error
|
||||
|
||||
for _, v := range []*pipe{b.out, b.err} {
|
||||
if v != nil {
|
||||
if err := v.r.Close(); err != nil {
|
||||
result = append(result, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return errors.Join(result...)
|
||||
}
|
||||
|
||||
func (b *binaryIO) Close() error {
|
||||
var result []error
|
||||
|
||||
for _, v := range []*pipe{b.out, b.err} {
|
||||
if v != nil {
|
||||
if err := v.Close(); err != nil {
|
||||
result = append(result, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := b.cancel(); err != nil {
|
||||
result = append(result, err)
|
||||
}
|
||||
|
||||
return errors.Join(result...)
|
||||
}
|
||||
|
||||
func (b *binaryIO) cancel() error {
|
||||
if b.cmd == nil || b.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Send SIGTERM first, so logger process has a chance to flush and exit properly
|
||||
if err := b.cmd.Process.Signal(syscall.SIGTERM); err != nil {
|
||||
result := []error{fmt.Errorf("failed to send SIGTERM: %w", err)}
|
||||
|
||||
log.L.WithError(err).Warn("failed to send SIGTERM signal, killing logging shim")
|
||||
|
||||
if err := b.cmd.Process.Kill(); err != nil {
|
||||
result = append(result, fmt.Errorf("failed to kill process after faulty SIGTERM: %w", err))
|
||||
}
|
||||
|
||||
return errors.Join(result...)
|
||||
}
|
||||
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
done <- b.cmd.Wait()
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
return err
|
||||
case <-time.After(binaryIOProcTermTimeout):
|
||||
log.L.Warn("failed to wait for shim logger process to exit, killing")
|
||||
|
||||
err := b.cmd.Process.Kill()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to kill shim logger process: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func (b *binaryIO) Stdin() io.WriteCloser {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *binaryIO) Stdout() io.ReadCloser {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *binaryIO) Stderr() io.ReadCloser {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *binaryIO) Set(cmd *exec.Cmd) {
|
||||
if b.out != nil {
|
||||
cmd.Stdout = b.out.w
|
||||
}
|
||||
if b.err != nil {
|
||||
cmd.Stderr = b.err.w
|
||||
}
|
||||
}
|
||||
|
||||
func newPipe() (*pipe, error) {
|
||||
r, w, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &pipe{
|
||||
r: r,
|
||||
w: w,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type pipe struct {
|
||||
r *os.File
|
||||
w *os.File
|
||||
}
|
||||
|
||||
func (p *pipe) Close() error {
|
||||
var result []error
|
||||
|
||||
if err := p.w.Close(); err != nil {
|
||||
result = append(result, fmt.Errorf("pipe: failed to close write pipe: %w", err))
|
||||
}
|
||||
|
||||
if err := p.r.Close(); err != nil {
|
||||
result = append(result, fmt.Errorf("pipe: failed to close read pipe: %w", err))
|
||||
}
|
||||
|
||||
return errors.Join(result...)
|
||||
}
|
||||
72
cmd/containerd-shim-runc-v2/process/io_test.go
Normal file
72
cmd/containerd-shim-runc-v2/process/io_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/containerd/containerd/v2/namespaces"
|
||||
)
|
||||
|
||||
func TestNewBinaryIO(t *testing.T) {
|
||||
ctx := namespaces.WithNamespace(context.Background(), "test")
|
||||
uri, _ := url.Parse("binary:///bin/echo?test")
|
||||
|
||||
before := descriptorCount(t)
|
||||
|
||||
io, err := NewBinaryIO(ctx, "1", uri)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = io.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
after := descriptorCount(t)
|
||||
if before != after-1 { // one descriptor must be closed from shim logger side
|
||||
t.Fatalf("some descriptors weren't closed (%d != %d -1)", before, after)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewBinaryIOCleanup(t *testing.T) {
|
||||
ctx := namespaces.WithNamespace(context.Background(), "test")
|
||||
uri, _ := url.Parse("binary:///not/existing")
|
||||
|
||||
before := descriptorCount(t)
|
||||
_, err := NewBinaryIO(ctx, "2", uri)
|
||||
if err == nil {
|
||||
t.Fatal("error expected for invalid binary")
|
||||
}
|
||||
|
||||
after := descriptorCount(t)
|
||||
if before != after {
|
||||
t.Fatalf("some descriptors weren't closed (%d != %d)", before, after)
|
||||
}
|
||||
}
|
||||
|
||||
func descriptorCount(t *testing.T) int {
|
||||
t.Helper()
|
||||
files, _ := os.ReadDir("/proc/self/fd")
|
||||
return len(files)
|
||||
}
|
||||
53
cmd/containerd-shim-runc-v2/process/io_util.go
Normal file
53
cmd/containerd-shim-runc-v2/process/io_util.go
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
// NewBinaryCmd returns a Cmd to be used to start a logging binary.
|
||||
// The Cmd is generated from the provided uri, and the container ID and
|
||||
// namespace are appended to the Cmd environment.
|
||||
func NewBinaryCmd(binaryURI *url.URL, id, ns string) *exec.Cmd {
|
||||
var args []string
|
||||
for k, vs := range binaryURI.Query() {
|
||||
args = append(args, k)
|
||||
if len(vs) > 0 {
|
||||
args = append(args, vs[0])
|
||||
}
|
||||
}
|
||||
|
||||
cmd := exec.Command(binaryURI.Path, args...)
|
||||
|
||||
cmd.Env = append(cmd.Env,
|
||||
"CONTAINER_ID="+id,
|
||||
"CONTAINER_NAMESPACE="+ns,
|
||||
)
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// CloseFiles closes any files passed in.
|
||||
// It it used for cleanup in the event of unexpected errors.
|
||||
func CloseFiles(files ...*os.File) {
|
||||
for _, file := range files {
|
||||
file.Close()
|
||||
}
|
||||
}
|
||||
56
cmd/containerd-shim-runc-v2/process/process.go
Normal file
56
cmd/containerd-shim-runc-v2/process/process.go
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/containerd/containerd/v2/pkg/stdio"
|
||||
)
|
||||
|
||||
// Process on a system
|
||||
type Process interface {
|
||||
// ID returns the id for the process
|
||||
ID() string
|
||||
// Pid returns the pid for the process
|
||||
Pid() int
|
||||
// ExitStatus returns the exit status
|
||||
ExitStatus() int
|
||||
// ExitedAt is the time the process exited
|
||||
ExitedAt() time.Time
|
||||
// Stdin returns the process STDIN
|
||||
Stdin() io.Closer
|
||||
// Stdio returns io information for the container
|
||||
Stdio() stdio.Stdio
|
||||
// Status returns the process status
|
||||
Status(context.Context) (string, error)
|
||||
// Wait blocks until the process has exited
|
||||
Wait()
|
||||
// Resize resizes the process console
|
||||
Resize(ws console.WinSize) error
|
||||
// Start execution of the process
|
||||
Start(context.Context) error
|
||||
// Delete deletes the process and its resourcess
|
||||
Delete(context.Context) error
|
||||
// Kill kills the process
|
||||
Kill(context.Context, uint32, bool) error
|
||||
// SetExited sets the exit status for the process
|
||||
SetExited(status int)
|
||||
}
|
||||
66
cmd/containerd-shim-runc-v2/process/types.go
Normal file
66
cmd/containerd-shim-runc-v2/process/types.go
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
google_protobuf "github.com/containerd/containerd/v2/protobuf/types"
|
||||
)
|
||||
|
||||
// Mount holds filesystem mount configuration
|
||||
type Mount struct {
|
||||
Type string
|
||||
Source string
|
||||
Target string
|
||||
Options []string
|
||||
}
|
||||
|
||||
// CreateConfig hold task creation configuration
|
||||
type CreateConfig struct {
|
||||
ID string
|
||||
Bundle string
|
||||
Runtime string
|
||||
Rootfs []Mount
|
||||
Terminal bool
|
||||
Stdin string
|
||||
Stdout string
|
||||
Stderr string
|
||||
Checkpoint string
|
||||
ParentCheckpoint string
|
||||
Options *google_protobuf.Any
|
||||
}
|
||||
|
||||
// ExecConfig holds exec creation configuration
|
||||
type ExecConfig struct {
|
||||
ID string
|
||||
Terminal bool
|
||||
Stdin string
|
||||
Stdout string
|
||||
Stderr string
|
||||
Spec *google_protobuf.Any
|
||||
}
|
||||
|
||||
// CheckpointConfig holds task checkpoint configuration
|
||||
type CheckpointConfig struct {
|
||||
WorkDir string
|
||||
Path string
|
||||
Exit bool
|
||||
AllowOpenTCP bool
|
||||
AllowExternalUnixSockets bool
|
||||
AllowTerminal bool
|
||||
FileLocks bool
|
||||
EmptyNamespaces []string
|
||||
}
|
||||
210
cmd/containerd-shim-runc-v2/process/utils.go
Normal file
210
cmd/containerd-shim-runc-v2/process/utils.go
Normal file
@@ -0,0 +1,210 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package process
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/v2/errdefs"
|
||||
runc "github.com/containerd/go-runc"
|
||||
specs "github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// RuncRoot is the path to the root runc state directory
|
||||
RuncRoot = "/run/containerd/runc"
|
||||
// InitPidFile name of the file that contains the init pid
|
||||
InitPidFile = "init.pid"
|
||||
// configFile is the name of the runc config file
|
||||
configFile = "config.json"
|
||||
)
|
||||
|
||||
// safePid is a thread safe wrapper for pid.
|
||||
type safePid struct {
|
||||
sync.Mutex
|
||||
pid int
|
||||
}
|
||||
|
||||
func (s *safePid) get() int {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
return s.pid
|
||||
}
|
||||
|
||||
// TODO(mlaventure): move to runc package?
|
||||
func getLastRuntimeError(r *runc.Runc) (string, error) {
|
||||
if r.Log == "" {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
f, err := os.OpenFile(r.Log, os.O_RDONLY, 0400)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
errMsg string
|
||||
log struct {
|
||||
Level string
|
||||
Msg string
|
||||
Time time.Time
|
||||
}
|
||||
)
|
||||
|
||||
dec := json.NewDecoder(f)
|
||||
for err = nil; err == nil; {
|
||||
if err = dec.Decode(&log); err != nil && err != io.EOF {
|
||||
return "", err
|
||||
}
|
||||
if log.Level == "error" {
|
||||
errMsg = strings.TrimSpace(log.Msg)
|
||||
}
|
||||
}
|
||||
|
||||
return errMsg, nil
|
||||
}
|
||||
|
||||
// criuError returns only the first line of the error message from criu
|
||||
// it tries to add an invalid dump log location when returning the message
|
||||
func criuError(err error) string {
|
||||
parts := strings.Split(err.Error(), "\n")
|
||||
return parts[0]
|
||||
}
|
||||
|
||||
func copyFile(to, from string) error {
|
||||
ff, err := os.Open(from)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer ff.Close()
|
||||
tt, err := os.Create(to)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tt.Close()
|
||||
|
||||
p := bufPool.Get().(*[]byte)
|
||||
defer bufPool.Put(p)
|
||||
_, err = io.CopyBuffer(tt, ff, *p)
|
||||
return err
|
||||
}
|
||||
|
||||
func checkKillError(err error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if strings.Contains(err.Error(), "os: process already finished") ||
|
||||
strings.Contains(err.Error(), "container not running") ||
|
||||
strings.Contains(strings.ToLower(err.Error()), "no such process") ||
|
||||
err == unix.ESRCH {
|
||||
return fmt.Errorf("process already finished: %w", errdefs.ErrNotFound)
|
||||
} else if strings.Contains(err.Error(), "does not exist") {
|
||||
return fmt.Errorf("no such container: %w", errdefs.ErrNotFound)
|
||||
}
|
||||
return fmt.Errorf("unknown error after kill: %w", err)
|
||||
}
|
||||
|
||||
func newPidFile(bundle string) *pidFile {
|
||||
return &pidFile{
|
||||
path: filepath.Join(bundle, InitPidFile),
|
||||
}
|
||||
}
|
||||
|
||||
func newExecPidFile(bundle, id string) *pidFile {
|
||||
return &pidFile{
|
||||
path: filepath.Join(bundle, fmt.Sprintf("%s.pid", id)),
|
||||
}
|
||||
}
|
||||
|
||||
type pidFile struct {
|
||||
path string
|
||||
}
|
||||
|
||||
func (p *pidFile) Path() string {
|
||||
return p.path
|
||||
}
|
||||
|
||||
func (p *pidFile) Read() (int, error) {
|
||||
return runc.ReadPidFile(p.path)
|
||||
}
|
||||
|
||||
// waitTimeout handles waiting on a waitgroup with a specified timeout.
|
||||
// this is commonly used for waiting on IO to finish after a process has exited
|
||||
func waitTimeout(ctx context.Context, wg *sync.WaitGroup, timeout time.Duration) error {
|
||||
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
return nil
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func stateName(v interface{}) string {
|
||||
switch v.(type) {
|
||||
case *runningState, *execRunningState:
|
||||
return "running"
|
||||
case *createdState, *execCreatedState, *createdCheckpointState:
|
||||
return "created"
|
||||
case *pausedState:
|
||||
return "paused"
|
||||
case *deletedState:
|
||||
return "deleted"
|
||||
case *stoppedState:
|
||||
return "stopped"
|
||||
}
|
||||
panic(fmt.Errorf("invalid state %v", v))
|
||||
}
|
||||
|
||||
func readConfig(path string) (spec *specs.Spec, err error) {
|
||||
cfg := filepath.Join(path, configFile)
|
||||
f, err := os.Open(cfg)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("JSON specification file %s not found", cfg)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err = json.NewDecoder(f).Decode(&spec); err != nil {
|
||||
return nil, fmt.Errorf("failed to parse config: %w", err)
|
||||
}
|
||||
if spec == nil {
|
||||
return nil, errors.New("config cannot be null")
|
||||
}
|
||||
return spec, nil
|
||||
}
|
||||
@@ -31,10 +31,10 @@ import (
|
||||
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
||||
"github.com/containerd/console"
|
||||
"github.com/containerd/containerd/v2/api/runtime/task/v3"
|
||||
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
|
||||
"github.com/containerd/containerd/v2/errdefs"
|
||||
"github.com/containerd/containerd/v2/mount"
|
||||
"github.com/containerd/containerd/v2/namespaces"
|
||||
"github.com/containerd/containerd/v2/pkg/process"
|
||||
"github.com/containerd/containerd/v2/pkg/stdio"
|
||||
"github.com/containerd/containerd/v2/runtime/v2/runc/options"
|
||||
"github.com/containerd/log"
|
||||
|
||||
@@ -29,8 +29,8 @@ import (
|
||||
"syscall"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
|
||||
"github.com/containerd/containerd/v2/namespaces"
|
||||
"github.com/containerd/containerd/v2/pkg/process"
|
||||
"github.com/containerd/containerd/v2/pkg/stdio"
|
||||
"github.com/containerd/fifo"
|
||||
)
|
||||
|
||||
@@ -30,13 +30,13 @@ import (
|
||||
eventstypes "github.com/containerd/containerd/v2/api/events"
|
||||
taskAPI "github.com/containerd/containerd/v2/api/runtime/task/v3"
|
||||
"github.com/containerd/containerd/v2/api/types/task"
|
||||
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
|
||||
"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/runc"
|
||||
"github.com/containerd/containerd/v2/errdefs"
|
||||
"github.com/containerd/containerd/v2/namespaces"
|
||||
"github.com/containerd/containerd/v2/pkg/oom"
|
||||
oomv1 "github.com/containerd/containerd/v2/pkg/oom/v1"
|
||||
oomv2 "github.com/containerd/containerd/v2/pkg/oom/v2"
|
||||
"github.com/containerd/containerd/v2/pkg/process"
|
||||
"github.com/containerd/containerd/v2/pkg/shutdown"
|
||||
"github.com/containerd/containerd/v2/pkg/stdio"
|
||||
"github.com/containerd/containerd/v2/pkg/userns"
|
||||
|
||||
Reference in New Issue
Block a user