
Changes: https://github.com/containerd/typeurl/compare/7f6e6d160d67...v2.1.0 Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
514 lines
13 KiB
Go
514 lines
13 KiB
Go
//go:build linux
|
|
|
|
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package runc
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"github.com/containerd/cgroups/v3"
|
|
"github.com/containerd/cgroups/v3/cgroup1"
|
|
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
|
"github.com/containerd/console"
|
|
"github.com/containerd/containerd/api/runtime/task/v2"
|
|
"github.com/containerd/containerd/errdefs"
|
|
"github.com/containerd/containerd/mount"
|
|
"github.com/containerd/containerd/namespaces"
|
|
"github.com/containerd/containerd/pkg/process"
|
|
"github.com/containerd/containerd/pkg/stdio"
|
|
"github.com/containerd/containerd/runtime/v2/runc/options"
|
|
"github.com/containerd/typeurl/v2"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// NewContainer returns a new runc container
|
|
func NewContainer(ctx context.Context, platform stdio.Platform, r *task.CreateTaskRequest) (_ *Container, retErr error) {
|
|
ns, err := namespaces.NamespaceRequired(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create namespace: %w", err)
|
|
}
|
|
|
|
opts := &options.Options{}
|
|
if r.Options.GetValue() != nil {
|
|
v, err := typeurl.UnmarshalAny(r.Options)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if v != nil {
|
|
opts = v.(*options.Options)
|
|
}
|
|
}
|
|
|
|
var pmounts []process.Mount
|
|
for _, m := range r.Rootfs {
|
|
pmounts = append(pmounts, process.Mount{
|
|
Type: m.Type,
|
|
Source: m.Source,
|
|
Target: m.Target,
|
|
Options: m.Options,
|
|
})
|
|
}
|
|
|
|
rootfs := ""
|
|
if len(pmounts) > 0 {
|
|
rootfs = filepath.Join(r.Bundle, "rootfs")
|
|
if err := os.Mkdir(rootfs, 0711); err != nil && !os.IsExist(err) {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
config := &process.CreateConfig{
|
|
ID: r.ID,
|
|
Bundle: r.Bundle,
|
|
Runtime: opts.BinaryName,
|
|
Rootfs: pmounts,
|
|
Terminal: r.Terminal,
|
|
Stdin: r.Stdin,
|
|
Stdout: r.Stdout,
|
|
Stderr: r.Stderr,
|
|
Checkpoint: r.Checkpoint,
|
|
ParentCheckpoint: r.ParentCheckpoint,
|
|
Options: r.Options,
|
|
}
|
|
|
|
if err := WriteOptions(r.Bundle, opts); err != nil {
|
|
return nil, err
|
|
}
|
|
// For historical reason, we write opts.BinaryName as well as the entire opts
|
|
if err := WriteRuntime(r.Bundle, opts.BinaryName); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var mounts []mount.Mount
|
|
for _, pm := range pmounts {
|
|
mounts = append(mounts, mount.Mount{
|
|
Type: pm.Type,
|
|
Source: pm.Source,
|
|
Target: pm.Target,
|
|
Options: pm.Options,
|
|
})
|
|
}
|
|
defer func() {
|
|
if retErr != nil {
|
|
if err := mount.UnmountMounts(mounts, rootfs, 0); err != nil {
|
|
logrus.WithError(err).Warn("failed to cleanup rootfs mount")
|
|
}
|
|
}
|
|
}()
|
|
if err := mount.All(mounts, rootfs); err != nil {
|
|
return nil, fmt.Errorf("failed to mount rootfs component: %w", err)
|
|
}
|
|
|
|
p, err := newInit(
|
|
ctx,
|
|
r.Bundle,
|
|
filepath.Join(r.Bundle, "work"),
|
|
ns,
|
|
platform,
|
|
config,
|
|
opts,
|
|
rootfs,
|
|
)
|
|
if err != nil {
|
|
return nil, errdefs.ToGRPC(err)
|
|
}
|
|
if err := p.Create(ctx, config); err != nil {
|
|
return nil, errdefs.ToGRPC(err)
|
|
}
|
|
container := &Container{
|
|
ID: r.ID,
|
|
Bundle: r.Bundle,
|
|
process: p,
|
|
processes: make(map[string]process.Process),
|
|
reservedProcess: make(map[string]struct{}),
|
|
}
|
|
pid := p.Pid()
|
|
if pid > 0 {
|
|
var cg interface{}
|
|
if cgroups.Mode() == cgroups.Unified {
|
|
g, err := cgroupsv2.PidGroupPath(pid)
|
|
if err != nil {
|
|
logrus.WithError(err).Errorf("loading cgroup2 for %d", pid)
|
|
return container, nil
|
|
}
|
|
cg, err = cgroupsv2.Load(g)
|
|
if err != nil {
|
|
logrus.WithError(err).Errorf("loading cgroup2 for %d", pid)
|
|
}
|
|
} else {
|
|
cg, err = cgroup1.Load(cgroup1.PidPath(pid))
|
|
if err != nil {
|
|
logrus.WithError(err).Errorf("loading cgroup for %d", pid)
|
|
}
|
|
}
|
|
container.cgroup = cg
|
|
}
|
|
return container, nil
|
|
}
|
|
|
|
const optionsFilename = "options.json"
|
|
|
|
// ReadOptions reads the option information from the path.
|
|
// When the file does not exist, ReadOptions returns nil without an error.
|
|
func ReadOptions(path string) (*options.Options, error) {
|
|
filePath := filepath.Join(path, optionsFilename)
|
|
if _, err := os.Stat(filePath); err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
data, err := os.ReadFile(filePath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var opts options.Options
|
|
if err := json.Unmarshal(data, &opts); err != nil {
|
|
return nil, err
|
|
}
|
|
return &opts, nil
|
|
}
|
|
|
|
// WriteOptions writes the options information into the path
|
|
func WriteOptions(path string, opts *options.Options) error {
|
|
data, err := json.Marshal(opts)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.WriteFile(filepath.Join(path, optionsFilename), data, 0600)
|
|
}
|
|
|
|
// ReadRuntime reads the runtime information from the path
|
|
func ReadRuntime(path string) (string, error) {
|
|
data, err := os.ReadFile(filepath.Join(path, "runtime"))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return string(data), nil
|
|
}
|
|
|
|
// WriteRuntime writes the runtime information into the path
|
|
func WriteRuntime(path, runtime string) error {
|
|
return os.WriteFile(filepath.Join(path, "runtime"), []byte(runtime), 0600)
|
|
}
|
|
|
|
func newInit(ctx context.Context, path, workDir, namespace string, platform stdio.Platform,
|
|
r *process.CreateConfig, options *options.Options, rootfs string) (*process.Init, error) {
|
|
runtime := process.NewRunc(options.Root, path, namespace, options.BinaryName, options.SystemdCgroup)
|
|
p := process.New(r.ID, runtime, stdio.Stdio{
|
|
Stdin: r.Stdin,
|
|
Stdout: r.Stdout,
|
|
Stderr: r.Stderr,
|
|
Terminal: r.Terminal,
|
|
})
|
|
p.Bundle = r.Bundle
|
|
p.Platform = platform
|
|
p.Rootfs = rootfs
|
|
p.WorkDir = workDir
|
|
p.IoUID = int(options.IoUid)
|
|
p.IoGID = int(options.IoGid)
|
|
p.NoPivotRoot = options.NoPivotRoot
|
|
p.NoNewKeyring = options.NoNewKeyring
|
|
p.CriuWorkPath = options.CriuWorkPath
|
|
if p.CriuWorkPath == "" {
|
|
// if criu work path not set, use container WorkDir
|
|
p.CriuWorkPath = p.WorkDir
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// Container for operating on a runc container and its processes
|
|
type Container struct {
|
|
mu sync.Mutex
|
|
|
|
// ID of the container
|
|
ID string
|
|
// Bundle path
|
|
Bundle string
|
|
|
|
// cgroup is either cgroups.Cgroup or *cgroupsv2.Manager
|
|
cgroup interface{}
|
|
process process.Process
|
|
processes map[string]process.Process
|
|
reservedProcess map[string]struct{}
|
|
}
|
|
|
|
// All processes in the container
|
|
func (c *Container) All() (o []process.Process) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
for _, p := range c.processes {
|
|
o = append(o, p)
|
|
}
|
|
if c.process != nil {
|
|
o = append(o, c.process)
|
|
}
|
|
return o
|
|
}
|
|
|
|
// ExecdProcesses added to the container
|
|
func (c *Container) ExecdProcesses() (o []process.Process) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
for _, p := range c.processes {
|
|
o = append(o, p)
|
|
}
|
|
return o
|
|
}
|
|
|
|
// Pid of the main process of a container
|
|
func (c *Container) Pid() int {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
return c.process.Pid()
|
|
}
|
|
|
|
// Cgroup of the container
|
|
func (c *Container) Cgroup() interface{} {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
return c.cgroup
|
|
}
|
|
|
|
// CgroupSet sets the cgroup to the container
|
|
func (c *Container) CgroupSet(cg interface{}) {
|
|
c.mu.Lock()
|
|
c.cgroup = cg
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
// Process returns the process by id
|
|
func (c *Container) Process(id string) (process.Process, error) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if id == "" {
|
|
if c.process == nil {
|
|
return nil, fmt.Errorf("container must be created: %w", errdefs.ErrFailedPrecondition)
|
|
}
|
|
return c.process, nil
|
|
}
|
|
p, ok := c.processes[id]
|
|
if !ok {
|
|
return nil, fmt.Errorf("process does not exist %s: %w", id, errdefs.ErrNotFound)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// ReserveProcess checks for the existence of an id and atomically
|
|
// reserves the process id if it does not already exist
|
|
//
|
|
// Returns true if the process id was successfully reserved and a
|
|
// cancel func to release the reservation
|
|
func (c *Container) ReserveProcess(id string) (bool, func()) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
if _, ok := c.processes[id]; ok {
|
|
return false, nil
|
|
}
|
|
if _, ok := c.reservedProcess[id]; ok {
|
|
return false, nil
|
|
}
|
|
c.reservedProcess[id] = struct{}{}
|
|
return true, func() {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
delete(c.reservedProcess, id)
|
|
}
|
|
}
|
|
|
|
// ProcessAdd adds a new process to the container
|
|
func (c *Container) ProcessAdd(process process.Process) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
delete(c.reservedProcess, process.ID())
|
|
c.processes[process.ID()] = process
|
|
}
|
|
|
|
// ProcessRemove removes the process by id from the container
|
|
func (c *Container) ProcessRemove(id string) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
delete(c.processes, id)
|
|
}
|
|
|
|
// Start a container process
|
|
func (c *Container) Start(ctx context.Context, r *task.StartRequest) (process.Process, error) {
|
|
p, err := c.Process(r.ExecID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := p.Start(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
if c.Cgroup() == nil && p.Pid() > 0 {
|
|
var cg interface{}
|
|
if cgroups.Mode() == cgroups.Unified {
|
|
g, err := cgroupsv2.PidGroupPath(p.Pid())
|
|
if err != nil {
|
|
logrus.WithError(err).Errorf("loading cgroup2 for %d", p.Pid())
|
|
}
|
|
cg, err = cgroupsv2.Load(g)
|
|
if err != nil {
|
|
logrus.WithError(err).Errorf("loading cgroup2 for %d", p.Pid())
|
|
}
|
|
} else {
|
|
cg, err = cgroup1.Load(cgroup1.PidPath(p.Pid()))
|
|
if err != nil {
|
|
logrus.WithError(err).Errorf("loading cgroup for %d", p.Pid())
|
|
}
|
|
}
|
|
c.cgroup = cg
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// Delete the container or a process by id
|
|
func (c *Container) Delete(ctx context.Context, r *task.DeleteRequest) (process.Process, error) {
|
|
p, err := c.Process(r.ExecID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := p.Delete(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
if r.ExecID != "" {
|
|
c.ProcessRemove(r.ExecID)
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// Exec an additional process
|
|
func (c *Container) Exec(ctx context.Context, r *task.ExecProcessRequest) (process.Process, error) {
|
|
process, err := c.process.(*process.Init).Exec(ctx, c.Bundle, &process.ExecConfig{
|
|
ID: r.ExecID,
|
|
Terminal: r.Terminal,
|
|
Stdin: r.Stdin,
|
|
Stdout: r.Stdout,
|
|
Stderr: r.Stderr,
|
|
Spec: r.Spec,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
c.ProcessAdd(process)
|
|
return process, nil
|
|
}
|
|
|
|
// Pause the container
|
|
func (c *Container) Pause(ctx context.Context) error {
|
|
return c.process.(*process.Init).Pause(ctx)
|
|
}
|
|
|
|
// Resume the container
|
|
func (c *Container) Resume(ctx context.Context) error {
|
|
return c.process.(*process.Init).Resume(ctx)
|
|
}
|
|
|
|
// ResizePty of a process
|
|
func (c *Container) ResizePty(ctx context.Context, r *task.ResizePtyRequest) error {
|
|
p, err := c.Process(r.ExecID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ws := console.WinSize{
|
|
Width: uint16(r.Width),
|
|
Height: uint16(r.Height),
|
|
}
|
|
return p.Resize(ws)
|
|
}
|
|
|
|
// Kill a process
|
|
func (c *Container) Kill(ctx context.Context, r *task.KillRequest) error {
|
|
p, err := c.Process(r.ExecID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return p.Kill(ctx, r.Signal, r.All)
|
|
}
|
|
|
|
// CloseIO of a process
|
|
func (c *Container) CloseIO(ctx context.Context, r *task.CloseIORequest) error {
|
|
p, err := c.Process(r.ExecID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if stdin := p.Stdin(); stdin != nil {
|
|
if err := stdin.Close(); err != nil {
|
|
return fmt.Errorf("close stdin: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Checkpoint the container
|
|
func (c *Container) Checkpoint(ctx context.Context, r *task.CheckpointTaskRequest) error {
|
|
p, err := c.Process("")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var opts *options.CheckpointOptions
|
|
if r.Options != nil {
|
|
v, err := typeurl.UnmarshalAny(r.Options)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
opts = v.(*options.CheckpointOptions)
|
|
}
|
|
return p.(*process.Init).Checkpoint(ctx, &process.CheckpointConfig{
|
|
Path: r.Path,
|
|
Exit: opts.Exit,
|
|
AllowOpenTCP: opts.OpenTcp,
|
|
AllowExternalUnixSockets: opts.ExternalUnixSockets,
|
|
AllowTerminal: opts.Terminal,
|
|
FileLocks: opts.FileLocks,
|
|
EmptyNamespaces: opts.EmptyNamespaces,
|
|
WorkDir: opts.WorkPath,
|
|
})
|
|
}
|
|
|
|
// Update the resource information of a running container
|
|
func (c *Container) Update(ctx context.Context, r *task.UpdateTaskRequest) error {
|
|
p, err := c.Process("")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return p.(*process.Init).Update(ctx, r.Resources)
|
|
}
|
|
|
|
// HasPid returns true if the container owns a specific pid
|
|
func (c *Container) HasPid(pid int) bool {
|
|
if c.Pid() == pid {
|
|
return true
|
|
}
|
|
for _, p := range c.All() {
|
|
if p.Pid() == pid {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|