
- Add Target to mount.Mount. - Add UnmountMounts to unmount a list of mounts in reverse order. - Add UnmountRecursive to unmount deepest mount first for a given target, using moby/sys/mountinfo. Signed-off-by: Edgar Lee <edgarhinshunlee@gmail.com>
271 lines
7.0 KiB
Go
271 lines
7.0 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package manager
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
goruntime "runtime"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/containerd/cgroups/v3"
|
|
"github.com/containerd/cgroups/v3/cgroup1"
|
|
cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
|
|
"github.com/containerd/containerd/log"
|
|
"github.com/containerd/containerd/mount"
|
|
"github.com/containerd/containerd/namespaces"
|
|
"github.com/containerd/containerd/pkg/process"
|
|
"github.com/containerd/containerd/pkg/schedcore"
|
|
"github.com/containerd/containerd/runtime/v2/runc"
|
|
"github.com/containerd/containerd/runtime/v2/runc/options"
|
|
"github.com/containerd/containerd/runtime/v2/shim"
|
|
runcC "github.com/containerd/go-runc"
|
|
exec "golang.org/x/sys/execabs"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// NewShimManager returns an implementation of the shim manager
|
|
// using runc
|
|
func NewShimManager(name string) shim.Manager {
|
|
return &manager{
|
|
name: name,
|
|
}
|
|
}
|
|
|
|
// group labels specifies how the shim groups services.
|
|
// currently supports a runc.v2 specific .group label and the
|
|
// standard k8s pod label. Order matters in this list
|
|
var groupLabels = []string{
|
|
"io.containerd.runc.v2.group",
|
|
"io.kubernetes.cri.sandbox-id",
|
|
}
|
|
|
|
type spec struct {
|
|
Annotations map[string]string `json:"annotations,omitempty"`
|
|
}
|
|
|
|
type manager struct {
|
|
name string
|
|
}
|
|
|
|
func newCommand(ctx context.Context, id, containerdAddress, containerdTTRPCAddress string, debug bool) (*exec.Cmd, error) {
|
|
ns, err := namespaces.NamespaceRequired(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
self, err := os.Executable()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cwd, err := os.Getwd()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
args := []string{
|
|
"-namespace", ns,
|
|
"-id", id,
|
|
"-address", containerdAddress,
|
|
}
|
|
if debug {
|
|
args = append(args, "-debug")
|
|
}
|
|
cmd := exec.Command(self, args...)
|
|
cmd.Dir = cwd
|
|
cmd.Env = append(os.Environ(), "GOMAXPROCS=4")
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
Setpgid: true,
|
|
}
|
|
return cmd, nil
|
|
}
|
|
|
|
func readSpec() (*spec, error) {
|
|
f, err := os.Open("config.json")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
var s spec
|
|
if err := json.NewDecoder(f).Decode(&s); err != nil {
|
|
return nil, err
|
|
}
|
|
return &s, nil
|
|
}
|
|
|
|
func (m manager) Name() string {
|
|
return m.name
|
|
}
|
|
|
|
func (manager) Start(ctx context.Context, id string, opts shim.StartOpts) (_ string, retErr error) {
|
|
cmd, err := newCommand(ctx, id, opts.Address, opts.TTRPCAddress, opts.Debug)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
grouping := id
|
|
spec, err := readSpec()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
for _, group := range groupLabels {
|
|
if groupID, ok := spec.Annotations[group]; ok {
|
|
grouping = groupID
|
|
break
|
|
}
|
|
}
|
|
address, err := shim.SocketAddress(ctx, opts.Address, grouping)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
socket, err := shim.NewSocket(address)
|
|
if err != nil {
|
|
// the only time where this would happen is if there is a bug and the socket
|
|
// was not cleaned up in the cleanup method of the shim or we are using the
|
|
// grouping functionality where the new process should be run with the same
|
|
// shim as an existing container
|
|
if !shim.SocketEaddrinuse(err) {
|
|
return "", fmt.Errorf("create new shim socket: %w", err)
|
|
}
|
|
if shim.CanConnect(address) {
|
|
if err := shim.WriteAddress("address", address); err != nil {
|
|
return "", fmt.Errorf("write existing socket for shim: %w", err)
|
|
}
|
|
return address, nil
|
|
}
|
|
if err := shim.RemoveSocket(address); err != nil {
|
|
return "", fmt.Errorf("remove pre-existing socket: %w", err)
|
|
}
|
|
if socket, err = shim.NewSocket(address); err != nil {
|
|
return "", fmt.Errorf("try create new shim socket 2x: %w", err)
|
|
}
|
|
}
|
|
defer func() {
|
|
if retErr != nil {
|
|
socket.Close()
|
|
_ = shim.RemoveSocket(address)
|
|
}
|
|
}()
|
|
|
|
// make sure that reexec shim-v2 binary use the value if need
|
|
if err := shim.WriteAddress("address", address); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
f, err := socket.File()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
|
|
|
|
goruntime.LockOSThread()
|
|
if os.Getenv("SCHED_CORE") != "" {
|
|
if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
|
|
return "", fmt.Errorf("enable sched core support: %w", err)
|
|
}
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
f.Close()
|
|
return "", err
|
|
}
|
|
|
|
goruntime.UnlockOSThread()
|
|
|
|
defer func() {
|
|
if retErr != nil {
|
|
cmd.Process.Kill()
|
|
}
|
|
}()
|
|
// make sure to wait after start
|
|
go cmd.Wait()
|
|
|
|
if opts, err := shim.ReadRuntimeOptions[*options.Options](os.Stdin); err == nil {
|
|
if opts.ShimCgroup != "" {
|
|
if cgroups.Mode() == cgroups.Unified {
|
|
cg, err := cgroupsv2.Load(opts.ShimCgroup)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to load cgroup %s: %w", opts.ShimCgroup, err)
|
|
}
|
|
if err := cg.AddProc(uint64(cmd.Process.Pid)); err != nil {
|
|
return "", fmt.Errorf("failed to join cgroup %s: %w", opts.ShimCgroup, err)
|
|
}
|
|
} else {
|
|
cg, err := cgroup1.Load(cgroup1.StaticPath(opts.ShimCgroup))
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to load cgroup %s: %w", opts.ShimCgroup, err)
|
|
}
|
|
if err := cg.AddProc(uint64(cmd.Process.Pid)); err != nil {
|
|
return "", fmt.Errorf("failed to join cgroup %s: %w", opts.ShimCgroup, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if err := shim.AdjustOOMScore(cmd.Process.Pid); err != nil {
|
|
return "", fmt.Errorf("failed to adjust OOM score for shim: %w", err)
|
|
}
|
|
return address, nil
|
|
}
|
|
|
|
func (manager) Stop(ctx context.Context, id string) (shim.StopStatus, error) {
|
|
cwd, err := os.Getwd()
|
|
if err != nil {
|
|
return shim.StopStatus{}, err
|
|
}
|
|
|
|
path := filepath.Join(filepath.Dir(cwd), id)
|
|
ns, err := namespaces.NamespaceRequired(ctx)
|
|
if err != nil {
|
|
return shim.StopStatus{}, err
|
|
}
|
|
runtime, err := runc.ReadRuntime(path)
|
|
if err != nil {
|
|
return shim.StopStatus{}, err
|
|
}
|
|
opts, err := runc.ReadOptions(path)
|
|
if err != nil {
|
|
return shim.StopStatus{}, err
|
|
}
|
|
root := process.RuncRoot
|
|
if opts != nil && opts.Root != "" {
|
|
root = opts.Root
|
|
}
|
|
|
|
r := process.NewRunc(root, path, ns, runtime, false)
|
|
if err := r.Delete(ctx, id, &runcC.DeleteOpts{
|
|
Force: true,
|
|
}); err != nil {
|
|
log.G(ctx).WithError(err).Warn("failed to remove runc container")
|
|
}
|
|
if err := mount.UnmountRecursive(filepath.Join(path, "rootfs"), 0); err != nil {
|
|
log.G(ctx).WithError(err).Warn("failed to cleanup rootfs mount")
|
|
}
|
|
pid, err := runcC.ReadPidFile(filepath.Join(path, process.InitPidFile))
|
|
if err != nil {
|
|
log.G(ctx).WithError(err).Warn("failed to read init pid file")
|
|
}
|
|
return shim.StopStatus{
|
|
ExitedAt: time.Now(),
|
|
ExitStatus: 128 + int(unix.SIGKILL),
|
|
Pid: pid,
|
|
}, nil
|
|
}
|