387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			387 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
   Copyright The containerd Authors.
 | 
						|
 | 
						|
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
   you may not use this file except in compliance with the License.
 | 
						|
   You may obtain a copy of the License at
 | 
						|
 | 
						|
       http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
   Unless required by applicable law or agreed to in writing, software
 | 
						|
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
   See the License for the specific language governing permissions and
 | 
						|
   limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package manager
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"context"
 | 
						|
	"encoding/json"
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"net"
 | 
						|
	"os"
 | 
						|
	"os/exec"
 | 
						|
	"path/filepath"
 | 
						|
	goruntime "runtime"
 | 
						|
	"syscall"
 | 
						|
	"time"
 | 
						|
 | 
						|
	"github.com/containerd/cgroups/v3"
 | 
						|
	"github.com/containerd/cgroups/v3/cgroup1"
 | 
						|
	cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2"
 | 
						|
	"github.com/containerd/containerd/api/types"
 | 
						|
	"github.com/containerd/containerd/api/types/runc/options"
 | 
						|
	"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/process"
 | 
						|
	"github.com/containerd/containerd/v2/cmd/containerd-shim-runc-v2/runc"
 | 
						|
	"github.com/containerd/containerd/v2/core/mount"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/namespaces"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/oci"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/protobuf"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/schedcore"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/shim"
 | 
						|
	"github.com/containerd/containerd/v2/version"
 | 
						|
	"github.com/containerd/errdefs"
 | 
						|
	runcC "github.com/containerd/go-runc"
 | 
						|
	"github.com/containerd/log"
 | 
						|
	"github.com/opencontainers/runtime-spec/specs-go/features"
 | 
						|
	"golang.org/x/sys/unix"
 | 
						|
)
 | 
						|
 | 
						|
// NewShimManager returns an implementation of the shim manager
 | 
						|
// using runc
 | 
						|
func NewShimManager(name string) shim.Manager {
 | 
						|
	return &manager{
 | 
						|
		name: name,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// group labels specifies how the shim groups services.
 | 
						|
// currently supports a runc.v2 specific .group label and the
 | 
						|
// standard k8s pod label.  Order matters in this list
 | 
						|
var groupLabels = []string{
 | 
						|
	"io.containerd.runc.v2.group",
 | 
						|
	"io.kubernetes.cri.sandbox-id",
 | 
						|
}
 | 
						|
 | 
						|
// spec is a shallow version of [oci.Spec] containing only the
 | 
						|
// fields we need for the hook. We use a shallow struct to reduce
 | 
						|
// the overhead of unmarshaling.
 | 
						|
type spec struct {
 | 
						|
	// Annotations contains arbitrary metadata for the container.
 | 
						|
	Annotations map[string]string `json:"annotations,omitempty"`
 | 
						|
}
 | 
						|
 | 
						|
type manager struct {
 | 
						|
	name string
 | 
						|
}
 | 
						|
 | 
						|
func newCommand(ctx context.Context, id, containerdAddress, containerdTTRPCAddress string, debug bool) (*exec.Cmd, error) {
 | 
						|
	ns, err := namespaces.NamespaceRequired(ctx)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	self, err := os.Executable()
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	cwd, err := os.Getwd()
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	args := []string{
 | 
						|
		"-namespace", ns,
 | 
						|
		"-id", id,
 | 
						|
		"-address", containerdAddress,
 | 
						|
	}
 | 
						|
	if debug {
 | 
						|
		args = append(args, "-debug")
 | 
						|
	}
 | 
						|
	cmd := exec.Command(self, args...)
 | 
						|
	cmd.Dir = cwd
 | 
						|
	cmd.Env = append(os.Environ(), "GOMAXPROCS=4")
 | 
						|
	cmd.SysProcAttr = &syscall.SysProcAttr{
 | 
						|
		Setpgid: true,
 | 
						|
	}
 | 
						|
	return cmd, nil
 | 
						|
}
 | 
						|
 | 
						|
func readSpec() (*spec, error) {
 | 
						|
	f, err := os.Open(oci.ConfigFilename)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	defer f.Close()
 | 
						|
	var s spec
 | 
						|
	if err := json.NewDecoder(f).Decode(&s); err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	return &s, nil
 | 
						|
}
 | 
						|
 | 
						|
func (m manager) Name() string {
 | 
						|
	return m.name
 | 
						|
}
 | 
						|
 | 
						|
type shimSocket struct {
 | 
						|
	addr string
 | 
						|
	s    *net.UnixListener
 | 
						|
	f    *os.File
 | 
						|
}
 | 
						|
 | 
						|
func (s *shimSocket) Close() {
 | 
						|
	if s.s != nil {
 | 
						|
		s.s.Close()
 | 
						|
	}
 | 
						|
	if s.f != nil {
 | 
						|
		s.f.Close()
 | 
						|
	}
 | 
						|
	_ = shim.RemoveSocket(s.addr)
 | 
						|
}
 | 
						|
 | 
						|
func newShimSocket(ctx context.Context, path, id string, debug bool) (*shimSocket, error) {
 | 
						|
	address, err := shim.SocketAddress(ctx, path, id, debug)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	socket, err := shim.NewSocket(address)
 | 
						|
	if err != nil {
 | 
						|
		// the only time where this would happen is if there is a bug and the socket
 | 
						|
		// was not cleaned up in the cleanup method of the shim or we are using the
 | 
						|
		// grouping functionality where the new process should be run with the same
 | 
						|
		// shim as an existing container
 | 
						|
		if !shim.SocketEaddrinuse(err) {
 | 
						|
			return nil, fmt.Errorf("create new shim socket: %w", err)
 | 
						|
		}
 | 
						|
		if !debug && shim.CanConnect(address) {
 | 
						|
			return &shimSocket{addr: address}, errdefs.ErrAlreadyExists
 | 
						|
		}
 | 
						|
		if err := shim.RemoveSocket(address); err != nil {
 | 
						|
			return nil, fmt.Errorf("remove pre-existing socket: %w", err)
 | 
						|
		}
 | 
						|
		if socket, err = shim.NewSocket(address); err != nil {
 | 
						|
			return nil, fmt.Errorf("try create new shim socket 2x: %w", err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	s := &shimSocket{
 | 
						|
		addr: address,
 | 
						|
		s:    socket,
 | 
						|
	}
 | 
						|
	f, err := socket.File()
 | 
						|
	if err != nil {
 | 
						|
		s.Close()
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	s.f = f
 | 
						|
	return s, nil
 | 
						|
}
 | 
						|
 | 
						|
func (manager) Start(ctx context.Context, id string, opts shim.StartOpts) (_ shim.BootstrapParams, retErr error) {
 | 
						|
	var params shim.BootstrapParams
 | 
						|
	params.Version = 3
 | 
						|
	params.Protocol = "ttrpc"
 | 
						|
 | 
						|
	cmd, err := newCommand(ctx, id, opts.Address, opts.TTRPCAddress, opts.Debug)
 | 
						|
	if err != nil {
 | 
						|
		return params, err
 | 
						|
	}
 | 
						|
	grouping := id
 | 
						|
	spec, err := readSpec()
 | 
						|
	if err != nil {
 | 
						|
		return params, err
 | 
						|
	}
 | 
						|
	for _, group := range groupLabels {
 | 
						|
		if groupID, ok := spec.Annotations[group]; ok {
 | 
						|
			grouping = groupID
 | 
						|
			break
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	var sockets []*shimSocket
 | 
						|
	defer func() {
 | 
						|
		if retErr != nil {
 | 
						|
			for _, s := range sockets {
 | 
						|
				s.Close()
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}()
 | 
						|
 | 
						|
	s, err := newShimSocket(ctx, opts.Address, grouping, false)
 | 
						|
	if err != nil {
 | 
						|
		if errdefs.IsAlreadyExists(err) {
 | 
						|
			params.Address = s.addr
 | 
						|
			return params, nil
 | 
						|
		}
 | 
						|
		return params, err
 | 
						|
	}
 | 
						|
	sockets = append(sockets, s)
 | 
						|
	cmd.ExtraFiles = append(cmd.ExtraFiles, s.f)
 | 
						|
 | 
						|
	if opts.Debug {
 | 
						|
		s, err = newShimSocket(ctx, opts.Address, grouping, true)
 | 
						|
		if err != nil {
 | 
						|
			return params, err
 | 
						|
		}
 | 
						|
		sockets = append(sockets, s)
 | 
						|
		cmd.ExtraFiles = append(cmd.ExtraFiles, s.f)
 | 
						|
	}
 | 
						|
 | 
						|
	goruntime.LockOSThread()
 | 
						|
	if os.Getenv("SCHED_CORE") != "" {
 | 
						|
		if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
 | 
						|
			return params, fmt.Errorf("enable sched core support: %w", err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if err := cmd.Start(); err != nil {
 | 
						|
		return params, err
 | 
						|
	}
 | 
						|
 | 
						|
	goruntime.UnlockOSThread()
 | 
						|
 | 
						|
	defer func() {
 | 
						|
		if retErr != nil {
 | 
						|
			cmd.Process.Kill()
 | 
						|
		}
 | 
						|
	}()
 | 
						|
	// make sure to wait after start
 | 
						|
	go cmd.Wait()
 | 
						|
 | 
						|
	if opts, err := shim.ReadRuntimeOptions[*options.Options](os.Stdin); err == nil {
 | 
						|
		if opts.ShimCgroup != "" {
 | 
						|
			if cgroups.Mode() == cgroups.Unified {
 | 
						|
				cg, err := cgroupsv2.Load(opts.ShimCgroup)
 | 
						|
				if err != nil {
 | 
						|
					return params, fmt.Errorf("failed to load cgroup %s: %w", opts.ShimCgroup, err)
 | 
						|
				}
 | 
						|
				if err := cg.AddProc(uint64(cmd.Process.Pid)); err != nil {
 | 
						|
					return params, fmt.Errorf("failed to join cgroup %s: %w", opts.ShimCgroup, err)
 | 
						|
				}
 | 
						|
			} else {
 | 
						|
				cg, err := cgroup1.Load(cgroup1.StaticPath(opts.ShimCgroup))
 | 
						|
				if err != nil {
 | 
						|
					return params, fmt.Errorf("failed to load cgroup %s: %w", opts.ShimCgroup, err)
 | 
						|
				}
 | 
						|
				if err := cg.AddProc(uint64(cmd.Process.Pid)); err != nil {
 | 
						|
					return params, fmt.Errorf("failed to join cgroup %s: %w", opts.ShimCgroup, err)
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if err := shim.AdjustOOMScore(cmd.Process.Pid); err != nil {
 | 
						|
		return params, fmt.Errorf("failed to adjust OOM score for shim: %w", err)
 | 
						|
	}
 | 
						|
 | 
						|
	params.Address = sockets[0].addr
 | 
						|
	return params, nil
 | 
						|
}
 | 
						|
 | 
						|
func (manager) Stop(ctx context.Context, id string) (shim.StopStatus, error) {
 | 
						|
	cwd, err := os.Getwd()
 | 
						|
	if err != nil {
 | 
						|
		return shim.StopStatus{}, err
 | 
						|
	}
 | 
						|
 | 
						|
	path := filepath.Join(filepath.Dir(cwd), id)
 | 
						|
	ns, err := namespaces.NamespaceRequired(ctx)
 | 
						|
	if err != nil {
 | 
						|
		return shim.StopStatus{}, err
 | 
						|
	}
 | 
						|
	runtime, err := runc.ReadRuntime(path)
 | 
						|
	if err != nil {
 | 
						|
		return shim.StopStatus{}, err
 | 
						|
	}
 | 
						|
	opts, err := runc.ReadOptions(path)
 | 
						|
	if err != nil {
 | 
						|
		return shim.StopStatus{}, err
 | 
						|
	}
 | 
						|
	root := process.RuncRoot
 | 
						|
	if opts != nil && opts.Root != "" {
 | 
						|
		root = opts.Root
 | 
						|
	}
 | 
						|
 | 
						|
	r := process.NewRunc(root, path, ns, runtime, false)
 | 
						|
	if err := r.Delete(ctx, id, &runcC.DeleteOpts{
 | 
						|
		Force: true,
 | 
						|
	}); err != nil {
 | 
						|
		log.G(ctx).WithError(err).Warn("failed to remove runc container")
 | 
						|
	}
 | 
						|
	if err := mount.UnmountRecursive(filepath.Join(path, "rootfs"), 0); err != nil {
 | 
						|
		log.G(ctx).WithError(err).Warn("failed to cleanup rootfs mount")
 | 
						|
	}
 | 
						|
	pid, err := runcC.ReadPidFile(filepath.Join(path, process.InitPidFile))
 | 
						|
	if err != nil {
 | 
						|
		log.G(ctx).WithError(err).Warn("failed to read init pid file")
 | 
						|
	}
 | 
						|
	return shim.StopStatus{
 | 
						|
		ExitedAt:   time.Now(),
 | 
						|
		ExitStatus: 128 + int(unix.SIGKILL),
 | 
						|
		Pid:        pid,
 | 
						|
	}, nil
 | 
						|
}
 | 
						|
 | 
						|
func (m manager) Info(ctx context.Context, optionsR io.Reader) (*types.RuntimeInfo, error) {
 | 
						|
	info := &types.RuntimeInfo{
 | 
						|
		Name: m.name,
 | 
						|
		Version: &types.RuntimeVersion{
 | 
						|
			Version:  version.Version,
 | 
						|
			Revision: version.Revision,
 | 
						|
		},
 | 
						|
		Annotations: nil,
 | 
						|
	}
 | 
						|
	binaryName := runcC.DefaultCommand
 | 
						|
	opts, err := shim.ReadRuntimeOptions[*options.Options](optionsR)
 | 
						|
	if err != nil {
 | 
						|
		if !errors.Is(err, errdefs.ErrNotFound) {
 | 
						|
			return nil, fmt.Errorf("failed to read runtime options (*options.Options): %w", err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if opts != nil {
 | 
						|
		info.Options, err = protobuf.MarshalAnyToProto(opts)
 | 
						|
		if err != nil {
 | 
						|
			return nil, fmt.Errorf("failed to marshal %T: %w", opts, err)
 | 
						|
		}
 | 
						|
		if opts.BinaryName != "" {
 | 
						|
			binaryName = opts.BinaryName
 | 
						|
		}
 | 
						|
 | 
						|
	}
 | 
						|
	absBinary, err := exec.LookPath(binaryName)
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("failed to look up the path of %q: %w", binaryName, err)
 | 
						|
	}
 | 
						|
	features, err := m.features(ctx, absBinary, opts)
 | 
						|
	if err != nil {
 | 
						|
		// youki does not implement `runc features` yet, at the time of writing this (Sep 2023)
 | 
						|
		// https://github.com/containers/youki/issues/815
 | 
						|
		log.G(ctx).WithError(err).Debug("Failed to get the runtime features. The runc binary does not implement `runc features` command?")
 | 
						|
	}
 | 
						|
	if features != nil {
 | 
						|
		info.Features, err = protobuf.MarshalAnyToProto(features)
 | 
						|
		if err != nil {
 | 
						|
			return nil, fmt.Errorf("failed to marshal %T: %w", features, err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return info, nil
 | 
						|
}
 | 
						|
 | 
						|
func (m manager) features(ctx context.Context, absBinary string, opts *options.Options) (*features.Features, error) {
 | 
						|
	var stderr bytes.Buffer
 | 
						|
	cmd := exec.CommandContext(ctx, absBinary, "features")
 | 
						|
	cmd.Stderr = &stderr
 | 
						|
	stdout, err := cmd.Output()
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("failed to execute %v: %w (stderr: %q)", cmd.Args, err, stderr.String())
 | 
						|
	}
 | 
						|
	var feat features.Features
 | 
						|
	if err := json.Unmarshal(stdout, &feat); err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	return &feat, nil
 | 
						|
}
 |