 d2013d2c99
			
		
	
	d2013d2c99
	
	
	
		
			
			runc option --criu is now ignored (with a warning), and the option will be removed entirely in a future release. Users who need a non- standard criu binary should rely on the standard way of looking up binaries in $PATH. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
		
			
				
	
	
		
			283 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			283 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
|    Copyright The containerd Authors.
 | |
| 
 | |
|    Licensed under the Apache License, Version 2.0 (the "License");
 | |
|    you may not use this file except in compliance with the License.
 | |
|    You may obtain a copy of the License at
 | |
| 
 | |
|        http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|    Unless required by applicable law or agreed to in writing, software
 | |
|    distributed under the License is distributed on an "AS IS" BASIS,
 | |
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|    See the License for the specific language governing permissions and
 | |
|    limitations under the License.
 | |
| */
 | |
| 
 | |
| package manager
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"encoding/json"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	goruntime "runtime"
 | |
| 	"syscall"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/containerd/cgroups"
 | |
| 	cgroupsv2 "github.com/containerd/cgroups/v2"
 | |
| 	"github.com/containerd/containerd/log"
 | |
| 	"github.com/containerd/containerd/mount"
 | |
| 	"github.com/containerd/containerd/namespaces"
 | |
| 	"github.com/containerd/containerd/pkg/process"
 | |
| 	"github.com/containerd/containerd/pkg/schedcore"
 | |
| 	"github.com/containerd/containerd/runtime/v2/runc"
 | |
| 	"github.com/containerd/containerd/runtime/v2/runc/options"
 | |
| 	"github.com/containerd/containerd/runtime/v2/shim"
 | |
| 	runcC "github.com/containerd/go-runc"
 | |
| 	"github.com/containerd/typeurl"
 | |
| 	"github.com/gogo/protobuf/proto"
 | |
| 	ptypes "github.com/gogo/protobuf/types"
 | |
| 	exec "golang.org/x/sys/execabs"
 | |
| 	"golang.org/x/sys/unix"
 | |
| )
 | |
| 
 | |
| // NewShimManager returns an implementation of the shim manager
 | |
| // using runc
 | |
| func NewShimManager(name string) shim.Manager {
 | |
| 	return &manager{
 | |
| 		name: name,
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // group labels specifies how the shim groups services.
 | |
| // currently supports a runc.v2 specific .group label and the
 | |
| // standard k8s pod label.  Order matters in this list
 | |
| var groupLabels = []string{
 | |
| 	"io.containerd.runc.v2.group",
 | |
| 	"io.kubernetes.cri.sandbox-id",
 | |
| }
 | |
| 
 | |
| type spec struct {
 | |
| 	Annotations map[string]string `json:"annotations,omitempty"`
 | |
| }
 | |
| 
 | |
| type manager struct {
 | |
| 	name string
 | |
| }
 | |
| 
 | |
| func newCommand(ctx context.Context, id, containerdBinary, containerdAddress, containerdTTRPCAddress string) (*exec.Cmd, error) {
 | |
| 	ns, err := namespaces.NamespaceRequired(ctx)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	self, err := os.Executable()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	cwd, err := os.Getwd()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	args := []string{
 | |
| 		"-namespace", ns,
 | |
| 		"-id", id,
 | |
| 		"-address", containerdAddress,
 | |
| 	}
 | |
| 	cmd := exec.Command(self, args...)
 | |
| 	cmd.Dir = cwd
 | |
| 	cmd.Env = append(os.Environ(), "GOMAXPROCS=4")
 | |
| 	cmd.SysProcAttr = &syscall.SysProcAttr{
 | |
| 		Setpgid: true,
 | |
| 	}
 | |
| 	return cmd, nil
 | |
| }
 | |
| 
 | |
| func readSpec() (*spec, error) {
 | |
| 	f, err := os.Open("config.json")
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 	var s spec
 | |
| 	if err := json.NewDecoder(f).Decode(&s); err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return &s, nil
 | |
| }
 | |
| 
 | |
| func (m manager) Name() string {
 | |
| 	return m.name
 | |
| }
 | |
| 
 | |
| func (manager) Start(ctx context.Context, id string, opts shim.StartOpts) (_ string, retErr error) {
 | |
| 	cmd, err := newCommand(ctx, id, opts.ContainerdBinary, opts.Address, opts.TTRPCAddress)
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	grouping := id
 | |
| 	spec, err := readSpec()
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	for _, group := range groupLabels {
 | |
| 		if groupID, ok := spec.Annotations[group]; ok {
 | |
| 			grouping = groupID
 | |
| 			break
 | |
| 		}
 | |
| 	}
 | |
| 	address, err := shim.SocketAddress(ctx, opts.Address, grouping)
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	socket, err := shim.NewSocket(address)
 | |
| 	if err != nil {
 | |
| 		// the only time where this would happen is if there is a bug and the socket
 | |
| 		// was not cleaned up in the cleanup method of the shim or we are using the
 | |
| 		// grouping functionality where the new process should be run with the same
 | |
| 		// shim as an existing container
 | |
| 		if !shim.SocketEaddrinuse(err) {
 | |
| 			return "", fmt.Errorf("create new shim socket: %w", err)
 | |
| 		}
 | |
| 		if shim.CanConnect(address) {
 | |
| 			if err := shim.WriteAddress("address", address); err != nil {
 | |
| 				return "", fmt.Errorf("write existing socket for shim: %w", err)
 | |
| 			}
 | |
| 			return address, nil
 | |
| 		}
 | |
| 		if err := shim.RemoveSocket(address); err != nil {
 | |
| 			return "", fmt.Errorf("remove pre-existing socket: %w", err)
 | |
| 		}
 | |
| 		if socket, err = shim.NewSocket(address); err != nil {
 | |
| 			return "", fmt.Errorf("try create new shim socket 2x: %w", err)
 | |
| 		}
 | |
| 	}
 | |
| 	defer func() {
 | |
| 		if retErr != nil {
 | |
| 			socket.Close()
 | |
| 			_ = shim.RemoveSocket(address)
 | |
| 		}
 | |
| 	}()
 | |
| 
 | |
| 	// make sure that reexec shim-v2 binary use the value if need
 | |
| 	if err := shim.WriteAddress("address", address); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	f, err := socket.File()
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	cmd.ExtraFiles = append(cmd.ExtraFiles, f)
 | |
| 
 | |
| 	goruntime.LockOSThread()
 | |
| 	if os.Getenv("SCHED_CORE") != "" {
 | |
| 		if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
 | |
| 			return "", fmt.Errorf("enable sched core support: %w", err)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if err := cmd.Start(); err != nil {
 | |
| 		f.Close()
 | |
| 		return "", err
 | |
| 	}
 | |
| 
 | |
| 	goruntime.UnlockOSThread()
 | |
| 
 | |
| 	defer func() {
 | |
| 		if retErr != nil {
 | |
| 			cmd.Process.Kill()
 | |
| 		}
 | |
| 	}()
 | |
| 	// make sure to wait after start
 | |
| 	go cmd.Wait()
 | |
| 	if data, err := io.ReadAll(os.Stdin); err == nil {
 | |
| 		if len(data) > 0 {
 | |
| 			var any ptypes.Any
 | |
| 			if err := proto.Unmarshal(data, &any); err != nil {
 | |
| 				return "", err
 | |
| 			}
 | |
| 			v, err := typeurl.UnmarshalAny(&any)
 | |
| 			if err != nil {
 | |
| 				return "", err
 | |
| 			}
 | |
| 			if opts, ok := v.(*options.Options); ok {
 | |
| 				if opts.ShimCgroup != "" {
 | |
| 					if cgroups.Mode() == cgroups.Unified {
 | |
| 						cg, err := cgroupsv2.LoadManager("/sys/fs/cgroup", opts.ShimCgroup)
 | |
| 						if err != nil {
 | |
| 							return "", fmt.Errorf("failed to load cgroup %s: %w", opts.ShimCgroup, err)
 | |
| 						}
 | |
| 						if err := cg.AddProc(uint64(cmd.Process.Pid)); err != nil {
 | |
| 							return "", fmt.Errorf("failed to join cgroup %s: %w", opts.ShimCgroup, err)
 | |
| 						}
 | |
| 					} else {
 | |
| 						cg, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(opts.ShimCgroup))
 | |
| 						if err != nil {
 | |
| 							return "", fmt.Errorf("failed to load cgroup %s: %w", opts.ShimCgroup, err)
 | |
| 						}
 | |
| 						if err := cg.Add(cgroups.Process{
 | |
| 							Pid: cmd.Process.Pid,
 | |
| 						}); err != nil {
 | |
| 							return "", fmt.Errorf("failed to join cgroup %s: %w", opts.ShimCgroup, err)
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	if err := shim.AdjustOOMScore(cmd.Process.Pid); err != nil {
 | |
| 		return "", fmt.Errorf("failed to adjust OOM score for shim: %w", err)
 | |
| 	}
 | |
| 	return address, nil
 | |
| }
 | |
| 
 | |
| func (manager) Stop(ctx context.Context, id string) (shim.StopStatus, error) {
 | |
| 	cwd, err := os.Getwd()
 | |
| 	if err != nil {
 | |
| 		return shim.StopStatus{}, err
 | |
| 	}
 | |
| 
 | |
| 	path := filepath.Join(filepath.Dir(cwd), id)
 | |
| 	ns, err := namespaces.NamespaceRequired(ctx)
 | |
| 	if err != nil {
 | |
| 		return shim.StopStatus{}, err
 | |
| 	}
 | |
| 	runtime, err := runc.ReadRuntime(path)
 | |
| 	if err != nil {
 | |
| 		return shim.StopStatus{}, err
 | |
| 	}
 | |
| 	opts, err := runc.ReadOptions(path)
 | |
| 	if err != nil {
 | |
| 		return shim.StopStatus{}, err
 | |
| 	}
 | |
| 	root := process.RuncRoot
 | |
| 	if opts != nil && opts.Root != "" {
 | |
| 		root = opts.Root
 | |
| 	}
 | |
| 
 | |
| 	r := process.NewRunc(root, path, ns, runtime, false)
 | |
| 	if err := r.Delete(ctx, id, &runcC.DeleteOpts{
 | |
| 		Force: true,
 | |
| 	}); err != nil {
 | |
| 		log.G(ctx).WithError(err).Warn("failed to remove runc container")
 | |
| 	}
 | |
| 	if err := mount.UnmountAll(filepath.Join(path, "rootfs"), 0); err != nil {
 | |
| 		log.G(ctx).WithError(err).Warn("failed to cleanup rootfs mount")
 | |
| 	}
 | |
| 	pid, err := runcC.ReadPidFile(filepath.Join(path, process.InitPidFile))
 | |
| 	if err != nil {
 | |
| 		log.G(ctx).WithError(err).Warn("failed to read init pid file")
 | |
| 	}
 | |
| 	return shim.StopStatus{
 | |
| 		ExitedAt:   time.Now(),
 | |
| 		ExitStatus: 128 + int(unix.SIGKILL),
 | |
| 		Pid:        pid,
 | |
| 	}, nil
 | |
| }
 |