302 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			302 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
//go:build !windows
 | 
						|
 | 
						|
/*
 | 
						|
   Copyright The containerd Authors.
 | 
						|
 | 
						|
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
   you may not use this file except in compliance with the License.
 | 
						|
   You may obtain a copy of the License at
 | 
						|
 | 
						|
       http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
   Unless required by applicable law or agreed to in writing, software
 | 
						|
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
   See the License for the specific language governing permissions and
 | 
						|
   limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package shim
 | 
						|
 | 
						|
import (
 | 
						|
	"bufio"
 | 
						|
	"context"
 | 
						|
	"crypto/sha256"
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"math"
 | 
						|
	"net"
 | 
						|
	"os"
 | 
						|
	"path/filepath"
 | 
						|
	"runtime"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
	"syscall"
 | 
						|
	"time"
 | 
						|
 | 
						|
	"github.com/containerd/log"
 | 
						|
	"github.com/mdlayher/vsock"
 | 
						|
 | 
						|
	"github.com/containerd/containerd/v2/defaults"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/namespaces"
 | 
						|
	"github.com/containerd/containerd/v2/pkg/sys"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	shimBinaryFormat = "containerd-shim-%s-%s"
 | 
						|
	socketPathLimit  = 106
 | 
						|
	protoVsock       = "vsock"
 | 
						|
	protoHybridVsock = "hvsock"
 | 
						|
	protoUnix        = "unix"
 | 
						|
)
 | 
						|
 | 
						|
func getSysProcAttr() *syscall.SysProcAttr {
 | 
						|
	return &syscall.SysProcAttr{
 | 
						|
		Setpgid: true,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// AdjustOOMScore sets the OOM score for the process to the parents OOM score +1
 | 
						|
// to ensure that they parent has a lower* score than the shim
 | 
						|
// if not already at the maximum OOM Score
 | 
						|
func AdjustOOMScore(pid int) error {
 | 
						|
	parent := os.Getppid()
 | 
						|
	score, err := sys.GetOOMScoreAdj(parent)
 | 
						|
	if err != nil {
 | 
						|
		return fmt.Errorf("get parent OOM score: %w", err)
 | 
						|
	}
 | 
						|
	shimScore := score + 1
 | 
						|
	if err := sys.AdjustOOMScore(pid, shimScore); err != nil {
 | 
						|
		return fmt.Errorf("set shim OOM score: %w", err)
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
const socketRoot = defaults.DefaultStateDir
 | 
						|
 | 
						|
// SocketAddress returns a socket address
 | 
						|
func SocketAddress(ctx context.Context, socketPath, id string, debug bool) (string, error) {
 | 
						|
	ns, err := namespaces.NamespaceRequired(ctx)
 | 
						|
	if err != nil {
 | 
						|
		return "", err
 | 
						|
	}
 | 
						|
	path := filepath.Join(socketPath, ns, id)
 | 
						|
	if debug {
 | 
						|
		path = filepath.Join(path, "debug")
 | 
						|
	}
 | 
						|
	d := sha256.Sum256([]byte(path))
 | 
						|
	return fmt.Sprintf("unix://%s/%x", filepath.Join(socketRoot, "s"), d), nil
 | 
						|
}
 | 
						|
 | 
						|
// AnonDialer returns a dialer for a socket
 | 
						|
func AnonDialer(address string, timeout time.Duration) (net.Conn, error) {
 | 
						|
	proto, addr, ok := strings.Cut(address, "://")
 | 
						|
	if !ok {
 | 
						|
		return net.DialTimeout("unix", socket(address).path(), timeout)
 | 
						|
	}
 | 
						|
	switch proto {
 | 
						|
	case protoVsock:
 | 
						|
		// vsock dialer can not set timeout
 | 
						|
		return dialVsock(addr)
 | 
						|
	case protoHybridVsock:
 | 
						|
		return dialHybridVsock(addr, timeout)
 | 
						|
	case protoUnix:
 | 
						|
		return net.DialTimeout("unix", socket(address).path(), timeout)
 | 
						|
	default:
 | 
						|
		return nil, fmt.Errorf("unsupported protocol: %s", proto)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// AnonReconnectDialer returns a dialer for an existing socket on reconnection
 | 
						|
func AnonReconnectDialer(address string, timeout time.Duration) (net.Conn, error) {
 | 
						|
	return AnonDialer(address, timeout)
 | 
						|
}
 | 
						|
 | 
						|
// NewSocket returns a new socket
 | 
						|
func NewSocket(address string) (*net.UnixListener, error) {
 | 
						|
	var (
 | 
						|
		sock       = socket(address)
 | 
						|
		path       = sock.path()
 | 
						|
		isAbstract = sock.isAbstract()
 | 
						|
		perm       = os.FileMode(0600)
 | 
						|
	)
 | 
						|
 | 
						|
	// Darwin needs +x to access socket, otherwise it'll fail with "bind: permission denied" when running as non-root.
 | 
						|
	if runtime.GOOS == "darwin" {
 | 
						|
		perm = 0700
 | 
						|
	}
 | 
						|
 | 
						|
	if !isAbstract {
 | 
						|
		if err := os.MkdirAll(filepath.Dir(path), perm); err != nil {
 | 
						|
			return nil, fmt.Errorf("mkdir failed for %s: %w", path, err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	l, err := net.Listen("unix", path)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	if !isAbstract {
 | 
						|
		if err := os.Chmod(path, perm); err != nil {
 | 
						|
			os.Remove(sock.path())
 | 
						|
			l.Close()
 | 
						|
			return nil, fmt.Errorf("chmod failed for %s: %w", path, err)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return l.(*net.UnixListener), nil
 | 
						|
}
 | 
						|
 | 
						|
const abstractSocketPrefix = "\x00"
 | 
						|
 | 
						|
type socket string
 | 
						|
 | 
						|
func (s socket) isAbstract() bool {
 | 
						|
	return !strings.HasPrefix(string(s), "unix://")
 | 
						|
}
 | 
						|
 | 
						|
func (s socket) path() string {
 | 
						|
	path := strings.TrimPrefix(string(s), "unix://")
 | 
						|
	// if there was no trim performed, we assume an abstract socket
 | 
						|
	if len(path) == len(s) {
 | 
						|
		path = abstractSocketPrefix + path
 | 
						|
	}
 | 
						|
	return path
 | 
						|
}
 | 
						|
 | 
						|
// RemoveSocket removes the socket at the specified address if
 | 
						|
// it exists on the filesystem
 | 
						|
func RemoveSocket(address string) error {
 | 
						|
	sock := socket(address)
 | 
						|
	if !sock.isAbstract() {
 | 
						|
		return os.Remove(sock.path())
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// SocketEaddrinuse returns true if the provided error is caused by the
 | 
						|
// EADDRINUSE error number
 | 
						|
func SocketEaddrinuse(err error) bool {
 | 
						|
	var netErr *net.OpError
 | 
						|
	if errors.As(err, &netErr) {
 | 
						|
		if netErr.Op != "listen" {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
		return errors.Is(err, syscall.EADDRINUSE)
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// CanConnect returns true if the socket provided at the address
 | 
						|
// is accepting new connections
 | 
						|
func CanConnect(address string) bool {
 | 
						|
	conn, err := AnonDialer(address, 100*time.Millisecond)
 | 
						|
	if err != nil {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	conn.Close()
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
func hybridVsockDialer(addr string, port uint64, timeout time.Duration) (net.Conn, error) {
 | 
						|
	timeoutCh := time.After(timeout)
 | 
						|
	// Do 10 retries before timeout
 | 
						|
	retryInterval := timeout / 10
 | 
						|
	for {
 | 
						|
		conn, err := net.DialTimeout("unix", addr, timeout)
 | 
						|
		if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
		if _, err = conn.Write([]byte(fmt.Sprintf("CONNECT %d\n", port))); err != nil {
 | 
						|
			conn.Close()
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
		errChan := make(chan error, 1)
 | 
						|
		go func() {
 | 
						|
			reader := bufio.NewReader(conn)
 | 
						|
			response, err := reader.ReadString('\n')
 | 
						|
			if err != nil {
 | 
						|
				errChan <- err
 | 
						|
				return
 | 
						|
			}
 | 
						|
			if strings.Contains(response, "OK") {
 | 
						|
				errChan <- nil
 | 
						|
			} else {
 | 
						|
				errChan <- fmt.Errorf("hybrid vsock handshake response error: %s", response)
 | 
						|
			}
 | 
						|
		}()
 | 
						|
		select {
 | 
						|
		case err = <-errChan:
 | 
						|
			if err != nil {
 | 
						|
				conn.Close()
 | 
						|
				// When it is EOF, maybe the server side is not ready.
 | 
						|
				if err == io.EOF {
 | 
						|
					log.G(context.Background()).Warnf("Read hybrid vsock got EOF, server may not ready")
 | 
						|
					time.Sleep(retryInterval)
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				return nil, err
 | 
						|
			}
 | 
						|
			return conn, nil
 | 
						|
		case <-timeoutCh:
 | 
						|
			conn.Close()
 | 
						|
			return nil, fmt.Errorf("timeout waiting for hybrid vsocket handshake of %s:%d", addr, port)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
func dialVsock(address string) (net.Conn, error) {
 | 
						|
	contextIDString, portString, ok := strings.Cut(address, ":")
 | 
						|
	if !ok {
 | 
						|
		return nil, fmt.Errorf("invalid vsock address %s", address)
 | 
						|
	}
 | 
						|
	contextID, err := strconv.ParseUint(contextIDString, 10, 0)
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("failed to parse vsock context id %s, %v", contextIDString, err)
 | 
						|
	}
 | 
						|
	if contextID > math.MaxUint32 {
 | 
						|
		return nil, fmt.Errorf("vsock context id %d is invalid", contextID)
 | 
						|
	}
 | 
						|
	port, err := strconv.ParseUint(portString, 10, 0)
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("failed to parse vsock port %s, %v", portString, err)
 | 
						|
	}
 | 
						|
	if port > math.MaxUint32 {
 | 
						|
		return nil, fmt.Errorf("vsock port %d is invalid", port)
 | 
						|
	}
 | 
						|
	return vsock.Dial(uint32(contextID), uint32(port), &vsock.Config{})
 | 
						|
}
 | 
						|
 | 
						|
func dialHybridVsock(address string, timeout time.Duration) (net.Conn, error) {
 | 
						|
	addr, portString, ok := strings.Cut(address, ":")
 | 
						|
	if !ok {
 | 
						|
		return nil, fmt.Errorf("invalid hybrid vsock address %s", address)
 | 
						|
	}
 | 
						|
	port, err := strconv.ParseUint(portString, 10, 0)
 | 
						|
	if err != nil {
 | 
						|
		return nil, fmt.Errorf("failed to parse hybrid vsock port %s, %v", portString, err)
 | 
						|
	}
 | 
						|
	if port > math.MaxUint32 {
 | 
						|
		return nil, fmt.Errorf("hybrid vsock port %d is invalid", port)
 | 
						|
	}
 | 
						|
	return hybridVsockDialer(addr, port, timeout)
 | 
						|
}
 | 
						|
 | 
						|
func cleanupSockets(ctx context.Context) {
 | 
						|
	if address, err := ReadAddress("address"); err == nil {
 | 
						|
		_ = RemoveSocket(address)
 | 
						|
	}
 | 
						|
	if len(socketFlag) > 0 {
 | 
						|
		_ = RemoveSocket("unix://" + socketFlag)
 | 
						|
	} else if address, err := SocketAddress(ctx, addressFlag, id, false); err == nil {
 | 
						|
		_ = RemoveSocket(address)
 | 
						|
	}
 | 
						|
	if len(debugSocketFlag) > 0 {
 | 
						|
		_ = RemoveSocket("unix://" + debugSocketFlag)
 | 
						|
	} else if address, err := SocketAddress(ctx, addressFlag, id, true); err == nil {
 | 
						|
		_ = RemoveSocket(address)
 | 
						|
	}
 | 
						|
}
 |