Move shim package to pkg
Signed-off-by: Maksym Pavlenko <pavlenko.maksym@gmail.com>
This commit is contained in:
		
							
								
								
									
										288
									
								
								pkg/shim/util_unix.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										288
									
								
								pkg/shim/util_unix.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,288 @@
 | 
			
		||||
//go:build !windows
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
   Copyright The containerd Authors.
 | 
			
		||||
 | 
			
		||||
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
   you may not use this file except in compliance with the License.
 | 
			
		||||
   You may obtain a copy of the License at
 | 
			
		||||
 | 
			
		||||
       http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
 | 
			
		||||
   Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
   See the License for the specific language governing permissions and
 | 
			
		||||
   limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
package shim
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bufio"
 | 
			
		||||
	"context"
 | 
			
		||||
	"crypto/sha256"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io"
 | 
			
		||||
	"math"
 | 
			
		||||
	"net"
 | 
			
		||||
	"os"
 | 
			
		||||
	"path/filepath"
 | 
			
		||||
	"runtime"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"syscall"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"github.com/containerd/log"
 | 
			
		||||
	"github.com/mdlayher/vsock"
 | 
			
		||||
 | 
			
		||||
	"github.com/containerd/containerd/v2/defaults"
 | 
			
		||||
	"github.com/containerd/containerd/v2/pkg/namespaces"
 | 
			
		||||
	"github.com/containerd/containerd/v2/pkg/sys"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	shimBinaryFormat = "containerd-shim-%s-%s"
 | 
			
		||||
	socketPathLimit  = 106
 | 
			
		||||
	protoVsock       = "vsock"
 | 
			
		||||
	protoHybridVsock = "hvsock"
 | 
			
		||||
	protoUnix        = "unix"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func getSysProcAttr() *syscall.SysProcAttr {
 | 
			
		||||
	return &syscall.SysProcAttr{
 | 
			
		||||
		Setpgid: true,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// AdjustOOMScore sets the OOM score for the process to the parents OOM score +1
 | 
			
		||||
// to ensure that they parent has a lower* score than the shim
 | 
			
		||||
// if not already at the maximum OOM Score
 | 
			
		||||
func AdjustOOMScore(pid int) error {
 | 
			
		||||
	parent := os.Getppid()
 | 
			
		||||
	score, err := sys.GetOOMScoreAdj(parent)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return fmt.Errorf("get parent OOM score: %w", err)
 | 
			
		||||
	}
 | 
			
		||||
	shimScore := score + 1
 | 
			
		||||
	if err := sys.AdjustOOMScore(pid, shimScore); err != nil {
 | 
			
		||||
		return fmt.Errorf("set shim OOM score: %w", err)
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const socketRoot = defaults.DefaultStateDir
 | 
			
		||||
 | 
			
		||||
// SocketAddress returns a socket address
 | 
			
		||||
func SocketAddress(ctx context.Context, socketPath, id string) (string, error) {
 | 
			
		||||
	ns, err := namespaces.NamespaceRequired(ctx)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", err
 | 
			
		||||
	}
 | 
			
		||||
	d := sha256.Sum256([]byte(filepath.Join(socketPath, ns, id)))
 | 
			
		||||
	return fmt.Sprintf("unix://%s/%x", filepath.Join(socketRoot, "s"), d), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// AnonDialer returns a dialer for a socket
 | 
			
		||||
func AnonDialer(address string, timeout time.Duration) (net.Conn, error) {
 | 
			
		||||
	proto, addr, ok := strings.Cut(address, "://")
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return net.DialTimeout("unix", socket(address).path(), timeout)
 | 
			
		||||
	}
 | 
			
		||||
	switch proto {
 | 
			
		||||
	case protoVsock:
 | 
			
		||||
		// vsock dialer can not set timeout
 | 
			
		||||
		return dialVsock(addr)
 | 
			
		||||
	case protoHybridVsock:
 | 
			
		||||
		return dialHybridVsock(addr, timeout)
 | 
			
		||||
	case protoUnix:
 | 
			
		||||
		return net.DialTimeout("unix", socket(address).path(), timeout)
 | 
			
		||||
	default:
 | 
			
		||||
		return nil, fmt.Errorf("unsupported protocol: %s", proto)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// AnonReconnectDialer returns a dialer for an existing socket on reconnection
 | 
			
		||||
func AnonReconnectDialer(address string, timeout time.Duration) (net.Conn, error) {
 | 
			
		||||
	return AnonDialer(address, timeout)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewSocket returns a new socket
 | 
			
		||||
func NewSocket(address string) (*net.UnixListener, error) {
 | 
			
		||||
	var (
 | 
			
		||||
		sock       = socket(address)
 | 
			
		||||
		path       = sock.path()
 | 
			
		||||
		isAbstract = sock.isAbstract()
 | 
			
		||||
		perm       = os.FileMode(0600)
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	// Darwin needs +x to access socket, otherwise it'll fail with "bind: permission denied" when running as non-root.
 | 
			
		||||
	if runtime.GOOS == "darwin" {
 | 
			
		||||
		perm = 0700
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !isAbstract {
 | 
			
		||||
		if err := os.MkdirAll(filepath.Dir(path), perm); err != nil {
 | 
			
		||||
			return nil, fmt.Errorf("mkdir failed for %s: %w", path, err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	l, err := net.Listen("unix", path)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !isAbstract {
 | 
			
		||||
		if err := os.Chmod(path, perm); err != nil {
 | 
			
		||||
			os.Remove(sock.path())
 | 
			
		||||
			l.Close()
 | 
			
		||||
			return nil, fmt.Errorf("chmod failed for %s: %w", path, err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return l.(*net.UnixListener), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const abstractSocketPrefix = "\x00"
 | 
			
		||||
 | 
			
		||||
type socket string
 | 
			
		||||
 | 
			
		||||
func (s socket) isAbstract() bool {
 | 
			
		||||
	return !strings.HasPrefix(string(s), "unix://")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s socket) path() string {
 | 
			
		||||
	path := strings.TrimPrefix(string(s), "unix://")
 | 
			
		||||
	// if there was no trim performed, we assume an abstract socket
 | 
			
		||||
	if len(path) == len(s) {
 | 
			
		||||
		path = abstractSocketPrefix + path
 | 
			
		||||
	}
 | 
			
		||||
	return path
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// RemoveSocket removes the socket at the specified address if
 | 
			
		||||
// it exists on the filesystem
 | 
			
		||||
func RemoveSocket(address string) error {
 | 
			
		||||
	sock := socket(address)
 | 
			
		||||
	if !sock.isAbstract() {
 | 
			
		||||
		return os.Remove(sock.path())
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SocketEaddrinuse returns true if the provided error is caused by the
 | 
			
		||||
// EADDRINUSE error number
 | 
			
		||||
func SocketEaddrinuse(err error) bool {
 | 
			
		||||
	netErr, ok := err.(*net.OpError)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	if netErr.Op != "listen" {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	syscallErr, ok := netErr.Err.(*os.SyscallError)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	errno, ok := syscallErr.Err.(syscall.Errno)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	return errno == syscall.EADDRINUSE
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CanConnect returns true if the socket provided at the address
 | 
			
		||||
// is accepting new connections
 | 
			
		||||
func CanConnect(address string) bool {
 | 
			
		||||
	conn, err := AnonDialer(address, 100*time.Millisecond)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	conn.Close()
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func hybridVsockDialer(addr string, port uint64, timeout time.Duration) (net.Conn, error) {
 | 
			
		||||
	timeoutCh := time.After(timeout)
 | 
			
		||||
	// Do 10 retries before timeout
 | 
			
		||||
	retryInterval := timeout / 10
 | 
			
		||||
	for {
 | 
			
		||||
		conn, err := net.DialTimeout("unix", addr, timeout)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		if _, err = conn.Write([]byte(fmt.Sprintf("CONNECT %d\n", port))); err != nil {
 | 
			
		||||
			conn.Close()
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
		errChan := make(chan error, 1)
 | 
			
		||||
		go func() {
 | 
			
		||||
			reader := bufio.NewReader(conn)
 | 
			
		||||
			response, err := reader.ReadString('\n')
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				errChan <- err
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
			if strings.Contains(response, "OK") {
 | 
			
		||||
				errChan <- nil
 | 
			
		||||
			} else {
 | 
			
		||||
				errChan <- fmt.Errorf("hybrid vsock handshake response error: %s", response)
 | 
			
		||||
			}
 | 
			
		||||
		}()
 | 
			
		||||
		select {
 | 
			
		||||
		case err = <-errChan:
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				conn.Close()
 | 
			
		||||
				// When it is EOF, maybe the server side is not ready.
 | 
			
		||||
				if err == io.EOF {
 | 
			
		||||
					log.G(context.Background()).Warnf("Read hybrid vsock got EOF, server may not ready")
 | 
			
		||||
					time.Sleep(retryInterval)
 | 
			
		||||
					continue
 | 
			
		||||
				}
 | 
			
		||||
				return nil, err
 | 
			
		||||
			}
 | 
			
		||||
			return conn, nil
 | 
			
		||||
		case <-timeoutCh:
 | 
			
		||||
			conn.Close()
 | 
			
		||||
			return nil, fmt.Errorf("timeout waiting for hybrid vsocket handshake of %s:%d", addr, port)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func dialVsock(address string) (net.Conn, error) {
 | 
			
		||||
	contextIDString, portString, ok := strings.Cut(address, ":")
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return nil, fmt.Errorf("invalid vsock address %s", address)
 | 
			
		||||
	}
 | 
			
		||||
	contextID, err := strconv.ParseUint(contextIDString, 10, 0)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("failed to parse vsock context id %s, %v", contextIDString, err)
 | 
			
		||||
	}
 | 
			
		||||
	if contextID > math.MaxUint32 {
 | 
			
		||||
		return nil, fmt.Errorf("vsock context id %d is invalid", contextID)
 | 
			
		||||
	}
 | 
			
		||||
	port, err := strconv.ParseUint(portString, 10, 0)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("failed to parse vsock port %s, %v", portString, err)
 | 
			
		||||
	}
 | 
			
		||||
	if port > math.MaxUint32 {
 | 
			
		||||
		return nil, fmt.Errorf("vsock port %d is invalid", port)
 | 
			
		||||
	}
 | 
			
		||||
	return vsock.Dial(uint32(contextID), uint32(port), &vsock.Config{})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func dialHybridVsock(address string, timeout time.Duration) (net.Conn, error) {
 | 
			
		||||
	addr, portString, ok := strings.Cut(address, ":")
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return nil, fmt.Errorf("invalid hybrid vsock address %s", address)
 | 
			
		||||
	}
 | 
			
		||||
	port, err := strconv.ParseUint(portString, 10, 0)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("failed to parse hybrid vsock port %s, %v", portString, err)
 | 
			
		||||
	}
 | 
			
		||||
	if port > math.MaxUint32 {
 | 
			
		||||
		return nil, fmt.Errorf("hybrid vsock port %d is invalid", port)
 | 
			
		||||
	}
 | 
			
		||||
	return hybridVsockDialer(addr, port, timeout)
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user