
In linux 5.14 and hopefully some backports, core scheduling allows processes to be co scheduled within the same domain on SMT enabled systems. The containerd impl sets the core sched domain when launching a shim. This allows a clean way for each shim(container/pod) to be in its own domain and any additional containers, (v2 pods) be be launched with the same domain as well as any exec'd process added to the container. kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html Signed-off-by: Michael Crosby <michael@thepasture.io>
217 lines
5.5 KiB
Go
217 lines
5.5 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package shim
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"net"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/containerd/containerd/namespaces"
|
|
"github.com/gogo/protobuf/proto"
|
|
"github.com/gogo/protobuf/types"
|
|
"github.com/pkg/errors"
|
|
exec "golang.org/x/sys/execabs"
|
|
)
|
|
|
|
var runtimePaths sync.Map
|
|
|
|
type CommandConfig struct {
|
|
Runtime string
|
|
Address string
|
|
TTRPCAddress string
|
|
Path string
|
|
SchedCore bool
|
|
Args []string
|
|
Opts *types.Any
|
|
}
|
|
|
|
// Command returns the shim command with the provided args and configuration
|
|
func Command(ctx context.Context, config *CommandConfig) (*exec.Cmd, error) {
|
|
ns, err := namespaces.NamespaceRequired(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
self, err := os.Executable()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
args := []string{
|
|
"-namespace", ns,
|
|
"-address", config.Address,
|
|
"-publish-binary", self,
|
|
}
|
|
args = append(args, config.Args...)
|
|
name := BinaryName(config.Runtime)
|
|
if name == "" {
|
|
return nil, fmt.Errorf("invalid runtime name %s, correct runtime name should format like io.containerd.runc.v1", config.Runtime)
|
|
}
|
|
|
|
var cmdPath string
|
|
cmdPathI, cmdPathFound := runtimePaths.Load(name)
|
|
if cmdPathFound {
|
|
cmdPath = cmdPathI.(string)
|
|
} else {
|
|
var lerr error
|
|
binaryPath := BinaryPath(config.Runtime)
|
|
if _, serr := os.Stat(binaryPath); serr == nil {
|
|
cmdPath = binaryPath
|
|
}
|
|
|
|
if cmdPath == "" {
|
|
if cmdPath, lerr = exec.LookPath(name); lerr != nil {
|
|
if eerr, ok := lerr.(*exec.Error); ok {
|
|
if eerr.Err == exec.ErrNotFound {
|
|
// Match the calling binaries (containerd) path and see
|
|
// if they are side by side. If so, execute the shim
|
|
// found there.
|
|
testPath := filepath.Join(filepath.Dir(self), name)
|
|
if _, serr := os.Stat(testPath); serr == nil {
|
|
cmdPath = testPath
|
|
}
|
|
if cmdPath == "" {
|
|
return nil, errors.Wrapf(os.ErrNotExist, "runtime %q binary not installed %q", config.Runtime, name)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
cmdPath, err = filepath.Abs(cmdPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if cmdPathI, cmdPathFound = runtimePaths.LoadOrStore(name, cmdPath); cmdPathFound {
|
|
// We didn't store cmdPath we loaded an already cached value. Use it.
|
|
cmdPath = cmdPathI.(string)
|
|
}
|
|
}
|
|
|
|
cmd := exec.CommandContext(ctx, cmdPath, args...)
|
|
cmd.Dir = config.Path
|
|
cmd.Env = append(
|
|
os.Environ(),
|
|
"GOMAXPROCS=2",
|
|
fmt.Sprintf("%s=%s", ttrpcAddressEnv, config.TTRPCAddress),
|
|
)
|
|
if config.SchedCore {
|
|
cmd.Env = append(cmd.Env, "SCHED_CORE=1")
|
|
}
|
|
cmd.SysProcAttr = getSysProcAttr()
|
|
if config.Opts != nil {
|
|
d, err := proto.Marshal(config.Opts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cmd.Stdin = bytes.NewReader(d)
|
|
}
|
|
return cmd, nil
|
|
}
|
|
|
|
// BinaryName returns the shim binary name from the runtime name,
|
|
// empty string returns means runtime name is invalid
|
|
func BinaryName(runtime string) string {
|
|
// runtime name should format like $prefix.name.version
|
|
parts := strings.Split(runtime, ".")
|
|
if len(parts) < 2 {
|
|
return ""
|
|
}
|
|
|
|
return fmt.Sprintf(shimBinaryFormat, parts[len(parts)-2], parts[len(parts)-1])
|
|
}
|
|
|
|
// BinaryPath returns the full path for the shim binary from the runtime name,
|
|
// empty string returns means runtime name is invalid
|
|
func BinaryPath(runtime string) string {
|
|
dir := filepath.Dir(runtime)
|
|
binary := BinaryName(runtime)
|
|
|
|
path, err := filepath.Abs(filepath.Join(dir, binary))
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
return path
|
|
}
|
|
|
|
// Connect to the provided address
|
|
func Connect(address string, d func(string, time.Duration) (net.Conn, error)) (net.Conn, error) {
|
|
return d(address, 100*time.Second)
|
|
}
|
|
|
|
// WritePidFile writes a pid file atomically
|
|
func WritePidFile(path string, pid int) error {
|
|
path, err := filepath.Abs(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
tempPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path)))
|
|
f, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = fmt.Fprintf(f, "%d", pid)
|
|
f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.Rename(tempPath, path)
|
|
}
|
|
|
|
// WriteAddress writes a address file atomically
|
|
func WriteAddress(path, address string) error {
|
|
path, err := filepath.Abs(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
tempPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path)))
|
|
f, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, err = f.WriteString(address)
|
|
f.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.Rename(tempPath, path)
|
|
}
|
|
|
|
// ErrNoAddress is returned when the address file has no content
|
|
var ErrNoAddress = errors.New("no shim address")
|
|
|
|
// ReadAddress returns the shim's socket address from the path
|
|
func ReadAddress(path string) (string, error) {
|
|
path, err := filepath.Abs(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if len(data) == 0 {
|
|
return "", ErrNoAddress
|
|
}
|
|
return string(data), nil
|
|
}
|