containerd/runtime/v2/shim/util.go
Michael Crosby e48bbe8394 add runc shim support for sched core
In linux 5.14 and hopefully some backports, core scheduling allows processes to
be co scheduled within the same domain on SMT enabled systems.

The containerd impl sets the core sched domain when launching a shim. This
allows a clean way for each shim(container/pod) to be in its own domain and any
additional containers, (v2 pods) be be launched with the same domain as well as
any exec'd process added to the container.

kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html

Signed-off-by: Michael Crosby <michael@thepasture.io>
2021-10-08 16:18:09 +00:00

217 lines
5.5 KiB
Go

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package shim
import (
"bytes"
"context"
"fmt"
"net"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/containerd/containerd/namespaces"
"github.com/gogo/protobuf/proto"
"github.com/gogo/protobuf/types"
"github.com/pkg/errors"
exec "golang.org/x/sys/execabs"
)
var runtimePaths sync.Map
type CommandConfig struct {
Runtime string
Address string
TTRPCAddress string
Path string
SchedCore bool
Args []string
Opts *types.Any
}
// Command returns the shim command with the provided args and configuration
func Command(ctx context.Context, config *CommandConfig) (*exec.Cmd, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
self, err := os.Executable()
if err != nil {
return nil, err
}
args := []string{
"-namespace", ns,
"-address", config.Address,
"-publish-binary", self,
}
args = append(args, config.Args...)
name := BinaryName(config.Runtime)
if name == "" {
return nil, fmt.Errorf("invalid runtime name %s, correct runtime name should format like io.containerd.runc.v1", config.Runtime)
}
var cmdPath string
cmdPathI, cmdPathFound := runtimePaths.Load(name)
if cmdPathFound {
cmdPath = cmdPathI.(string)
} else {
var lerr error
binaryPath := BinaryPath(config.Runtime)
if _, serr := os.Stat(binaryPath); serr == nil {
cmdPath = binaryPath
}
if cmdPath == "" {
if cmdPath, lerr = exec.LookPath(name); lerr != nil {
if eerr, ok := lerr.(*exec.Error); ok {
if eerr.Err == exec.ErrNotFound {
// Match the calling binaries (containerd) path and see
// if they are side by side. If so, execute the shim
// found there.
testPath := filepath.Join(filepath.Dir(self), name)
if _, serr := os.Stat(testPath); serr == nil {
cmdPath = testPath
}
if cmdPath == "" {
return nil, errors.Wrapf(os.ErrNotExist, "runtime %q binary not installed %q", config.Runtime, name)
}
}
}
}
}
cmdPath, err = filepath.Abs(cmdPath)
if err != nil {
return nil, err
}
if cmdPathI, cmdPathFound = runtimePaths.LoadOrStore(name, cmdPath); cmdPathFound {
// We didn't store cmdPath we loaded an already cached value. Use it.
cmdPath = cmdPathI.(string)
}
}
cmd := exec.CommandContext(ctx, cmdPath, args...)
cmd.Dir = config.Path
cmd.Env = append(
os.Environ(),
"GOMAXPROCS=2",
fmt.Sprintf("%s=%s", ttrpcAddressEnv, config.TTRPCAddress),
)
if config.SchedCore {
cmd.Env = append(cmd.Env, "SCHED_CORE=1")
}
cmd.SysProcAttr = getSysProcAttr()
if config.Opts != nil {
d, err := proto.Marshal(config.Opts)
if err != nil {
return nil, err
}
cmd.Stdin = bytes.NewReader(d)
}
return cmd, nil
}
// BinaryName returns the shim binary name from the runtime name,
// empty string returns means runtime name is invalid
func BinaryName(runtime string) string {
// runtime name should format like $prefix.name.version
parts := strings.Split(runtime, ".")
if len(parts) < 2 {
return ""
}
return fmt.Sprintf(shimBinaryFormat, parts[len(parts)-2], parts[len(parts)-1])
}
// BinaryPath returns the full path for the shim binary from the runtime name,
// empty string returns means runtime name is invalid
func BinaryPath(runtime string) string {
dir := filepath.Dir(runtime)
binary := BinaryName(runtime)
path, err := filepath.Abs(filepath.Join(dir, binary))
if err != nil {
return ""
}
return path
}
// Connect to the provided address
func Connect(address string, d func(string, time.Duration) (net.Conn, error)) (net.Conn, error) {
return d(address, 100*time.Second)
}
// WritePidFile writes a pid file atomically
func WritePidFile(path string, pid int) error {
path, err := filepath.Abs(path)
if err != nil {
return err
}
tempPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path)))
f, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
if err != nil {
return err
}
_, err = fmt.Fprintf(f, "%d", pid)
f.Close()
if err != nil {
return err
}
return os.Rename(tempPath, path)
}
// WriteAddress writes a address file atomically
func WriteAddress(path, address string) error {
path, err := filepath.Abs(path)
if err != nil {
return err
}
tempPath := filepath.Join(filepath.Dir(path), fmt.Sprintf(".%s", filepath.Base(path)))
f, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_EXCL|os.O_SYNC, 0666)
if err != nil {
return err
}
_, err = f.WriteString(address)
f.Close()
if err != nil {
return err
}
return os.Rename(tempPath, path)
}
// ErrNoAddress is returned when the address file has no content
var ErrNoAddress = errors.New("no shim address")
// ReadAddress returns the shim's socket address from the path
func ReadAddress(path string) (string, error) {
path, err := filepath.Abs(path)
if err != nil {
return "", err
}
data, err := os.ReadFile(path)
if err != nil {
return "", err
}
if len(data) == 0 {
return "", ErrNoAddress
}
return string(data), nil
}