add runc shim support for sched core

In linux 5.14 and hopefully some backports, core scheduling allows processes to
be co scheduled within the same domain on SMT enabled systems.

The containerd impl sets the core sched domain when launching a shim. This
allows a clean way for each shim(container/pod) to be in its own domain and any
additional containers, (v2 pods) be be launched with the same domain as well as
any exec'd process added to the container.

kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html

Signed-off-by: Michael Crosby <michael@thepasture.io>
This commit is contained in:
Michael Crosby
2021-09-15 17:49:38 +00:00
parent 88e1cf5fb5
commit e48bbe8394
99 changed files with 4329 additions and 3611 deletions

View File

@@ -35,12 +35,20 @@ import (
"github.com/sirupsen/logrus"
)
func shimBinary(bundle *Bundle, runtime, containerdAddress string, containerdTTRPCAddress string) *binary {
type shimBinaryConfig struct {
runtime string
address string
ttrpcAddress string
schedCore bool
}
func shimBinary(bundle *Bundle, config shimBinaryConfig) *binary {
return &binary{
bundle: bundle,
runtime: runtime,
containerdAddress: containerdAddress,
containerdTTRPCAddress: containerdTTRPCAddress,
runtime: config.runtime,
containerdAddress: config.address,
containerdTTRPCAddress: config.ttrpcAddress,
schedCore: config.schedCore,
}
}
@@ -48,6 +56,7 @@ type binary struct {
runtime string
containerdAddress string
containerdTTRPCAddress string
schedCore bool
bundle *Bundle
}
@@ -61,13 +70,15 @@ func (b *binary) Start(ctx context.Context, opts *types.Any, onClose func()) (_
cmd, err := client.Command(
ctx,
b.runtime,
b.containerdAddress,
b.containerdTTRPCAddress,
b.bundle.Path,
opts,
args...,
)
&client.CommandConfig{
Runtime: b.runtime,
Address: b.containerdAddress,
TTRPCAddress: b.containerdTTRPCAddress,
Path: b.bundle.Path,
Opts: opts,
Args: args,
SchedCore: b.schedCore,
})
if err != nil {
return nil, err
}
@@ -138,14 +149,19 @@ func (b *binary) Delete(ctx context.Context) (*runtime.Exit, error) {
}
cmd, err := client.Command(ctx,
b.runtime,
b.containerdAddress,
b.containerdTTRPCAddress,
bundlePath,
nil,
"-id", b.bundle.ID,
"-bundle", b.bundle.Path,
"delete")
&client.CommandConfig{
Runtime: b.runtime,
Address: b.containerdAddress,
TTRPCAddress: b.containerdTTRPCAddress,
Path: bundlePath,
Opts: nil,
Args: []string{
"-id", b.bundle.ID,
"-bundle", b.bundle.Path,
"delete",
},
})
if err != nil {
return nil, err
}

View File

@@ -41,6 +41,8 @@ import (
type Config struct {
// Supported platforms
Platforms []string `toml:"platforms"`
// SchedCore enabled linux core scheduling
SchedCore bool `toml:"sched_core"`
}
func init() {
@@ -55,7 +57,8 @@ func init() {
Platforms: defaultPlatforms(),
},
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
supportedPlatforms, err := parsePlatforms(ic.Config.(*Config).Platforms)
config := ic.Config.(*Config)
supportedPlatforms, err := parsePlatforms(config.Platforms)
if err != nil {
return nil, err
}
@@ -78,26 +81,45 @@ func init() {
cs := metadata.NewContainerStore(m.(*metadata.DB))
events := ep.(*exchange.Exchange)
return New(ic.Context, ic.Root, ic.State, ic.Address, ic.TTRPCAddress, events, cs)
return New(ic.Context, &ManagerConfig{
Root: ic.Root,
State: ic.State,
Address: ic.Address,
TTRPCAddress: ic.TTRPCAddress,
Events: events,
Store: cs,
SchedCore: config.SchedCore,
})
},
})
}
type ManagerConfig struct {
Root string
State string
Store containers.Store
Events *exchange.Exchange
Address string
TTRPCAddress string
SchedCore bool
}
// New task manager for v2 shims
func New(ctx context.Context, root, state, containerdAddress, containerdTTRPCAddress string, events *exchange.Exchange, cs containers.Store) (*TaskManager, error) {
for _, d := range []string{root, state} {
func New(ctx context.Context, config *ManagerConfig) (*TaskManager, error) {
for _, d := range []string{config.Root, config.State} {
if err := os.MkdirAll(d, 0711); err != nil {
return nil, err
}
}
m := &TaskManager{
root: root,
state: state,
containerdAddress: containerdAddress,
containerdTTRPCAddress: containerdTTRPCAddress,
root: config.Root,
state: config.State,
containerdAddress: config.Address,
containerdTTRPCAddress: config.TTRPCAddress,
schedCore: config.SchedCore,
tasks: runtime.NewTaskList(),
events: events,
containers: cs,
events: config.Events,
containers: config.Store,
}
if err := m.loadExistingTasks(ctx); err != nil {
return nil, err
@@ -111,6 +133,7 @@ type TaskManager struct {
state string
containerdAddress string
containerdTTRPCAddress string
schedCore bool
tasks *runtime.TaskList
events *exchange.Exchange
@@ -167,7 +190,12 @@ func (m *TaskManager) startShim(ctx context.Context, bundle *Bundle, id string,
topts = opts.RuntimeOptions
}
b := shimBinary(bundle, opts.Runtime, m.containerdAddress, m.containerdTTRPCAddress)
b := shimBinary(bundle, shimBinaryConfig{
runtime: opts.Runtime,
address: m.containerdAddress,
ttrpcAddress: m.containerdTTRPCAddress,
schedCore: m.schedCore,
})
shim, err := b.Start(ctx, topts, func() {
log.G(ctx).WithField("id", id).Info("shim disconnected")
@@ -303,7 +331,13 @@ func (m *TaskManager) loadTasks(ctx context.Context) error {
bundle.Delete()
continue
}
binaryCall := shimBinary(bundle, container.Runtime.Name, m.containerdAddress, m.containerdTTRPCAddress)
binaryCall := shimBinary(bundle,
shimBinaryConfig{
runtime: container.Runtime.Name,
address: m.containerdAddress,
ttrpcAddress: m.containerdTTRPCAddress,
schedCore: m.schedCore,
})
shim, err := loadShim(ctx, bundle, func() {
log.G(ctx).WithField("id", id).Info("shim disconnected")

View File

@@ -24,6 +24,7 @@ import (
"io"
"os"
"path/filepath"
goruntime "runtime"
"sync"
"syscall"
"time"
@@ -37,6 +38,7 @@ import (
"github.com/containerd/containerd/pkg/oom"
oomv1 "github.com/containerd/containerd/pkg/oom/v1"
"github.com/containerd/containerd/pkg/process"
"github.com/containerd/containerd/pkg/schedcore"
"github.com/containerd/containerd/pkg/stdio"
"github.com/containerd/containerd/runtime/v2/runc"
"github.com/containerd/containerd/runtime/v2/runc/options"
@@ -166,10 +168,19 @@ func (s *service) StartShim(ctx context.Context, opts shim.StartOpts) (_ string,
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
goruntime.LockOSThread()
if os.Getenv("SCHED_CORE") != "" {
if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
return "", errors.Wrap(err, "enable sched core support")
}
}
if err := cmd.Start(); err != nil {
f.Close()
return "", err
}
goruntime.UnlockOSThread()
defer func() {
if retErr != nil {
cmd.Process.Kill()

View File

@@ -25,6 +25,7 @@ import (
"io"
"os"
"path/filepath"
goruntime "runtime"
"sync"
"syscall"
"time"
@@ -40,6 +41,7 @@ import (
oomv1 "github.com/containerd/containerd/pkg/oom/v1"
oomv2 "github.com/containerd/containerd/pkg/oom/v2"
"github.com/containerd/containerd/pkg/process"
"github.com/containerd/containerd/pkg/schedcore"
"github.com/containerd/containerd/pkg/stdio"
"github.com/containerd/containerd/pkg/userns"
"github.com/containerd/containerd/runtime/v2/runc"
@@ -234,10 +236,20 @@ func (s *service) StartShim(ctx context.Context, opts shim.StartOpts) (_ string,
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
goruntime.LockOSThread()
if os.Getenv("SCHED_CORE") != "" {
if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
return "", errors.Wrap(err, "enable sched core support")
}
}
if err := cmd.Start(); err != nil {
f.Close()
return "", err
}
goruntime.UnlockOSThread()
defer func() {
if retErr != nil {
cmd.Process.Kill()

View File

@@ -36,8 +36,18 @@ import (
var runtimePaths sync.Map
type CommandConfig struct {
Runtime string
Address string
TTRPCAddress string
Path string
SchedCore bool
Args []string
Opts *types.Any
}
// Command returns the shim command with the provided args and configuration
func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAddress, path string, opts *types.Any, cmdArgs ...string) (*exec.Cmd, error) {
func Command(ctx context.Context, config *CommandConfig) (*exec.Cmd, error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
@@ -48,13 +58,13 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
}
args := []string{
"-namespace", ns,
"-address", containerdAddress,
"-address", config.Address,
"-publish-binary", self,
}
args = append(args, cmdArgs...)
name := BinaryName(runtime)
args = append(args, config.Args...)
name := BinaryName(config.Runtime)
if name == "" {
return nil, fmt.Errorf("invalid runtime name %s, correct runtime name should format like io.containerd.runc.v1", runtime)
return nil, fmt.Errorf("invalid runtime name %s, correct runtime name should format like io.containerd.runc.v1", config.Runtime)
}
var cmdPath string
@@ -63,7 +73,7 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
cmdPath = cmdPathI.(string)
} else {
var lerr error
binaryPath := BinaryPath(runtime)
binaryPath := BinaryPath(config.Runtime)
if _, serr := os.Stat(binaryPath); serr == nil {
cmdPath = binaryPath
}
@@ -80,7 +90,7 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
cmdPath = testPath
}
if cmdPath == "" {
return nil, errors.Wrapf(os.ErrNotExist, "runtime %q binary not installed %q", runtime, name)
return nil, errors.Wrapf(os.ErrNotExist, "runtime %q binary not installed %q", config.Runtime, name)
}
}
}
@@ -97,15 +107,18 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
}
cmd := exec.CommandContext(ctx, cmdPath, args...)
cmd.Dir = path
cmd.Dir = config.Path
cmd.Env = append(
os.Environ(),
"GOMAXPROCS=2",
fmt.Sprintf("%s=%s", ttrpcAddressEnv, containerdTTRPCAddress),
fmt.Sprintf("%s=%s", ttrpcAddressEnv, config.TTRPCAddress),
)
if config.SchedCore {
cmd.Env = append(cmd.Env, "SCHED_CORE=1")
}
cmd.SysProcAttr = getSysProcAttr()
if opts != nil {
d, err := proto.Marshal(opts)
if config.Opts != nil {
d, err := proto.Marshal(config.Opts)
if err != nil {
return nil, err
}