add runc shim support for sched core
In linux 5.14 and hopefully some backports, core scheduling allows processes to be co scheduled within the same domain on SMT enabled systems. The containerd impl sets the core sched domain when launching a shim. This allows a clean way for each shim(container/pod) to be in its own domain and any additional containers, (v2 pods) be be launched with the same domain as well as any exec'd process added to the container. kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html Signed-off-by: Michael Crosby <michael@thepasture.io>
This commit is contained in:
@@ -35,12 +35,20 @@ import (
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func shimBinary(bundle *Bundle, runtime, containerdAddress string, containerdTTRPCAddress string) *binary {
|
||||
type shimBinaryConfig struct {
|
||||
runtime string
|
||||
address string
|
||||
ttrpcAddress string
|
||||
schedCore bool
|
||||
}
|
||||
|
||||
func shimBinary(bundle *Bundle, config shimBinaryConfig) *binary {
|
||||
return &binary{
|
||||
bundle: bundle,
|
||||
runtime: runtime,
|
||||
containerdAddress: containerdAddress,
|
||||
containerdTTRPCAddress: containerdTTRPCAddress,
|
||||
runtime: config.runtime,
|
||||
containerdAddress: config.address,
|
||||
containerdTTRPCAddress: config.ttrpcAddress,
|
||||
schedCore: config.schedCore,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,6 +56,7 @@ type binary struct {
|
||||
runtime string
|
||||
containerdAddress string
|
||||
containerdTTRPCAddress string
|
||||
schedCore bool
|
||||
bundle *Bundle
|
||||
}
|
||||
|
||||
@@ -61,13 +70,15 @@ func (b *binary) Start(ctx context.Context, opts *types.Any, onClose func()) (_
|
||||
|
||||
cmd, err := client.Command(
|
||||
ctx,
|
||||
b.runtime,
|
||||
b.containerdAddress,
|
||||
b.containerdTTRPCAddress,
|
||||
b.bundle.Path,
|
||||
opts,
|
||||
args...,
|
||||
)
|
||||
&client.CommandConfig{
|
||||
Runtime: b.runtime,
|
||||
Address: b.containerdAddress,
|
||||
TTRPCAddress: b.containerdTTRPCAddress,
|
||||
Path: b.bundle.Path,
|
||||
Opts: opts,
|
||||
Args: args,
|
||||
SchedCore: b.schedCore,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -138,14 +149,19 @@ func (b *binary) Delete(ctx context.Context) (*runtime.Exit, error) {
|
||||
}
|
||||
|
||||
cmd, err := client.Command(ctx,
|
||||
b.runtime,
|
||||
b.containerdAddress,
|
||||
b.containerdTTRPCAddress,
|
||||
bundlePath,
|
||||
nil,
|
||||
"-id", b.bundle.ID,
|
||||
"-bundle", b.bundle.Path,
|
||||
"delete")
|
||||
&client.CommandConfig{
|
||||
Runtime: b.runtime,
|
||||
Address: b.containerdAddress,
|
||||
TTRPCAddress: b.containerdTTRPCAddress,
|
||||
Path: bundlePath,
|
||||
Opts: nil,
|
||||
Args: []string{
|
||||
"-id", b.bundle.ID,
|
||||
"-bundle", b.bundle.Path,
|
||||
"delete",
|
||||
},
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -41,6 +41,8 @@ import (
|
||||
type Config struct {
|
||||
// Supported platforms
|
||||
Platforms []string `toml:"platforms"`
|
||||
// SchedCore enabled linux core scheduling
|
||||
SchedCore bool `toml:"sched_core"`
|
||||
}
|
||||
|
||||
func init() {
|
||||
@@ -55,7 +57,8 @@ func init() {
|
||||
Platforms: defaultPlatforms(),
|
||||
},
|
||||
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
|
||||
supportedPlatforms, err := parsePlatforms(ic.Config.(*Config).Platforms)
|
||||
config := ic.Config.(*Config)
|
||||
supportedPlatforms, err := parsePlatforms(config.Platforms)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -78,26 +81,45 @@ func init() {
|
||||
cs := metadata.NewContainerStore(m.(*metadata.DB))
|
||||
events := ep.(*exchange.Exchange)
|
||||
|
||||
return New(ic.Context, ic.Root, ic.State, ic.Address, ic.TTRPCAddress, events, cs)
|
||||
return New(ic.Context, &ManagerConfig{
|
||||
Root: ic.Root,
|
||||
State: ic.State,
|
||||
Address: ic.Address,
|
||||
TTRPCAddress: ic.TTRPCAddress,
|
||||
Events: events,
|
||||
Store: cs,
|
||||
SchedCore: config.SchedCore,
|
||||
})
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
type ManagerConfig struct {
|
||||
Root string
|
||||
State string
|
||||
Store containers.Store
|
||||
Events *exchange.Exchange
|
||||
Address string
|
||||
TTRPCAddress string
|
||||
SchedCore bool
|
||||
}
|
||||
|
||||
// New task manager for v2 shims
|
||||
func New(ctx context.Context, root, state, containerdAddress, containerdTTRPCAddress string, events *exchange.Exchange, cs containers.Store) (*TaskManager, error) {
|
||||
for _, d := range []string{root, state} {
|
||||
func New(ctx context.Context, config *ManagerConfig) (*TaskManager, error) {
|
||||
for _, d := range []string{config.Root, config.State} {
|
||||
if err := os.MkdirAll(d, 0711); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
m := &TaskManager{
|
||||
root: root,
|
||||
state: state,
|
||||
containerdAddress: containerdAddress,
|
||||
containerdTTRPCAddress: containerdTTRPCAddress,
|
||||
root: config.Root,
|
||||
state: config.State,
|
||||
containerdAddress: config.Address,
|
||||
containerdTTRPCAddress: config.TTRPCAddress,
|
||||
schedCore: config.SchedCore,
|
||||
tasks: runtime.NewTaskList(),
|
||||
events: events,
|
||||
containers: cs,
|
||||
events: config.Events,
|
||||
containers: config.Store,
|
||||
}
|
||||
if err := m.loadExistingTasks(ctx); err != nil {
|
||||
return nil, err
|
||||
@@ -111,6 +133,7 @@ type TaskManager struct {
|
||||
state string
|
||||
containerdAddress string
|
||||
containerdTTRPCAddress string
|
||||
schedCore bool
|
||||
|
||||
tasks *runtime.TaskList
|
||||
events *exchange.Exchange
|
||||
@@ -167,7 +190,12 @@ func (m *TaskManager) startShim(ctx context.Context, bundle *Bundle, id string,
|
||||
topts = opts.RuntimeOptions
|
||||
}
|
||||
|
||||
b := shimBinary(bundle, opts.Runtime, m.containerdAddress, m.containerdTTRPCAddress)
|
||||
b := shimBinary(bundle, shimBinaryConfig{
|
||||
runtime: opts.Runtime,
|
||||
address: m.containerdAddress,
|
||||
ttrpcAddress: m.containerdTTRPCAddress,
|
||||
schedCore: m.schedCore,
|
||||
})
|
||||
shim, err := b.Start(ctx, topts, func() {
|
||||
log.G(ctx).WithField("id", id).Info("shim disconnected")
|
||||
|
||||
@@ -303,7 +331,13 @@ func (m *TaskManager) loadTasks(ctx context.Context) error {
|
||||
bundle.Delete()
|
||||
continue
|
||||
}
|
||||
binaryCall := shimBinary(bundle, container.Runtime.Name, m.containerdAddress, m.containerdTTRPCAddress)
|
||||
binaryCall := shimBinary(bundle,
|
||||
shimBinaryConfig{
|
||||
runtime: container.Runtime.Name,
|
||||
address: m.containerdAddress,
|
||||
ttrpcAddress: m.containerdTTRPCAddress,
|
||||
schedCore: m.schedCore,
|
||||
})
|
||||
shim, err := loadShim(ctx, bundle, func() {
|
||||
log.G(ctx).WithField("id", id).Info("shim disconnected")
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
goruntime "runtime"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -37,6 +38,7 @@ import (
|
||||
"github.com/containerd/containerd/pkg/oom"
|
||||
oomv1 "github.com/containerd/containerd/pkg/oom/v1"
|
||||
"github.com/containerd/containerd/pkg/process"
|
||||
"github.com/containerd/containerd/pkg/schedcore"
|
||||
"github.com/containerd/containerd/pkg/stdio"
|
||||
"github.com/containerd/containerd/runtime/v2/runc"
|
||||
"github.com/containerd/containerd/runtime/v2/runc/options"
|
||||
@@ -166,10 +168,19 @@ func (s *service) StartShim(ctx context.Context, opts shim.StartOpts) (_ string,
|
||||
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
|
||||
|
||||
goruntime.LockOSThread()
|
||||
if os.Getenv("SCHED_CORE") != "" {
|
||||
if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
|
||||
return "", errors.Wrap(err, "enable sched core support")
|
||||
}
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
f.Close()
|
||||
return "", err
|
||||
}
|
||||
goruntime.UnlockOSThread()
|
||||
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
cmd.Process.Kill()
|
||||
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
goruntime "runtime"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
@@ -40,6 +41,7 @@ import (
|
||||
oomv1 "github.com/containerd/containerd/pkg/oom/v1"
|
||||
oomv2 "github.com/containerd/containerd/pkg/oom/v2"
|
||||
"github.com/containerd/containerd/pkg/process"
|
||||
"github.com/containerd/containerd/pkg/schedcore"
|
||||
"github.com/containerd/containerd/pkg/stdio"
|
||||
"github.com/containerd/containerd/pkg/userns"
|
||||
"github.com/containerd/containerd/runtime/v2/runc"
|
||||
@@ -234,10 +236,20 @@ func (s *service) StartShim(ctx context.Context, opts shim.StartOpts) (_ string,
|
||||
|
||||
cmd.ExtraFiles = append(cmd.ExtraFiles, f)
|
||||
|
||||
goruntime.LockOSThread()
|
||||
if os.Getenv("SCHED_CORE") != "" {
|
||||
if err := schedcore.Create(schedcore.ProcessGroup); err != nil {
|
||||
return "", errors.Wrap(err, "enable sched core support")
|
||||
}
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
f.Close()
|
||||
return "", err
|
||||
}
|
||||
|
||||
goruntime.UnlockOSThread()
|
||||
|
||||
defer func() {
|
||||
if retErr != nil {
|
||||
cmd.Process.Kill()
|
||||
|
||||
@@ -36,8 +36,18 @@ import (
|
||||
|
||||
var runtimePaths sync.Map
|
||||
|
||||
type CommandConfig struct {
|
||||
Runtime string
|
||||
Address string
|
||||
TTRPCAddress string
|
||||
Path string
|
||||
SchedCore bool
|
||||
Args []string
|
||||
Opts *types.Any
|
||||
}
|
||||
|
||||
// Command returns the shim command with the provided args and configuration
|
||||
func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAddress, path string, opts *types.Any, cmdArgs ...string) (*exec.Cmd, error) {
|
||||
func Command(ctx context.Context, config *CommandConfig) (*exec.Cmd, error) {
|
||||
ns, err := namespaces.NamespaceRequired(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -48,13 +58,13 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
|
||||
}
|
||||
args := []string{
|
||||
"-namespace", ns,
|
||||
"-address", containerdAddress,
|
||||
"-address", config.Address,
|
||||
"-publish-binary", self,
|
||||
}
|
||||
args = append(args, cmdArgs...)
|
||||
name := BinaryName(runtime)
|
||||
args = append(args, config.Args...)
|
||||
name := BinaryName(config.Runtime)
|
||||
if name == "" {
|
||||
return nil, fmt.Errorf("invalid runtime name %s, correct runtime name should format like io.containerd.runc.v1", runtime)
|
||||
return nil, fmt.Errorf("invalid runtime name %s, correct runtime name should format like io.containerd.runc.v1", config.Runtime)
|
||||
}
|
||||
|
||||
var cmdPath string
|
||||
@@ -63,7 +73,7 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
|
||||
cmdPath = cmdPathI.(string)
|
||||
} else {
|
||||
var lerr error
|
||||
binaryPath := BinaryPath(runtime)
|
||||
binaryPath := BinaryPath(config.Runtime)
|
||||
if _, serr := os.Stat(binaryPath); serr == nil {
|
||||
cmdPath = binaryPath
|
||||
}
|
||||
@@ -80,7 +90,7 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
|
||||
cmdPath = testPath
|
||||
}
|
||||
if cmdPath == "" {
|
||||
return nil, errors.Wrapf(os.ErrNotExist, "runtime %q binary not installed %q", runtime, name)
|
||||
return nil, errors.Wrapf(os.ErrNotExist, "runtime %q binary not installed %q", config.Runtime, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -97,15 +107,18 @@ func Command(ctx context.Context, runtime, containerdAddress, containerdTTRPCAdd
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, cmdPath, args...)
|
||||
cmd.Dir = path
|
||||
cmd.Dir = config.Path
|
||||
cmd.Env = append(
|
||||
os.Environ(),
|
||||
"GOMAXPROCS=2",
|
||||
fmt.Sprintf("%s=%s", ttrpcAddressEnv, containerdTTRPCAddress),
|
||||
fmt.Sprintf("%s=%s", ttrpcAddressEnv, config.TTRPCAddress),
|
||||
)
|
||||
if config.SchedCore {
|
||||
cmd.Env = append(cmd.Env, "SCHED_CORE=1")
|
||||
}
|
||||
cmd.SysProcAttr = getSysProcAttr()
|
||||
if opts != nil {
|
||||
d, err := proto.Marshal(opts)
|
||||
if config.Opts != nil {
|
||||
d, err := proto.Marshal(config.Opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user