
In linux 5.14 and hopefully some backports, core scheduling allows processes to be co scheduled within the same domain on SMT enabled systems. The containerd impl sets the core sched domain when launching a shim. This allows a clean way for each shim(container/pod) to be in its own domain and any additional containers, (v2 pods) be be launched with the same domain as well as any exec'd process added to the container. kernel docs: https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/core-scheduling.html Signed-off-by: Michael Crosby <michael@thepasture.io>
195 lines
5.1 KiB
Go
195 lines
5.1 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package v2
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"io"
|
|
"os"
|
|
gruntime "runtime"
|
|
"strings"
|
|
|
|
"github.com/containerd/containerd/log"
|
|
"github.com/containerd/containerd/namespaces"
|
|
"github.com/containerd/containerd/runtime"
|
|
client "github.com/containerd/containerd/runtime/v2/shim"
|
|
"github.com/containerd/containerd/runtime/v2/task"
|
|
"github.com/containerd/ttrpc"
|
|
"github.com/gogo/protobuf/types"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
type shimBinaryConfig struct {
|
|
runtime string
|
|
address string
|
|
ttrpcAddress string
|
|
schedCore bool
|
|
}
|
|
|
|
func shimBinary(bundle *Bundle, config shimBinaryConfig) *binary {
|
|
return &binary{
|
|
bundle: bundle,
|
|
runtime: config.runtime,
|
|
containerdAddress: config.address,
|
|
containerdTTRPCAddress: config.ttrpcAddress,
|
|
schedCore: config.schedCore,
|
|
}
|
|
}
|
|
|
|
type binary struct {
|
|
runtime string
|
|
containerdAddress string
|
|
containerdTTRPCAddress string
|
|
schedCore bool
|
|
bundle *Bundle
|
|
}
|
|
|
|
func (b *binary) Start(ctx context.Context, opts *types.Any, onClose func()) (_ *shim, err error) {
|
|
args := []string{"-id", b.bundle.ID}
|
|
switch logrus.GetLevel() {
|
|
case logrus.DebugLevel, logrus.TraceLevel:
|
|
args = append(args, "-debug")
|
|
}
|
|
args = append(args, "start")
|
|
|
|
cmd, err := client.Command(
|
|
ctx,
|
|
&client.CommandConfig{
|
|
Runtime: b.runtime,
|
|
Address: b.containerdAddress,
|
|
TTRPCAddress: b.containerdTTRPCAddress,
|
|
Path: b.bundle.Path,
|
|
Opts: opts,
|
|
Args: args,
|
|
SchedCore: b.schedCore,
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// Windows needs a namespace when openShimLog
|
|
ns, _ := namespaces.Namespace(ctx)
|
|
shimCtx, cancelShimLog := context.WithCancel(namespaces.WithNamespace(context.Background(), ns))
|
|
defer func() {
|
|
if err != nil {
|
|
cancelShimLog()
|
|
}
|
|
}()
|
|
f, err := openShimLog(shimCtx, b.bundle, client.AnonDialer)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "open shim log pipe")
|
|
}
|
|
defer func() {
|
|
if err != nil {
|
|
f.Close()
|
|
}
|
|
}()
|
|
// open the log pipe and block until the writer is ready
|
|
// this helps with synchronization of the shim
|
|
// copy the shim's logs to containerd's output
|
|
go func() {
|
|
defer f.Close()
|
|
_, err := io.Copy(os.Stderr, f)
|
|
// To prevent flood of error messages, the expected error
|
|
// should be reset, like os.ErrClosed or os.ErrNotExist, which
|
|
// depends on platform.
|
|
err = checkCopyShimLogError(ctx, err)
|
|
if err != nil {
|
|
log.G(ctx).WithError(err).Error("copy shim log")
|
|
}
|
|
}()
|
|
out, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return nil, errors.Wrapf(err, "%s", out)
|
|
}
|
|
address := strings.TrimSpace(string(out))
|
|
conn, err := client.Connect(address, client.AnonDialer)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
onCloseWithShimLog := func() {
|
|
onClose()
|
|
cancelShimLog()
|
|
f.Close()
|
|
}
|
|
client := ttrpc.NewClient(conn, ttrpc.WithOnClose(onCloseWithShimLog))
|
|
return &shim{
|
|
bundle: b.bundle,
|
|
client: client,
|
|
task: task.NewTaskClient(client),
|
|
}, nil
|
|
}
|
|
|
|
func (b *binary) Delete(ctx context.Context) (*runtime.Exit, error) {
|
|
log.G(ctx).Info("cleaning up dead shim")
|
|
|
|
// On Windows and FreeBSD, the current working directory of the shim should
|
|
// not be the bundle path during the delete operation. Instead, we invoke
|
|
// with the default work dir and forward the bundle path on the cmdline.
|
|
// Windows cannot delete the current working directory while an executable
|
|
// is in use with it. On FreeBSD, fork/exec can fail.
|
|
var bundlePath string
|
|
if gruntime.GOOS != "windows" && gruntime.GOOS != "freebsd" {
|
|
bundlePath = b.bundle.Path
|
|
}
|
|
|
|
cmd, err := client.Command(ctx,
|
|
&client.CommandConfig{
|
|
Runtime: b.runtime,
|
|
Address: b.containerdAddress,
|
|
TTRPCAddress: b.containerdTTRPCAddress,
|
|
Path: bundlePath,
|
|
Opts: nil,
|
|
Args: []string{
|
|
"-id", b.bundle.ID,
|
|
"-bundle", b.bundle.Path,
|
|
"delete",
|
|
},
|
|
})
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var (
|
|
out = bytes.NewBuffer(nil)
|
|
errb = bytes.NewBuffer(nil)
|
|
)
|
|
cmd.Stdout = out
|
|
cmd.Stderr = errb
|
|
if err := cmd.Run(); err != nil {
|
|
log.G(ctx).WithField("cmd", cmd).WithError(err).Error("failed to delete")
|
|
return nil, errors.Wrapf(err, "%s", errb.String())
|
|
}
|
|
s := errb.String()
|
|
if s != "" {
|
|
log.G(ctx).Warnf("cleanup warnings %s", s)
|
|
}
|
|
var response task.DeleteResponse
|
|
if err := response.Unmarshal(out.Bytes()); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := b.bundle.Delete(); err != nil {
|
|
return nil, err
|
|
}
|
|
return &runtime.Exit{
|
|
Status: response.ExitStatus,
|
|
Timestamp: response.ExitedAt,
|
|
Pid: response.Pid,
|
|
}, nil
|
|
}
|