
In all of the examples, its recommended to call `Wait()` before starting a process/task. Since `Wait()` is a blocking call, this means it must be called from a goroutine like so: ```go statusC := make(chan uint32) go func() { status, err := task.Wait(ctx) if err != nil { // handle async err } statusC <- status }() task.Start(ctx) <-statusC ``` This means there is a race here where there is no guarentee when the goroutine is going to be scheduled, and even a bit more since this requires an RPC call to be made. In addition, this code is very messy and a common pattern for any caller using Wait+Start. Instead, this changes `Wait()` to use an async model having `Wait()` return a channel instead of the code itself. This ensures that when `Wait()` returns that the client has a handle on the event stream (already made the RPC request) before returning and reduces any sort of race to how the stream is handled by grpc since we can't guarentee that we have a goroutine running and blocked on `Recv()`. Making `Wait()` async also cleans up the code in the caller drastically: ```go statusC, err := task.Wait(ctx) if err != nil { return err } task.Start(ctx) status := <-statusC if status.Err != nil { return err } ``` No more spinning up goroutines and more natural error handling for the caller. Signed-off-by: Brian Goff <cpuguy83@gmail.com>
264 lines
5.5 KiB
Go
264 lines
5.5 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/signal"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/containerd/containerd"
|
|
"github.com/containerd/containerd/namespaces"
|
|
specs "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/urfave/cli"
|
|
)
|
|
|
|
const imageName = "docker.io/library/alpine:latest"
|
|
|
|
func main() {
|
|
// morr power!
|
|
runtime.GOMAXPROCS(runtime.NumCPU())
|
|
|
|
app := cli.NewApp()
|
|
app.Name = "containerd-stress"
|
|
app.Description = "stress test a containerd daemon"
|
|
app.Flags = []cli.Flag{
|
|
cli.BoolFlag{
|
|
Name: "debug",
|
|
Usage: "set debug output in the logs",
|
|
},
|
|
cli.StringFlag{
|
|
Name: "address,a",
|
|
Value: "/run/containerd/containerd.sock",
|
|
Usage: "path to the containerd socket",
|
|
},
|
|
cli.IntFlag{
|
|
Name: "concurrent,c",
|
|
Value: 1,
|
|
Usage: "set the concurrency of the stress test",
|
|
},
|
|
cli.DurationFlag{
|
|
Name: "duration,d",
|
|
Value: 1 * time.Minute,
|
|
Usage: "set the duration of the stress test",
|
|
},
|
|
}
|
|
app.Before = func(context *cli.Context) error {
|
|
if context.GlobalBool("debug") {
|
|
logrus.SetLevel(logrus.DebugLevel)
|
|
}
|
|
return nil
|
|
}
|
|
app.Action = func(context *cli.Context) error {
|
|
config := config{
|
|
Address: context.GlobalString("address"),
|
|
Duration: context.GlobalDuration("duration"),
|
|
Concurrency: context.GlobalInt("concurrent"),
|
|
}
|
|
return test(config)
|
|
}
|
|
if err := app.Run(os.Args); err != nil {
|
|
fmt.Fprintln(os.Stderr, err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
type config struct {
|
|
Concurrency int
|
|
Duration time.Duration
|
|
Address string
|
|
}
|
|
|
|
func (c config) newClient() (*containerd.Client, error) {
|
|
return containerd.New(c.Address)
|
|
}
|
|
|
|
func test(c config) error {
|
|
var (
|
|
wg sync.WaitGroup
|
|
ctx = namespaces.WithNamespace(context.Background(), "stress")
|
|
)
|
|
|
|
client, err := c.newClient()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer client.Close()
|
|
if err := cleanup(ctx, client); err != nil {
|
|
return err
|
|
}
|
|
logrus.Infof("pulling %s", imageName)
|
|
image, err := client.Pull(ctx, imageName, containerd.WithPullUnpack)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
logrus.Info("generating spec from image")
|
|
spec, err := containerd.GenerateSpec(containerd.WithImageConfig(ctx, image), containerd.WithProcessArgs("true"))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
tctx, cancel := context.WithTimeout(ctx, c.Duration)
|
|
go func() {
|
|
s := make(chan os.Signal, 1)
|
|
signal.Notify(s, syscall.SIGTERM, syscall.SIGINT)
|
|
<-s
|
|
cancel()
|
|
}()
|
|
|
|
var (
|
|
workers []*worker
|
|
start = time.Now()
|
|
)
|
|
logrus.Info("starting stress test run...")
|
|
for i := 0; i < c.Concurrency; i++ {
|
|
wg.Add(1)
|
|
w := &worker{
|
|
id: i,
|
|
wg: &wg,
|
|
spec: spec,
|
|
image: image,
|
|
client: client,
|
|
}
|
|
workers = append(workers, w)
|
|
go w.run(ctx, tctx)
|
|
}
|
|
wg.Wait()
|
|
|
|
var (
|
|
total int
|
|
failures int
|
|
end = time.Now().Sub(start).Seconds()
|
|
)
|
|
logrus.Infof("ending test run in %0.3f seconds", end)
|
|
for _, w := range workers {
|
|
total += w.count
|
|
failures += w.failures
|
|
}
|
|
logrus.WithField("failures", failures).Infof(
|
|
"create/start/delete %d containers in %0.3f seconds (%0.3f c/sec) or (%0.3f sec/c)",
|
|
total,
|
|
end,
|
|
float64(total)/end,
|
|
end/float64(total),
|
|
)
|
|
return nil
|
|
}
|
|
|
|
type worker struct {
|
|
id int
|
|
wg *sync.WaitGroup
|
|
count int
|
|
failures int
|
|
waitContext context.Context
|
|
|
|
client *containerd.Client
|
|
image containerd.Image
|
|
spec *specs.Spec
|
|
}
|
|
|
|
func (w *worker) run(ctx, tctx context.Context) {
|
|
defer func() {
|
|
w.wg.Done()
|
|
logrus.Infof("worker %d finished", w.id)
|
|
}()
|
|
wctx, cancel := context.WithCancel(ctx)
|
|
w.waitContext = wctx
|
|
go func() {
|
|
<-tctx.Done()
|
|
cancel()
|
|
}()
|
|
for {
|
|
select {
|
|
case <-tctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
|
|
w.count++
|
|
id := w.getID()
|
|
logrus.Debugf("starting container %s", id)
|
|
if err := w.runContainer(ctx, id); err != nil {
|
|
if err != context.DeadlineExceeded ||
|
|
!strings.Contains(err.Error(), context.DeadlineExceeded.Error()) {
|
|
w.failures++
|
|
logrus.WithError(err).Errorf("running container %s", id)
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (w *worker) runContainer(ctx context.Context, id string) error {
|
|
c, err := w.client.NewContainer(ctx, id,
|
|
containerd.WithSpec(w.spec),
|
|
containerd.WithNewSnapshot(id, w.image),
|
|
)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer c.Delete(ctx, containerd.WithSnapshotCleanup)
|
|
|
|
task, err := c.NewTask(ctx, containerd.NullIO)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer task.Delete(ctx, containerd.WithProcessKill)
|
|
|
|
statusC, err := task.Wait(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := task.Start(ctx); err != nil {
|
|
return err
|
|
}
|
|
status := <-statusC
|
|
if status.Err != nil {
|
|
if status.Err == context.DeadlineExceeded || status.Err == context.Canceled {
|
|
return nil
|
|
}
|
|
w.failures++
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (w *worker) getID() string {
|
|
return fmt.Sprintf("%d-%d", w.id, w.count)
|
|
}
|
|
|
|
func (w *worker) cleanup(ctx context.Context, c containerd.Container) {
|
|
if err := c.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
|
|
if err == context.DeadlineExceeded {
|
|
return
|
|
}
|
|
w.failures++
|
|
logrus.WithError(err).Errorf("delete container %s", c.ID())
|
|
}
|
|
}
|
|
|
|
// cleanup cleans up any containers in the "stress" namespace before the test run
|
|
func cleanup(ctx context.Context, client *containerd.Client) error {
|
|
containers, err := client.Containers(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, c := range containers {
|
|
task, err := c.Task(ctx, nil)
|
|
if err == nil {
|
|
task.Delete(ctx, containerd.WithProcessKill)
|
|
}
|
|
if err := c.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
|
|
if derr := c.Delete(ctx); derr == nil {
|
|
continue
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|