containerd/cmd/containerd-stress/main.go
Brian Goff 026896ac4c Make Wait() async
In all of the examples, its recommended to call `Wait()` before starting
a process/task.
Since `Wait()` is a blocking call, this means it must be called from a
goroutine like so:

```go
statusC := make(chan uint32)
go func() {
  status, err := task.Wait(ctx)
  if err != nil {
    // handle async err
  }

  statusC <- status
}()

task.Start(ctx)
<-statusC
```

This means there is a race here where there is no guarentee when the
goroutine is going to be scheduled, and even a bit more since this
requires an RPC call to be made.
In addition, this code is very messy and a common pattern for any caller
using Wait+Start.

Instead, this changes `Wait()` to use an async model having `Wait()`
return a channel instead of the code itself.
This ensures that when `Wait()` returns that the client has a handle on
the event stream (already made the RPC request) before returning and
reduces any sort of race to how the stream is handled by grpc since we
can't guarentee that we have a goroutine running and blocked on
`Recv()`.

Making `Wait()` async also cleans up the code in the caller drastically:

```go
statusC, err := task.Wait(ctx)
if err != nil {
  return err
}

task.Start(ctx)

status := <-statusC
if status.Err != nil {
  return err
}
```

No more spinning up goroutines and more natural error
handling for the caller.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
2017-08-22 09:33:07 -04:00

264 lines
5.5 KiB
Go

package main
import (
"context"
"fmt"
"os"
"os/signal"
"runtime"
"strings"
"sync"
"syscall"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/namespaces"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)
const imageName = "docker.io/library/alpine:latest"
func main() {
// morr power!
runtime.GOMAXPROCS(runtime.NumCPU())
app := cli.NewApp()
app.Name = "containerd-stress"
app.Description = "stress test a containerd daemon"
app.Flags = []cli.Flag{
cli.BoolFlag{
Name: "debug",
Usage: "set debug output in the logs",
},
cli.StringFlag{
Name: "address,a",
Value: "/run/containerd/containerd.sock",
Usage: "path to the containerd socket",
},
cli.IntFlag{
Name: "concurrent,c",
Value: 1,
Usage: "set the concurrency of the stress test",
},
cli.DurationFlag{
Name: "duration,d",
Value: 1 * time.Minute,
Usage: "set the duration of the stress test",
},
}
app.Before = func(context *cli.Context) error {
if context.GlobalBool("debug") {
logrus.SetLevel(logrus.DebugLevel)
}
return nil
}
app.Action = func(context *cli.Context) error {
config := config{
Address: context.GlobalString("address"),
Duration: context.GlobalDuration("duration"),
Concurrency: context.GlobalInt("concurrent"),
}
return test(config)
}
if err := app.Run(os.Args); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
type config struct {
Concurrency int
Duration time.Duration
Address string
}
func (c config) newClient() (*containerd.Client, error) {
return containerd.New(c.Address)
}
func test(c config) error {
var (
wg sync.WaitGroup
ctx = namespaces.WithNamespace(context.Background(), "stress")
)
client, err := c.newClient()
if err != nil {
return err
}
defer client.Close()
if err := cleanup(ctx, client); err != nil {
return err
}
logrus.Infof("pulling %s", imageName)
image, err := client.Pull(ctx, imageName, containerd.WithPullUnpack)
if err != nil {
return err
}
logrus.Info("generating spec from image")
spec, err := containerd.GenerateSpec(containerd.WithImageConfig(ctx, image), containerd.WithProcessArgs("true"))
if err != nil {
return err
}
tctx, cancel := context.WithTimeout(ctx, c.Duration)
go func() {
s := make(chan os.Signal, 1)
signal.Notify(s, syscall.SIGTERM, syscall.SIGINT)
<-s
cancel()
}()
var (
workers []*worker
start = time.Now()
)
logrus.Info("starting stress test run...")
for i := 0; i < c.Concurrency; i++ {
wg.Add(1)
w := &worker{
id: i,
wg: &wg,
spec: spec,
image: image,
client: client,
}
workers = append(workers, w)
go w.run(ctx, tctx)
}
wg.Wait()
var (
total int
failures int
end = time.Now().Sub(start).Seconds()
)
logrus.Infof("ending test run in %0.3f seconds", end)
for _, w := range workers {
total += w.count
failures += w.failures
}
logrus.WithField("failures", failures).Infof(
"create/start/delete %d containers in %0.3f seconds (%0.3f c/sec) or (%0.3f sec/c)",
total,
end,
float64(total)/end,
end/float64(total),
)
return nil
}
type worker struct {
id int
wg *sync.WaitGroup
count int
failures int
waitContext context.Context
client *containerd.Client
image containerd.Image
spec *specs.Spec
}
func (w *worker) run(ctx, tctx context.Context) {
defer func() {
w.wg.Done()
logrus.Infof("worker %d finished", w.id)
}()
wctx, cancel := context.WithCancel(ctx)
w.waitContext = wctx
go func() {
<-tctx.Done()
cancel()
}()
for {
select {
case <-tctx.Done():
return
default:
}
w.count++
id := w.getID()
logrus.Debugf("starting container %s", id)
if err := w.runContainer(ctx, id); err != nil {
if err != context.DeadlineExceeded ||
!strings.Contains(err.Error(), context.DeadlineExceeded.Error()) {
w.failures++
logrus.WithError(err).Errorf("running container %s", id)
}
}
}
}
func (w *worker) runContainer(ctx context.Context, id string) error {
c, err := w.client.NewContainer(ctx, id,
containerd.WithSpec(w.spec),
containerd.WithNewSnapshot(id, w.image),
)
if err != nil {
return err
}
defer c.Delete(ctx, containerd.WithSnapshotCleanup)
task, err := c.NewTask(ctx, containerd.NullIO)
if err != nil {
return err
}
defer task.Delete(ctx, containerd.WithProcessKill)
statusC, err := task.Wait(ctx)
if err != nil {
return err
}
if err := task.Start(ctx); err != nil {
return err
}
status := <-statusC
if status.Err != nil {
if status.Err == context.DeadlineExceeded || status.Err == context.Canceled {
return nil
}
w.failures++
}
return nil
}
func (w *worker) getID() string {
return fmt.Sprintf("%d-%d", w.id, w.count)
}
func (w *worker) cleanup(ctx context.Context, c containerd.Container) {
if err := c.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
if err == context.DeadlineExceeded {
return
}
w.failures++
logrus.WithError(err).Errorf("delete container %s", c.ID())
}
}
// cleanup cleans up any containers in the "stress" namespace before the test run
func cleanup(ctx context.Context, client *containerd.Client) error {
containers, err := client.Containers(ctx)
if err != nil {
return err
}
for _, c := range containers {
task, err := c.Task(ctx, nil)
if err == nil {
task.Delete(ctx, containerd.WithProcessKill)
}
if err := c.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
if derr := c.Delete(ctx); derr == nil {
continue
}
return err
}
}
return nil
}