From 94602aea63257a8add2431caa9f80613da8cb350 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Fri, 19 Jan 2018 13:44:54 -0500 Subject: [PATCH] Add execs to stress tests This improves the exec support so that they can run along with the normal stress tests. You don't have to pick exec stres or container stress. They both run at the same time and report the different values. Signed-off-by: Michael Crosby --- cmd/containerd-stress/exec_worker.go | 110 +++++++++++++++++++++++++++ cmd/containerd-stress/main.go | 43 +++++++++-- cmd/containerd-stress/worker.go | 33 -------- 3 files changed, 146 insertions(+), 40 deletions(-) create mode 100644 cmd/containerd-stress/exec_worker.go diff --git a/cmd/containerd-stress/exec_worker.go b/cmd/containerd-stress/exec_worker.go new file mode 100644 index 000000000..f175ce84e --- /dev/null +++ b/cmd/containerd-stress/exec_worker.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/oci" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +type execWorker struct { + worker +} + +func (w *execWorker) exec(ctx, tctx context.Context) { + defer func() { + w.wg.Done() + logrus.Infof("worker %d finished", w.id) + }() + // create and start the exec container + w.spec.Linux.CgroupsPath = filepath.Join("/", "stress", "exec-container") + w.spec.Process.Args = []string{ + "sleep", "30d", + } + c, err := w.client.NewContainer(ctx, "exec-container", + containerd.WithNewSnapshot("exec-container", w.image), + containerd.WithSpec(w.spec, oci.WithUsername("games")), + ) + if err != nil { + logrus.WithError(err).Error("create exec container") + return + } + defer c.Delete(ctx, containerd.WithSnapshotCleanup) + + task, err := c.NewTask(ctx, cio.NullIO) + if err != nil { + logrus.WithError(err).Error("create exec container's task") + return + } + defer task.Delete(ctx, containerd.WithProcessKill) + + statusC, err := task.Wait(ctx) + if err != nil { + logrus.WithError(err).Error("wait exec container's task") + return + } + + pspec := w.spec.Process + pspec.Args = []string{"true"} + + for { + select { + case <-tctx.Done(): + if err := task.Kill(ctx, syscall.SIGKILL); err != nil { + logrus.WithError(err).Error("kill exec container's task") + } + <-statusC + return + default: + } + + w.count++ + id := w.getID() + logrus.Debugf("starting exec %s", id) + start := time.Now() + + if err := w.runExec(ctx, task, id, pspec); err != nil { + if err != context.DeadlineExceeded || + !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { + w.failures++ + logrus.WithError(err).Errorf("running exec %s", id) + errCounter.WithValues(err.Error()).Inc() + } + continue + } + // only log times are success so we don't scew the results from failures that go really fast + execTimer.WithValues(w.commit).UpdateSince(start) + } +} + +func (w *execWorker) runExec(ctx context.Context, task containerd.Task, id string, spec *specs.Process) error { + process, err := task.Exec(ctx, id, spec, cio.NullIO) + if err != nil { + return err + } + defer process.Delete(ctx, containerd.WithProcessKill) + + statusC, err := process.Wait(ctx) + if err != nil { + return err + } + if err := process.Start(ctx); err != nil { + return err + } + status := <-statusC + _, _, err = status.Result() + if err != nil { + if err == context.DeadlineExceeded || err == context.Canceled { + return nil + } + w.failures++ + } + return nil +} diff --git a/cmd/containerd-stress/main.go b/cmd/containerd-stress/main.go index 612b4430a..bf4ef7518 100644 --- a/cmd/containerd-stress/main.go +++ b/cmd/containerd-stress/main.go @@ -25,6 +25,7 @@ const imageName = "docker.io/library/alpine:latest" var ( ct metrics.LabeledTimer + execTimer metrics.LabeledTimer errCounter metrics.LabeledCounter binarySizeGauge metrics.LabeledGauge ) @@ -34,6 +35,7 @@ func init() { // if you want more fine grained metrics then you can drill down with the metrics in prom that // containerd is outputing ct = ns.NewLabeledTimer("run", "Run time of a full container during the test", "commit") + execTimer = ns.NewLabeledTimer("exec", "Run time of an exec process during the test", "commit") binarySizeGauge = ns.NewLabeledGauge("binary_size", "Binary size of compiled binaries", metrics.Bytes, "name") errCounter = ns.NewLabeledCounter("errors", "Errors encountered running the stress tests", "err") metrics.Register(ns) @@ -75,9 +77,12 @@ func (r *run) gather(workers []*worker) *result { type result struct { Total int `json:"total"` + Failures int `json:"failures"` Seconds float64 `json:"seconds"` ContainersPerSecond float64 `json:"containersPerSecond"` SecondsPerContainer float64 `json:"secondsPerContainer"` + ExecTotal int `json:"execTotal"` + ExecFailures int `json:"execFailures"` } func main() { @@ -121,12 +126,12 @@ func main() { }, } app.Before = func(context *cli.Context) error { - if context.GlobalBool("debug") { - logrus.SetLevel(logrus.DebugLevel) - } if context.GlobalBool("json") { logrus.SetLevel(logrus.WarnLevel) } + if context.GlobalBool("debug") { + logrus.SetLevel(logrus.DebugLevel) + } return nil } app.Action = func(context *cli.Context) error { @@ -206,9 +211,6 @@ func test(c config) error { ) logrus.Info("starting stress test run...") args := oci.WithProcessArgs("true") - if c.Exec { - args = oci.WithProcessArgs("sleep", "10") - } v, err := client.Version(ctx) if err != nil { return err @@ -230,11 +232,34 @@ func test(c config) error { spec: spec, image: image, client: client, - doExec: c.Exec, commit: v.Revision, } workers = append(workers, w) } + var exec *execWorker + if c.Exec { + wg.Add(1) + spec, err := oci.GenerateSpec(ctx, client, + &containers.Container{}, + oci.WithImageConfig(image), + args, + ) + if err != nil { + return err + } + exec = &execWorker{ + worker: worker{ + id: c.Concurrency, + wg: &wg, + spec: spec, + image: image, + client: client, + commit: v.Revision, + }, + } + go exec.exec(ctx, tctx) + } + // start the timer and run the worker r.start() for _, w := range workers { @@ -245,6 +270,10 @@ func test(c config) error { r.end() results := r.gather(workers) + if c.Exec { + results.ExecTotal = exec.count + results.ExecFailures = exec.failures + } logrus.Infof("ending test run in %0.3f seconds", results.Seconds) logrus.WithField("failures", r.failures).Infof( diff --git a/cmd/containerd-stress/worker.go b/cmd/containerd-stress/worker.go index 19bf8552f..bf0dea835 100644 --- a/cmd/containerd-stress/worker.go +++ b/cmd/containerd-stress/worker.go @@ -4,10 +4,8 @@ import ( "context" "fmt" "path/filepath" - "strconv" "strings" "sync" - "syscall" "time" "github.com/containerd/containerd" @@ -26,7 +24,6 @@ type worker struct { client *containerd.Client image containerd.Image spec *specs.Spec - doExec bool commit string } @@ -86,17 +83,6 @@ func (w *worker) runContainer(ctx context.Context, id string) error { if err := task.Start(ctx); err != nil { return err } - if w.doExec { - for i := 0; i < 256; i++ { - if err := w.exec(ctx, i, task); err != nil { - w.failures++ - logrus.WithError(err).Error("exec failure") - } - } - if err := task.Kill(ctx, syscall.SIGKILL); err != nil { - return err - } - } status := <-statusC _, _, err = status.Result() if err != nil { @@ -108,25 +94,6 @@ func (w *worker) runContainer(ctx context.Context, id string) error { return nil } -func (w *worker) exec(ctx context.Context, i int, t containerd.Task) error { - pSpec := *w.spec.Process - pSpec.Args = []string{"true"} - process, err := t.Exec(ctx, strconv.Itoa(i), &pSpec, cio.NullIO) - if err != nil { - return err - } - defer process.Delete(ctx) - status, err := process.Wait(ctx) - if err != nil { - return err - } - if err := process.Start(ctx); err != nil { - return err - } - <-status - return nil -} - func (w *worker) getID() string { return fmt.Sprintf("%d-%d", w.id, w.count) }