From 9fcca96771e570eb451ff914d5a99acfeb6d31ff Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 6 Dec 2017 16:07:36 -0500 Subject: [PATCH 1/5] Add json output to stress test tool Signed-off-by: Michael Crosby --- cmd/containerd-stress/main.go | 88 ++++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/cmd/containerd-stress/main.go b/cmd/containerd-stress/main.go index ab48f8a19..53057d020 100644 --- a/cmd/containerd-stress/main.go +++ b/cmd/containerd-stress/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/json" "fmt" "os" "os/signal" @@ -25,6 +26,47 @@ import ( const imageName = "docker.io/library/alpine:latest" +type run struct { + total int + failures int + + started time.Time + ended time.Time +} + +func (r *run) start() { + r.started = time.Now() +} + +func (r *run) end() { + r.ended = time.Now() +} + +func (r *run) seconds() float64 { + return r.ended.Sub(r.started).Seconds() +} + +func (r *run) gather(workers []*worker) *result { + for _, w := range workers { + r.total += w.count + r.failures += w.failures + } + sec := r.seconds() + return &result{ + Total: r.total, + Seconds: sec, + ContainersPerSecond: float64(r.total) / sec, + SecondsPerContainer: sec / float64(r.total), + } +} + +type result struct { + Total int `json:"total"` + Seconds float64 `json:"seconds"` + ContainersPerSecond float64 `json:"containersPerSecond"` + SecondsPerContainer float64 `json:"secondsPerContainer"` +} + func main() { // morr power! runtime.GOMAXPROCS(runtime.NumCPU()) @@ -56,11 +98,18 @@ func main() { Name: "exec", Usage: "add execs to the stress tests", }, + cli.BoolFlag{ + Name: "json,j", + Usage: "output results in json format", + }, } app.Before = func(context *cli.Context) error { if context.GlobalBool("debug") { logrus.SetLevel(logrus.DebugLevel) } + if context.GlobalBool("json") { + logrus.SetLevel(logrus.WarnLevel) + } return nil } app.Action = func(context *cli.Context) error { @@ -69,6 +118,7 @@ func main() { Duration: context.GlobalDuration("duration"), Concurrency: context.GlobalInt("concurrent"), Exec: context.GlobalBool("exec"), + Json: context.GlobalBool("json"), } return test(config) } @@ -83,6 +133,7 @@ type config struct { Duration time.Duration Address string Exec bool + Json bool } func (c config) newClient() (*containerd.Client, error) { @@ -119,13 +170,14 @@ func test(c config) error { var ( workers []*worker - start = time.Now() + r = &run{} ) logrus.Info("starting stress test run...") args := oci.WithProcessArgs("true") if c.Exec { args = oci.WithProcessArgs("sleep", "10") } + // create the workers along with their spec for i := 0; i < c.Concurrency; i++ { wg.Add(1) spec, err := oci.GenerateSpec(ctx, client, @@ -145,27 +197,31 @@ func test(c config) error { doExec: c.Exec, } workers = append(workers, w) + } + // start the timer and run the worker + r.start() + for _, w := range workers { go w.run(ctx, tctx) } + // wait and end the timer wg.Wait() + r.end() - var ( - total int - failures int - end = time.Now().Sub(start).Seconds() - ) - logrus.Infof("ending test run in %0.3f seconds", end) - for _, w := range workers { - total += w.count - failures += w.failures - } - logrus.WithField("failures", failures).Infof( + results := r.gather(workers) + logrus.Infof("ending test run in %0.3f seconds", results.Seconds) + + logrus.WithField("failures", r.failures).Infof( "create/start/delete %d containers in %0.3f seconds (%0.3f c/sec) or (%0.3f sec/c)", - total, - end, - float64(total)/end, - end/float64(total), + results.Total, + results.Seconds, + results.ContainersPerSecond, + results.SecondsPerContainer, ) + if c.Json { + if err := json.NewEncoder(os.Stdout).Encode(results); err != nil { + return err + } + } return nil } From ca5f16c33e8cbf72329a21b260f7fee9abad9779 Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Mon, 11 Dec 2017 10:36:19 -0500 Subject: [PATCH 2/5] Move stress worker to new file Signed-off-by: Michael Crosby --- cmd/containerd-stress/main.go | 113 ----------------------------- cmd/containerd-stress/worker.go | 125 ++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 113 deletions(-) create mode 100644 cmd/containerd-stress/worker.go diff --git a/cmd/containerd-stress/main.go b/cmd/containerd-stress/main.go index 53057d020..0a0d7aa9d 100644 --- a/cmd/containerd-stress/main.go +++ b/cmd/containerd-stress/main.go @@ -6,20 +6,15 @@ import ( "fmt" "os" "os/signal" - "path/filepath" "runtime" - "strconv" - "strings" "sync" "syscall" "time" "github.com/containerd/containerd" - "github.com/containerd/containerd/cio" "github.com/containerd/containerd/containers" "github.com/containerd/containerd/namespaces" "github.com/containerd/containerd/oci" - specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "github.com/urfave/cli" ) @@ -225,114 +220,6 @@ func test(c config) error { return nil } -type worker struct { - id int - wg *sync.WaitGroup - count int - failures int - - client *containerd.Client - image containerd.Image - spec *specs.Spec - doExec bool -} - -func (w *worker) run(ctx, tctx context.Context) { - defer func() { - w.wg.Done() - logrus.Infof("worker %d finished", w.id) - }() - for { - select { - case <-tctx.Done(): - return - default: - } - - w.count++ - id := w.getID() - logrus.Debugf("starting container %s", id) - if err := w.runContainer(ctx, id); err != nil { - if err != context.DeadlineExceeded || - !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { - w.failures++ - logrus.WithError(err).Errorf("running container %s", id) - - } - } - } -} - -func (w *worker) runContainer(ctx context.Context, id string) error { - // fix up cgroups path for a default config - w.spec.Linux.CgroupsPath = filepath.Join("/", "stress", id) - c, err := w.client.NewContainer(ctx, id, - containerd.WithNewSnapshot(id, w.image), - containerd.WithSpec(w.spec, oci.WithUsername("games")), - ) - if err != nil { - return err - } - defer c.Delete(ctx, containerd.WithSnapshotCleanup) - - task, err := c.NewTask(ctx, cio.NullIO) - if err != nil { - return err - } - defer task.Delete(ctx, containerd.WithProcessKill) - - statusC, err := task.Wait(ctx) - if err != nil { - return err - } - if err := task.Start(ctx); err != nil { - return err - } - if w.doExec { - for i := 0; i < 256; i++ { - if err := w.exec(ctx, i, task); err != nil { - w.failures++ - logrus.WithError(err).Error("exec failure") - } - } - if err := task.Kill(ctx, syscall.SIGKILL); err != nil { - return err - } - } - status := <-statusC - _, _, err = status.Result() - if err != nil { - if err == context.DeadlineExceeded || err == context.Canceled { - return nil - } - w.failures++ - } - return nil -} - -func (w *worker) exec(ctx context.Context, i int, t containerd.Task) error { - pSpec := *w.spec.Process - pSpec.Args = []string{"true"} - process, err := t.Exec(ctx, strconv.Itoa(i), &pSpec, cio.NullIO) - if err != nil { - return err - } - defer process.Delete(ctx) - status, err := process.Wait(ctx) - if err != nil { - return err - } - if err := process.Start(ctx); err != nil { - return err - } - <-status - return nil -} - -func (w *worker) getID() string { - return fmt.Sprintf("%d-%d", w.id, w.count) -} - // cleanup cleans up any containers in the "stress" namespace before the test run func cleanup(ctx context.Context, client *containerd.Client) error { containers, err := client.Containers(ctx) diff --git a/cmd/containerd-stress/worker.go b/cmd/containerd-stress/worker.go new file mode 100644 index 000000000..b89dfd113 --- /dev/null +++ b/cmd/containerd-stress/worker.go @@ -0,0 +1,125 @@ +package main + +import ( + "context" + "fmt" + "path/filepath" + "strconv" + "strings" + "sync" + "syscall" + + "github.com/Sirupsen/logrus" + "github.com/containerd/containerd" + "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/oci" + specs "github.com/opencontainers/runtime-spec/specs-go" +) + +type worker struct { + id int + wg *sync.WaitGroup + count int + failures int + + client *containerd.Client + image containerd.Image + spec *specs.Spec + doExec bool +} + +func (w *worker) run(ctx, tctx context.Context) { + defer func() { + w.wg.Done() + logrus.Infof("worker %d finished", w.id) + }() + for { + select { + case <-tctx.Done(): + return + default: + } + + w.count++ + id := w.getID() + logrus.Debugf("starting container %s", id) + if err := w.runContainer(ctx, id); err != nil { + if err != context.DeadlineExceeded || + !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { + w.failures++ + logrus.WithError(err).Errorf("running container %s", id) + + } + } + } +} + +func (w *worker) runContainer(ctx context.Context, id string) error { + // fix up cgroups path for a default config + w.spec.Linux.CgroupsPath = filepath.Join("/", "stress", id) + c, err := w.client.NewContainer(ctx, id, + containerd.WithNewSnapshot(id, w.image), + containerd.WithSpec(w.spec, oci.WithUsername("games")), + ) + if err != nil { + return err + } + defer c.Delete(ctx, containerd.WithSnapshotCleanup) + + task, err := c.NewTask(ctx, cio.NullIO) + if err != nil { + return err + } + defer task.Delete(ctx, containerd.WithProcessKill) + + statusC, err := task.Wait(ctx) + if err != nil { + return err + } + if err := task.Start(ctx); err != nil { + return err + } + if w.doExec { + for i := 0; i < 256; i++ { + if err := w.exec(ctx, i, task); err != nil { + w.failures++ + logrus.WithError(err).Error("exec failure") + } + } + if err := task.Kill(ctx, syscall.SIGKILL); err != nil { + return err + } + } + status := <-statusC + _, _, err = status.Result() + if err != nil { + if err == context.DeadlineExceeded || err == context.Canceled { + return nil + } + w.failures++ + } + return nil +} + +func (w *worker) exec(ctx context.Context, i int, t containerd.Task) error { + pSpec := *w.spec.Process + pSpec.Args = []string{"true"} + process, err := t.Exec(ctx, strconv.Itoa(i), &pSpec, cio.NullIO) + if err != nil { + return err + } + defer process.Delete(ctx) + status, err := process.Wait(ctx) + if err != nil { + return err + } + if err := process.Start(ctx); err != nil { + return err + } + <-status + return nil +} + +func (w *worker) getID() string { + return fmt.Sprintf("%d-%d", w.id, w.count) +} From 4d55298aabedaa728c7e0aa849061b62bf3a4e1d Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 12 Dec 2017 13:10:30 -0500 Subject: [PATCH 3/5] Add prom timer to stress Signed-off-by: Michael Crosby --- cmd/containerd-stress/main.go | 26 +++++++++++++++++++++++--- cmd/containerd-stress/worker.go | 18 +++++++++++++++++- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/cmd/containerd-stress/main.go b/cmd/containerd-stress/main.go index 0a0d7aa9d..3b2b030fb 100644 --- a/cmd/containerd-stress/main.go +++ b/cmd/containerd-stress/main.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "net/http" "os" "os/signal" "runtime" @@ -15,6 +16,7 @@ import ( "github.com/containerd/containerd/containers" "github.com/containerd/containerd/namespaces" "github.com/containerd/containerd/oci" + metrics "github.com/docker/go-metrics" "github.com/sirupsen/logrus" "github.com/urfave/cli" ) @@ -97,6 +99,10 @@ func main() { Name: "json,j", Usage: "output results in json format", }, + cli.StringFlag{ + Name: "metrics,m", + Usage: "address to serve the metrics API", + }, } app.Before = func(context *cli.Context) error { if context.GlobalBool("debug") { @@ -113,7 +119,11 @@ func main() { Duration: context.GlobalDuration("duration"), Concurrency: context.GlobalInt("concurrent"), Exec: context.GlobalBool("exec"), - Json: context.GlobalBool("json"), + JSON: context.GlobalBool("json"), + Metrics: context.GlobalString("metrics"), + } + if config.Metrics != "" { + return serve(config) } return test(config) } @@ -128,13 +138,23 @@ type config struct { Duration time.Duration Address string Exec bool - Json bool + JSON bool + Metrics string } func (c config) newClient() (*containerd.Client, error) { return containerd.New(c.Address) } +func serve(c config) error { + go func() { + if err := http.ListenAndServe(c.Metrics, metrics.Handler()); err != nil { + logrus.WithError(err).Error("listen and serve") + } + }() + return test(c) +} + func test(c config) error { var ( wg sync.WaitGroup @@ -212,7 +232,7 @@ func test(c config) error { results.ContainersPerSecond, results.SecondsPerContainer, ) - if c.Json { + if c.JSON { if err := json.NewEncoder(os.Stdout).Encode(results); err != nil { return err } diff --git a/cmd/containerd-stress/worker.go b/cmd/containerd-stress/worker.go index b89dfd113..a66148220 100644 --- a/cmd/containerd-stress/worker.go +++ b/cmd/containerd-stress/worker.go @@ -8,14 +8,26 @@ import ( "strings" "sync" "syscall" + "time" - "github.com/Sirupsen/logrus" "github.com/containerd/containerd" "github.com/containerd/containerd/cio" "github.com/containerd/containerd/oci" + metrics "github.com/docker/go-metrics" specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" ) +var ct metrics.Timer + +func init() { + ns := metrics.NewNamespace("stress", "", nil) + // if you want more fine grained metrics then you can drill down with the metrics in prom that + // containerd is outputing + ct = ns.NewTimer("run", "Run time of a full container during the test") + metrics.Register(ns) +} + type worker struct { id int wg *sync.WaitGroup @@ -43,6 +55,7 @@ func (w *worker) run(ctx, tctx context.Context) { w.count++ id := w.getID() logrus.Debugf("starting container %s", id) + start := time.Now() if err := w.runContainer(ctx, id); err != nil { if err != context.DeadlineExceeded || !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { @@ -50,7 +63,10 @@ func (w *worker) run(ctx, tctx context.Context) { logrus.WithError(err).Errorf("running container %s", id) } + continue } + // only log times are success so we don't scew the results from failures that go really fast + ct.UpdateSince(start) } } From 652e07807828b8df2a1743eb358ace7d56791c9e Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Tue, 12 Dec 2017 14:14:39 -0500 Subject: [PATCH 4/5] Add commit to stress metric Signed-off-by: Michael Crosby --- cmd/containerd-stress/main.go | 5 +++++ cmd/containerd-stress/worker.go | 7 ++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cmd/containerd-stress/main.go b/cmd/containerd-stress/main.go index 3b2b030fb..a53ca6e77 100644 --- a/cmd/containerd-stress/main.go +++ b/cmd/containerd-stress/main.go @@ -192,6 +192,10 @@ func test(c config) error { if c.Exec { args = oci.WithProcessArgs("sleep", "10") } + v, err := client.Version(ctx) + if err != nil { + return err + } // create the workers along with their spec for i := 0; i < c.Concurrency; i++ { wg.Add(1) @@ -210,6 +214,7 @@ func test(c config) error { image: image, client: client, doExec: c.Exec, + commit: v.Revision, } workers = append(workers, w) } diff --git a/cmd/containerd-stress/worker.go b/cmd/containerd-stress/worker.go index a66148220..0386b3daa 100644 --- a/cmd/containerd-stress/worker.go +++ b/cmd/containerd-stress/worker.go @@ -18,13 +18,13 @@ import ( "github.com/sirupsen/logrus" ) -var ct metrics.Timer +var ct metrics.LabeledTimer func init() { ns := metrics.NewNamespace("stress", "", nil) // if you want more fine grained metrics then you can drill down with the metrics in prom that // containerd is outputing - ct = ns.NewTimer("run", "Run time of a full container during the test") + ct = ns.NewLabeledTimer("run", "Run time of a full container during the test", "commit") metrics.Register(ns) } @@ -38,6 +38,7 @@ type worker struct { image containerd.Image spec *specs.Spec doExec bool + commit string } func (w *worker) run(ctx, tctx context.Context) { @@ -66,7 +67,7 @@ func (w *worker) run(ctx, tctx context.Context) { continue } // only log times are success so we don't scew the results from failures that go really fast - ct.UpdateSince(start) + ct.WithValues(w.commit).UpdateSince(start) } } From 6ae0f5f7e2ab0e9762c3fdaa4aa176002258fdbb Mon Sep 17 00:00:00 2001 From: Michael Crosby Date: Wed, 13 Dec 2017 13:15:07 -0500 Subject: [PATCH 5/5] Add error metric for stress tests Signed-off-by: Michael Crosby --- cmd/containerd-stress/worker.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/containerd-stress/worker.go b/cmd/containerd-stress/worker.go index 0386b3daa..2738555ab 100644 --- a/cmd/containerd-stress/worker.go +++ b/cmd/containerd-stress/worker.go @@ -18,13 +18,17 @@ import ( "github.com/sirupsen/logrus" ) -var ct metrics.LabeledTimer +var ( + ct metrics.LabeledTimer + errCounter metrics.LabeledCounter +) func init() { ns := metrics.NewNamespace("stress", "", nil) // if you want more fine grained metrics then you can drill down with the metrics in prom that // containerd is outputing ct = ns.NewLabeledTimer("run", "Run time of a full container during the test", "commit") + errCounter = ns.NewLabeledCounter("errors", "Errors encountered running the stress tests", "err") metrics.Register(ns) } @@ -62,6 +66,7 @@ func (w *worker) run(ctx, tctx context.Context) { !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { w.failures++ logrus.WithError(err).Errorf("running container %s", id) + errCounter.WithValues(err.Error()).Inc() } continue