diff --git a/cmd/containerd-stress/exec_worker.go b/cmd/containerd-stress/exec_worker.go new file mode 100644 index 000000000..f175ce84e --- /dev/null +++ b/cmd/containerd-stress/exec_worker.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + "path/filepath" + "strings" + "syscall" + "time" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/cio" + "github.com/containerd/containerd/oci" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" +) + +type execWorker struct { + worker +} + +func (w *execWorker) exec(ctx, tctx context.Context) { + defer func() { + w.wg.Done() + logrus.Infof("worker %d finished", w.id) + }() + // create and start the exec container + w.spec.Linux.CgroupsPath = filepath.Join("/", "stress", "exec-container") + w.spec.Process.Args = []string{ + "sleep", "30d", + } + c, err := w.client.NewContainer(ctx, "exec-container", + containerd.WithNewSnapshot("exec-container", w.image), + containerd.WithSpec(w.spec, oci.WithUsername("games")), + ) + if err != nil { + logrus.WithError(err).Error("create exec container") + return + } + defer c.Delete(ctx, containerd.WithSnapshotCleanup) + + task, err := c.NewTask(ctx, cio.NullIO) + if err != nil { + logrus.WithError(err).Error("create exec container's task") + return + } + defer task.Delete(ctx, containerd.WithProcessKill) + + statusC, err := task.Wait(ctx) + if err != nil { + logrus.WithError(err).Error("wait exec container's task") + return + } + + pspec := w.spec.Process + pspec.Args = []string{"true"} + + for { + select { + case <-tctx.Done(): + if err := task.Kill(ctx, syscall.SIGKILL); err != nil { + logrus.WithError(err).Error("kill exec container's task") + } + <-statusC + return + default: + } + + w.count++ + id := w.getID() + logrus.Debugf("starting exec %s", id) + start := time.Now() + + if err := w.runExec(ctx, task, id, pspec); err != nil { + if err != context.DeadlineExceeded || + !strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { + w.failures++ + logrus.WithError(err).Errorf("running exec %s", id) + errCounter.WithValues(err.Error()).Inc() + } + continue + } + // only log times are success so we don't scew the results from failures that go really fast + execTimer.WithValues(w.commit).UpdateSince(start) + } +} + +func (w *execWorker) runExec(ctx context.Context, task containerd.Task, id string, spec *specs.Process) error { + process, err := task.Exec(ctx, id, spec, cio.NullIO) + if err != nil { + return err + } + defer process.Delete(ctx, containerd.WithProcessKill) + + statusC, err := process.Wait(ctx) + if err != nil { + return err + } + if err := process.Start(ctx); err != nil { + return err + } + status := <-statusC + _, _, err = status.Result() + if err != nil { + if err == context.DeadlineExceeded || err == context.Canceled { + return nil + } + w.failures++ + } + return nil +} diff --git a/cmd/containerd-stress/main.go b/cmd/containerd-stress/main.go index 612b4430a..bf4ef7518 100644 --- a/cmd/containerd-stress/main.go +++ b/cmd/containerd-stress/main.go @@ -25,6 +25,7 @@ const imageName = "docker.io/library/alpine:latest" var ( ct metrics.LabeledTimer + execTimer metrics.LabeledTimer errCounter metrics.LabeledCounter binarySizeGauge metrics.LabeledGauge ) @@ -34,6 +35,7 @@ func init() { // if you want more fine grained metrics then you can drill down with the metrics in prom that // containerd is outputing ct = ns.NewLabeledTimer("run", "Run time of a full container during the test", "commit") + execTimer = ns.NewLabeledTimer("exec", "Run time of an exec process during the test", "commit") binarySizeGauge = ns.NewLabeledGauge("binary_size", "Binary size of compiled binaries", metrics.Bytes, "name") errCounter = ns.NewLabeledCounter("errors", "Errors encountered running the stress tests", "err") metrics.Register(ns) @@ -75,9 +77,12 @@ func (r *run) gather(workers []*worker) *result { type result struct { Total int `json:"total"` + Failures int `json:"failures"` Seconds float64 `json:"seconds"` ContainersPerSecond float64 `json:"containersPerSecond"` SecondsPerContainer float64 `json:"secondsPerContainer"` + ExecTotal int `json:"execTotal"` + ExecFailures int `json:"execFailures"` } func main() { @@ -121,12 +126,12 @@ func main() { }, } app.Before = func(context *cli.Context) error { - if context.GlobalBool("debug") { - logrus.SetLevel(logrus.DebugLevel) - } if context.GlobalBool("json") { logrus.SetLevel(logrus.WarnLevel) } + if context.GlobalBool("debug") { + logrus.SetLevel(logrus.DebugLevel) + } return nil } app.Action = func(context *cli.Context) error { @@ -206,9 +211,6 @@ func test(c config) error { ) logrus.Info("starting stress test run...") args := oci.WithProcessArgs("true") - if c.Exec { - args = oci.WithProcessArgs("sleep", "10") - } v, err := client.Version(ctx) if err != nil { return err @@ -230,11 +232,34 @@ func test(c config) error { spec: spec, image: image, client: client, - doExec: c.Exec, commit: v.Revision, } workers = append(workers, w) } + var exec *execWorker + if c.Exec { + wg.Add(1) + spec, err := oci.GenerateSpec(ctx, client, + &containers.Container{}, + oci.WithImageConfig(image), + args, + ) + if err != nil { + return err + } + exec = &execWorker{ + worker: worker{ + id: c.Concurrency, + wg: &wg, + spec: spec, + image: image, + client: client, + commit: v.Revision, + }, + } + go exec.exec(ctx, tctx) + } + // start the timer and run the worker r.start() for _, w := range workers { @@ -245,6 +270,10 @@ func test(c config) error { r.end() results := r.gather(workers) + if c.Exec { + results.ExecTotal = exec.count + results.ExecFailures = exec.failures + } logrus.Infof("ending test run in %0.3f seconds", results.Seconds) logrus.WithField("failures", r.failures).Infof( diff --git a/cmd/containerd-stress/worker.go b/cmd/containerd-stress/worker.go index 19bf8552f..bf0dea835 100644 --- a/cmd/containerd-stress/worker.go +++ b/cmd/containerd-stress/worker.go @@ -4,10 +4,8 @@ import ( "context" "fmt" "path/filepath" - "strconv" "strings" "sync" - "syscall" "time" "github.com/containerd/containerd" @@ -26,7 +24,6 @@ type worker struct { client *containerd.Client image containerd.Image spec *specs.Spec - doExec bool commit string } @@ -86,17 +83,6 @@ func (w *worker) runContainer(ctx context.Context, id string) error { if err := task.Start(ctx); err != nil { return err } - if w.doExec { - for i := 0; i < 256; i++ { - if err := w.exec(ctx, i, task); err != nil { - w.failures++ - logrus.WithError(err).Error("exec failure") - } - } - if err := task.Kill(ctx, syscall.SIGKILL); err != nil { - return err - } - } status := <-statusC _, _, err = status.Result() if err != nil { @@ -108,25 +94,6 @@ func (w *worker) runContainer(ctx context.Context, id string) error { return nil } -func (w *worker) exec(ctx context.Context, i int, t containerd.Task) error { - pSpec := *w.spec.Process - pSpec.Args = []string{"true"} - process, err := t.Exec(ctx, strconv.Itoa(i), &pSpec, cio.NullIO) - if err != nil { - return err - } - defer process.Delete(ctx) - status, err := process.Wait(ctx) - if err != nil { - return err - } - if err := process.Start(ctx); err != nil { - return err - } - <-status - return nil -} - func (w *worker) getID() string { return fmt.Sprintf("%d-%d", w.id, w.count) }