Add execs to stress tests

This improves the exec support so that they can run along with the
normal stress tests.  You don't have to pick exec stres or container
stress.  They both run at the same time and report the different values.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby 2018-01-19 13:44:54 -05:00
parent 823d3398ea
commit 94602aea63
3 changed files with 146 additions and 40 deletions

View File

@ -0,0 +1,110 @@
package main
import (
"context"
"path/filepath"
"strings"
"syscall"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/oci"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
type execWorker struct {
worker
}
func (w *execWorker) exec(ctx, tctx context.Context) {
defer func() {
w.wg.Done()
logrus.Infof("worker %d finished", w.id)
}()
// create and start the exec container
w.spec.Linux.CgroupsPath = filepath.Join("/", "stress", "exec-container")
w.spec.Process.Args = []string{
"sleep", "30d",
}
c, err := w.client.NewContainer(ctx, "exec-container",
containerd.WithNewSnapshot("exec-container", w.image),
containerd.WithSpec(w.spec, oci.WithUsername("games")),
)
if err != nil {
logrus.WithError(err).Error("create exec container")
return
}
defer c.Delete(ctx, containerd.WithSnapshotCleanup)
task, err := c.NewTask(ctx, cio.NullIO)
if err != nil {
logrus.WithError(err).Error("create exec container's task")
return
}
defer task.Delete(ctx, containerd.WithProcessKill)
statusC, err := task.Wait(ctx)
if err != nil {
logrus.WithError(err).Error("wait exec container's task")
return
}
pspec := w.spec.Process
pspec.Args = []string{"true"}
for {
select {
case <-tctx.Done():
if err := task.Kill(ctx, syscall.SIGKILL); err != nil {
logrus.WithError(err).Error("kill exec container's task")
}
<-statusC
return
default:
}
w.count++
id := w.getID()
logrus.Debugf("starting exec %s", id)
start := time.Now()
if err := w.runExec(ctx, task, id, pspec); err != nil {
if err != context.DeadlineExceeded ||
!strings.Contains(err.Error(), context.DeadlineExceeded.Error()) {
w.failures++
logrus.WithError(err).Errorf("running exec %s", id)
errCounter.WithValues(err.Error()).Inc()
}
continue
}
// only log times are success so we don't scew the results from failures that go really fast
execTimer.WithValues(w.commit).UpdateSince(start)
}
}
func (w *execWorker) runExec(ctx context.Context, task containerd.Task, id string, spec *specs.Process) error {
process, err := task.Exec(ctx, id, spec, cio.NullIO)
if err != nil {
return err
}
defer process.Delete(ctx, containerd.WithProcessKill)
statusC, err := process.Wait(ctx)
if err != nil {
return err
}
if err := process.Start(ctx); err != nil {
return err
}
status := <-statusC
_, _, err = status.Result()
if err != nil {
if err == context.DeadlineExceeded || err == context.Canceled {
return nil
}
w.failures++
}
return nil
}

View File

@ -25,6 +25,7 @@ const imageName = "docker.io/library/alpine:latest"
var (
ct metrics.LabeledTimer
execTimer metrics.LabeledTimer
errCounter metrics.LabeledCounter
binarySizeGauge metrics.LabeledGauge
)
@ -34,6 +35,7 @@ func init() {
// if you want more fine grained metrics then you can drill down with the metrics in prom that
// containerd is outputing
ct = ns.NewLabeledTimer("run", "Run time of a full container during the test", "commit")
execTimer = ns.NewLabeledTimer("exec", "Run time of an exec process during the test", "commit")
binarySizeGauge = ns.NewLabeledGauge("binary_size", "Binary size of compiled binaries", metrics.Bytes, "name")
errCounter = ns.NewLabeledCounter("errors", "Errors encountered running the stress tests", "err")
metrics.Register(ns)
@ -75,9 +77,12 @@ func (r *run) gather(workers []*worker) *result {
type result struct {
Total int `json:"total"`
Failures int `json:"failures"`
Seconds float64 `json:"seconds"`
ContainersPerSecond float64 `json:"containersPerSecond"`
SecondsPerContainer float64 `json:"secondsPerContainer"`
ExecTotal int `json:"execTotal"`
ExecFailures int `json:"execFailures"`
}
func main() {
@ -121,12 +126,12 @@ func main() {
},
}
app.Before = func(context *cli.Context) error {
if context.GlobalBool("debug") {
logrus.SetLevel(logrus.DebugLevel)
}
if context.GlobalBool("json") {
logrus.SetLevel(logrus.WarnLevel)
}
if context.GlobalBool("debug") {
logrus.SetLevel(logrus.DebugLevel)
}
return nil
}
app.Action = func(context *cli.Context) error {
@ -206,9 +211,6 @@ func test(c config) error {
)
logrus.Info("starting stress test run...")
args := oci.WithProcessArgs("true")
if c.Exec {
args = oci.WithProcessArgs("sleep", "10")
}
v, err := client.Version(ctx)
if err != nil {
return err
@ -230,11 +232,34 @@ func test(c config) error {
spec: spec,
image: image,
client: client,
doExec: c.Exec,
commit: v.Revision,
}
workers = append(workers, w)
}
var exec *execWorker
if c.Exec {
wg.Add(1)
spec, err := oci.GenerateSpec(ctx, client,
&containers.Container{},
oci.WithImageConfig(image),
args,
)
if err != nil {
return err
}
exec = &execWorker{
worker: worker{
id: c.Concurrency,
wg: &wg,
spec: spec,
image: image,
client: client,
commit: v.Revision,
},
}
go exec.exec(ctx, tctx)
}
// start the timer and run the worker
r.start()
for _, w := range workers {
@ -245,6 +270,10 @@ func test(c config) error {
r.end()
results := r.gather(workers)
if c.Exec {
results.ExecTotal = exec.count
results.ExecFailures = exec.failures
}
logrus.Infof("ending test run in %0.3f seconds", results.Seconds)
logrus.WithField("failures", r.failures).Infof(

View File

@ -4,10 +4,8 @@ import (
"context"
"fmt"
"path/filepath"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/containerd/containerd"
@ -26,7 +24,6 @@ type worker struct {
client *containerd.Client
image containerd.Image
spec *specs.Spec
doExec bool
commit string
}
@ -86,17 +83,6 @@ func (w *worker) runContainer(ctx context.Context, id string) error {
if err := task.Start(ctx); err != nil {
return err
}
if w.doExec {
for i := 0; i < 256; i++ {
if err := w.exec(ctx, i, task); err != nil {
w.failures++
logrus.WithError(err).Error("exec failure")
}
}
if err := task.Kill(ctx, syscall.SIGKILL); err != nil {
return err
}
}
status := <-statusC
_, _, err = status.Result()
if err != nil {
@ -108,25 +94,6 @@ func (w *worker) runContainer(ctx context.Context, id string) error {
return nil
}
func (w *worker) exec(ctx context.Context, i int, t containerd.Task) error {
pSpec := *w.spec.Process
pSpec.Args = []string{"true"}
process, err := t.Exec(ctx, strconv.Itoa(i), &pSpec, cio.NullIO)
if err != nil {
return err
}
defer process.Delete(ctx)
status, err := process.Wait(ctx)
if err != nil {
return err
}
if err := process.Start(ctx); err != nil {
return err
}
<-status
return nil
}
func (w *worker) getID() string {
return fmt.Sprintf("%d-%d", w.id, w.count)
}