containerd/task.go
Stephen J Day af2d7f0e55
events: initial support for filters
This change further plumbs the components required for implementing
event filters. Specifically, we now have the ability to filter on the
`topic` and `namespace`.

In the course of implementing this functionality, it was found that
there were mismatches in the events API that created extra serialization
round trips. A modification to `typeurl.MarshalAny` and a clear
separation between publishing and forwarding allow us to avoid these
serialization issues.

Unfortunately, this has required a few tweaks to the GRPC API, so this
is a breaking change. `Publish` and `Forward` have been clearly separated in
the GRPC API. `Publish` honors the contextual namespace and performs
timestamping while `Forward` simply validates and forwards. The behavior
of `Subscribe` is to propagate events for all namespaces unless
specifically filtered (and hence the relation to this particular change.

The following is an example of using filters to monitor the task events
generated while running the [bucketbench tool](https://github.com/estesp/bucketbench):

```
$ ctr events 'topic~=/tasks/.+,namespace==bb'
...
2017-07-28 22:19:51.78944874 +0000 UTC   bb        /tasks/start   {"container_id":"bb-ctr-6-8","pid":25889}
2017-07-28 22:19:51.791893688 +0000 UTC   bb        /tasks/start   {"container_id":"bb-ctr-4-8","pid":25882}
2017-07-28 22:19:51.792608389 +0000 UTC   bb        /tasks/start   {"container_id":"bb-ctr-2-9","pid":25860}
2017-07-28 22:19:51.793035217 +0000 UTC   bb        /tasks/start   {"container_id":"bb-ctr-5-6","pid":25869}
2017-07-28 22:19:51.802659622 +0000 UTC   bb        /tasks/start   {"container_id":"bb-ctr-0-7","pid":25877}
2017-07-28 22:19:51.805192898 +0000 UTC   bb        /tasks/start   {"container_id":"bb-ctr-3-6","pid":25856}
2017-07-28 22:19:51.832374931 +0000 UTC   bb        /tasks/exit   {"container_id":"bb-ctr-8-6","id":"bb-ctr-8-6","pid":25864,"exited_at":"2017-07-28T22:19:51.832013043Z"}
2017-07-28 22:19:51.84001249 +0000 UTC   bb        /tasks/exit   {"container_id":"bb-ctr-2-9","id":"bb-ctr-2-9","pid":25860,"exited_at":"2017-07-28T22:19:51.839717714Z"}
2017-07-28 22:19:51.840272635 +0000 UTC   bb        /tasks/exit   {"container_id":"bb-ctr-7-6","id":"bb-ctr-7-6","pid":25855,"exited_at":"2017-07-28T22:19:51.839796335Z"}
...
```

In addition to the events changes, we now display the namespace origin
of the event in the cli tool.

This will be followed by a PR to add individual field filtering for the
events API for each event type.

Signed-off-by: Stephen J Day <stephen.day@docker.com>
2017-07-31 12:53:18 -07:00

409 lines
9.9 KiB
Go

package containerd
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
goruntime "runtime"
"strings"
"syscall"
eventsapi "github.com/containerd/containerd/api/services/events/v1"
"github.com/containerd/containerd/api/services/tasks/v1"
"github.com/containerd/containerd/api/types"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/linux/runcopts"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/rootfs"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/typeurl"
digest "github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/specs-go/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
)
const UnknownExitStatus = 255
type TaskStatus string
const (
Running TaskStatus = "running"
Created TaskStatus = "created"
Stopped TaskStatus = "stopped"
Paused TaskStatus = "paused"
Pausing TaskStatus = "pausing"
)
type IOCloseInfo struct {
Stdin bool
}
type IOCloserOpts func(*IOCloseInfo)
func WithStdinCloser(r *IOCloseInfo) {
r.Stdin = true
}
type CheckpointTaskInfo struct {
ParentCheckpoint digest.Digest
Options interface{}
}
type CheckpointTaskOpts func(*CheckpointTaskInfo) error
type TaskInfo struct {
Checkpoint *types.Descriptor
RootFS []mount.Mount
Options interface{}
}
type Task interface {
Pid() uint32
Delete(context.Context) (uint32, error)
Kill(context.Context, syscall.Signal) error
Pause(context.Context) error
Resume(context.Context) error
Start(context.Context) error
Status(context.Context) (TaskStatus, error)
Wait(context.Context) (uint32, error)
Exec(context.Context, string, *specs.Process, IOCreation) (Process, error)
Pids(context.Context) ([]uint32, error)
CloseIO(context.Context, ...IOCloserOpts) error
Resize(ctx context.Context, w, h uint32) error
IO() *IO
Checkpoint(context.Context, ...CheckpointTaskOpts) (v1.Descriptor, error)
Update(context.Context, ...UpdateTaskOpts) error
}
type Process interface {
Pid() uint32
Start(context.Context) error
Delete(context.Context) (uint32, error)
Kill(context.Context, syscall.Signal) error
Wait(context.Context) (uint32, error)
CloseIO(context.Context, ...IOCloserOpts) error
Resize(ctx context.Context, w, h uint32) error
IO() *IO
}
var _ = (Task)(&task{})
type task struct {
client *Client
io *IO
id string
pid uint32
deferred *tasks.CreateTaskRequest
}
// Pid returns the pid or process id for the task
func (t *task) Pid() uint32 {
return t.pid
}
func (t *task) Start(ctx context.Context) error {
if t.deferred != nil {
response, err := t.client.TaskService().Create(ctx, t.deferred)
t.deferred = nil
if err != nil {
t.io.closer.Close()
return err
}
t.pid = response.Pid
return nil
}
_, err := t.client.TaskService().Start(ctx, &tasks.StartTaskRequest{
ContainerID: t.id,
})
if err != nil {
t.io.closer.Close()
}
return err
}
func (t *task) Kill(ctx context.Context, s syscall.Signal) error {
_, err := t.client.TaskService().Kill(ctx, &tasks.KillRequest{
Signal: uint32(s),
ContainerID: t.id,
})
if err != nil {
return errdefs.FromGRPC(err)
}
return nil
}
func (t *task) Pause(ctx context.Context) error {
_, err := t.client.TaskService().Pause(ctx, &tasks.PauseTaskRequest{
ContainerID: t.id,
})
return errdefs.FromGRPC(err)
}
func (t *task) Resume(ctx context.Context) error {
_, err := t.client.TaskService().Resume(ctx, &tasks.ResumeTaskRequest{
ContainerID: t.id,
})
return errdefs.FromGRPC(err)
}
func (t *task) Status(ctx context.Context) (TaskStatus, error) {
r, err := t.client.TaskService().Get(ctx, &tasks.GetTaskRequest{
ContainerID: t.id,
})
if err != nil {
return "", errdefs.FromGRPC(err)
}
return TaskStatus(strings.ToLower(r.Task.Status.String())), nil
}
// Wait is a blocking call that will wait for the task to exit and return the exit status
func (t *task) Wait(ctx context.Context) (uint32, error) {
eventstream, err := t.client.EventService().Subscribe(ctx, &eventsapi.SubscribeRequest{
Filters: []string{"topic==" + runtime.TaskExitEventTopic},
})
if err != nil {
return UnknownExitStatus, errdefs.FromGRPC(err)
}
for {
evt, err := eventstream.Recv()
if err != nil {
return UnknownExitStatus, err
}
if typeurl.Is(evt.Event, &eventsapi.TaskExit{}) {
v, err := typeurl.UnmarshalAny(evt.Event)
if err != nil {
return UnknownExitStatus, err
}
e := v.(*eventsapi.TaskExit)
if e.ContainerID == t.id && e.Pid == t.pid {
return e.ExitStatus, nil
}
}
}
}
// Delete deletes the task and its runtime state
// it returns the exit status of the task and any errors that were encountered
// during cleanup
func (t *task) Delete(ctx context.Context) (uint32, error) {
if t.io != nil {
t.io.Cancel()
t.io.Wait()
t.io.Close()
}
r, err := t.client.TaskService().Delete(ctx, &tasks.DeleteTaskRequest{
ContainerID: t.id,
})
if err != nil {
return UnknownExitStatus, err
}
return r.ExitStatus, nil
}
func (t *task) Exec(ctx context.Context, id string, spec *specs.Process, ioCreate IOCreation) (Process, error) {
if id == "" {
return nil, errors.Wrapf(errdefs.ErrInvalidArgument, "exec id must not be empty")
}
i, err := ioCreate(id)
if err != nil {
return nil, err
}
return &process{
id: id,
task: t,
io: i,
spec: spec,
}, nil
}
func (t *task) Pids(ctx context.Context) ([]uint32, error) {
response, err := t.client.TaskService().ListPids(ctx, &tasks.ListPidsRequest{
ContainerID: t.id,
})
if err != nil {
return nil, err
}
return response.Pids, nil
}
func (t *task) CloseIO(ctx context.Context, opts ...IOCloserOpts) error {
r := &tasks.CloseIORequest{
ContainerID: t.id,
}
var i IOCloseInfo
for _, o := range opts {
o(&i)
}
r.Stdin = i.Stdin
_, err := t.client.TaskService().CloseIO(ctx, r)
return err
}
func (t *task) IO() *IO {
return t.io
}
func (t *task) Resize(ctx context.Context, w, h uint32) error {
_, err := t.client.TaskService().ResizePty(ctx, &tasks.ResizePtyRequest{
ContainerID: t.id,
Width: w,
Height: h,
})
return err
}
func (t *task) Checkpoint(ctx context.Context, opts ...CheckpointTaskOpts) (d v1.Descriptor, err error) {
request := &tasks.CheckpointTaskRequest{
ContainerID: t.id,
}
var i CheckpointTaskInfo
for _, o := range opts {
if err := o(&i); err != nil {
return d, err
}
}
request.ParentCheckpoint = i.ParentCheckpoint
if i.Options != nil {
any, err := typeurl.MarshalAny(i.Options)
if err != nil {
return d, err
}
request.Options = any
}
// make sure we pause it and resume after all other filesystem operations are completed
if err := t.Pause(ctx); err != nil {
return d, err
}
defer t.Resume(ctx)
cr, err := t.client.ContainerService().Get(ctx, t.id)
if err != nil {
return d, err
}
var index v1.Index
if err := t.checkpointTask(ctx, &index, request); err != nil {
return d, err
}
if err := t.checkpointImage(ctx, &index, cr.Image); err != nil {
return d, err
}
if err := t.checkpointRWSnapshot(ctx, &index, cr.Snapshotter, cr.RootFS); err != nil {
return d, err
}
index.Annotations = make(map[string]string)
index.Annotations["image.name"] = cr.Image
return t.writeIndex(ctx, &index)
}
type UpdateTaskInfo struct {
Resources interface{}
}
type UpdateTaskOpts func(context.Context, *Client, *UpdateTaskInfo) error
func (t *task) Update(ctx context.Context, opts ...UpdateTaskOpts) error {
request := &tasks.UpdateTaskRequest{
ContainerID: t.id,
}
var i UpdateTaskInfo
for _, o := range opts {
if err := o(ctx, t.client, &i); err != nil {
return err
}
}
if i.Resources != nil {
any, err := typeurl.MarshalAny(i.Resources)
if err != nil {
return err
}
request.Resources = any
}
_, err := t.client.TaskService().Update(ctx, request)
return err
}
func (t *task) checkpointTask(ctx context.Context, index *v1.Index, request *tasks.CheckpointTaskRequest) error {
response, err := t.client.TaskService().Checkpoint(ctx, request)
if err != nil {
return err
}
// add the checkpoint descriptors to the index
for _, d := range response.Descriptors {
index.Manifests = append(index.Manifests, v1.Descriptor{
MediaType: d.MediaType,
Size: d.Size_,
Digest: d.Digest,
Platform: &v1.Platform{
OS: goruntime.GOOS,
Architecture: goruntime.GOARCH,
},
})
}
return nil
}
func (t *task) checkpointRWSnapshot(ctx context.Context, index *v1.Index, snapshotterName string, id string) error {
rw, err := rootfs.Diff(ctx, id, fmt.Sprintf("checkpoint-rw-%s", id), t.client.SnapshotService(snapshotterName), t.client.DiffService())
if err != nil {
return err
}
rw.Platform = &v1.Platform{
OS: goruntime.GOOS,
Architecture: goruntime.GOARCH,
}
index.Manifests = append(index.Manifests, rw)
return nil
}
func (t *task) checkpointImage(ctx context.Context, index *v1.Index, image string) error {
if image == "" {
return fmt.Errorf("cannot checkpoint image with empty name")
}
ir, err := t.client.ImageService().Get(ctx, image)
if err != nil {
return err
}
index.Manifests = append(index.Manifests, ir.Target)
return nil
}
func (t *task) writeIndex(ctx context.Context, index *v1.Index) (v1.Descriptor, error) {
buf := bytes.NewBuffer(nil)
if err := json.NewEncoder(buf).Encode(index); err != nil {
return v1.Descriptor{}, err
}
return writeContent(ctx, t.client.ContentStore(), v1.MediaTypeImageIndex, t.id, buf)
}
func writeContent(ctx context.Context, store content.Store, mediaType, ref string, r io.Reader) (d v1.Descriptor, err error) {
writer, err := store.Writer(ctx, ref, 0, "")
if err != nil {
return d, err
}
defer writer.Close()
size, err := io.Copy(writer, r)
if err != nil {
return d, err
}
if err := writer.Commit(size, ""); err != nil {
return d, err
}
return v1.Descriptor{
MediaType: mediaType,
Digest: writer.Digest(),
Size: size,
}, nil
}
func WithExit(r *CheckpointTaskInfo) error {
r.Options = &runcopts.CheckpointOptions{
Exit: true,
}
return nil
}