containerd/windows/task.go
Stephen J Day a615a6fe5d
events: refactor event distribution
In the course of setting out to add filters and address some cleanup, it
was found that we had a few problems in the events subsystem that needed
addressing before moving forward.

The biggest change was to move to the more standard terminology of
publish and subscribe. We make this terminology change across the Go
interface and the GRPC API, making the behavior more familier. The
previous system was very context-oriented, which is no longer required.

With this, we've removed a large amount of dead and unneeded code. Event
transactions, context storage and the concept of `Poster` is gone. This
has been replaced in most places with a `Publisher`, which matches the
actual usage throughout the codebase, removing the need for helpers.

There are still some questions around the way events are handled in the
shim. Right now, we've preserved some of the existing bugs which may
require more extensive changes to resolve correctly.

Signed-off-by: Stephen J Day <stephen.day@docker.com>
2017-07-25 15:08:09 -07:00

447 lines
9.4 KiB
Go

// +build windows
package windows
import (
"context"
"io"
"sync"
"syscall"
"time"
"github.com/Microsoft/hcsshim"
eventsapi "github.com/containerd/containerd/api/services/events/v1"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/runtime"
"github.com/containerd/containerd/typeurl"
"github.com/gogo/protobuf/types"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
type task struct {
sync.Mutex
id string
namespace string
pid uint32
io *pipeSet
status runtime.Status
spec *specs.Spec
processes map[string]*process
hyperV bool
publisher events.Publisher
rwLayer string
pidPool *pidPool
hcsContainer hcsshim.Container
terminateDuration time.Duration
servicing bool
}
func (t *task) ID() string {
return t.id
}
func (t *task) State(ctx context.Context) (runtime.State, error) {
var status runtime.Status
if p := t.getProcess(t.id); p != nil {
status = p.Status()
} else {
status = t.getStatus()
}
return runtime.State{
Status: status,
Pid: t.pid,
Stdin: t.io.src.Stdin,
Stdout: t.io.src.Stdout,
Stderr: t.io.src.Stderr,
Terminal: t.io.src.Terminal,
}, nil
}
func (t *task) Kill(ctx context.Context, signal uint32, all bool) error {
p := t.getProcess(t.id)
if p == nil {
return errors.Wrapf(errdefs.ErrFailedPrecondition, "task is not running")
}
if p.Status() == runtime.StoppedStatus {
return errors.Wrapf(errdefs.ErrNotFound, "process is stopped")
}
return p.Kill(ctx, signal, all)
}
func (t *task) ResizePty(ctx context.Context, size runtime.ConsoleSize) error {
p := t.getProcess(t.id)
if p == nil {
return errors.Wrap(errdefs.ErrFailedPrecondition, "task not started")
}
return p.ResizePty(ctx, size)
}
func (t *task) CloseIO(ctx context.Context) error {
p := t.getProcess(t.id)
if p == nil {
return errors.Wrap(errdefs.ErrFailedPrecondition, "task not started")
}
return p.hcs.CloseStdin()
}
func (t *task) Info() runtime.TaskInfo {
return runtime.TaskInfo{
ID: t.id,
Runtime: pluginID,
Namespace: t.namespace,
// TODO(mlaventure): what about Spec? I think this could be removed from the info, the id is enough since it matches the one from the container
}
}
func (t *task) Start(ctx context.Context) error {
conf := newProcessConfig(t.spec.Process, t.io)
if _, err := t.newProcess(ctx, t.id, conf, t.io); err != nil {
return err
}
t.publisher.Publish(ctx,
runtime.TaskStartEventTopic,
&eventsapi.TaskStart{
ContainerID: t.id,
Pid: t.pid,
})
return nil
}
func (t *task) Pause(ctx context.Context) error {
if t.hyperV {
err := t.hcsContainer.Pause()
if err == nil {
t.Lock()
t.status = runtime.PausedStatus
t.Unlock()
}
if err == nil {
t.publisher.Publish(ctx,
runtime.TaskPausedEventTopic,
&eventsapi.TaskPaused{
ContainerID: t.id,
})
}
return errors.Wrap(err, "hcsshim failed to pause task")
}
return errors.Wrap(errdefs.ErrFailedPrecondition, "not an hyperV task")
}
func (t *task) Resume(ctx context.Context) error {
if t.hyperV {
err := t.hcsContainer.Resume()
if err == nil {
t.Lock()
t.status = runtime.RunningStatus
t.Unlock()
}
if err == nil {
t.publisher.Publish(ctx,
runtime.TaskResumedEventTopic,
&eventsapi.TaskResumed{
ContainerID: t.id,
})
}
return errors.Wrap(err, "hcsshim failed to resume task")
}
return errors.Wrap(errdefs.ErrFailedPrecondition, "not an hyperV task")
}
func (t *task) Exec(ctx context.Context, id string, opts runtime.ExecOpts) (runtime.Process, error) {
if p := t.getProcess(t.id); p == nil {
return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "task not started")
}
if p := t.getProcess(id); p != nil {
return nil, errors.Wrap(errdefs.ErrAlreadyExists, "id already in use")
}
s, err := typeurl.UnmarshalAny(opts.Spec)
if err != nil {
return nil, err
}
spec := s.(*specs.Process)
if spec.Cwd == "" {
spec.Cwd = t.spec.Process.Cwd
}
var pset *pipeSet
if pset, err = newPipeSet(ctx, opts.IO); err != nil {
return nil, err
}
defer func() {
if err != nil {
pset.Close()
}
}()
conf := newProcessConfig(spec, pset)
p, err := t.newProcess(ctx, id, conf, pset)
if err != nil {
return nil, err
}
t.publisher.Publish(ctx,
runtime.TaskExecAddedEventTopic,
&eventsapi.TaskExecAdded{
ContainerID: t.id,
ExecID: id,
Pid: p.Pid(),
})
return p, nil
}
func (t *task) Pids(ctx context.Context) ([]uint32, error) {
t.Lock()
defer t.Unlock()
var (
pids = make([]uint32, len(t.processes))
idx = 0
)
for _, p := range t.processes {
pids[idx] = p.Pid()
idx++
}
return pids, nil
}
func (t *task) Checkpoint(_ context.Context, _ string, _ *types.Any) error {
return errors.Wrap(errdefs.ErrUnavailable, "not supported")
}
func (t *task) DeleteProcess(ctx context.Context, id string) (*runtime.Exit, error) {
if id == t.id {
return nil, errors.Wrapf(errdefs.ErrInvalidArgument,
"cannot delete init process")
}
if p := t.getProcess(id); p != nil {
ec, ea, err := p.ExitCode()
if err != nil {
return nil, err
}
t.removeProcess(id)
return &runtime.Exit{
Pid: p.pid,
Status: ec,
Timestamp: ea,
}, nil
}
return nil, errors.Wrapf(errdefs.ErrNotFound, "no such process %s", id)
}
func (t *task) Update(ctx context.Context, resources *types.Any) error {
return errors.Wrap(errdefs.ErrUnavailable, "not supported")
}
func (t *task) Process(ctx context.Context, id string) (p runtime.Process, err error) {
p = t.getProcess(id)
if p == nil {
err = errors.Wrapf(errdefs.ErrNotFound, "no such process %d", id)
}
return p, err
}
func (t *task) newProcess(ctx context.Context, id string, conf *hcsshim.ProcessConfig, pset *pipeSet) (*process, error) {
var (
err error
pid uint32
)
// If we fail, close the io right now
defer func() {
if err != nil {
pset.Close()
}
}()
t.Lock()
if len(t.processes) == 0 {
pid = t.pid
} else {
if pid, err = t.pidPool.Get(); err != nil {
t.Unlock()
return nil, err
}
defer func() {
if err != nil {
t.pidPool.Put(pid)
}
}()
}
t.Unlock()
var p hcsshim.Process
if p, err = t.hcsContainer.CreateProcess(conf); err != nil {
return nil, errors.Wrapf(err, "failed to create process")
}
stdin, stdout, stderr, err := p.Stdio()
if err != nil {
p.Kill()
return nil, errors.Wrapf(err, "failed to retrieve init process stdio")
}
ioCopy := func(name string, dst io.WriteCloser, src io.ReadCloser) {
log.G(ctx).WithFields(logrus.Fields{"id": id, "pid": pid}).
Debugf("%s: copy started", name)
io.Copy(dst, src)
log.G(ctx).WithFields(logrus.Fields{"id": id, "pid": pid}).
Debugf("%s: copy done", name)
dst.Close()
src.Close()
}
if pset.stdin != nil {
go ioCopy("stdin", stdin, pset.stdin)
}
if pset.stdout != nil {
go ioCopy("stdout", pset.stdout, stdout)
}
if pset.stderr != nil {
go ioCopy("stderr", pset.stderr, stderr)
}
t.Lock()
wp := &process{
id: id,
pid: pid,
io: pset,
status: runtime.RunningStatus,
task: t,
hcs: p,
exitCh: make(chan struct{}),
}
t.processes[id] = wp
t.Unlock()
// Wait for the process to exit to get the exit status
go func() {
if err := p.Wait(); err != nil {
herr, ok := err.(*hcsshim.ProcessError)
if ok && herr.Err != syscall.ERROR_BROKEN_PIPE {
log.G(ctx).
WithError(err).
WithFields(logrus.Fields{"id": id, "pid": pid}).
Warnf("hcsshim wait failed (process may have been killed)")
}
// Try to get the exit code nonetheless
}
wp.exitTime = time.Now()
ec, err := p.ExitCode()
if err != nil {
log.G(ctx).
WithError(err).
WithFields(logrus.Fields{"id": id, "pid": pid}).
Warnf("hcsshim could not retrieve exit code")
// Use the unknown exit code
ec = 255
}
wp.exitCode = uint32(ec)
t.publisher.Publish(ctx,
runtime.TaskExitEventTopic,
&eventsapi.TaskExit{
ContainerID: t.id,
ID: id,
Pid: pid,
ExitStatus: wp.exitCode,
ExitedAt: wp.exitTime,
})
close(wp.exitCh)
// Ensure io's are closed
pset.Close()
// Cleanup HCS resources
p.Close()
}()
return wp, nil
}
func (t *task) getProcess(id string) *process {
t.Lock()
p := t.processes[id]
t.Unlock()
return p
}
func (t *task) removeProcessNL(id string) {
if p, ok := t.processes[id]; ok {
if p.io != nil {
p.io.Close()
}
t.pidPool.Put(p.pid)
delete(t.processes, id)
}
}
func (t *task) removeProcess(id string) {
t.Lock()
t.removeProcessNL(id)
t.Unlock()
}
func (t *task) getStatus() runtime.Status {
t.Lock()
status := t.status
t.Unlock()
return status
}
// stop tries to shutdown the task.
// It will do so by first calling Shutdown on the hcsshim.Container and if
// that fails, by resorting to caling Terminate
func (t *task) stop(ctx context.Context) error {
if err := t.hcsStop(ctx, t.hcsContainer.Shutdown); err != nil {
return t.hcsStop(ctx, t.hcsContainer.Terminate)
}
t.hcsContainer.Close()
return nil
}
func (t *task) hcsStop(ctx context.Context, stop func() error) error {
err := stop()
switch {
case hcsshim.IsPending(err):
err = t.hcsContainer.WaitTimeout(t.terminateDuration)
case hcsshim.IsAlreadyStopped(err):
err = nil
}
return err
}
func (t *task) cleanup() {
t.Lock()
for _, p := range t.processes {
t.removeProcessNL(p.id)
}
removeLayer(context.Background(), t.rwLayer)
t.Unlock()
}