Cleanup dead v2 shim.

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu 2019-04-11 11:06:23 -07:00
parent a17c809571
commit 5c9811ded0
3 changed files with 103 additions and 41 deletions

View File

@ -52,7 +52,7 @@ type binary struct {
rtTasks *runtime.TaskList
}
func (b *binary) Start(ctx context.Context) (_ *shim, err error) {
func (b *binary) Start(ctx context.Context, onClose func()) (_ *shim, err error) {
args := []string{"-id", b.bundle.ID}
if logrus.GetLevel() == logrus.DebugLevel {
args = append(args, "-debug")
@ -96,7 +96,7 @@ func (b *binary) Start(ctx context.Context) (_ *shim, err error) {
if err != nil {
return nil, err
}
client := ttrpc.NewClient(conn, ttrpc.WithOnClose(func() { _ = conn.Close() }))
client := ttrpc.NewClient(conn, ttrpc.WithOnClose(onClose))
return &shim{
bundle: b.bundle,
client: client,
@ -147,9 +147,6 @@ func (b *binary) Delete(ctx context.Context) (*runtime.Exit, error) {
if err := b.bundle.Delete(); err != nil {
return nil, err
}
// remove self from the runtime task list
// this seems dirty but it cleans up the API across runtimes, tasks, and the service
b.rtTasks.Delete(ctx, b.bundle.ID)
return &runtime.Exit{
Status: response.ExitStatus,
Timestamp: response.ExitedAt,

View File

@ -113,6 +113,10 @@ func (m *TaskManager) ID() string {
// Create a new task
func (m *TaskManager) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error) {
ns, err := namespaces.NamespaceRequired(ctx)
if err != nil {
return nil, err
}
bundle, err := NewBundle(ctx, m.root, m.state, id, opts.Spec.Value)
if err != nil {
return nil, err
@ -123,7 +127,15 @@ func (m *TaskManager) Create(ctx context.Context, id string, opts runtime.Create
}
}()
b := shimBinary(ctx, bundle, opts.Runtime, m.containerdAddress, m.events, m.tasks)
shim, err := b.Start(ctx)
shim, err := b.Start(ctx, func() {
log.G(ctx).WithField("id", id).Info("shim disconnected")
_, err := m.tasks.Get(ctx, id)
if err != nil {
// Task was never started or was already successfully deleted
return
}
cleanupAfterDeadShim(context.Background(), id, ns, m.events, b)
})
if err != nil {
return nil, err
}
@ -219,12 +231,9 @@ func (m *TaskManager) loadTasks(ctx context.Context) error {
bundle.Delete()
continue
}
shim, err := loadShim(ctx, bundle, m.events, m.tasks)
if err != nil {
log.G(ctx).WithError(err).Errorf("cleanup dead shim %s", id)
container, err := m.container(ctx, id)
if err != nil {
log.G(ctx).WithError(err).Errorf("loading dead container %s", id)
log.G(ctx).WithError(err).Errorf("loading container %s", id)
if err := mount.UnmountAll(filepath.Join(bundle.Path, "rootfs"), 0); err != nil {
log.G(ctx).WithError(err).Errorf("forceful unmount of rootfs %s", id)
}
@ -232,10 +241,17 @@ func (m *TaskManager) loadTasks(ctx context.Context) error {
continue
}
binaryCall := shimBinary(ctx, bundle, container.Runtime.Name, m.containerdAddress, m.events, m.tasks)
if _, err := binaryCall.Delete(ctx); err != nil {
log.G(ctx).WithError(err).Errorf("binary call to delete for %s", id)
continue
shim, err := loadShim(ctx, bundle, m.events, m.tasks, func() {
log.G(ctx).WithField("id", id).Info("shim disconnected")
_, err := m.tasks.Get(ctx, id)
if err != nil {
// Task was never started or was already successfully deleted
return
}
cleanupAfterDeadShim(context.Background(), id, ns, m.events, binaryCall)
})
if err != nil {
cleanupAfterDeadShim(ctx, id, ns, m.events, binaryCall)
continue
}
m.tasks.Add(ctx, shim)

View File

@ -24,18 +24,21 @@ import (
"path/filepath"
"time"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/types"
tasktypes "github.com/containerd/containerd/api/types/task"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/events/exchange"
"github.com/containerd/containerd/identifiers"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/runtime"
client "github.com/containerd/containerd/runtime/v2/shim"
"github.com/containerd/containerd/runtime/v2/task"
"github.com/containerd/ttrpc"
ptypes "github.com/gogo/protobuf/types"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
func loadAddress(path string) (string, error) {
@ -46,7 +49,7 @@ func loadAddress(path string) (string, error) {
return string(data), nil
}
func loadShim(ctx context.Context, bundle *Bundle, events *exchange.Exchange, rt *runtime.TaskList) (_ *shim, err error) {
func loadShim(ctx context.Context, bundle *Bundle, events *exchange.Exchange, rt *runtime.TaskList, onClose func()) (_ *shim, err error) {
address, err := loadAddress(filepath.Join(bundle.Path, "address"))
if err != nil {
return nil, err
@ -55,6 +58,11 @@ func loadShim(ctx context.Context, bundle *Bundle, events *exchange.Exchange, rt
if err != nil {
return nil, err
}
defer func() {
if err != nil {
conn.Close()
}
}()
f, err := openShimLog(ctx, bundle)
if err != nil {
return nil, errors.Wrap(err, "open shim log pipe")
@ -74,7 +82,12 @@ func loadShim(ctx context.Context, bundle *Bundle, events *exchange.Exchange, rt
}
}()
client := ttrpc.NewClient(conn, ttrpc.WithOnClose(func() { _ = conn.Close() }))
client := ttrpc.NewClient(conn, ttrpc.WithOnClose(onClose))
defer func() {
if err != nil {
client.Close()
}
}()
s := &shim{
client: client,
task: task.NewTaskClient(client),
@ -88,6 +101,52 @@ func loadShim(ctx context.Context, bundle *Bundle, events *exchange.Exchange, rt
return s, nil
}
func cleanupAfterDeadShim(ctx context.Context, id, ns string, events *exchange.Exchange, binaryCall *binary) {
ctx = namespaces.WithNamespace(ctx, ns)
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
log.G(ctx).WithFields(logrus.Fields{
"id": id,
"namespace": ns,
}).Warn("cleaning up after shim disconnected")
response, err := binaryCall.Delete(ctx)
if err != nil {
log.G(ctx).WithError(err).WithFields(logrus.Fields{
"id": id,
"namespace": ns,
}).Warn("failed to clean up after shim disconnected")
}
var (
pid uint32
exitStatus uint32
exitedAt time.Time
)
if response != nil {
pid = response.Pid
exitStatus = response.Status
exitedAt = response.Timestamp
} else {
exitStatus = 255
exitedAt = time.Now()
}
events.Publish(ctx, runtime.TaskExitEventTopic, &eventstypes.TaskExit{
ContainerID: id,
ID: id,
Pid: pid,
ExitStatus: exitStatus,
ExitedAt: exitedAt,
})
events.Publish(ctx, runtime.TaskDeleteEventTopic, &eventstypes.TaskDelete{
ContainerID: id,
Pid: pid,
ExitStatus: exitStatus,
ExitedAt: exitedAt,
})
}
type shim struct {
bundle *Bundle
client *ttrpc.Client
@ -119,19 +178,9 @@ func (s *shim) Shutdown(ctx context.Context) error {
}
func (s *shim) waitShutdown(ctx context.Context) error {
dead := make(chan struct{})
go func() {
if err := s.Shutdown(ctx); err != nil {
log.G(ctx).WithError(err).Error("shim shutdown error")
}
close(dead)
}()
select {
case <-time.After(3 * time.Second):
return errors.New("failed to shutdown shim in time")
case <-dead:
return nil
}
ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
defer cancel()
return s.Shutdown(ctx)
}
// ID of the shim/task
@ -154,15 +203,15 @@ func (s *shim) Delete(ctx context.Context) (*runtime.Exit, error) {
if err != nil {
return nil, errdefs.FromGRPC(err)
}
if err := s.waitShutdown(ctx); err != nil {
return nil, err
}
if err := s.bundle.Delete(); err != nil {
return nil, err
}
// remove self from the runtime task list
// this seems dirty but it cleans up the API across runtimes, tasks, and the service
s.rtTasks.Delete(ctx, s.ID())
if err := s.waitShutdown(ctx); err != nil {
log.G(ctx).WithError(err).Error("failed to shutdown shim")
}
if err := s.bundle.Delete(); err != nil {
log.G(ctx).WithError(err).Error("failed to delete bundle")
}
return &runtime.Exit{
Status: response.ExitStatus,
Timestamp: response.ExitedAt,