From d7b9cb00198600574ea8ac6db62ea8bdd076e466 Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Mon, 20 Jul 2020 13:04:07 -0700 Subject: [PATCH] shim: move event context timeout to publsher Before this change, if an event fails to send on the first attempt, subsequent attempts will fail with context.Cancelled because the the caller of publish passes a cancellable timeout, which the publisher uses to send the event. The publisher returns immediately if the send fails, but adds the event to an async queue to try again. Meanwhile the caller will return cancelling the context. Additionally, subsequent attempts may fail to send because the timeout was expected to be for a single request but the queue sleeps for `attempt*time.Second`. In the shim service, the timeout was set to 5s, which means the send will fail with context.DeadlineExceeded before it reaches `maxRequeue` (which is currently 5). This change moves the timeout to the publisher so each send attempt gets its own timeout. Signed-off-by: Brian Goff --- runtime/v2/runc/v2/service.go | 2 -- runtime/v2/shim/publisher.go | 11 +++++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/runtime/v2/runc/v2/service.go b/runtime/v2/runc/v2/service.go index 39970b313..d3ea1e8ff 100644 --- a/runtime/v2/runc/v2/service.go +++ b/runtime/v2/runc/v2/service.go @@ -769,9 +769,7 @@ func (s *service) forward(ctx context.Context, publisher shim.Publisher) { ns, _ := namespaces.Namespace(ctx) ctx = namespaces.WithNamespace(context.Background(), ns) for e := range s.events { - ctx, cancel := context.WithTimeout(ctx, 5*time.Second) err := publisher.Publish(ctx, runc.GetTopic(e), e) - cancel() if err != nil { logrus.WithError(err).Error("post event") } diff --git a/runtime/v2/shim/publisher.go b/runtime/v2/shim/publisher.go index c5c4ecc15..cf27a6bc7 100644 --- a/runtime/v2/shim/publisher.go +++ b/runtime/v2/shim/publisher.go @@ -130,7 +130,9 @@ func (l *RemoteEventsPublisher) Publish(ctx context.Context, topic string, event func (l *RemoteEventsPublisher) forwardRequest(ctx context.Context, req *v1.ForwardRequest) error { service, err := l.client.EventsService() if err == nil { - _, err = service.Forward(ctx, req) + fCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + _, err = service.Forward(fCtx, req) + cancel() if err == nil { return nil } @@ -149,7 +151,12 @@ func (l *RemoteEventsPublisher) forwardRequest(ctx context.Context, req *v1.Forw if err != nil { return err } - if _, err = service.Forward(ctx, req); err != nil { + + // try again with a fresh context, otherwise we may get a context timeout unexpectedly. + fCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + _, err = service.Forward(fCtx, req) + cancel() + if err != nil { return err }