sandbox: do retry for wait to remote sandbox controller
remote sandbox controller may restart, the Wait call should be retried if it is an grpc disconnetion error. Signed-off-by: Abel Feng <fshb1988@gmail.com>
This commit is contained in:
parent
b168147ca8
commit
58be881890
@ -18,6 +18,7 @@ package proxy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
api "github.com/containerd/containerd/api/services/sandbox/v1"
|
||||
"github.com/containerd/containerd/api/types"
|
||||
@ -119,9 +120,31 @@ func (s *remoteSandboxController) Shutdown(ctx context.Context, sandboxID string
|
||||
}
|
||||
|
||||
func (s *remoteSandboxController) Wait(ctx context.Context, sandboxID string) (sandbox.ExitStatus, error) {
|
||||
resp, err := s.client.Wait(ctx, &api.ControllerWaitRequest{SandboxID: sandboxID})
|
||||
// For remote sandbox controllers, the controller process may restart,
|
||||
// we have to retry if the error indicates that it is the grpc disconnection.
|
||||
var (
|
||||
resp *api.ControllerWaitResponse
|
||||
err error
|
||||
retryInterval time.Duration = 128
|
||||
)
|
||||
for {
|
||||
resp, err = s.client.Wait(ctx, &api.ControllerWaitRequest{SandboxID: sandboxID})
|
||||
if err != nil {
|
||||
return sandbox.ExitStatus{}, errdefs.FromGRPC(err)
|
||||
grpcErr := errdefs.FromGRPC(err)
|
||||
if !errdefs.IsUnavailable(grpcErr) {
|
||||
return sandbox.ExitStatus{}, grpcErr
|
||||
}
|
||||
select {
|
||||
case <-time.After(retryInterval * time.Millisecond):
|
||||
if retryInterval < 4096 {
|
||||
retryInterval = retryInterval << 1
|
||||
}
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
return sandbox.ExitStatus{}, grpcErr
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
return sandbox.ExitStatus{
|
||||
|
@ -53,7 +53,7 @@ func (c *criService) startSandboxExitMonitor(ctx context.Context, id string, exi
|
||||
case exitRes := <-exitCh:
|
||||
exitStatus, exitedAt, err := exitRes.Result()
|
||||
if err != nil {
|
||||
log.L.WithError(err).Errorf("failed to get task exit status for %q", id)
|
||||
log.L.WithError(err).Errorf("failed to get sandbox status for %q", id)
|
||||
exitStatus = unknownExitCode
|
||||
exitedAt = time.Now()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user