sandbox: do retry for wait to remote sandbox controller
remote sandbox controller may restart, the Wait call should be retried if it is an grpc disconnetion error. Signed-off-by: Abel Feng <fshb1988@gmail.com>
This commit is contained in:
parent
b168147ca8
commit
58be881890
@ -18,6 +18,7 @@ package proxy
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"time"
|
||||||
|
|
||||||
api "github.com/containerd/containerd/api/services/sandbox/v1"
|
api "github.com/containerd/containerd/api/services/sandbox/v1"
|
||||||
"github.com/containerd/containerd/api/types"
|
"github.com/containerd/containerd/api/types"
|
||||||
@ -119,9 +120,31 @@ func (s *remoteSandboxController) Shutdown(ctx context.Context, sandboxID string
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *remoteSandboxController) Wait(ctx context.Context, sandboxID string) (sandbox.ExitStatus, error) {
|
func (s *remoteSandboxController) Wait(ctx context.Context, sandboxID string) (sandbox.ExitStatus, error) {
|
||||||
resp, err := s.client.Wait(ctx, &api.ControllerWaitRequest{SandboxID: sandboxID})
|
// For remote sandbox controllers, the controller process may restart,
|
||||||
if err != nil {
|
// we have to retry if the error indicates that it is the grpc disconnection.
|
||||||
return sandbox.ExitStatus{}, errdefs.FromGRPC(err)
|
var (
|
||||||
|
resp *api.ControllerWaitResponse
|
||||||
|
err error
|
||||||
|
retryInterval time.Duration = 128
|
||||||
|
)
|
||||||
|
for {
|
||||||
|
resp, err = s.client.Wait(ctx, &api.ControllerWaitRequest{SandboxID: sandboxID})
|
||||||
|
if err != nil {
|
||||||
|
grpcErr := errdefs.FromGRPC(err)
|
||||||
|
if !errdefs.IsUnavailable(grpcErr) {
|
||||||
|
return sandbox.ExitStatus{}, grpcErr
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-time.After(retryInterval * time.Millisecond):
|
||||||
|
if retryInterval < 4096 {
|
||||||
|
retryInterval = retryInterval << 1
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
case <-ctx.Done():
|
||||||
|
return sandbox.ExitStatus{}, grpcErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
return sandbox.ExitStatus{
|
return sandbox.ExitStatus{
|
||||||
|
@ -53,7 +53,7 @@ func (c *criService) startSandboxExitMonitor(ctx context.Context, id string, exi
|
|||||||
case exitRes := <-exitCh:
|
case exitRes := <-exitCh:
|
||||||
exitStatus, exitedAt, err := exitRes.Result()
|
exitStatus, exitedAt, err := exitRes.Result()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.L.WithError(err).Errorf("failed to get task exit status for %q", id)
|
log.L.WithError(err).Errorf("failed to get sandbox status for %q", id)
|
||||||
exitStatus = unknownExitCode
|
exitStatus = unknownExitCode
|
||||||
exitedAt = time.Now()
|
exitedAt = time.Now()
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user