Merge pull request #9447 from dcantah/dial-grpc-shim-socket

runtime/v2: net.Dial gRPC shim sockets before trying grpc
This commit is contained in:
Maksym Pavlenko 2023-12-01 22:32:41 +00:00 committed by GitHub
commit 6f405e89f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -22,12 +22,14 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"net"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
"time" "time"
"github.com/containerd/containerd/v2/pkg/atomicfile" "github.com/containerd/containerd/v2/pkg/atomicfile"
"github.com/containerd/containerd/v2/pkg/dialer"
"github.com/containerd/ttrpc" "github.com/containerd/ttrpc"
"google.golang.org/grpc" "google.golang.org/grpc"
"google.golang.org/grpc/connectivity" "google.golang.org/grpc/connectivity"
@ -39,7 +41,6 @@ import (
"github.com/containerd/containerd/v2/errdefs" "github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/events/exchange" "github.com/containerd/containerd/v2/events/exchange"
"github.com/containerd/containerd/v2/identifiers" "github.com/containerd/containerd/v2/identifiers"
"github.com/containerd/containerd/v2/pkg/dialer"
"github.com/containerd/containerd/v2/pkg/timeout" "github.com/containerd/containerd/v2/pkg/timeout"
"github.com/containerd/containerd/v2/protobuf" "github.com/containerd/containerd/v2/protobuf"
ptypes "github.com/containerd/containerd/v2/protobuf/types" ptypes "github.com/containerd/containerd/v2/protobuf/types"
@ -275,7 +276,7 @@ func makeConnection(ctx context.Context, id string, params client.BootstrapParam
grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithBlock(), grpc.WithBlock(),
} }
return grpcDialContext(ctx, dialer.DialAddress(params.Address), onClose, gopts...) return grpcDialContext(ctx, params.Address, onClose, gopts...)
default: default:
return nil, fmt.Errorf("unexpected protocol: %q", params.Protocol) return nil, fmt.Errorf("unexpected protocol: %q", params.Protocol)
} }
@ -286,10 +287,29 @@ func makeConnection(ctx context.Context, id string, params client.BootstrapParam
// a callback run when the connection is severed or explicitly closed. // a callback run when the connection is severed or explicitly closed.
func grpcDialContext( func grpcDialContext(
ctx context.Context, ctx context.Context,
target string, address string,
onClose func(), onClose func(),
gopts ...grpc.DialOption, gopts ...grpc.DialOption,
) (*grpcConn, error) { ) (*grpcConn, error) {
// If grpc.WithBlock is specified in gopts this causes the connection to block waiting for
// a connection regardless of if the socket exists or has a listener when Dial begins. This
// specific behavior of WithBlock is mostly undesirable for shims, as if the socket isn't
// there when we go to load/connect there's likely an issue. However, getting rid of WithBlock is
// also undesirable as we don't want the background connection behavior, we want to ensure
// a connection before moving on. To bring this in line with the ttrpc connection behavior
// lets do an initial dial to ensure the shims socket is actually available. stat wouldn't suffice
// here as if the shim exited unexpectedly its socket may still be on the filesystem, but it'd return
// ECONNREFUSED which grpc.DialContext will happily trudge along through for the full timeout.
//
// This is especially helpful on restart of containerd as if the shim died while containerd
// was down, we end up waiting the full timeout.
conn, err := net.DialTimeout("unix", address, time.Second*10)
if err != nil {
return nil, err
}
conn.Close()
target := dialer.DialAddress(address)
client, err := grpc.DialContext(ctx, target, gopts...) client, err := grpc.DialContext(ctx, target, gopts...)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create GRPC connection: %w", err) return nil, fmt.Errorf("failed to create GRPC connection: %w", err)