kubernetes/pkg/kubelet/cm/dra/plugin/client.go
Kevin Klues 0449cef8fd Increase timeout for DRA kubelet plugin client
The 10 second timeout was too low. Given that the retry loop for the
kubelet itself is 90s, increasing the timeout to half of this seems
reasonable. Ideally we would pull in the variable that sets the retry
timeout to 90s and then just set our local timeout to half of that.
Unfortunately, this is not exported, so we settle (for now with just
explicitly setting it to 45s.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
2023-07-18 22:45:01 +01:00

207 lines
6.0 KiB
Go

/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package plugin
import (
"context"
"errors"
"fmt"
"io"
"net"
"time"
"google.golang.org/grpc"
grpccodes "google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials/insecure"
grpcstatus "google.golang.org/grpc/status"
"k8s.io/klog/v2"
drapbv1alpha2 "k8s.io/kubelet/pkg/apis/dra/v1alpha2"
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
)
const PluginClientTimeout = 45 * time.Second
// Strongly typed address.
type draAddr string
// draPluginClient encapsulates all dra plugin methods.
type draPluginClient struct {
pluginName string
addr draAddr
nodeClientCreator nodeClientCreator
}
var _ drapb.NodeClient = &draPluginClient{}
type nodeClientCreator func(addr draAddr) (
nodeClient drapb.NodeClient,
nodeClientOld drapbv1alpha2.NodeClient,
closer io.Closer,
err error,
)
// newNodeClient creates a new NodeClient with the internally used gRPC
// connection set up. It also returns a closer which must be called to close
// the gRPC connection when the NodeClient is not used anymore.
// This is the default implementation for the nodeClientCreator, used in
// newDRAPluginClient.
func newNodeClient(addr draAddr) (nodeClient drapb.NodeClient, nodeClientOld drapbv1alpha2.NodeClient, closer io.Closer, err error) {
var conn *grpc.ClientConn
conn, err = newGrpcConn(addr)
if err != nil {
return nil, nil, nil, err
}
return drapb.NewNodeClient(conn), drapbv1alpha2.NewNodeClient(conn), conn, nil
}
func NewDRAPluginClient(pluginName string) (drapb.NodeClient, error) {
if pluginName == "" {
return nil, fmt.Errorf("plugin name is empty")
}
existingPlugin := draPlugins.Get(pluginName)
if existingPlugin == nil {
return nil, fmt.Errorf("plugin name %s not found in the list of registered DRA plugins", pluginName)
}
return &draPluginClient{
pluginName: pluginName,
addr: draAddr(existingPlugin.endpoint),
nodeClientCreator: newNodeClient,
}, nil
}
func (r *draPluginClient) NodePrepareResources(
ctx context.Context,
req *drapb.NodePrepareResourcesRequest,
opts ...grpc.CallOption,
) (resp *drapb.NodePrepareResourcesResponse, err error) {
logger := klog.FromContext(ctx)
logger.V(4).Info(log("calling NodePrepareResources rpc"), "request", req)
defer logger.V(4).Info(log("done calling NodePrepareResources rpc"), "response", resp, "err", err)
if r.nodeClientCreator == nil {
return nil, errors.New("failed to call NodePrepareResources. nodeClientCreator is nil")
}
nodeClient, nodeClientOld, closer, err := r.nodeClientCreator(r.addr)
if err != nil {
return nil, err
}
defer closer.Close()
ctx, cancel := context.WithTimeout(ctx, PluginClientTimeout)
defer cancel()
resp, err = nodeClient.NodePrepareResources(ctx, req)
if err != nil {
status, _ := grpcstatus.FromError(err)
if status.Code() == grpccodes.Unimplemented {
// Fall back to the older gRPC API.
resp = &drapb.NodePrepareResourcesResponse{
Claims: make(map[string]*drapb.NodePrepareResourceResponse),
}
err = nil
for _, claim := range req.Claims {
respOld, errOld := nodeClientOld.NodePrepareResource(ctx,
&drapbv1alpha2.NodePrepareResourceRequest{
Namespace: claim.Namespace,
ClaimUid: claim.Uid,
ClaimName: claim.Name,
ResourceHandle: claim.ResourceHandle,
})
result := &drapb.NodePrepareResourceResponse{}
if errOld != nil {
result.Error = errOld.Error()
} else {
result.CDIDevices = respOld.CdiDevices
}
resp.Claims[claim.Uid] = result
}
}
}
return
}
func (r *draPluginClient) NodeUnprepareResources(
ctx context.Context,
req *drapb.NodeUnprepareResourcesRequest,
opts ...grpc.CallOption,
) (resp *drapb.NodeUnprepareResourcesResponse, err error) {
logger := klog.FromContext(ctx)
logger.V(4).Info(log("calling NodeUnprepareResource rpc"), "request", req)
defer logger.V(4).Info(log("done calling NodeUnprepareResources rpc"), "response", resp, "err", err)
if r.nodeClientCreator == nil {
return nil, errors.New("failed to call NodeUnprepareResources. nodeClientCreator is nil")
}
nodeClient, nodeClientOld, closer, err := r.nodeClientCreator(r.addr)
if err != nil {
return nil, err
}
defer closer.Close()
ctx, cancel := context.WithTimeout(ctx, PluginClientTimeout)
defer cancel()
resp, err = nodeClient.NodeUnprepareResources(ctx, req)
if err != nil {
status, _ := grpcstatus.FromError(err)
if status.Code() == grpccodes.Unimplemented {
// Fall back to the older gRPC API.
resp = &drapb.NodeUnprepareResourcesResponse{
Claims: make(map[string]*drapb.NodeUnprepareResourceResponse),
}
err = nil
for _, claim := range req.Claims {
_, errOld := nodeClientOld.NodeUnprepareResource(ctx,
&drapbv1alpha2.NodeUnprepareResourceRequest{
Namespace: claim.Namespace,
ClaimUid: claim.Uid,
ClaimName: claim.Name,
ResourceHandle: claim.ResourceHandle,
})
result := &drapb.NodeUnprepareResourceResponse{}
if errOld != nil {
result.Error = errOld.Error()
}
resp.Claims[claim.Uid] = result
}
}
}
return
}
func newGrpcConn(addr draAddr) (*grpc.ClientConn, error) {
network := "unix"
klog.V(4).InfoS(log("creating new gRPC connection"), "protocol", network, "endpoint", addr)
return grpc.Dial(
string(addr),
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithContextDialer(func(ctx context.Context, target string) (net.Conn, error) {
return (&net.Dialer{}).DialContext(ctx, network, target)
}),
)
}