Some of the E2E node tests were flaky. Their timeout apparently was chosen
under the assumption that kubelet would retry immediately after a failed gRPC
call, with a factor of 2 as safety margin. But according to
0449cef8fd,
kubelet has a different, higher retry period of 90 seconds, which was exactly
the test timeout. The test timeout has to be higher than that.
As the tests don't use the gRPC call timeout anymore, it can be made
private. While at it, the name and documentation gets updated.
147 lines
4.2 KiB
Go
147 lines
4.2 KiB
Go
/*
|
|
Copyright 2022 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package plugin
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"sync"
|
|
"time"
|
|
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/connectivity"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
|
|
utilversion "k8s.io/apimachinery/pkg/util/version"
|
|
"k8s.io/klog/v2"
|
|
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
|
|
)
|
|
|
|
// NewDRAPluginClient returns a wrapper around those gRPC methods of a DRA
|
|
// driver kubelet plugin which need to be called by kubelet. The wrapper
|
|
// handles gRPC connection management and logging. Connections are reused
|
|
// across different NewDRAPluginClient calls.
|
|
func NewDRAPluginClient(pluginName string) (*Plugin, error) {
|
|
if pluginName == "" {
|
|
return nil, fmt.Errorf("plugin name is empty")
|
|
}
|
|
|
|
existingPlugin := draPlugins.get(pluginName)
|
|
if existingPlugin == nil {
|
|
return nil, fmt.Errorf("plugin name %s not found in the list of registered DRA plugins", pluginName)
|
|
}
|
|
|
|
return existingPlugin, nil
|
|
}
|
|
|
|
type Plugin struct {
|
|
backgroundCtx context.Context
|
|
cancel func(cause error)
|
|
|
|
mutex sync.Mutex
|
|
conn *grpc.ClientConn
|
|
endpoint string
|
|
highestSupportedVersion *utilversion.Version
|
|
clientCallTimeout time.Duration
|
|
}
|
|
|
|
func (p *Plugin) getOrCreateGRPCConn() (*grpc.ClientConn, error) {
|
|
p.mutex.Lock()
|
|
defer p.mutex.Unlock()
|
|
|
|
if p.conn != nil {
|
|
return p.conn, nil
|
|
}
|
|
|
|
ctx := p.backgroundCtx
|
|
logger := klog.FromContext(ctx)
|
|
|
|
network := "unix"
|
|
logger.V(4).Info("Creating new gRPC connection", "protocol", network, "endpoint", p.endpoint)
|
|
// grpc.Dial is deprecated. grpc.NewClient should be used instead.
|
|
// For now this gets ignored because this function is meant to establish
|
|
// the connection, with the one second timeout below. Perhaps that
|
|
// approach should be reconsidered?
|
|
//nolint:staticcheck
|
|
conn, err := grpc.Dial(
|
|
p.endpoint,
|
|
grpc.WithTransportCredentials(insecure.NewCredentials()),
|
|
grpc.WithContextDialer(func(ctx context.Context, target string) (net.Conn, error) {
|
|
return (&net.Dialer{}).DialContext(ctx, network, target)
|
|
}),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
|
|
defer cancel()
|
|
|
|
if ok := conn.WaitForStateChange(ctx, connectivity.Connecting); !ok {
|
|
return nil, errors.New("timed out waiting for gRPC connection to be ready")
|
|
}
|
|
|
|
p.conn = conn
|
|
return p.conn, nil
|
|
}
|
|
|
|
func (p *Plugin) NodePrepareResources(
|
|
ctx context.Context,
|
|
req *drapb.NodePrepareResourcesRequest,
|
|
opts ...grpc.CallOption,
|
|
) (*drapb.NodePrepareResourcesResponse, error) {
|
|
logger := klog.FromContext(ctx)
|
|
logger.V(4).Info("Calling NodePrepareResources rpc", "request", req)
|
|
|
|
conn, err := p.getOrCreateGRPCConn()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
|
|
defer cancel()
|
|
|
|
nodeClient := drapb.NewNodeClient(conn)
|
|
response, err := nodeClient.NodePrepareResources(ctx, req)
|
|
logger.V(4).Info("Done calling NodePrepareResources rpc", "response", response, "err", err)
|
|
return response, err
|
|
}
|
|
|
|
func (p *Plugin) NodeUnprepareResources(
|
|
ctx context.Context,
|
|
req *drapb.NodeUnprepareResourcesRequest,
|
|
opts ...grpc.CallOption,
|
|
) (*drapb.NodeUnprepareResourcesResponse, error) {
|
|
logger := klog.FromContext(ctx)
|
|
logger.V(4).Info("Calling NodeUnprepareResource rpc", "request", req)
|
|
|
|
conn, err := p.getOrCreateGRPCConn()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, p.clientCallTimeout)
|
|
defer cancel()
|
|
|
|
nodeClient := drapb.NewNodeClient(conn)
|
|
response, err := nodeClient.NodeUnprepareResources(ctx, req)
|
|
logger.V(4).Info("Done calling NodeUnprepareResources rpc", "response", response, "err", err)
|
|
return response, err
|
|
}
|