Files
kubernetes/pkg/kubelet/cm/dra/manager_test.go
2024-07-24 00:27:52 +03:00

957 lines
28 KiB
Go

/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dra
import (
"context"
"fmt"
"net"
"os"
"path/filepath"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/grpc"
v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1alpha3"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/dynamic-resource-allocation/resourceclaim"
drapb "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
"k8s.io/kubernetes/pkg/kubelet/cm/dra/plugin"
"k8s.io/kubernetes/pkg/kubelet/cm/dra/state"
)
const (
driverClassName = "test"
podName = "test-pod"
containerName = "test-container"
)
type fakeDRADriverGRPCServer struct {
drapb.UnimplementedNodeServer
driverName string
timeout *time.Duration
prepareResourceCalls atomic.Uint32
unprepareResourceCalls atomic.Uint32
prepareResourcesResponse *drapb.NodePrepareResourcesResponse
unprepareResourcesResponse *drapb.NodeUnprepareResourcesResponse
}
func (s *fakeDRADriverGRPCServer) NodePrepareResources(ctx context.Context, req *drapb.NodePrepareResourcesRequest) (*drapb.NodePrepareResourcesResponse, error) {
s.prepareResourceCalls.Add(1)
if s.timeout != nil {
time.Sleep(*s.timeout)
}
if s.prepareResourcesResponse == nil {
cdiDeviceName := "claim-" + req.Claims[0].UID
cdiID := s.driverName + "/" + driverClassName + "=" + cdiDeviceName
return &drapb.NodePrepareResourcesResponse{
Claims: map[string]*drapb.NodePrepareResourceResponse{
req.Claims[0].UID: {
Devices: []*drapb.Device{
{
PoolName: poolName,
DeviceName: deviceName,
RequestNames: []string{req.Claims[0].Name},
CDIDeviceIDs: []string{cdiID},
},
},
},
},
}, nil
}
return s.prepareResourcesResponse, nil
}
func (s *fakeDRADriverGRPCServer) NodeUnprepareResources(ctx context.Context, req *drapb.NodeUnprepareResourcesRequest) (*drapb.NodeUnprepareResourcesResponse, error) {
s.unprepareResourceCalls.Add(1)
if s.timeout != nil {
time.Sleep(*s.timeout)
}
if s.unprepareResourcesResponse == nil {
return &drapb.NodeUnprepareResourcesResponse{
Claims: map[string]*drapb.NodeUnprepareResourceResponse{
req.Claims[0].UID: {},
},
}, nil
}
return s.unprepareResourcesResponse, nil
}
type tearDown func()
type fakeDRAServerInfo struct {
// fake DRA server
server *fakeDRADriverGRPCServer
// fake DRA plugin socket name
socketName string
// teardownFn stops fake gRPC server
teardownFn tearDown
}
func setupFakeDRADriverGRPCServer(shouldTimeout bool, pluginClientTimeout *time.Duration, prepareResourcesResponse *drapb.NodePrepareResourcesResponse, unprepareResourcesResponse *drapb.NodeUnprepareResourcesResponse) (fakeDRAServerInfo, error) {
socketDir, err := os.MkdirTemp("", "dra")
if err != nil {
return fakeDRAServerInfo{
server: nil,
socketName: "",
teardownFn: nil,
}, err
}
socketName := filepath.Join(socketDir, "server.sock")
stopCh := make(chan struct{})
teardown := func() {
close(stopCh)
os.RemoveAll(socketName)
}
l, err := net.Listen("unix", socketName)
if err != nil {
teardown()
return fakeDRAServerInfo{
server: nil,
socketName: "",
teardownFn: nil,
}, err
}
s := grpc.NewServer()
fakeDRADriverGRPCServer := &fakeDRADriverGRPCServer{
driverName: driverName,
prepareResourcesResponse: prepareResourcesResponse,
unprepareResourcesResponse: unprepareResourcesResponse,
}
if shouldTimeout {
timeout := *pluginClientTimeout * 2
fakeDRADriverGRPCServer.timeout = &timeout
}
drapb.RegisterNodeServer(s, fakeDRADriverGRPCServer)
go func() {
go s.Serve(l)
<-stopCh
s.GracefulStop()
}()
return fakeDRAServerInfo{
server: fakeDRADriverGRPCServer,
socketName: socketName,
teardownFn: teardown,
}, nil
}
func TestNewManagerImpl(t *testing.T) {
kubeClient := fake.NewSimpleClientset()
for _, test := range []struct {
description string
stateFileDirectory string
wantErr bool
}{
{
description: "invalid directory path",
stateFileDirectory: "",
wantErr: true,
},
{
description: "valid directory path",
stateFileDirectory: t.TempDir(),
},
} {
t.Run(test.description, func(t *testing.T) {
manager, err := NewManagerImpl(kubeClient, test.stateFileDirectory, "worker")
if test.wantErr {
assert.Error(t, err)
return
}
assert.NoError(t, err)
assert.NotNil(t, manager.cache)
assert.NotNil(t, manager.kubeClient)
})
}
}
// genTestPod generates pod object
func genTestPod() *v1.Pod {
claimName := claimName
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: namespace,
UID: podUID,
},
Spec: v1.PodSpec{
ResourceClaims: []v1.PodResourceClaim{
{
Name: claimName,
ResourceClaimName: &claimName,
},
},
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Claims: []v1.ResourceClaim{
{
Name: claimName,
},
},
},
},
},
},
}
}
// getTestClaim generates resource claim object
func genTestClaim(name, driver, device, podUID string) *resourceapi.ResourceClaim {
return &resourceapi.ResourceClaim{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
UID: types.UID(fmt.Sprintf("%s-uid", name)),
},
Spec: resourceapi.ResourceClaimSpec{
Devices: resourceapi.DeviceClaim{
Requests: []resourceapi.DeviceRequest{
{
Name: requestName,
DeviceClassName: className,
},
},
},
},
Status: resourceapi.ResourceClaimStatus{
Allocation: &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: requestName,
Pool: poolName,
Device: device,
Driver: driver,
},
},
},
},
ReservedFor: []resourceapi.ResourceClaimConsumerReference{
{UID: types.UID(podUID)},
},
},
}
}
// genTestClaimInfo generates claim info object
func genTestClaimInfo(podUIDs []string, prepared bool) *ClaimInfo {
return &ClaimInfo{
ClaimInfoState: state.ClaimInfoState{
ClaimUID: claimUID,
ClaimName: claimName,
Namespace: namespace,
PodUIDs: sets.New[string](podUIDs...),
DriverState: map[string]state.DriverState{
driverName: {
Devices: []state.Device{{
PoolName: poolName,
DeviceName: deviceName,
RequestNames: []string{requestName},
CDIDeviceIDs: []string{cdiID},
}},
},
},
},
prepared: prepared,
}
}
// genClaimInfoState generates claim info state object
func genClaimInfoState(cdiDeviceID string) state.ClaimInfoState {
s := state.ClaimInfoState{
ClaimUID: claimUID,
ClaimName: claimName,
Namespace: namespace,
PodUIDs: sets.New[string](podUID),
DriverState: map[string]state.DriverState{
driverName: {},
},
}
if cdiDeviceID != "" {
s.DriverState[driverName] = state.DriverState{Devices: []state.Device{{PoolName: poolName, DeviceName: deviceName, RequestNames: []string{requestName}, CDIDeviceIDs: []string{cdiDeviceID}}}}
}
return s
}
func TestGetResources(t *testing.T) {
kubeClient := fake.NewSimpleClientset()
for _, test := range []struct {
description string
container *v1.Container
pod *v1.Pod
claimInfo *ClaimInfo
wantErr bool
}{
{
description: "claim info with devices",
container: &v1.Container{
Name: containerName,
Resources: v1.ResourceRequirements{
Claims: []v1.ResourceClaim{
{
Name: claimName,
},
},
},
},
pod: genTestPod(),
claimInfo: genTestClaimInfo(nil, false),
},
{
description: "nil claiminfo",
container: &v1.Container{
Name: containerName,
Resources: v1.ResourceRequirements{
Claims: []v1.ResourceClaim{
{
Name: claimName,
},
},
},
},
pod: genTestPod(),
wantErr: true,
},
} {
t.Run(test.description, func(t *testing.T) {
manager, err := NewManagerImpl(kubeClient, t.TempDir(), "worker")
assert.NoError(t, err)
if test.claimInfo != nil {
manager.cache.add(test.claimInfo)
}
containerInfo, err := manager.GetResources(test.pod, test.container)
if test.wantErr {
assert.Error(t, err)
} else {
require.NoError(t, err)
assert.Equal(t, test.claimInfo.DriverState[driverName].Devices[0].CDIDeviceIDs[0], containerInfo.CDIDevices[0].Name)
}
})
}
}
func getFakeNode() (*v1.Node, error) {
return &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "worker"}}, nil
}
func TestPrepareResources(t *testing.T) {
claimName := claimName
fakeKubeClient := fake.NewSimpleClientset()
for _, test := range []struct {
description string
driverName string
pod *v1.Pod
claimInfo *ClaimInfo
claim *resourceapi.ResourceClaim
resp *drapb.NodePrepareResourcesResponse
wantTimeout bool
wantResourceSkipped bool
expectedErrMsg string
expectedClaimInfoState state.ClaimInfoState
expectedPrepareCalls uint32
}{
{
description: "claim doesn't exist",
driverName: driverName,
pod: genTestPod(),
expectedErrMsg: "failed to fetch ResourceClaim ",
},
{
description: "unknown driver",
pod: genTestPod(),
claim: genTestClaim(claimName, "unknown driver", deviceName, podUID),
expectedErrMsg: "plugin name unknown driver not found in the list of registered DRA plugins",
},
{
description: "should prepare resources, driver returns nil value",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{string(claimUID): nil}},
expectedClaimInfoState: genClaimInfoState(""),
expectedPrepareCalls: 1,
},
{
description: "driver returns empty result",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{}},
expectedPrepareCalls: 1,
expectedErrMsg: "NodePrepareResources left out 1 claims",
},
{
description: "pod is not allowed to use resource claim",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, ""),
expectedErrMsg: "is not allowed to use resource claim ",
},
{
description: "no container uses the claim",
driverName: driverName,
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: namespace,
UID: podUID,
},
Spec: v1.PodSpec{
ResourceClaims: []v1.PodResourceClaim{
{
Name: claimName,
ResourceClaimName: &claimName,
},
},
Containers: []v1.Container{
{},
},
},
},
claim: genTestClaim(claimName, driverName, deviceName, podUID),
expectedPrepareCalls: 1,
expectedClaimInfoState: genClaimInfoState(cdiID),
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{
string(claimUID): {
Devices: []*drapb.Device{
{
PoolName: poolName,
DeviceName: deviceName,
RequestNames: []string{requestName},
CDIDeviceIDs: []string{cdiID},
},
},
},
}},
},
{
description: "resource already prepared",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
claimInfo: genTestClaimInfo([]string{podUID}, true),
expectedClaimInfoState: genClaimInfoState(cdiID),
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{
string(claimUID): {
Devices: []*drapb.Device{
{
PoolName: poolName,
DeviceName: deviceName,
RequestNames: []string{requestName},
CDIDeviceIDs: []string{cdiID},
},
},
},
}},
},
{
description: "should timeout",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
wantTimeout: true,
expectedPrepareCalls: 1,
expectedErrMsg: "NodePrepareResources failed: rpc error: code = DeadlineExceeded desc = context deadline exceeded",
},
{
description: "should prepare resource, claim not in cache",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
expectedClaimInfoState: genClaimInfoState(cdiID),
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{
string(claimUID): {
Devices: []*drapb.Device{
{
PoolName: poolName,
DeviceName: deviceName,
RequestNames: []string{requestName},
CDIDeviceIDs: []string{cdiID},
},
},
},
}},
expectedPrepareCalls: 1,
},
{
description: "resource already prepared",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
claimInfo: genTestClaimInfo([]string{podUID}, true),
expectedClaimInfoState: genClaimInfoState(cdiID),
resp: &drapb.NodePrepareResourcesResponse{Claims: map[string]*drapb.NodePrepareResourceResponse{
string(claimUID): {
Devices: []*drapb.Device{
{
PoolName: poolName,
DeviceName: deviceName,
RequestNames: []string{requestName},
CDIDeviceIDs: []string{cdiID},
},
},
},
}},
},
} {
t.Run(test.description, func(t *testing.T) {
cache, err := newClaimInfoCache(t.TempDir(), draManagerStateFileName)
if err != nil {
t.Fatalf("failed to newClaimInfoCache, err:%v", err)
}
manager := &ManagerImpl{
kubeClient: fakeKubeClient,
cache: cache,
}
if test.claim != nil {
if _, err := fakeKubeClient.ResourceV1alpha3().ResourceClaims(test.pod.Namespace).Create(context.Background(), test.claim, metav1.CreateOptions{}); err != nil {
t.Fatalf("failed to create ResourceClaim %s: %+v", test.claim.Name, err)
}
defer func() {
require.NoError(t, fakeKubeClient.ResourceV1alpha3().ResourceClaims(test.pod.Namespace).Delete(context.Background(), test.claim.Name, metav1.DeleteOptions{}))
}()
}
var pluginClientTimeout *time.Duration
if test.wantTimeout {
timeout := time.Millisecond * 20
pluginClientTimeout = &timeout
}
draServerInfo, err := setupFakeDRADriverGRPCServer(test.wantTimeout, pluginClientTimeout, test.resp, nil)
if err != nil {
t.Fatal(err)
}
defer draServerInfo.teardownFn()
plg := plugin.NewRegistrationHandler(nil, getFakeNode)
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}, pluginClientTimeout); err != nil {
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
}
defer plg.DeRegisterPlugin(test.driverName) // for sake of next tests
if test.claimInfo != nil {
manager.cache.add(test.claimInfo)
}
err = manager.PrepareResources(test.pod)
assert.Equal(t, test.expectedPrepareCalls, draServerInfo.server.prepareResourceCalls.Load())
if test.expectedErrMsg != "" {
assert.Error(t, err)
if err != nil {
assert.Contains(t, err.Error(), test.expectedErrMsg)
}
return // PrepareResources returned an error so stopping the test case here
}
assert.NoError(t, err)
if test.wantResourceSkipped {
return // resource skipped so no need to continue
}
// check the cache contains the expected claim info
claimName, _, err := resourceclaim.Name(test.pod, &test.pod.Spec.ResourceClaims[0])
if err != nil {
t.Fatal(err)
}
claimInfo, ok := manager.cache.get(*claimName, test.pod.Namespace)
if !ok {
t.Fatalf("claimInfo not found in cache for claim %s", *claimName)
}
if len(claimInfo.PodUIDs) != 1 || !claimInfo.PodUIDs.Has(string(test.pod.UID)) {
t.Fatalf("podUIDs mismatch: expected [%s], got %v", test.pod.UID, claimInfo.PodUIDs)
}
assert.Equal(t, test.expectedClaimInfoState, claimInfo.ClaimInfoState)
})
}
}
func TestUnprepareResources(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset()
for _, test := range []struct {
description string
driverName string
pod *v1.Pod
claimInfo *ClaimInfo
claim *resourceapi.ResourceClaim
resp *drapb.NodeUnprepareResourcesResponse
wantTimeout bool
wantResourceSkipped bool
expectedUnprepareCalls uint32
expectedErrMsg string
}{
{
description: "unknown driver",
pod: genTestPod(),
claim: genTestClaim(claimName, "unknown driver", deviceName, podUID),
claimInfo: genTestClaimInfo([]string{podUID}, true),
expectedErrMsg: "plugin name test-driver not found in the list of registered DRA plugins",
},
{
description: "resource claim referenced by other pod(s)",
driverName: driverName,
pod: genTestPod(),
claimInfo: genTestClaimInfo([]string{podUID, "another-pod-uid"}, true),
wantResourceSkipped: true,
},
{
description: "should timeout",
driverName: driverName,
pod: genTestPod(),
claimInfo: genTestClaimInfo([]string{podUID}, true),
wantTimeout: true,
expectedUnprepareCalls: 1,
expectedErrMsg: "context deadline exceeded",
},
{
description: "should fail when driver returns empty response",
driverName: driverName,
pod: genTestPod(),
claimInfo: genTestClaimInfo([]string{podUID}, true),
resp: &drapb.NodeUnprepareResourcesResponse{Claims: map[string]*drapb.NodeUnprepareResourceResponse{}},
expectedUnprepareCalls: 1,
expectedErrMsg: "NodeUnprepareResources left out 1 claims",
},
{
description: "should unprepare resource",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
claimInfo: genTestClaimInfo([]string{podUID}, false),
expectedUnprepareCalls: 1,
},
{
description: "should unprepare already prepared resource",
driverName: driverName,
pod: genTestPod(),
claim: genTestClaim(claimName, driverName, deviceName, podUID),
claimInfo: genTestClaimInfo([]string{podUID}, true),
expectedUnprepareCalls: 1,
},
{
description: "should unprepare resource when driver returns nil value",
driverName: driverName,
pod: genTestPod(),
claimInfo: genTestClaimInfo([]string{podUID}, true),
resp: &drapb.NodeUnprepareResourcesResponse{Claims: map[string]*drapb.NodeUnprepareResourceResponse{string(claimUID): nil}},
expectedUnprepareCalls: 1,
},
} {
t.Run(test.description, func(t *testing.T) {
cache, err := newClaimInfoCache(t.TempDir(), draManagerStateFileName)
if err != nil {
t.Fatalf("failed to create a new instance of the claimInfoCache, err: %v", err)
}
var pluginClientTimeout *time.Duration
if test.wantTimeout {
timeout := time.Millisecond * 20
pluginClientTimeout = &timeout
}
draServerInfo, err := setupFakeDRADriverGRPCServer(test.wantTimeout, pluginClientTimeout, nil, test.resp)
if err != nil {
t.Fatal(err)
}
defer draServerInfo.teardownFn()
plg := plugin.NewRegistrationHandler(nil, getFakeNode)
if err := plg.RegisterPlugin(test.driverName, draServerInfo.socketName, []string{"1.27"}, pluginClientTimeout); err != nil {
t.Fatalf("failed to register plugin %s, err: %v", test.driverName, err)
}
defer plg.DeRegisterPlugin(test.driverName) // for sake of next tests
manager := &ManagerImpl{
kubeClient: fakeKubeClient,
cache: cache,
}
if test.claimInfo != nil {
manager.cache.add(test.claimInfo)
}
err = manager.UnprepareResources(test.pod)
assert.Equal(t, test.expectedUnprepareCalls, draServerInfo.server.unprepareResourceCalls.Load())
if test.expectedErrMsg != "" {
assert.Error(t, err)
if err != nil {
assert.Contains(t, err.Error(), test.expectedErrMsg)
}
return // PrepareResources returned an error so stopping the test case here
}
assert.NoError(t, err)
if test.wantResourceSkipped {
return // resource skipped so no need to continue
}
// Check that the cache has been updated correctly
claimName, _, err := resourceclaim.Name(test.pod, &test.pod.Spec.ResourceClaims[0])
if err != nil {
t.Fatal(err)
}
if manager.cache.contains(*claimName, test.pod.Namespace) {
t.Fatalf("claimInfo still found in cache after calling UnprepareResources")
}
})
}
}
func TestPodMightNeedToUnprepareResources(t *testing.T) {
fakeKubeClient := fake.NewSimpleClientset()
cache, err := newClaimInfoCache(t.TempDir(), draManagerStateFileName)
if err != nil {
t.Fatalf("failed to newClaimInfoCache, err:%v", err)
}
manager := &ManagerImpl{
kubeClient: fakeKubeClient,
cache: cache,
}
claimInfo := &ClaimInfo{
ClaimInfoState: state.ClaimInfoState{PodUIDs: sets.New(podUID), ClaimName: claimName, Namespace: namespace},
}
manager.cache.add(claimInfo)
if !manager.cache.contains(claimName, namespace) {
t.Fatalf("failed to get claimInfo from cache for claim name %s, namespace %s: err:%v", claimName, namespace, err)
}
claimInfo.addPodReference(types.UID(podUID))
needsUnprepare := manager.PodMightNeedToUnprepareResources(types.UID(podUID))
assert.True(t, needsUnprepare)
}
func TestGetContainerClaimInfos(t *testing.T) {
for _, test := range []struct {
description string
pod *v1.Pod
claimInfo *ClaimInfo
expectedClaimName string
expectedErrMsg string
}{
{
description: "should get claim info",
expectedClaimName: claimName,
pod: genTestPod(),
claimInfo: genTestClaimInfo([]string{podUID}, false),
},
{
description: "should fail when claim info not found",
pod: genTestPod(),
claimInfo: &ClaimInfo{},
expectedErrMsg: "unable to get claim info for claim ",
},
{
description: "should fail when none of the supported fields are set",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: namespace,
UID: podUID,
},
Spec: v1.PodSpec{
ResourceClaims: []v1.PodResourceClaim{
{
Name: claimName,
// missing ResourceClaimName or ResourceClaimTemplateName
},
},
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Claims: []v1.ResourceClaim{
{
Name: claimName,
},
},
},
},
},
},
},
claimInfo: genTestClaimInfo([]string{podUID}, false),
expectedErrMsg: "none of the supported fields are set",
},
{
description: "should fail when claim info is not cached",
pod: genTestPod(),
expectedErrMsg: "unable to get claim info for claim ",
},
} {
t.Run(test.description, func(t *testing.T) {
cache, err := newClaimInfoCache(t.TempDir(), draManagerStateFileName)
if err != nil {
t.Fatalf("error occur:%v", err)
}
manager := &ManagerImpl{
cache: cache,
}
if test.claimInfo != nil {
manager.cache.add(test.claimInfo)
}
claimInfos, err := manager.GetContainerClaimInfos(test.pod, &test.pod.Spec.Containers[0])
if test.expectedErrMsg != "" {
assert.Error(t, err)
if err != nil {
assert.Contains(t, err.Error(), test.expectedErrMsg)
}
return
}
assert.NoError(t, err)
assert.Len(t, claimInfos, 1)
assert.Equal(t, test.expectedClaimName, claimInfos[0].ClaimInfoState.ClaimName)
})
}
}
// TestParallelPrepareUnprepareResources calls PrepareResources and UnprepareResources APIs in parallel
// to detect possible data races
func TestParallelPrepareUnprepareResources(t *testing.T) {
// Setup and register fake DRA driver
draServerInfo, err := setupFakeDRADriverGRPCServer(false, nil, nil, nil)
if err != nil {
t.Fatal(err)
}
defer draServerInfo.teardownFn()
plg := plugin.NewRegistrationHandler(nil, getFakeNode)
if err := plg.RegisterPlugin(driverName, draServerInfo.socketName, []string{"1.27"}, nil); err != nil {
t.Fatalf("failed to register plugin %s, err: %v", driverName, err)
}
defer plg.DeRegisterPlugin(driverName)
// Create ClaimInfo cache
cache, err := newClaimInfoCache(t.TempDir(), draManagerStateFileName)
if err != nil {
t.Errorf("failed to newClaimInfoCache, err: %+v", err)
return
}
// Create fake Kube client and DRA manager
fakeKubeClient := fake.NewSimpleClientset()
manager := &ManagerImpl{kubeClient: fakeKubeClient, cache: cache}
// Call PrepareResources in parallel
var wgSync, wgStart sync.WaitGroup // groups to sync goroutines
numGoroutines := 30
wgSync.Add(numGoroutines)
wgStart.Add(1)
for i := 0; i < numGoroutines; i++ {
go func(t *testing.T, goRoutineNum int) {
defer wgSync.Done()
wgStart.Wait() // Wait to start all goroutines at the same time
var err error
claimName := fmt.Sprintf("test-pod-claim-%d", goRoutineNum)
podUID := types.UID(fmt.Sprintf("test-pod-uid-%d", goRoutineNum))
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("test-pod-%d", goRoutineNum),
Namespace: namespace,
UID: podUID,
},
Spec: v1.PodSpec{
ResourceClaims: []v1.PodResourceClaim{
{
Name: claimName,
ResourceClaimName: func() *string {
s := claimName
return &s
}(),
},
},
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Claims: []v1.ResourceClaim{
{
Name: claimName,
},
},
},
},
},
},
}
claim := genTestClaim(claimName, driverName, deviceName, string(podUID))
if _, err = fakeKubeClient.ResourceV1alpha3().ResourceClaims(pod.Namespace).Create(context.Background(), claim, metav1.CreateOptions{}); err != nil {
t.Errorf("failed to create ResourceClaim %s: %+v", claim.Name, err)
return
}
if err = manager.PrepareResources(pod); err != nil {
t.Errorf("pod: %s: PrepareResources failed: %+v", pod.Name, err)
return
}
if err = manager.UnprepareResources(pod); err != nil {
t.Errorf("pod: %s: UnprepareResources failed: %+v", pod.Name, err)
return
}
}(t, i)
}
wgStart.Done() // Start executing goroutines
wgSync.Wait() // Wait for all goroutines to finish
}