Merge pull request #119012 from pohly/dra-batch-node-prepare

kubelet: support batched prepare/unprepare in v1alpha3 DRA plugin API
This commit is contained in:
Kubernetes Prow Robot
2023-07-12 10:57:37 -07:00
committed by GitHub
11 changed files with 2604 additions and 171 deletions

View File

@@ -48,8 +48,10 @@ import (
)
const (
NodePrepareResourceMethod = "/v1alpha2.Node/NodePrepareResource"
NodeUnprepareResourceMethod = "/v1alpha2.Node/NodeUnprepareResource"
NodePrepareResourceMethod = "/v1alpha2.Node/NodePrepareResource"
NodePrepareResourcesMethod = "/v1alpha3.Node/NodePrepareResources"
NodeUnprepareResourceMethod = "/v1alpha2.Node/NodeUnprepareResource"
NodeUnprepareResourcesMethod = "/v1alpha3.Node/NodeUnprepareResources"
)
type Nodes struct {
@@ -87,9 +89,11 @@ func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes {
// up after the test.
func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources) *Driver {
d := &Driver{
f: f,
fail: map[MethodInstance]bool{},
callCounts: map[MethodInstance]int64{},
f: f,
fail: map[MethodInstance]bool{},
callCounts: map[MethodInstance]int64{},
NodeV1alpha2: true,
NodeV1alpha3: true,
}
ginkgo.BeforeEach(func() {
@@ -121,6 +125,8 @@ type Driver struct {
Name string
Nodes map[string]*app.ExamplePlugin
NodeV1alpha2, NodeV1alpha3 bool
mutex sync.Mutex
fail map[MethodInstance]bool
callCounts map[MethodInstance]int64
@@ -229,6 +235,8 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
kubeletplugin.PluginListener(listen(ctx, d.f, pod.Name, "plugin", 9001)),
kubeletplugin.RegistrarListener(listen(ctx, d.f, pod.Name, "registrar", 9000)),
kubeletplugin.KubeletPluginSocketPath(draAddr),
kubeletplugin.NodeV1alpha2(d.NodeV1alpha2),
kubeletplugin.NodeV1alpha3(d.NodeV1alpha3),
)
framework.ExpectNoError(err, "start kubelet plugin for node %s", pod.Spec.NodeName)
d.cleanup = append(d.cleanup, func() {

View File

@@ -67,9 +67,9 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
ginkgo.By("the driver is running")
})
ginkgo.It("must retry NodePrepareResource", func(ctx context.Context) {
ginkgo.It("must retry NodePrepareResources", func(ctx context.Context) {
// We have exactly one host.
m := MethodInstance{driver.Nodenames()[0], NodePrepareResourceMethod}
m := MethodInstance{driver.Nodenames()[0], NodePrepareResourcesMethod}
driver.Fail(m, true)
@@ -79,10 +79,10 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
b.create(ctx, parameters, pod, template)
ginkgo.By("wait for NodePrepareResource call")
ginkgo.By("wait for NodePrepareResources call")
gomega.Eventually(ctx, func(ctx context.Context) error {
if driver.CallCount(m) == 0 {
return errors.New("NodePrepareResource not called yet")
return errors.New("NodePrepareResources not called yet")
}
return nil
}).WithTimeout(podStartTimeout).Should(gomega.Succeed())
@@ -93,7 +93,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace)
framework.ExpectNoError(err, "start pod with inline resource claim")
if driver.CallCount(m) == callCount {
framework.Fail("NodePrepareResource should have been called again")
framework.Fail("NodePrepareResources should have been called again")
}
})
@@ -593,44 +593,64 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
})
})
ginkgo.Context("multiple drivers", func() {
multipleDrivers := func(nodeV1alpha2, nodeV1alpha3 bool) {
nodes := NewNodes(f, 1, 4)
driver1 := NewDriver(f, nodes, func() app.Resources {
return app.Resources{
NodeLocal: true,
MaxAllocations: 1,
MaxAllocations: 2,
Nodes: nodes.NodeNames,
}
})
driver1.NodeV1alpha2 = nodeV1alpha2
driver1.NodeV1alpha3 = nodeV1alpha3
b1 := newBuilder(f, driver1)
driver2 := NewDriver(f, nodes, func() app.Resources {
return app.Resources{
NodeLocal: true,
MaxAllocations: 1,
MaxAllocations: 2,
Nodes: nodes.NodeNames,
}
})
driver2.NameSuffix = "-other"
driver2.NodeV1alpha2 = nodeV1alpha2
driver2.NodeV1alpha3 = nodeV1alpha3
b2 := newBuilder(f, driver2)
ginkgo.It("work", func(ctx context.Context) {
parameters1 := b1.parameters()
parameters2 := b2.parameters()
claim1 := b1.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
claim1b := b1.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
claim2 := b2.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
claim2b := b2.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
pod := b1.podExternal()
pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims,
v1.PodResourceClaim{
Name: "claim2",
Source: v1.ClaimSource{
ResourceClaimName: &claim2.Name,
for i, claim := range []*resourcev1alpha2.ResourceClaim{claim1b, claim2, claim2b} {
claim := claim
pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims,
v1.PodResourceClaim{
Name: fmt.Sprintf("claim%d", i+1),
Source: v1.ClaimSource{
ResourceClaimName: &claim.Name,
},
},
},
)
b1.create(ctx, parameters1, parameters2, claim1, claim2, pod)
)
}
b1.create(ctx, parameters1, parameters2, claim1, claim1b, claim2, claim2b, pod)
b1.testPod(ctx, f.ClientSet, pod)
})
}
multipleDriversContext := func(prefix string, nodeV1alpha2, nodeV1alpha3 bool) {
ginkgo.Context(prefix, func() {
multipleDrivers(nodeV1alpha2, nodeV1alpha3)
})
}
ginkgo.Context("multiple drivers", func() {
multipleDriversContext("using only drapbv1alpha2", true, false)
multipleDriversContext("using only drapbv1alpha3", false, true)
multipleDriversContext("using both drapbv1alpha2 and drapbv1alpha3", true, true)
})
})

View File

@@ -42,3 +42,13 @@ var NodePrepareResourceCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall)
}
return false, nil
}).WithMessage("contain NodePrepareResource call")
// NodePrepareResoucesCalled checks that NodePrepareResources API has been called
var NodePrepareResourcesCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
for _, call := range actualCalls {
if strings.HasSuffix(call.FullMethod, "/NodePrepareResources") && call.Err == nil {
return true, nil
}
}
return false, nil
}).WithMessage("contain NodePrepareResources call")

View File

@@ -28,7 +28,8 @@ import (
"k8s.io/dynamic-resource-allocation/kubeletplugin"
"k8s.io/klog/v2"
drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1alpha2"
drapbv1alpha2 "k8s.io/kubelet/pkg/apis/dra/v1alpha2"
drapbv1alpha3 "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
)
type ExamplePlugin struct {
@@ -69,7 +70,7 @@ type ClaimID struct {
UID string
}
var _ drapbv1.NodeServer = &ExamplePlugin{}
var _ drapbv1alpha2.NodeServer = &ExamplePlugin{}
// getJSONFilePath returns the absolute path where CDI file is/should be.
func (ex *ExamplePlugin) getJSONFilePath(claimUID string) string {
@@ -147,7 +148,7 @@ func (ex *ExamplePlugin) Block() {
// a deterministic name to simplify NodeUnprepareResource (no need to remember
// or discover the name) and idempotency (when called again, the file simply
// gets written again).
func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.NodePrepareResourceRequest) (*drapbv1.NodePrepareResourceResponse, error) {
func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1alpha2.NodePrepareResourceRequest) (*drapbv1alpha2.NodePrepareResourceResponse, error) {
logger := klog.FromContext(ctx)
// Block to emulate plugin stuckness or slowness.
@@ -201,7 +202,7 @@ func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.N
}
dev := vendor + "/" + class + "=" + deviceName
resp := &drapbv1.NodePrepareResourceResponse{CdiDevices: []string{dev}}
resp := &drapbv1alpha2.NodePrepareResourceResponse{CdiDevices: []string{dev}}
ex.mutex.Lock()
defer ex.mutex.Unlock()
@@ -211,10 +212,34 @@ func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.N
return resp, nil
}
func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapbv1alpha3.NodePrepareResourcesRequest) (*drapbv1alpha3.NodePrepareResourcesResponse, error) {
resp := &drapbv1alpha3.NodePrepareResourcesResponse{
Claims: make(map[string]*drapbv1alpha3.NodePrepareResourceResponse),
}
for _, claimReq := range req.Claims {
claimResp, err := ex.NodePrepareResource(ctx, &drapbv1alpha2.NodePrepareResourceRequest{
Namespace: claimReq.Namespace,
ClaimName: claimReq.Name,
ClaimUid: claimReq.Uid,
ResourceHandle: claimReq.ResourceHandle,
})
if err != nil {
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{
Error: err.Error(),
}
} else {
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{
CDIDevices: claimResp.CdiDevices,
}
}
}
return resp, nil
}
// NodeUnprepareResource removes the CDI file created by
// NodePrepareResource. It's idempotent, therefore it is not an error when that
// file is already gone.
func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1.NodeUnprepareResourceRequest) (*drapbv1.NodeUnprepareResourceResponse, error) {
func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1alpha2.NodeUnprepareResourceRequest) (*drapbv1alpha2.NodeUnprepareResourceResponse, error) {
logger := klog.FromContext(ctx)
// Block to emulate plugin stuckness or slowness.
@@ -234,7 +259,29 @@ func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1
defer ex.mutex.Unlock()
delete(ex.prepared, ClaimID{Name: req.ClaimName, UID: req.ClaimUid})
return &drapbv1.NodeUnprepareResourceResponse{}, nil
return &drapbv1alpha2.NodeUnprepareResourceResponse{}, nil
}
func (ex *ExamplePlugin) NodeUnprepareResources(ctx context.Context, req *drapbv1alpha3.NodeUnprepareResourcesRequest) (*drapbv1alpha3.NodeUnprepareResourcesResponse, error) {
resp := &drapbv1alpha3.NodeUnprepareResourcesResponse{
Claims: make(map[string]*drapbv1alpha3.NodeUnprepareResourceResponse),
}
for _, claimReq := range req.Claims {
_, err := ex.NodeUnprepareResource(ctx, &drapbv1alpha2.NodeUnprepareResourceRequest{
Namespace: claimReq.Namespace,
ClaimName: claimReq.Name,
ClaimUid: claimReq.Uid,
ResourceHandle: claimReq.ResourceHandle,
})
if err != nil {
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{
Error: err.Error(),
}
} else {
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{}
}
}
return resp, nil
}
func (ex *ExamplePlugin) GetPreparedResources() []ClaimID {