Merge pull request #119012 from pohly/dra-batch-node-prepare
kubelet: support batched prepare/unprepare in v1alpha3 DRA plugin API
This commit is contained in:
@@ -48,8 +48,10 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
NodePrepareResourceMethod = "/v1alpha2.Node/NodePrepareResource"
|
||||
NodeUnprepareResourceMethod = "/v1alpha2.Node/NodeUnprepareResource"
|
||||
NodePrepareResourceMethod = "/v1alpha2.Node/NodePrepareResource"
|
||||
NodePrepareResourcesMethod = "/v1alpha3.Node/NodePrepareResources"
|
||||
NodeUnprepareResourceMethod = "/v1alpha2.Node/NodeUnprepareResource"
|
||||
NodeUnprepareResourcesMethod = "/v1alpha3.Node/NodeUnprepareResources"
|
||||
)
|
||||
|
||||
type Nodes struct {
|
||||
@@ -87,9 +89,11 @@ func NewNodes(f *framework.Framework, minNodes, maxNodes int) *Nodes {
|
||||
// up after the test.
|
||||
func NewDriver(f *framework.Framework, nodes *Nodes, configureResources func() app.Resources) *Driver {
|
||||
d := &Driver{
|
||||
f: f,
|
||||
fail: map[MethodInstance]bool{},
|
||||
callCounts: map[MethodInstance]int64{},
|
||||
f: f,
|
||||
fail: map[MethodInstance]bool{},
|
||||
callCounts: map[MethodInstance]int64{},
|
||||
NodeV1alpha2: true,
|
||||
NodeV1alpha3: true,
|
||||
}
|
||||
|
||||
ginkgo.BeforeEach(func() {
|
||||
@@ -121,6 +125,8 @@ type Driver struct {
|
||||
Name string
|
||||
Nodes map[string]*app.ExamplePlugin
|
||||
|
||||
NodeV1alpha2, NodeV1alpha3 bool
|
||||
|
||||
mutex sync.Mutex
|
||||
fail map[MethodInstance]bool
|
||||
callCounts map[MethodInstance]int64
|
||||
@@ -229,6 +235,8 @@ func (d *Driver) SetUp(nodes *Nodes, resources app.Resources) {
|
||||
kubeletplugin.PluginListener(listen(ctx, d.f, pod.Name, "plugin", 9001)),
|
||||
kubeletplugin.RegistrarListener(listen(ctx, d.f, pod.Name, "registrar", 9000)),
|
||||
kubeletplugin.KubeletPluginSocketPath(draAddr),
|
||||
kubeletplugin.NodeV1alpha2(d.NodeV1alpha2),
|
||||
kubeletplugin.NodeV1alpha3(d.NodeV1alpha3),
|
||||
)
|
||||
framework.ExpectNoError(err, "start kubelet plugin for node %s", pod.Spec.NodeName)
|
||||
d.cleanup = append(d.cleanup, func() {
|
||||
|
@@ -67,9 +67,9 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
ginkgo.By("the driver is running")
|
||||
})
|
||||
|
||||
ginkgo.It("must retry NodePrepareResource", func(ctx context.Context) {
|
||||
ginkgo.It("must retry NodePrepareResources", func(ctx context.Context) {
|
||||
// We have exactly one host.
|
||||
m := MethodInstance{driver.Nodenames()[0], NodePrepareResourceMethod}
|
||||
m := MethodInstance{driver.Nodenames()[0], NodePrepareResourcesMethod}
|
||||
|
||||
driver.Fail(m, true)
|
||||
|
||||
@@ -79,10 +79,10 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
|
||||
b.create(ctx, parameters, pod, template)
|
||||
|
||||
ginkgo.By("wait for NodePrepareResource call")
|
||||
ginkgo.By("wait for NodePrepareResources call")
|
||||
gomega.Eventually(ctx, func(ctx context.Context) error {
|
||||
if driver.CallCount(m) == 0 {
|
||||
return errors.New("NodePrepareResource not called yet")
|
||||
return errors.New("NodePrepareResources not called yet")
|
||||
}
|
||||
return nil
|
||||
}).WithTimeout(podStartTimeout).Should(gomega.Succeed())
|
||||
@@ -93,7 +93,7 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
err := e2epod.WaitForPodNameRunningInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace)
|
||||
framework.ExpectNoError(err, "start pod with inline resource claim")
|
||||
if driver.CallCount(m) == callCount {
|
||||
framework.Fail("NodePrepareResource should have been called again")
|
||||
framework.Fail("NodePrepareResources should have been called again")
|
||||
}
|
||||
})
|
||||
|
||||
@@ -593,44 +593,64 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation]", fu
|
||||
})
|
||||
})
|
||||
|
||||
ginkgo.Context("multiple drivers", func() {
|
||||
multipleDrivers := func(nodeV1alpha2, nodeV1alpha3 bool) {
|
||||
nodes := NewNodes(f, 1, 4)
|
||||
driver1 := NewDriver(f, nodes, func() app.Resources {
|
||||
return app.Resources{
|
||||
NodeLocal: true,
|
||||
MaxAllocations: 1,
|
||||
MaxAllocations: 2,
|
||||
Nodes: nodes.NodeNames,
|
||||
}
|
||||
})
|
||||
driver1.NodeV1alpha2 = nodeV1alpha2
|
||||
driver1.NodeV1alpha3 = nodeV1alpha3
|
||||
b1 := newBuilder(f, driver1)
|
||||
|
||||
driver2 := NewDriver(f, nodes, func() app.Resources {
|
||||
return app.Resources{
|
||||
NodeLocal: true,
|
||||
MaxAllocations: 1,
|
||||
MaxAllocations: 2,
|
||||
Nodes: nodes.NodeNames,
|
||||
}
|
||||
})
|
||||
driver2.NameSuffix = "-other"
|
||||
driver2.NodeV1alpha2 = nodeV1alpha2
|
||||
driver2.NodeV1alpha3 = nodeV1alpha3
|
||||
b2 := newBuilder(f, driver2)
|
||||
|
||||
ginkgo.It("work", func(ctx context.Context) {
|
||||
parameters1 := b1.parameters()
|
||||
parameters2 := b2.parameters()
|
||||
claim1 := b1.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
claim1b := b1.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
claim2 := b2.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
claim2b := b2.externalClaim(resourcev1alpha2.AllocationModeWaitForFirstConsumer)
|
||||
pod := b1.podExternal()
|
||||
pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims,
|
||||
v1.PodResourceClaim{
|
||||
Name: "claim2",
|
||||
Source: v1.ClaimSource{
|
||||
ResourceClaimName: &claim2.Name,
|
||||
for i, claim := range []*resourcev1alpha2.ResourceClaim{claim1b, claim2, claim2b} {
|
||||
claim := claim
|
||||
pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims,
|
||||
v1.PodResourceClaim{
|
||||
Name: fmt.Sprintf("claim%d", i+1),
|
||||
Source: v1.ClaimSource{
|
||||
ResourceClaimName: &claim.Name,
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
b1.create(ctx, parameters1, parameters2, claim1, claim2, pod)
|
||||
)
|
||||
}
|
||||
b1.create(ctx, parameters1, parameters2, claim1, claim1b, claim2, claim2b, pod)
|
||||
b1.testPod(ctx, f.ClientSet, pod)
|
||||
})
|
||||
}
|
||||
multipleDriversContext := func(prefix string, nodeV1alpha2, nodeV1alpha3 bool) {
|
||||
ginkgo.Context(prefix, func() {
|
||||
multipleDrivers(nodeV1alpha2, nodeV1alpha3)
|
||||
})
|
||||
}
|
||||
|
||||
ginkgo.Context("multiple drivers", func() {
|
||||
multipleDriversContext("using only drapbv1alpha2", true, false)
|
||||
multipleDriversContext("using only drapbv1alpha3", false, true)
|
||||
multipleDriversContext("using both drapbv1alpha2 and drapbv1alpha3", true, true)
|
||||
})
|
||||
})
|
||||
|
||||
|
@@ -42,3 +42,13 @@ var NodePrepareResourceCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall)
|
||||
}
|
||||
return false, nil
|
||||
}).WithMessage("contain NodePrepareResource call")
|
||||
|
||||
// NodePrepareResoucesCalled checks that NodePrepareResources API has been called
|
||||
var NodePrepareResourcesCalled = gcustom.MakeMatcher(func(actualCalls []GRPCCall) (bool, error) {
|
||||
for _, call := range actualCalls {
|
||||
if strings.HasSuffix(call.FullMethod, "/NodePrepareResources") && call.Err == nil {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}).WithMessage("contain NodePrepareResources call")
|
||||
|
@@ -28,7 +28,8 @@ import (
|
||||
|
||||
"k8s.io/dynamic-resource-allocation/kubeletplugin"
|
||||
"k8s.io/klog/v2"
|
||||
drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1alpha2"
|
||||
drapbv1alpha2 "k8s.io/kubelet/pkg/apis/dra/v1alpha2"
|
||||
drapbv1alpha3 "k8s.io/kubelet/pkg/apis/dra/v1alpha3"
|
||||
)
|
||||
|
||||
type ExamplePlugin struct {
|
||||
@@ -69,7 +70,7 @@ type ClaimID struct {
|
||||
UID string
|
||||
}
|
||||
|
||||
var _ drapbv1.NodeServer = &ExamplePlugin{}
|
||||
var _ drapbv1alpha2.NodeServer = &ExamplePlugin{}
|
||||
|
||||
// getJSONFilePath returns the absolute path where CDI file is/should be.
|
||||
func (ex *ExamplePlugin) getJSONFilePath(claimUID string) string {
|
||||
@@ -147,7 +148,7 @@ func (ex *ExamplePlugin) Block() {
|
||||
// a deterministic name to simplify NodeUnprepareResource (no need to remember
|
||||
// or discover the name) and idempotency (when called again, the file simply
|
||||
// gets written again).
|
||||
func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.NodePrepareResourceRequest) (*drapbv1.NodePrepareResourceResponse, error) {
|
||||
func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1alpha2.NodePrepareResourceRequest) (*drapbv1alpha2.NodePrepareResourceResponse, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
// Block to emulate plugin stuckness or slowness.
|
||||
@@ -201,7 +202,7 @@ func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.N
|
||||
}
|
||||
|
||||
dev := vendor + "/" + class + "=" + deviceName
|
||||
resp := &drapbv1.NodePrepareResourceResponse{CdiDevices: []string{dev}}
|
||||
resp := &drapbv1alpha2.NodePrepareResourceResponse{CdiDevices: []string{dev}}
|
||||
|
||||
ex.mutex.Lock()
|
||||
defer ex.mutex.Unlock()
|
||||
@@ -211,10 +212,34 @@ func (ex *ExamplePlugin) NodePrepareResource(ctx context.Context, req *drapbv1.N
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (ex *ExamplePlugin) NodePrepareResources(ctx context.Context, req *drapbv1alpha3.NodePrepareResourcesRequest) (*drapbv1alpha3.NodePrepareResourcesResponse, error) {
|
||||
resp := &drapbv1alpha3.NodePrepareResourcesResponse{
|
||||
Claims: make(map[string]*drapbv1alpha3.NodePrepareResourceResponse),
|
||||
}
|
||||
for _, claimReq := range req.Claims {
|
||||
claimResp, err := ex.NodePrepareResource(ctx, &drapbv1alpha2.NodePrepareResourceRequest{
|
||||
Namespace: claimReq.Namespace,
|
||||
ClaimName: claimReq.Name,
|
||||
ClaimUid: claimReq.Uid,
|
||||
ResourceHandle: claimReq.ResourceHandle,
|
||||
})
|
||||
if err != nil {
|
||||
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{
|
||||
Error: err.Error(),
|
||||
}
|
||||
} else {
|
||||
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodePrepareResourceResponse{
|
||||
CDIDevices: claimResp.CdiDevices,
|
||||
}
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// NodeUnprepareResource removes the CDI file created by
|
||||
// NodePrepareResource. It's idempotent, therefore it is not an error when that
|
||||
// file is already gone.
|
||||
func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1.NodeUnprepareResourceRequest) (*drapbv1.NodeUnprepareResourceResponse, error) {
|
||||
func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1alpha2.NodeUnprepareResourceRequest) (*drapbv1alpha2.NodeUnprepareResourceResponse, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
// Block to emulate plugin stuckness or slowness.
|
||||
@@ -234,7 +259,29 @@ func (ex *ExamplePlugin) NodeUnprepareResource(ctx context.Context, req *drapbv1
|
||||
defer ex.mutex.Unlock()
|
||||
delete(ex.prepared, ClaimID{Name: req.ClaimName, UID: req.ClaimUid})
|
||||
|
||||
return &drapbv1.NodeUnprepareResourceResponse{}, nil
|
||||
return &drapbv1alpha2.NodeUnprepareResourceResponse{}, nil
|
||||
}
|
||||
|
||||
func (ex *ExamplePlugin) NodeUnprepareResources(ctx context.Context, req *drapbv1alpha3.NodeUnprepareResourcesRequest) (*drapbv1alpha3.NodeUnprepareResourcesResponse, error) {
|
||||
resp := &drapbv1alpha3.NodeUnprepareResourcesResponse{
|
||||
Claims: make(map[string]*drapbv1alpha3.NodeUnprepareResourceResponse),
|
||||
}
|
||||
for _, claimReq := range req.Claims {
|
||||
_, err := ex.NodeUnprepareResource(ctx, &drapbv1alpha2.NodeUnprepareResourceRequest{
|
||||
Namespace: claimReq.Namespace,
|
||||
ClaimName: claimReq.Name,
|
||||
ClaimUid: claimReq.Uid,
|
||||
ResourceHandle: claimReq.ResourceHandle,
|
||||
})
|
||||
if err != nil {
|
||||
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{
|
||||
Error: err.Error(),
|
||||
}
|
||||
} else {
|
||||
resp.Claims[claimReq.Uid] = &drapbv1alpha3.NodeUnprepareResourceResponse{}
|
||||
}
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (ex *ExamplePlugin) GetPreparedResources() []ClaimID {
|
||||
|
@@ -109,8 +109,8 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][Node
|
||||
framework.ExpectNoError(err)
|
||||
})
|
||||
|
||||
ginkgo.It("must keep pod in pending state if NodePrepareResource times out", func(ctx context.Context) {
|
||||
ginkgo.By("set delay for the NodePrepareResource call")
|
||||
ginkgo.It("must keep pod in pending state if NodePrepareResources times out", func(ctx context.Context) {
|
||||
ginkgo.By("set delay for the NodePrepareResources call")
|
||||
kubeletPlugin.Block()
|
||||
pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod")
|
||||
|
||||
@@ -120,8 +120,8 @@ var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][Node
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("wait for NodePrepareResource call")
|
||||
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(dra.PluginClientTimeout * 2).Should(testdriver.NodePrepareResourceCalled)
|
||||
ginkgo.By("wait for NodePrepareResources call")
|
||||
gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(dra.PluginClientTimeout * 2).Should(testdriver.NodePrepareResourcesCalled)
|
||||
|
||||
// TODO: Check condition or event when implemented
|
||||
// see https://github.com/kubernetes/kubernetes/issues/118468 for details
|
||||
|
Reference in New Issue
Block a user