DRA: add node tests
- Setup overall test structure - Tested Kubelet plugin re-registration on plugin and Kubelet restarts - Tested pod processing on Kubelet start
This commit is contained in:
		
							
								
								
									
										230
									
								
								test/e2e_node/dra_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										230
									
								
								test/e2e_node/dra_test.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,230 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					Copyright 2023 The Kubernetes Authors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License.
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					E2E Node test for DRA (Dynamic Resource Allocation)
 | 
				
			||||||
 | 
					This test covers node-specific aspects of DRA
 | 
				
			||||||
 | 
					The test can be run locally on Linux this way:
 | 
				
			||||||
 | 
					  make test-e2e-node FOCUS='\[NodeFeature:DynamicResourceAllocation\]' SKIP='\[Flaky\]' PARALLELISM=1 \
 | 
				
			||||||
 | 
					       TEST_ARGS='--feature-gates="DynamicResourceAllocation=true" --service-feature-gates="DynamicResourceAllocation=true" --runtime-config=api/all=true'
 | 
				
			||||||
 | 
					*/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					package e2enode
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"context"
 | 
				
			||||||
 | 
						"os"
 | 
				
			||||||
 | 
						"path"
 | 
				
			||||||
 | 
						"path/filepath"
 | 
				
			||||||
 | 
						"time"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"github.com/onsi/ginkgo/v2"
 | 
				
			||||||
 | 
						"github.com/onsi/gomega"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						v1 "k8s.io/api/core/v1"
 | 
				
			||||||
 | 
						resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
 | 
				
			||||||
 | 
						metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
				
			||||||
 | 
						"k8s.io/client-go/kubernetes"
 | 
				
			||||||
 | 
						"k8s.io/klog/v2"
 | 
				
			||||||
 | 
						admissionapi "k8s.io/pod-security-admission/api"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/kubernetes/test/e2e/framework"
 | 
				
			||||||
 | 
						e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"k8s.io/dynamic-resource-allocation/kubeletplugin"
 | 
				
			||||||
 | 
						testdriver "k8s.io/kubernetes/test/e2e/dra/test-driver/app"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const (
 | 
				
			||||||
 | 
						driverName                = "test-driver.cdi.k8s.io"
 | 
				
			||||||
 | 
						cdiDir                    = "/var/run/cdi"
 | 
				
			||||||
 | 
						endpoint                  = "/var/lib/kubelet/plugins/test-driver/dra.sock"
 | 
				
			||||||
 | 
						pluginRegistrationPath    = "/var/lib/kubelet/plugins_registry"
 | 
				
			||||||
 | 
						draAddress                = "/var/lib/kubelet/plugins/test-driver/dra.sock"
 | 
				
			||||||
 | 
						pluginRegistrationTimeout = time.Second * 60 // how long to wait for a node plugin to be registered
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var _ = ginkgo.Describe("[sig-node] DRA [Feature:DynamicResourceAllocation][NodeFeature:DynamicResourceAllocation]", func() {
 | 
				
			||||||
 | 
						f := framework.NewDefaultFramework("dra-node")
 | 
				
			||||||
 | 
						f.NamespacePodSecurityEnforceLevel = admissionapi.LevelBaseline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						var kubeletPlugin *testdriver.ExamplePlugin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ginkgo.Context("Resource Kubelet Plugin [Serial]", func() {
 | 
				
			||||||
 | 
							ginkgo.BeforeEach(func(ctx context.Context) {
 | 
				
			||||||
 | 
								kubeletPlugin = newKubeletPlugin(getNodeName(ctx, f))
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ginkgo.It("must register after Kubelet restart", func(ctx context.Context) {
 | 
				
			||||||
 | 
								oldCalls := kubeletPlugin.GetGRPCCalls()
 | 
				
			||||||
 | 
								getNewCalls := func() []testdriver.GRPCCall {
 | 
				
			||||||
 | 
									calls := kubeletPlugin.GetGRPCCalls()
 | 
				
			||||||
 | 
									return calls[len(oldCalls):]
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								ginkgo.By("restarting Kubelet")
 | 
				
			||||||
 | 
								restartKubelet(true)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								ginkgo.By("wait for Kubelet plugin re-registration")
 | 
				
			||||||
 | 
								gomega.Eventually(getNewCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ginkgo.It("must register after plugin restart", func(ctx context.Context) {
 | 
				
			||||||
 | 
								ginkgo.By("restart Kubelet Plugin")
 | 
				
			||||||
 | 
								kubeletPlugin.Stop()
 | 
				
			||||||
 | 
								kubeletPlugin = newKubeletPlugin(getNodeName(ctx, f))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								ginkgo.By("wait for Kubelet plugin re-registration")
 | 
				
			||||||
 | 
								gomega.Eventually(kubeletPlugin.GetGRPCCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							ginkgo.It("must process pod created when kubelet is not running", func(ctx context.Context) {
 | 
				
			||||||
 | 
								// Stop Kubelet
 | 
				
			||||||
 | 
								startKubelet := stopKubelet()
 | 
				
			||||||
 | 
								pod := createTestObjects(ctx, f.ClientSet, getNodeName(ctx, f), f.Namespace.Name, "draclass", "external-claim", "drapod")
 | 
				
			||||||
 | 
								// Pod must be in pending state
 | 
				
			||||||
 | 
								err := e2epod.WaitForPodCondition(ctx, f.ClientSet, f.Namespace.Name, pod.Name, "Pending", framework.PodStartShortTimeout, func(pod *v1.Pod) (bool, error) {
 | 
				
			||||||
 | 
									return pod.Status.Phase == v1.PodPending, nil
 | 
				
			||||||
 | 
								})
 | 
				
			||||||
 | 
								framework.ExpectNoError(err)
 | 
				
			||||||
 | 
								// Start Kubelet
 | 
				
			||||||
 | 
								startKubelet()
 | 
				
			||||||
 | 
								// Pod should succeed
 | 
				
			||||||
 | 
								err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.Name, f.Namespace.Name, framework.PodStartShortTimeout)
 | 
				
			||||||
 | 
								framework.ExpectNoError(err)
 | 
				
			||||||
 | 
							})
 | 
				
			||||||
 | 
						})
 | 
				
			||||||
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Run Kubelet plugin and wait until it's registered
 | 
				
			||||||
 | 
					func newKubeletPlugin(nodeName string) *testdriver.ExamplePlugin {
 | 
				
			||||||
 | 
						ginkgo.By("start Kubelet plugin")
 | 
				
			||||||
 | 
						logger := klog.LoggerWithValues(klog.LoggerWithName(klog.Background(), "kubelet plugin"), "node", nodeName)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Ensure that directories exist, creating them if necessary. We want
 | 
				
			||||||
 | 
						// to know early if there is a setup problem that would prevent
 | 
				
			||||||
 | 
						// creating those directories.
 | 
				
			||||||
 | 
						err := os.MkdirAll(cdiDir, os.FileMode(0750))
 | 
				
			||||||
 | 
						framework.ExpectNoError(err, "create CDI directory")
 | 
				
			||||||
 | 
						err = os.MkdirAll(filepath.Dir(endpoint), 0750)
 | 
				
			||||||
 | 
						framework.ExpectNoError(err, "create socket directory")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						plugin, err := testdriver.StartPlugin(
 | 
				
			||||||
 | 
							logger,
 | 
				
			||||||
 | 
							cdiDir,
 | 
				
			||||||
 | 
							driverName,
 | 
				
			||||||
 | 
							"",
 | 
				
			||||||
 | 
							testdriver.FileOperations{},
 | 
				
			||||||
 | 
							kubeletplugin.PluginSocketPath(endpoint),
 | 
				
			||||||
 | 
							kubeletplugin.RegistrarSocketPath(path.Join(pluginRegistrationPath, driverName+"-reg.sock")),
 | 
				
			||||||
 | 
							kubeletplugin.KubeletPluginSocketPath(draAddress),
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
 | 
						framework.ExpectNoError(err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						gomega.Eventually(plugin.GetGRPCCalls).WithTimeout(pluginRegistrationTimeout).Should(testdriver.BeRegistered)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ginkgo.DeferCleanup(plugin.Stop)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return plugin
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// createTestObjects creates objects required by the test
 | 
				
			||||||
 | 
					// NOTE: as scheduler and controller manager are not running by the Node e2e,
 | 
				
			||||||
 | 
					// the objects must contain all required data to be processed correctly by the API server
 | 
				
			||||||
 | 
					// and placed on the node without involving the scheduler and the DRA controller
 | 
				
			||||||
 | 
					func createTestObjects(ctx context.Context, clientSet kubernetes.Interface, nodename, namespace, className, claimName, podName string) *v1.Pod {
 | 
				
			||||||
 | 
						// ResourceClass
 | 
				
			||||||
 | 
						class := &resourcev1alpha2.ResourceClass{
 | 
				
			||||||
 | 
							ObjectMeta: metav1.ObjectMeta{
 | 
				
			||||||
 | 
								Name: className,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							DriverName: driverName,
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						_, err := clientSet.ResourceV1alpha2().ResourceClasses().Create(ctx, class, metav1.CreateOptions{})
 | 
				
			||||||
 | 
						framework.ExpectNoError(err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ginkgo.DeferCleanup(clientSet.ResourceV1alpha2().ResourceClasses().Delete, className, metav1.DeleteOptions{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// ResourceClaim
 | 
				
			||||||
 | 
						podClaimName := "resource-claim"
 | 
				
			||||||
 | 
						claim := &resourcev1alpha2.ResourceClaim{
 | 
				
			||||||
 | 
							ObjectMeta: metav1.ObjectMeta{
 | 
				
			||||||
 | 
								Name: claimName,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							Spec: resourcev1alpha2.ResourceClaimSpec{
 | 
				
			||||||
 | 
								ResourceClassName: className,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						createdClaim, err := clientSet.ResourceV1alpha2().ResourceClaims(namespace).Create(ctx, claim, metav1.CreateOptions{})
 | 
				
			||||||
 | 
						framework.ExpectNoError(err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ginkgo.DeferCleanup(clientSet.ResourceV1alpha2().ResourceClaims(namespace).Delete, claimName, metav1.DeleteOptions{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Pod
 | 
				
			||||||
 | 
						containerName := "testcontainer"
 | 
				
			||||||
 | 
						pod := &v1.Pod{
 | 
				
			||||||
 | 
							ObjectMeta: metav1.ObjectMeta{
 | 
				
			||||||
 | 
								Name: podName,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							Spec: v1.PodSpec{
 | 
				
			||||||
 | 
								NodeName: nodename, // Assign the node as the scheduler is not running
 | 
				
			||||||
 | 
								ResourceClaims: []v1.PodResourceClaim{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										Name: podClaimName,
 | 
				
			||||||
 | 
										Source: v1.ClaimSource{
 | 
				
			||||||
 | 
											ResourceClaimName: &claimName,
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								Containers: []v1.Container{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										Name:  containerName,
 | 
				
			||||||
 | 
										Image: e2epod.GetDefaultTestImage(),
 | 
				
			||||||
 | 
										Resources: v1.ResourceRequirements{
 | 
				
			||||||
 | 
											Claims: []v1.ResourceClaim{{Name: podClaimName}},
 | 
				
			||||||
 | 
										},
 | 
				
			||||||
 | 
										Command: []string{"/bin/sh", "-c", "env | grep DRA_PARAM1=PARAM1_VALUE"},
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
								RestartPolicy: v1.RestartPolicyNever,
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						createdPod, err := clientSet.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
 | 
				
			||||||
 | 
						framework.ExpectNoError(err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ginkgo.DeferCleanup(clientSet.CoreV1().Pods(namespace).Delete, podName, metav1.DeleteOptions{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Update claim status: set ReservedFor and AllocationResult
 | 
				
			||||||
 | 
						// NOTE: This is usually done by the DRA controller
 | 
				
			||||||
 | 
						createdClaim.Status = resourcev1alpha2.ResourceClaimStatus{
 | 
				
			||||||
 | 
							DriverName: driverName,
 | 
				
			||||||
 | 
							ReservedFor: []resourcev1alpha2.ResourceClaimConsumerReference{
 | 
				
			||||||
 | 
								{Resource: "pods", Name: podName, UID: createdPod.UID},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							Allocation: &resourcev1alpha2.AllocationResult{
 | 
				
			||||||
 | 
								ResourceHandles: []resourcev1alpha2.ResourceHandle{
 | 
				
			||||||
 | 
									{
 | 
				
			||||||
 | 
										DriverName: driverName,
 | 
				
			||||||
 | 
										Data:       "{\"EnvVars\":{\"DRA_PARAM1\":\"PARAM1_VALUE\"},\"NodeName\":\"\"}",
 | 
				
			||||||
 | 
									},
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						_, err = clientSet.ResourceV1alpha2().ResourceClaims(namespace).UpdateStatus(ctx, createdClaim, metav1.UpdateOptions{})
 | 
				
			||||||
 | 
						framework.ExpectNoError(err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return pod
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user