From 1fdf262137804eaaf905aaf8e06de4a33b1a6627 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 17 Jan 2020 13:30:31 +0100 Subject: [PATCH 01/13] e2e: topomgr: explicit save the kubelet config For the sake of readability, save the old Kubelet config once. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index c9f5b2df1e1..afb1a6da383 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -76,10 +76,8 @@ func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v } } -func configureTopologyManagerInKubelet(f *framework.Framework, policy string) { +func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string) { // Configure Topology Manager in Kubelet with policy. - oldCfg, err := getCurrentKubeletConfig() - framework.ExpectNoError(err) newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { newCfg.FeatureGates = make(map[string]bool) @@ -92,7 +90,6 @@ func configureTopologyManagerInKubelet(f *framework.Framework, policy string) { // Set the Topology Manager policy newCfg.TopologyManagerPolicy = policy - //newCfg.TopologyManagerPolicy = topologymanager.PolicySingleNumaNode // Set the CPU Manager policy to static. newCfg.CPUManagerPolicy = string(cpumanager.PolicyStatic) @@ -351,18 +348,19 @@ func runTopologyManagerSuiteTests(f *framework.Framework) { } func runTopologyManagerTests(f *framework.Framework) { - var oldCfg *kubeletconfig.KubeletConfiguration - ginkgo.It("run Topology Manager test suite", func() { + oldCfg, err := getCurrentKubeletConfig() + framework.ExpectNoError(err) var policies = []string{topologymanager.PolicySingleNumaNode, topologymanager.PolicyRestricted, topologymanager.PolicyBestEffort, topologymanager.PolicyNone} for _, policy := range policies { // Configure Topology Manager - ginkgo.By("by configuring Topology Manager policy to xxx") + ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - configureTopologyManagerInKubelet(f, policy) + + configureTopologyManagerInKubelet(f, oldCfg, policy) // Run the tests runTopologyManagerSuiteTests(f) } From cd7e3d626c52fd687fd0155440d946dbaa5fd5be Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Fri, 17 Jan 2020 13:31:11 +0100 Subject: [PATCH 02/13] e2e: topomgr: add test infra This patch all the testing infra and utilities needed to run e2e topology manager tests. This include setup a guaranteed pod which needs some devices. The simplest real device available for the purpose are the SRIOV devices, hence we use them. This patch pulls the SRIOV device plugin from the official, yet external, repository. We do it as close as possible for the nvidia GPU plugin. This patch also performs minor refactoring for some test framework utilities, needed to support the new e2e tests. Finally, we add an empty e2e topology manager test, to be completed by the next patch. Signed-off-by: Francesco Romani --- hack/generate-bindata.sh | 1 + test/e2e/framework/util.go | 6 +- test/e2e/generated/BUILD | 1 + test/e2e_node/BUILD | 3 + test/e2e_node/image_list.go | 25 ++ test/e2e_node/testing-manifests/BUILD | 14 ++ .../testing-manifests/sriovdp-cm.yaml | 36 +++ .../testing-manifests/sriovdp-ds.yaml | 58 +++++ .../testing-manifests/sriovdp-sa.yaml | 5 + test/e2e_node/topology_manager_test.go | 218 +++++++++++++++++- test/e2e_node/util_sriov.go | 30 +++ 11 files changed, 385 insertions(+), 12 deletions(-) create mode 100644 test/e2e_node/testing-manifests/BUILD create mode 100644 test/e2e_node/testing-manifests/sriovdp-cm.yaml create mode 100644 test/e2e_node/testing-manifests/sriovdp-ds.yaml create mode 100644 test/e2e_node/testing-manifests/sriovdp-sa.yaml create mode 100644 test/e2e_node/util_sriov.go diff --git a/hack/generate-bindata.sh b/hack/generate-bindata.sh index f8cb2768ff2..25cd5d75d96 100755 --- a/hack/generate-bindata.sh +++ b/hack/generate-bindata.sh @@ -47,6 +47,7 @@ BINDATA_OUTPUT="test/e2e/generated/bindata.go" go-bindata -nometadata -o "${BINDATA_OUTPUT}.tmp" -pkg generated \ -ignore .jpg -ignore .png -ignore .md -ignore 'BUILD(\.bazel)?' \ "test/e2e/testing-manifests/..." \ + "test/e2e_node/testing-manifests/..." \ "test/images/..." \ "test/fixtures/..." diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 4d839f0ebe5..233bb6da7d5 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -1878,7 +1878,6 @@ func DumpDebugInfo(c clientset.Interface, ns string) { // DsFromManifest reads a .json/yaml file and returns the daemonset in it. func DsFromManifest(url string) (*appsv1.DaemonSet, error) { - var ds appsv1.DaemonSet Logf("Parsing ds from %v", url) var response *http.Response @@ -1904,7 +1903,12 @@ func DsFromManifest(url string) (*appsv1.DaemonSet, error) { if err != nil { return nil, fmt.Errorf("Failed to read html response body: %v", err) } + return DsFromData(data) +} +// DsFromData reads a byte slice and returns the daemonset in it. +func DsFromData(data []byte) (*appsv1.DaemonSet, error) { + var ds appsv1.DaemonSet dataJSON, err := utilyaml.ToJSON(data) if err != nil { return nil, fmt.Errorf("Failed to parse data to json: %v", err) diff --git a/test/e2e/generated/BUILD b/test/e2e/generated/BUILD index c5d8f822a81..de04cfe7b5e 100644 --- a/test/e2e/generated/BUILD +++ b/test/e2e/generated/BUILD @@ -24,6 +24,7 @@ go_bindata( name = "bindata", srcs = [ "//test/e2e/testing-manifests:all-srcs", + "//test/e2e_node/testing-manifests:all-srcs", "//test/fixtures:all-srcs", "//test/images:all-srcs", ], diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index 29d740a2a97..a59aeeeff7f 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -17,6 +17,7 @@ go_library( "node_problem_detector_linux.go", "resource_collector.go", "util.go", + "util_sriov.go", "util_xfs_linux.go", "util_xfs_unsupported.go", ], @@ -49,6 +50,7 @@ go_library( "//test/e2e/framework/gpu:go_default_library", "//test/e2e/framework/metrics:go_default_library", "//test/e2e/framework/node:go_default_library", + "//test/e2e/framework/testfiles:go_default_library", "//test/utils/image:go_default_library", "//vendor/github.com/blang/semver:go_default_library", "//vendor/github.com/coreos/go-systemd/util:go_default_library", @@ -266,6 +268,7 @@ filegroup( "//test/e2e_node/runner/remote:all-srcs", "//test/e2e_node/services:all-srcs", "//test/e2e_node/system:all-srcs", + "//test/e2e_node/testing-manifests:all-srcs", ], tags = ["automanaged"], visibility = ["//visibility:public"], diff --git a/test/e2e_node/image_list.go b/test/e2e_node/image_list.go index 2aef9388a27..e896ae36522 100644 --- a/test/e2e_node/image_list.go +++ b/test/e2e_node/image_list.go @@ -31,6 +31,7 @@ import ( commontest "k8s.io/kubernetes/test/e2e/common" "k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework/gpu" + "k8s.io/kubernetes/test/e2e/framework/testfiles" imageutils "k8s.io/kubernetes/test/utils/image" ) @@ -68,6 +69,7 @@ func updateImageWhiteList() { framework.ImageWhiteList = NodeImageWhiteList.Union(commontest.CommonImageWhiteList) // Images from extra envs framework.ImageWhiteList.Insert(getNodeProblemDetectorImage()) + framework.ImageWhiteList.Insert(getSRIOVDevicePluginImage()) } func getNodeProblemDetectorImage() string { @@ -184,3 +186,26 @@ func getGPUDevicePluginImage() string { } return ds.Spec.Template.Spec.Containers[0].Image } + +// getSRIOVDevicePluginImage returns the image of SRIOV device plugin. +func getSRIOVDevicePluginImage() string { + data, err := testfiles.Read(SRIOVDevicePluginDSYAML) + if err != nil { + klog.Errorf("Failed to read the device plugin manifest: %v", err) + return "" + } + ds, err := framework.DsFromData(data) + if err != nil { + klog.Errorf("Failed to parse the device plugin image: %v", err) + return "" + } + if ds == nil { + klog.Errorf("Failed to parse the device plugin image: the extracted DaemonSet is nil") + return "" + } + if len(ds.Spec.Template.Spec.Containers) < 1 { + klog.Errorf("Failed to parse the device plugin image: cannot extract the container from YAML") + return "" + } + return ds.Spec.Template.Spec.Containers[0].Image +} diff --git a/test/e2e_node/testing-manifests/BUILD b/test/e2e_node/testing-manifests/BUILD new file mode 100644 index 00000000000..7e76248ad95 --- /dev/null +++ b/test/e2e_node/testing-manifests/BUILD @@ -0,0 +1,14 @@ +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], +) diff --git a/test/e2e_node/testing-manifests/sriovdp-cm.yaml b/test/e2e_node/testing-manifests/sriovdp-cm.yaml new file mode 100644 index 00000000000..373d759767c --- /dev/null +++ b/test/e2e_node/testing-manifests/sriovdp-cm.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: sriovdp-config + namespace: kube-system +data: + config.json: | + { + "resourceList": [{ + "resourceName": "intel_sriov_netdevice", + "selectors": { + "vendors": ["8086"], + "devices": ["154c", "10ed", "1521"], + "drivers": ["i40evf", "ixgbevf", "igb"] + } + }, + { + "resourceName": "intel_sriov_dpdk", + "selectors": { + "vendors": ["8086"], + "devices": ["154c", "10ed"], + "drivers": ["vfio-pci"], + "pfNames": ["enp0s0f0","enp2s2f1"] + } + }, + { + "resourceName": "mlnx_sriov_rdma", + "isRdma": true, + "selectors": { + "vendors": ["15b3"], + "devices": ["1018"], + "drivers": ["mlx5_ib"] + } + } + ] + } diff --git a/test/e2e_node/testing-manifests/sriovdp-ds.yaml b/test/e2e_node/testing-manifests/sriovdp-ds.yaml new file mode 100644 index 00000000000..30f76ff470b --- /dev/null +++ b/test/e2e_node/testing-manifests/sriovdp-ds.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-sriov-device-plugin-amd64 + namespace: kube-system + labels: + tier: node + app: sriovdp +spec: + selector: + matchLabels: + name: sriov-device-plugin + template: + metadata: + labels: + name: sriov-device-plugin + tier: node + app: sriovdp + spec: + hostNetwork: true + hostPID: true + nodeSelector: + beta.kubernetes.io/arch: amd64 + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + serviceAccountName: sriov-device-plugin + containers: + - name: kube-sriovdp + image: docker.io/nfvpe/sriov-device-plugin:v3.1 + imagePullPolicy: Never + args: + - --log-dir=sriovdp + - --log-level=10 + securityContext: + privileged: true + volumeMounts: + - name: devicesock + mountPath: /var/lib/kubelet/ + readOnly: false + - name: log + mountPath: /var/log + - name: config-volume + mountPath: /etc/pcidp + volumes: + - name: devicesock + hostPath: + path: /var/lib/kubelet/ + - name: log + hostPath: + path: /var/log + - name: config-volume + configMap: + name: sriovdp-config + items: + - key: config.json + path: config.json diff --git a/test/e2e_node/testing-manifests/sriovdp-sa.yaml b/test/e2e_node/testing-manifests/sriovdp-sa.yaml new file mode 100644 index 00000000000..73bf1199ee2 --- /dev/null +++ b/test/e2e_node/testing-manifests/sriovdp-sa.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sriov-device-plugin + namespace: kube-system diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index afb1a6da383..3db897f4a8b 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -17,10 +17,16 @@ limitations under the License. package e2enode import ( + "context" "fmt" + "os/exec" + "strconv" + "strings" "time" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" @@ -29,7 +35,9 @@ import ( "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" + "k8s.io/kubernetes/test/e2e/framework/testfiles" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -37,16 +45,52 @@ import ( // Helper for makeTopologyManagerPod(). type tmCtnAttribute struct { - ctnName string - cpuRequest string - cpuLimit string + ctnName string + cpuRequest string + cpuLimit string + devResource string +} + +func detectNUMANodes() int { + outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"NUMA node(s):\" | cut -d \":\" -f 2").Output() + framework.ExpectNoError(err) + + numaNodes, err := strconv.Atoi(strings.TrimSpace(string(outData))) + framework.ExpectNoError(err) + + return numaNodes +} + +// TODO: what about HT? +func detectCoresPerSocket() int { + outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"Core(s) per socket:\" | cut -d \":\" -f 2").Output() + framework.ExpectNoError(err) + + coreCount, err := strconv.Atoi(strings.TrimSpace(string(outData))) + framework.ExpectNoError(err) + + return coreCount +} + +func detectSRIOVDevices() int { + outData, err := exec.Command("/bin/sh", "-c", "ls /sys/bus/pci/devices/*/sriov_totalvfs | wc -w").Output() + framework.ExpectNoError(err) + + devCount, err := strconv.Atoi(strings.TrimSpace(string(outData))) + framework.ExpectNoError(err) + + return devCount } // makeTopologyMangerPod returns a pod with the provided tmCtnAttributes. func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v1.Pod { + cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d" + return makeTopologyManagerTestPod(podName, cpusetCmd, tmCtnAttributes) +} + +func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnAttribute) *v1.Pod { var containers []v1.Container for _, ctnAttr := range tmCtnAttributes { - cpusetCmd := fmt.Sprintf("grep Cpus_allowed_list /proc/self/status | cut -f2 && sleep 1d") ctn := v1.Container{ Name: ctnAttr.ctnName, Image: busyboxImage, @@ -60,7 +104,11 @@ func makeTopologyManagerPod(podName string, tmCtnAttributes []tmCtnAttribute) *v v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"), }, }, - Command: []string{"sh", "-c", cpusetCmd}, + Command: []string{"sh", "-c", podCmd}, + } + if ctnAttr.devResource != "" { + ctn.Resources.Requests[v1.ResourceName(ctnAttr.devResource)] = resource.MustParse("1") + ctn.Resources.Limits[v1.ResourceName(ctnAttr.devResource)] = resource.MustParse("1") } containers = append(containers, ctn) } @@ -121,7 +169,60 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco }, time.Minute, time.Second).Should(gomega.BeTrue()) } -func runTopologyManagerSuiteTests(f *framework.Framework) { +// getSRIOVDevicePluginPod returns the Device Plugin pod for sriov resources in e2e tests. +func getSRIOVDevicePluginPod() *v1.Pod { + ds := readDaemonSetV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginDSYAML)) + p := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: SRIOVDevicePluginName, + Namespace: metav1.NamespaceSystem, + }, + + Spec: ds.Spec.Template.Spec, + } + + return p +} + +func readConfigMapV1OrDie(objBytes []byte) *v1.ConfigMap { + v1.AddToScheme(appsScheme) + requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(v1.SchemeGroupVersion), objBytes) + if err != nil { + panic(err) + } + return requiredObj.(*v1.ConfigMap) +} + +func readServiceAccountV1OrDie(objBytes []byte) *v1.ServiceAccount { + v1.AddToScheme(appsScheme) + requiredObj, err := runtime.Decode(appsCodecs.UniversalDecoder(v1.SchemeGroupVersion), objBytes) + if err != nil { + panic(err) + } + return requiredObj.(*v1.ServiceAccount) +} + +// numberOfResources returns the number of resources advertised by a node. +func numberOfResources(node *v1.Node, resourceKey string) int64 { + val, ok := node.Status.Capacity[v1.ResourceName(resourceKey)] + + if !ok { + return 0 + } + + return val.Value() +} + +func deletePodInNamespace(f *framework.Framework, namespace, name string) { + gp := int64(0) + deleteOptions := metav1.DeleteOptions{ + GracePeriodSeconds: &gp, + } + err := f.ClientSet.CoreV1().Pods(namespace).Delete(context.TODO(), name, &deleteOptions) + framework.ExpectNoError(err) +} + +func runTopologyManagerPolicySuiteTests(f *framework.Framework) { var cpuCap, cpuAlloc int64 var cpuListString, expAllowedCPUsListRegex string var cpuList []int @@ -347,9 +448,72 @@ func runTopologyManagerSuiteTests(f *framework.Framework) { waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) } +func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework) { + var err error + + configMap := readConfigMapV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginCMYAML)) + ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name)) + if _, err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(context.TODO(), configMap, metav1.CreateOptions{}); err != nil { + framework.Failf("unable to create test configMap %s: %v", configMap.Name, err) + } + + serviceAccount := readServiceAccountV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginSAYAML)) + ginkgo.By(fmt.Sprintf("Creating serviceAccount %v/%v", metav1.NamespaceSystem, serviceAccount.Name)) + if _, err = f.ClientSet.CoreV1().ServiceAccounts(metav1.NamespaceSystem).Create(context.TODO(), serviceAccount, metav1.CreateOptions{}); err != nil { + framework.Failf("unable to create test serviceAccount %s: %v", serviceAccount.Name, err) + } + + e2enode.WaitForNodeToBeReady(f.ClientSet, framework.TestContext.NodeName, 5*time.Minute) + + dp := getSRIOVDevicePluginPod() + dp.Spec.NodeName = framework.TestContext.NodeName + + ginkgo.By("Create SRIOV device plugin pod") + dpPod, err := f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(context.TODO(), dp, metav1.CreateOptions{}) + framework.ExpectNoError(err) + + ginkgo.By("Waiting for devices to become available on the local node") + gomega.Eventually(func() bool { + node := getLocalNode(f) + framework.Logf("Node status: %v", node.Status.Capacity) + return numberOfResources(node, SRIOVResourceName) > 0 + }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) + framework.Logf("Successfully created device plugin pod") + + ginkgo.By("running a Gu pod") + ctnAttrs := []tmCtnAttribute{ + { + ctnName: "gu-container", + cpuRequest: "1000m", + cpuLimit: "1000m", + devResource: SRIOVResourceName, + }, + } + + pod := makeTopologyManagerTestPod("gu-pod", "env && sleep 1d", ctnAttrs) + pod = f.PodClient().CreateSync(pod) + + ginkgo.By("validating the Gu pod") + _, err = e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod.Spec.Containers[0].Name, pod.Name) + + ginkgo.By("by deleting the pods and waiting for container removal") + deletePods(f, []string{pod.Name}) + waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) + + framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", + dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) + deletePodInNamespace(f, dpPod.Namespace, dpPod.Name) + waitForContainerRemoval(dpPod.Spec.Containers[0].Name, dpPod.Name, dpPod.Namespace) +} + func runTopologyManagerTests(f *framework.Framework) { - ginkgo.It("run Topology Manager test suite", func() { - oldCfg, err := getCurrentKubeletConfig() + var oldCfg *kubeletconfig.KubeletConfiguration + var err error + + ginkgo.It("run Topology Manager policy test suite", func() { + oldCfg, err = getCurrentKubeletConfig() framework.ExpectNoError(err) var policies = []string{topologymanager.PolicySingleNumaNode, topologymanager.PolicyRestricted, @@ -362,13 +526,45 @@ func runTopologyManagerTests(f *framework.Framework) { configureTopologyManagerInKubelet(f, oldCfg, policy) // Run the tests - runTopologyManagerSuiteTests(f) + runTopologyManagerPolicySuiteTests(f) } // restore kubelet config setOldKubeletConfig(f, oldCfg) - // Debug sleep to allow time to look at kubelet config - time.Sleep(5 * time.Minute) + // Delete state file to allow repeated runs + deleteStateFile() + }) + + ginkgo.It("run Topology Manager node alignment test suite", func() { + numaNodes := detectNUMANodes() + coreCount := detectCoresPerSocket() + sriovdevCount := detectSRIOVDevices() + + if numaNodes < 2 { + e2eskipper.Skipf("this test is meant to run on a multi-node NUMA system") + } + if coreCount < 4 { + e2eskipper.Skipf("this test is meant to run on a system with at least 4 cores per socket") + } + if sriovdevCount == 0 { + e2eskipper.Skipf("this test is meant to run on a system with at least one SRIOV device") + } + + oldCfg, err = getCurrentKubeletConfig() + framework.ExpectNoError(err) + + policy := topologymanager.PolicySingleNumaNode + + // Configure Topology Manager + ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) + framework.Logf("Configuring topology Manager policy to %s", policy) + + configureTopologyManagerInKubelet(f, oldCfg, policy) + + runTopologyManagerNodeAlignmentSuiteTests(f) + + // restore kubelet config + setOldKubeletConfig(f, oldCfg) // Delete state file to allow repeated runs deleteStateFile() diff --git a/test/e2e_node/util_sriov.go b/test/e2e_node/util_sriov.go new file mode 100644 index 00000000000..f985b11d94a --- /dev/null +++ b/test/e2e_node/util_sriov.go @@ -0,0 +1,30 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2enode + +const ( + // SRIOVResourceName is the name of the example resource which is used in the e2e test + SRIOVResourceName = "intel.com/intel_sriov_netdevice" // TODO make it configurable + // SRIOVDevicePluginCMYAML is the path of the config map to configure the sriov device plugin. + SRIOVDevicePluginCMYAML = "test/e2e_node/testing-manifests/sriovdp-cm.yaml" + // SRIOVDevicePluginDSYAML is the path of the daemonset template of the sriov device plugin. // TODO: Parametrize it by making it a feature in TestFramework. + SRIOVDevicePluginDSYAML = "test/e2e_node/testing-manifests/sriovdp-ds.yaml" + // SRIOVDevicePluginSAYAML is the path of the service account needed by the sriov device plugin to run. + SRIOVDevicePluginSAYAML = "test/e2e_node/testing-manifests/sriovdp-sa.yaml" + // SRIOVDevicePluginName is the name of the device plugin pod + SRIOVDevicePluginName = "sriov-device-plugin" +) From fa26fb6817911f04012b9aeacb473be48b0e3e07 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 28 Jan 2020 17:43:35 +0100 Subject: [PATCH 03/13] e2e: topomgr: check pod resource alignment This patch extends and completes the previously-added empty topology manager test for single-NUMA node policy by adding reporting in the test pod and checking the resource alignment. Signed-off-by: Francesco Romani --- test/e2e_node/BUILD | 2 + test/e2e_node/numa_alignment.go | 199 +++++++++++++++++++++++++ test/e2e_node/topology_manager_test.go | 64 +++++--- 3 files changed, 242 insertions(+), 23 deletions(-) create mode 100644 test/e2e_node/numa_alignment.go diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index a59aeeeff7f..88e895289d3 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -15,6 +15,7 @@ go_library( "framework.go", "image_list.go", "node_problem_detector_linux.go", + "numa_alignment.go", "resource_collector.go", "util.go", "util_sriov.go", @@ -31,6 +32,7 @@ go_library( "//pkg/kubelet/apis/podresources/v1alpha1:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//pkg/kubelet/cm:go_default_library", + "//pkg/kubelet/cm/cpuset:go_default_library", "//pkg/kubelet/kubeletconfig/util/codec:go_default_library", "//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/remote:go_default_library", diff --git a/test/e2e_node/numa_alignment.go b/test/e2e_node/numa_alignment.go new file mode 100644 index 00000000000..d44a58036d3 --- /dev/null +++ b/test/e2e_node/numa_alignment.go @@ -0,0 +1,199 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2enode + +import ( + "fmt" + "sort" + "strconv" + "strings" + + v1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" + + "k8s.io/kubernetes/test/e2e/framework" +) + +type numaPodResources struct { + CPUToNUMANode map[int]int + PCIDevsToNUMANode map[string]int +} + +func (R *numaPodResources) CheckAlignment() bool { + nodeNum := -1 // not set + for _, cpuNode := range R.CPUToNUMANode { + if nodeNum == -1 { + nodeNum = cpuNode + } else if nodeNum != cpuNode { + return false + } + } + for _, devNode := range R.PCIDevsToNUMANode { + // TODO: explain -1 + if devNode != -1 && nodeNum != devNode { + return false + } + } + return true +} + +func (R *numaPodResources) String() string { + var b strings.Builder + // To store the keys in slice in sorted order + var cpuKeys []int + for ck := range R.CPUToNUMANode { + cpuKeys = append(cpuKeys, ck) + } + sort.Ints(cpuKeys) + for _, k := range cpuKeys { + nodeNum := R.CPUToNUMANode[k] + b.WriteString(fmt.Sprintf("CPU cpu#%03d=%02d\n", k, nodeNum)) + } + var pciKeys []string + for pk := range R.PCIDevsToNUMANode { + pciKeys = append(pciKeys, pk) + } + sort.Strings(pciKeys) + for _, k := range pciKeys { + nodeNum := R.PCIDevsToNUMANode[k] + b.WriteString(fmt.Sprintf("PCI %s=%02d\n", k, nodeNum)) + } + return b.String() +} + +func getCPUToNUMANodeMapFromEnv(f *framework.Framework, pod *v1.Pod, environ map[string]string, numaNodes int) (map[int]int, error) { + var cpuIDs []int + cpuListAllowedEnvVar := "CPULIST_ALLOWED" + + for name, value := range environ { + if name == cpuListAllowedEnvVar { + cpus, err := cpuset.Parse(value) + if err != nil { + return nil, err + } + cpuIDs = cpus.ToSlice() + } + } + if len(cpuIDs) == 0 { + return nil, fmt.Errorf("variable %q found in environ", cpuListAllowedEnvVar) + } + + cpusPerNUMA := make(map[int][]int) + for numaNode := 0; numaNode < numaNodes; numaNode++ { + nodeCPUList := f.ExecCommandInContainer(pod.Name, pod.Spec.Containers[0].Name, + "/bin/cat", fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", numaNode)) + + cpus, err := cpuset.Parse(nodeCPUList) + if err != nil { + return nil, err + } + cpusPerNUMA[numaNode] = cpus.ToSlice() + } + + // CPU IDs -> NUMA Node ID + CPUToNUMANode := make(map[int]int) + for nodeNum, cpus := range cpusPerNUMA { + for _, cpu := range cpus { + CPUToNUMANode[cpu] = nodeNum + } + } + + // filter out only the allowed CPUs + CPUMap := make(map[int]int) + for _, cpuID := range cpuIDs { + _, ok := CPUToNUMANode[cpuID] + if !ok { + return nil, fmt.Errorf("CPU %d not found on NUMA map: %v", cpuID, CPUToNUMANode) + } + CPUMap[cpuID] = CPUToNUMANode[cpuID] + } + return CPUMap, nil +} + +func getPCIDeviceToNumaNodeMapFromEnv(f *framework.Framework, pod *v1.Pod, environ map[string]string) (map[string]int, error) { + pciDevPrefix := "PCIDEVICE_" + // at this point we don't care which plugin selected the device, + // we only need to know which devices were assigned to the POD. + // Hence, do prefix search for the variable and fetch the device(s). + + NUMAPerDev := make(map[string]int) + for name, value := range environ { + if !strings.HasPrefix(name, pciDevPrefix) { + continue + } + + // a single plugin can allocate more than a single device + pciDevs := strings.Split(value, ",") + for _, pciDev := range pciDevs { + pciDevNUMANode := f.ExecCommandInContainer(pod.Name, pod.Spec.Containers[0].Name, + "/bin/cat", fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", pciDev)) + + nodeNum, err := strconv.Atoi(pciDevNUMANode) + if err != nil { + return nil, err + } + NUMAPerDev[pciDev] = nodeNum + } + } + if len(NUMAPerDev) == 0 { + return nil, fmt.Errorf("no PCI devices found in environ") + } + return NUMAPerDev, nil +} + +func makeEnvMap(logs string) (map[string]string, error) { + podEnv := strings.Split(logs, "\n") + envMap := make(map[string]string) + for _, envVar := range podEnv { + if len(envVar) == 0 { + continue + } + pair := strings.SplitN(envVar, "=", 2) + if len(pair) != 2 { + return nil, fmt.Errorf("unable to split %q", envVar) + } + envMap[pair[0]] = pair[1] + } + return envMap, nil +} + +func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, logs string, numaNodes int) (numaPodResources, error) { + podEnv, err := makeEnvMap(logs) + if err != nil { + return numaPodResources{}, err + } + + CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, podEnv, numaNodes) + if err != nil { + return numaPodResources{}, err + } + + PCIDevsToNUMANode, err := getPCIDeviceToNumaNodeMapFromEnv(f, pod, podEnv) + if err != nil { + return numaPodResources{}, err + } + + numaRes := numaPodResources{ + CPUToNUMANode: CPUToNUMANode, + PCIDevsToNUMANode: PCIDevsToNUMANode, + } + aligned := numaRes.CheckAlignment() + if !aligned { + return numaRes, fmt.Errorf("NUMA resources not aligned") + } + return numaRes, nil +} diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 3db897f4a8b..833002fe460 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -43,6 +43,10 @@ import ( "github.com/onsi/gomega" ) +const ( + numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d` +) + // Helper for makeTopologyManagerPod(). type tmCtnAttribute struct { ctnName string @@ -222,6 +226,18 @@ func deletePodInNamespace(f *framework.Framework, namespace, name string) { framework.ExpectNoError(err) } +func validatePodAlignment(f *framework.Framework, pod *v1.Pod, numaNodes int) { + ginkgo.By("validating the Gu pod") + logs, err := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) + framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", + pod.Spec.Containers[0].Name, pod.Name) + + framework.Logf("got pod logs: %v", logs) + numaRes, err := checkNUMAAlignment(f, pod, logs, numaNodes) + framework.ExpectNoError(err, "NUMA Alignment check failed for [%s] of pod [%s]: %s", + pod.Spec.Containers[0].Name, pod.Name, numaRes.String()) +} + func runTopologyManagerPolicySuiteTests(f *framework.Framework) { var cpuCap, cpuAlloc int64 var cpuListString, expAllowedCPUsListRegex string @@ -448,7 +464,29 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) } -func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework) { +func runTopologyManagerNodeAlignmentSinglePodTest(f *framework.Framework, numaNodes int) { + ginkgo.By("allocate aligned resources for a single pod") + ctnAttrs := []tmCtnAttribute{ + { + ctnName: "gu-container", + cpuRequest: "1000m", + cpuLimit: "1000m", + devResource: SRIOVResourceName, + }, + } + + pod := makeTopologyManagerTestPod("gu-pod", numalignCmd, ctnAttrs) + pod = f.PodClient().CreateSync(pod) + + validatePodAlignment(f, pod, numaNodes) + + framework.Logf("deleting the pod %s/%s and waiting for container %s removal", + pod.Namespace, pod.Name, pod.Spec.Containers[0].Name) + deletePods(f, []string{pod.Name}) + waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) +} + +func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes int) { var err error configMap := readConfigMapV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginCMYAML)) @@ -480,27 +518,7 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework) { }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) framework.Logf("Successfully created device plugin pod") - ginkgo.By("running a Gu pod") - ctnAttrs := []tmCtnAttribute{ - { - ctnName: "gu-container", - cpuRequest: "1000m", - cpuLimit: "1000m", - devResource: SRIOVResourceName, - }, - } - - pod := makeTopologyManagerTestPod("gu-pod", "env && sleep 1d", ctnAttrs) - pod = f.PodClient().CreateSync(pod) - - ginkgo.By("validating the Gu pod") - _, err = e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, pod.Spec.Containers[0].Name) - framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", - pod.Spec.Containers[0].Name, pod.Name) - - ginkgo.By("by deleting the pods and waiting for container removal") - deletePods(f, []string{pod.Name}) - waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) + runTopologyManagerNodeAlignmentSinglePodTest(f, numaNodes) framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) @@ -561,7 +579,7 @@ func runTopologyManagerTests(f *framework.Framework) { configureTopologyManagerInKubelet(f, oldCfg, policy) - runTopologyManagerNodeAlignmentSuiteTests(f) + runTopologyManagerNodeAlignmentSuiteTests(f, numaNodes) // restore kubelet config setOldKubeletConfig(f, oldCfg) From 6687fcc78c103cb7c6df4153fadfee191472d26b Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Thu, 30 Jan 2020 16:52:55 +0100 Subject: [PATCH 04/13] e2e: topomgr: autodetect SRIOV resource to use The SRIOV device plugin can create different resources depending on both the hardware present on the system and the configuration. As long as we have at least one SRIOV device, the tests don't actually care about which specific device is. Previously, the test hardcoded the most common intel SRIOV device identifier. This patch lifts the restriction and let the test autodetect and use what's available. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 33 ++++++++++++++++---------- test/e2e_node/util_sriov.go | 2 -- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 833002fe460..944156b61b6 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "os/exec" + "regexp" "strconv" "strings" "time" @@ -206,15 +207,18 @@ func readServiceAccountV1OrDie(objBytes []byte) *v1.ServiceAccount { return requiredObj.(*v1.ServiceAccount) } -// numberOfResources returns the number of resources advertised by a node. -func numberOfResources(node *v1.Node, resourceKey string) int64 { - val, ok := node.Status.Capacity[v1.ResourceName(resourceKey)] - - if !ok { - return 0 +func findSRIOVResource(node *v1.Node) (string, int64) { + re := regexp.MustCompile(`^intel.com/.*sriov.*`) + for key, val := range node.Status.Capacity { + resource := string(key) + if re.MatchString(resource) { + v := val.Value() + if v > 0 { + return resource, v + } + } } - - return val.Value() + return "", 0 } func deletePodInNamespace(f *framework.Framework, namespace, name string) { @@ -464,14 +468,14 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) } -func runTopologyManagerNodeAlignmentSinglePodTest(f *framework.Framework, numaNodes int) { +func runTopologyManagerNodeAlignmentSinglePodTest(f *framework.Framework, sriovResourceName string, numaNodes int) { ginkgo.By("allocate aligned resources for a single pod") ctnAttrs := []tmCtnAttribute{ { ctnName: "gu-container", cpuRequest: "1000m", cpuLimit: "1000m", - devResource: SRIOVResourceName, + devResource: sriovResourceName, }, } @@ -510,15 +514,18 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes dpPod, err := f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Create(context.TODO(), dp, metav1.CreateOptions{}) framework.ExpectNoError(err) + sriovResourceName := "" + var sriovResourceAmount int64 ginkgo.By("Waiting for devices to become available on the local node") gomega.Eventually(func() bool { node := getLocalNode(f) framework.Logf("Node status: %v", node.Status.Capacity) - return numberOfResources(node, SRIOVResourceName) > 0 + sriovResourceName, sriovResourceAmount = findSRIOVResource(node) + return sriovResourceAmount > 0 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) - framework.Logf("Successfully created device plugin pod") + framework.Logf("Successfully created device plugin pod, detected SRIOV device %q", sriovResourceName) - runTopologyManagerNodeAlignmentSinglePodTest(f, numaNodes) + runTopologyManagerNodeAlignmentSinglePodTest(f, sriovResourceName, numaNodes) framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) diff --git a/test/e2e_node/util_sriov.go b/test/e2e_node/util_sriov.go index f985b11d94a..4b404332157 100644 --- a/test/e2e_node/util_sriov.go +++ b/test/e2e_node/util_sriov.go @@ -17,8 +17,6 @@ limitations under the License. package e2enode const ( - // SRIOVResourceName is the name of the example resource which is used in the e2e test - SRIOVResourceName = "intel.com/intel_sriov_netdevice" // TODO make it configurable // SRIOVDevicePluginCMYAML is the path of the config map to configure the sriov device plugin. SRIOVDevicePluginCMYAML = "test/e2e_node/testing-manifests/sriovdp-cm.yaml" // SRIOVDevicePluginDSYAML is the path of the daemonset template of the sriov device plugin. // TODO: Parametrize it by making it a feature in TestFramework. From 1b5801a08696dcedb86ac05447a0e2a13db694b1 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 3 Feb 2020 17:03:07 +0100 Subject: [PATCH 05/13] e2e: topomgr: add option to specify the SRIOV conf We cannot anticipate all the possible configurations needed by the SRIOV device plugin: there is too much variety. Hence, we need to allow the test environment to supply a host-specific ConfigMap to properly configure the device plugin and avoid false negatives. We still provide a the default config map as fallback and reference. Signed-off-by: Francesco Romani --- test/e2e/framework/test_context.go | 3 +++ test/e2e_node/e2e_node_suite_test.go | 1 + test/e2e_node/testing-manifests/sriovdp-cm.yaml | 4 ++-- test/e2e_node/topology_manager_test.go | 15 ++++++++++++++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index c9b8a37db4f..a9391b72052 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -167,6 +167,9 @@ type TestContextType struct { // ProgressReportURL is the URL which progress updates will be posted to as tests complete. If empty, no updates are sent. ProgressReportURL string + + // SriovdpConfigMapFile is the path to the ConfigMap to configure the SRIOV device plugin on this host. + SriovdpConfigMapFile string } // NodeKillerConfig describes configuration of NodeKiller -- a utility to diff --git a/test/e2e_node/e2e_node_suite_test.go b/test/e2e_node/e2e_node_suite_test.go index 33b381b0f3b..0237f7d38b9 100644 --- a/test/e2e_node/e2e_node_suite_test.go +++ b/test/e2e_node/e2e_node_suite_test.go @@ -80,6 +80,7 @@ func registerNodeFlags(flags *flag.FlagSet) { flags.StringVar(&framework.TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.") flags.StringVar(&framework.TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.") flags.Var(cliflag.NewMapStringString(&framework.TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2") + flags.StringVar(&framework.TestContext.SriovdpConfigMapFile, "sriovdp-configmap-file", "", "The name of the SRIOV device plugin Config Map to load.") } func init() { diff --git a/test/e2e_node/testing-manifests/sriovdp-cm.yaml b/test/e2e_node/testing-manifests/sriovdp-cm.yaml index 373d759767c..f37ae0c8d2b 100644 --- a/test/e2e_node/testing-manifests/sriovdp-cm.yaml +++ b/test/e2e_node/testing-manifests/sriovdp-cm.yaml @@ -10,8 +10,8 @@ data: "resourceName": "intel_sriov_netdevice", "selectors": { "vendors": ["8086"], - "devices": ["154c", "10ed", "1521"], - "drivers": ["i40evf", "ixgbevf", "igb"] + "devices": ["154c", "10ed"], + "drivers": ["i40evf", "ixgbevf"] } }, { diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 944156b61b6..127cbe50ac5 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -19,6 +19,7 @@ package e2enode import ( "context" "fmt" + "io/ioutil" "os/exec" "regexp" "strconv" @@ -491,9 +492,21 @@ func runTopologyManagerNodeAlignmentSinglePodTest(f *framework.Framework, sriovR } func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes int) { + cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML) var err error - configMap := readConfigMapV1OrDie(testfiles.ReadOrDie(SRIOVDevicePluginCMYAML)) + // the SRIOVDP configuration is hw-dependent, so we allow per-test-host customization. + framework.Logf("host-local SRIOV Device Plugin Config Map %q", framework.TestContext.SriovdpConfigMapFile) + if framework.TestContext.SriovdpConfigMapFile != "" { + cmData, err = ioutil.ReadFile(framework.TestContext.SriovdpConfigMapFile) + if err != nil { + framework.Failf("unable to load the SRIOV Device Plugin ConfigMap: %v", err) + } + } else { + framework.Logf("Using built-in SRIOV Device Plugin Config Map") + } + + configMap := readConfigMapV1OrDie(cmData) ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name)) if _, err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(context.TODO(), configMap, metav1.CreateOptions{}); err != nil { framework.Failf("unable to create test configMap %s: %v", configMap.Name, err) From ee92b4aae0292923ed15fe4d63685e4aa530f94b Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 29 Jan 2020 16:17:28 +0100 Subject: [PATCH 06/13] e2e: topomgr: add more positive tests this patch builds on the topology manager e2e infrastructure to add more positive e2e test cases. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 99 ++++++++++++++++++-------- 1 file changed, 69 insertions(+), 30 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 127cbe50ac5..b19764f2e0e 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -51,10 +51,12 @@ const ( // Helper for makeTopologyManagerPod(). type tmCtnAttribute struct { - ctnName string - cpuRequest string - cpuLimit string - devResource string + ctnName string + cpuRequest string + cpuLimit string + deviceName string + deviceRequest string + deviceLimit string } func detectNUMANodes() int { @@ -67,7 +69,6 @@ func detectNUMANodes() int { return numaNodes } -// TODO: what about HT? func detectCoresPerSocket() int { outData, err := exec.Command("/bin/sh", "-c", "lscpu | grep \"Core(s) per socket:\" | cut -d \":\" -f 2").Output() framework.ExpectNoError(err) @@ -112,9 +113,9 @@ func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnA }, Command: []string{"sh", "-c", podCmd}, } - if ctnAttr.devResource != "" { - ctn.Resources.Requests[v1.ResourceName(ctnAttr.devResource)] = resource.MustParse("1") - ctn.Resources.Limits[v1.ResourceName(ctnAttr.devResource)] = resource.MustParse("1") + if ctnAttr.deviceName != "" { + ctn.Resources.Requests[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceRequest) + ctn.Resources.Limits[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceLimit) } containers = append(containers, ctn) } @@ -469,29 +470,45 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) } -func runTopologyManagerNodeAlignmentSinglePodTest(f *framework.Framework, sriovResourceName string, numaNodes int) { - ginkgo.By("allocate aligned resources for a single pod") - ctnAttrs := []tmCtnAttribute{ - { - ctnName: "gu-container", - cpuRequest: "1000m", - cpuLimit: "1000m", - devResource: sriovResourceName, - }, +func runTopologyManagerNodeAlignmentTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) { + ginkgo.By(fmt.Sprintf("allocate aligned resources for a %d pod(s): cpuAmount=%s %s=%s", numPods, cpuAmount, sriovResourceName, deviceAmount)) + + var pods []*v1.Pod + + for podID := 0; podID < numPods; podID++ { + ctnAttrs := []tmCtnAttribute{ + { + ctnName: "gu-container", + cpuRequest: cpuAmount, + cpuLimit: cpuAmount, + deviceName: sriovResourceName, + deviceRequest: deviceAmount, + deviceLimit: deviceAmount, + }, + } + + podName := fmt.Sprintf("gu-pod-%d", podID) + framework.Logf("creating pod %s attrs %v", podName, ctnAttrs) + pod := makeTopologyManagerTestPod(podName, numalignCmd, ctnAttrs) + pod = f.PodClient().CreateSync(pod) + framework.Logf("created pod %s", podName) + pods = append(pods, pod) } - pod := makeTopologyManagerTestPod("gu-pod", numalignCmd, ctnAttrs) - pod = f.PodClient().CreateSync(pod) + for podID := 0; podID < numPods; podID++ { + validatePodAlignment(f, pods[podID], numaNodes) + } - validatePodAlignment(f, pod, numaNodes) - - framework.Logf("deleting the pod %s/%s and waiting for container %s removal", - pod.Namespace, pod.Name, pod.Spec.Containers[0].Name) - deletePods(f, []string{pod.Name}) - waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) + for podID := 0; podID < numPods; podID++ { + pod := pods[podID] + framework.Logf("deleting the pod %s/%s and waiting for container %s removal", + pod.Namespace, pod.Name, pod.Spec.Containers[0].Name) + deletePods(f, []string{pod.Name}) + waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) + } } -func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes int) { +func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes, coreCount int) { cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML) var err error @@ -536,9 +553,29 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes sriovResourceName, sriovResourceAmount = findSRIOVResource(node) return sriovResourceAmount > 0 }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) - framework.Logf("Successfully created device plugin pod, detected SRIOV device %q", sriovResourceName) + framework.Logf("Successfully created device plugin pod, detected %d SRIOV device %q", sriovResourceAmount, sriovResourceName) - runTopologyManagerNodeAlignmentSinglePodTest(f, sriovResourceName, numaNodes) + // could have been a loop, we unroll it to explain the testcases + + // simplest case: one guaranteed core, one device + runTopologyManagerNodeAlignmentTest(f, numaNodes, 1, "1000m", sriovResourceName, "1") + + // two guaranteed cores, one device + runTopologyManagerNodeAlignmentTest(f, numaNodes, 1, "2000m", sriovResourceName, "1") + + // TODO: test taking an entire NUMA node. + // to do a meaningful test, we need to know: + // - where are the reserved CPUs placed (which NUMA node) + // - where are the SRIOV device attacched to (which NUMA node(s)) + + if sriovResourceAmount > 1 { + // no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function + // preconditions must ensure the following can be fulfilled + runTopologyManagerNodeAlignmentTest(f, numaNodes, 2, "1000m", sriovResourceName, "1") + runTopologyManagerNodeAlignmentTest(f, numaNodes, 2, "2000m", sriovResourceName, "1") + + // testing more complex conditions require knowledge about the system cpu+bus topology + } framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) @@ -574,9 +611,11 @@ func runTopologyManagerTests(f *framework.Framework) { }) ginkgo.It("run Topology Manager node alignment test suite", func() { + // this is a very rough check. We just want to rule out system that does NOT have + // any SRIOV device. A more proper check will be done in runTopologyManagerNodeAlignmentTest + sriovdevCount := detectSRIOVDevices() numaNodes := detectNUMANodes() coreCount := detectCoresPerSocket() - sriovdevCount := detectSRIOVDevices() if numaNodes < 2 { e2eskipper.Skipf("this test is meant to run on a multi-node NUMA system") @@ -599,7 +638,7 @@ func runTopologyManagerTests(f *framework.Framework) { configureTopologyManagerInKubelet(f, oldCfg, policy) - runTopologyManagerNodeAlignmentSuiteTests(f, numaNodes) + runTopologyManagerNodeAlignmentSuiteTests(f, numaNodes, coreCount) // restore kubelet config setOldKubeletConfig(f, oldCfg) From d9d652e867dce2877fb787d0b72ab28139efc571 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 3 Feb 2020 18:15:26 +0100 Subject: [PATCH 07/13] e2e: topomgr: initial negative tests Negative tests is when we request a gu Pod we know the system cannot fullfill - hence we expect rejection from the topology manager. Unfortunately, besides the trivial case of excessive cores (request more socket than a NUMA node provides) we cannot easily test the devices, because crafting a proper pod will require detailed knowledge of the hw topology. Let's consider a hypotetical two-node NUMA system with two PCIe busses, one per NUMA node, with a SRIOV device on each bus. A proper negative test would require two SRIOV device, that the system can provide but not on the same single NUMA node. Requiring for example three devices (one more than the system provides) will lead to a different, legitimate admission error. For these reasons we bootstrap the testing infra for the negative tests, but we add just the simplest one. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 61 +++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index b19764f2e0e..a59fb3ce372 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -46,7 +46,8 @@ import ( ) const ( - numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d` + numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d` + topologyError = "Topology Affinity Error" // XXX do we have a proper constant? ) // Helper for makeTopologyManagerPod(). @@ -470,7 +471,7 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) } -func runTopologyManagerNodeAlignmentTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) { +func runTopologyManagerPositiveTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) { ginkgo.By(fmt.Sprintf("allocate aligned resources for a %d pod(s): cpuAmount=%s %s=%s", numPods, cpuAmount, sriovResourceName, deviceAmount)) var pods []*v1.Pod @@ -508,6 +509,45 @@ func runTopologyManagerNodeAlignmentTest(f *framework.Framework, numaNodes, numP } } +func runTopologyManagerNegativeTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) { + ginkgo.By(fmt.Sprintf("allocate aligned resources for a %d pod(s): cpuAmount=%s %s=%s", numPods, cpuAmount, sriovResourceName, deviceAmount)) + + ctnAttrs := []tmCtnAttribute{ + { + ctnName: "gu-container", + cpuRequest: cpuAmount, + cpuLimit: cpuAmount, + deviceName: sriovResourceName, + deviceRequest: deviceAmount, + deviceLimit: deviceAmount, + }, + } + + podName := "gu-pod" + framework.Logf("creating pod %s attrs %v", podName, ctnAttrs) + pod := makeTopologyManagerTestPod(podName, numalignCmd, ctnAttrs) + + pod = f.PodClient().Create(pod) + err := e2epod.WaitForPodCondition(f.ClientSet, f.Namespace.Name, pod.Name, "Failed", 30*time.Second, func(pod *v1.Pod) (bool, error) { + if pod.Status.Phase != v1.PodPending { + return true, nil + } + return false, nil + }) + framework.ExpectNoError(err) + pod, err = f.PodClient().Get(context.TODO(), pod.Name, metav1.GetOptions{}) + framework.ExpectNoError(err) + + if pod.Status.Phase != v1.PodFailed { + framework.Failf("pod %s not failed: %v", pod.Name, pod.Status) + } + if pod.Status.Reason != topologyError { + framework.Failf("pod %s failed for wrong reason: %v", pod.Name, pod.Status) + } + + deletePods(f, []string{pod.Name}) +} + func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes, coreCount int) { cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML) var err error @@ -555,13 +595,17 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) framework.Logf("Successfully created device plugin pod, detected %d SRIOV device %q", sriovResourceAmount, sriovResourceName) + threadsPerCore := 1 + if isHTEnabled() { + threadsPerCore = 2 + } // could have been a loop, we unroll it to explain the testcases // simplest case: one guaranteed core, one device - runTopologyManagerNodeAlignmentTest(f, numaNodes, 1, "1000m", sriovResourceName, "1") + runTopologyManagerPositiveTest(f, numaNodes, 1, "1000m", sriovResourceName, "1") // two guaranteed cores, one device - runTopologyManagerNodeAlignmentTest(f, numaNodes, 1, "2000m", sriovResourceName, "1") + runTopologyManagerPositiveTest(f, numaNodes, 1, "2000m", sriovResourceName, "1") // TODO: test taking an entire NUMA node. // to do a meaningful test, we need to know: @@ -571,12 +615,15 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes if sriovResourceAmount > 1 { // no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function // preconditions must ensure the following can be fulfilled - runTopologyManagerNodeAlignmentTest(f, numaNodes, 2, "1000m", sriovResourceName, "1") - runTopologyManagerNodeAlignmentTest(f, numaNodes, 2, "2000m", sriovResourceName, "1") + runTopologyManagerPositiveTest(f, numaNodes, 2, "1000m", sriovResourceName, "1") + runTopologyManagerPositiveTest(f, numaNodes, 2, "2000m", sriovResourceName, "1") // testing more complex conditions require knowledge about the system cpu+bus topology } + // overflow NUMA node capacity: cores + runTopologyManagerNegativeTest(f, numaNodes, 1, fmt.Sprintf("%dm", (1+threadsPerCore)*coreCount*1000), sriovResourceName, "1") + framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) deletePodInNamespace(f, dpPod.Namespace, dpPod.Name) @@ -612,7 +659,7 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.It("run Topology Manager node alignment test suite", func() { // this is a very rough check. We just want to rule out system that does NOT have - // any SRIOV device. A more proper check will be done in runTopologyManagerNodeAlignmentTest + // any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest sriovdevCount := detectSRIOVDevices() numaNodes := detectNUMANodes() coreCount := detectCoresPerSocket() From 3b4122bd0386a5e13d351d9ef191bdd947261d56 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Tue, 4 Feb 2020 17:20:22 +0100 Subject: [PATCH 08/13] e2e: topomgr: get and use topology hints from conf TO properly implement some e2e tests, we need to know some basic topology facts about the system running the tests. The bare minimum we need to know is how many PCI SRIOV devices are attached to which NUMA node. This way we know which core we can reserve for kube services, and which NUMA socket we can take to test full socket reservation. To let the tests know the PCI device topology, we use annotations in the SRIOV device plugin ConfigMap we need anyway. The format is ```yaml metadata: annotations: pcidevice_node0: "2" pcidevice_node1: "0" ``` with one annotation per NUMA node in the system. Signed-off-by: Francesco Romani --- test/e2e_node/numa_alignment.go | 13 +++++ test/e2e_node/topology_manager_test.go | 80 +++++++++++++++++++------- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/test/e2e_node/numa_alignment.go b/test/e2e_node/numa_alignment.go index d44a58036d3..33edd964e8c 100644 --- a/test/e2e_node/numa_alignment.go +++ b/test/e2e_node/numa_alignment.go @@ -18,6 +18,7 @@ package e2enode import ( "fmt" + "io/ioutil" "sort" "strconv" "strings" @@ -75,6 +76,18 @@ func (R *numaPodResources) String() string { return b.String() } +func getCPUsPerNUMANode(nodeNum int) ([]int, error) { + nodeCPUList, err := ioutil.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeNum)) + if err != nil { + return nil, err + } + cpus, err := cpuset.Parse(strings.TrimSpace(string(nodeCPUList))) + if err != nil { + return nil, err + } + return cpus.ToSlice(), nil +} + func getCPUToNUMANodeMapFromEnv(f *framework.Framework, pod *v1.Pod, environ map[string]string, numaNodes int) (map[int]int, error) { var cpuIDs []int cpuListAllowedEnvVar := "CPULIST_ALLOWED" diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index a59fb3ce372..bd6a8b314cb 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -132,7 +132,27 @@ func makeTopologyManagerTestPod(podName, podCmd string, tmCtnAttributes []tmCtnA } } -func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string) { +func findNUMANodeWithoutSRIOVDevices(configMap *v1.ConfigMap, numaNodes int) (int, bool) { + for nodeNum := 0; nodeNum < numaNodes; nodeNum++ { + value, ok := configMap.Annotations[fmt.Sprintf("pcidevice_node%d", nodeNum)] + if !ok { + framework.Logf("missing pcidevice annotation for NUMA node %d", nodeNum) + return -1, false + } + v, err := strconv.Atoi(value) + if err != nil { + framework.Failf("error getting the PCI device count on NUMA node %d: %v", nodeNum, err) + } + if v == 0 { + framework.Logf("NUMA node %d has no SRIOV devices attached", nodeNum) + return nodeNum, true + } + framework.Logf("NUMA node %d has %d SRIOV devices attached", nodeNum, v) + } + return -1, false +} + +func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletconfig.KubeletConfiguration, policy string, configMap *v1.ConfigMap, numaNodes int) string { // Configure Topology Manager in Kubelet with policy. newCfg := oldCfg.DeepCopy() if newCfg.FeatureGates == nil { @@ -153,18 +173,25 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco // Set the CPU Manager reconcile period to 1 second. newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second} - // The Kubelet panics if either kube-reserved or system-reserved is not set - // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that - // kubelet doesn't panic. - if newCfg.KubeReserved == nil { - newCfg.KubeReserved = map[string]string{} - } + if nodeNum, ok := findNUMANodeWithoutSRIOVDevices(configMap, numaNodes); ok { + cpus, err := getCPUsPerNUMANode(nodeNum) + framework.Logf("NUMA Node %d doesn't seem to have attached SRIOV devices and has cpus=%v", nodeNum, cpus) + framework.ExpectNoError(err) + newCfg.ReservedSystemCPUs = fmt.Sprintf("%d", cpus[len(cpus)-1]) + } else { + // The Kubelet panics if either kube-reserved or system-reserved is not set + // when CPU Manager is enabled. Set cpu in kube-reserved > 0 so that + // kubelet doesn't panic. + if newCfg.KubeReserved == nil { + newCfg.KubeReserved = map[string]string{} + } - if _, ok := newCfg.KubeReserved["cpu"]; !ok { - newCfg.KubeReserved["cpu"] = "200m" + if _, ok := newCfg.KubeReserved["cpu"]; !ok { + newCfg.KubeReserved["cpu"] = "200m" + } } // Dump the config -- debug - framework.Logf("New kublet config is %s", *newCfg) + framework.Logf("New kubelet config is %s", *newCfg) // Update the Kubelet configuration. framework.ExpectNoError(setKubeletConfiguration(f, newCfg)) @@ -175,6 +202,8 @@ func configureTopologyManagerInKubelet(f *framework.Framework, oldCfg *kubeletco framework.ExpectNoError(err) return nodes == 1 }, time.Minute, time.Second).Should(gomega.BeTrue()) + + return newCfg.ReservedSystemCPUs } // getSRIOVDevicePluginPod returns the Device Plugin pod for sriov resources in e2e tests. @@ -548,14 +577,14 @@ func runTopologyManagerNegativeTest(f *framework.Framework, numaNodes, numPods i deletePods(f, []string{pod.Name}) } -func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes, coreCount int) { +func getSRIOVDevicePluginConfigMap(cmFile string) *v1.ConfigMap { cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML) var err error // the SRIOVDP configuration is hw-dependent, so we allow per-test-host customization. - framework.Logf("host-local SRIOV Device Plugin Config Map %q", framework.TestContext.SriovdpConfigMapFile) - if framework.TestContext.SriovdpConfigMapFile != "" { - cmData, err = ioutil.ReadFile(framework.TestContext.SriovdpConfigMapFile) + framework.Logf("host-local SRIOV Device Plugin Config Map %q", cmFile) + if cmFile != "" { + cmData, err = ioutil.ReadFile(cmFile) if err != nil { framework.Failf("unable to load the SRIOV Device Plugin ConfigMap: %v", err) } @@ -563,7 +592,12 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes framework.Logf("Using built-in SRIOV Device Plugin Config Map") } - configMap := readConfigMapV1OrDie(cmData) + return readConfigMapV1OrDie(cmData) +} + +func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs string, numaNodes, coreCount int) { + var err error + ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name)) if _, err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Create(context.TODO(), configMap, metav1.CreateOptions{}); err != nil { framework.Failf("unable to create test configMap %s: %v", configMap.Name, err) @@ -607,10 +641,10 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, numaNodes // two guaranteed cores, one device runTopologyManagerPositiveTest(f, numaNodes, 1, "2000m", sriovResourceName, "1") - // TODO: test taking an entire NUMA node. - // to do a meaningful test, we need to know: - // - where are the reserved CPUs placed (which NUMA node) - // - where are the SRIOV device attacched to (which NUMA node(s)) + // take an entire socket; but for that, we need to have reserved CPUs on another one. + if reservedSystemCPUs != "" { + runTopologyManagerPositiveTest(f, numaNodes, 1, fmt.Sprintf("%dm", threadsPerCore*coreCount*1000), sriovResourceName, "1") + } if sriovResourceAmount > 1 { // no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function @@ -646,7 +680,7 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - configureTopologyManagerInKubelet(f, oldCfg, policy) + configureTopologyManagerInKubelet(f, oldCfg, policy, nil, 0) // Run the tests runTopologyManagerPolicySuiteTests(f) } @@ -674,6 +708,8 @@ func runTopologyManagerTests(f *framework.Framework) { e2eskipper.Skipf("this test is meant to run on a system with at least one SRIOV device") } + configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile) + oldCfg, err = getCurrentKubeletConfig() framework.ExpectNoError(err) @@ -683,9 +719,9 @@ func runTopologyManagerTests(f *framework.Framework) { ginkgo.By(fmt.Sprintf("by configuring Topology Manager policy to %s", policy)) framework.Logf("Configuring topology Manager policy to %s", policy) - configureTopologyManagerInKubelet(f, oldCfg, policy) + reservedSystemCPUs := configureTopologyManagerInKubelet(f, oldCfg, policy, configMap, numaNodes) - runTopologyManagerNodeAlignmentSuiteTests(f, numaNodes, coreCount) + runTopologyManagerNodeAlignmentSuiteTests(f, configMap, reservedSystemCPUs, numaNodes, coreCount) // restore kubelet config setOldKubeletConfig(f, oldCfg) From 512a4e8a3e2e0762537fd2be8a2df48492fb1b2e Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 10 Feb 2020 16:35:14 +0100 Subject: [PATCH 09/13] e2e: topomgr: reduce node readiness timeout Five minutes was initially used only to be overcautious. From my experiments, the node is ready in usually less than a minute. Double it to give some buffer space. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index bd6a8b314cb..6d5882ce86e 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -626,7 +626,7 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap framework.Logf("Node status: %v", node.Status.Capacity) sriovResourceName, sriovResourceAmount = findSRIOVResource(node) return sriovResourceAmount > 0 - }, 5*time.Minute, framework.Poll).Should(gomega.BeTrue()) + }, 2*time.Minute, framework.Poll).Should(gomega.BeTrue()) framework.Logf("Successfully created device plugin pod, detected %d SRIOV device %q", sriovResourceAmount, sriovResourceName) threadsPerCore := 1 From 83c344647f563fb3b1d29d7182d9ccc6d73ec287 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 10 Feb 2020 18:12:08 +0100 Subject: [PATCH 10/13] e2e: topomgr: better check for AffinityError Add a helper function to check if a Pod failed admission for Topology Affinity Error. So far we only check the Status.Reason. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 6d5882ce86e..38c10dae602 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -46,8 +46,7 @@ import ( ) const ( - numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d` - topologyError = "Topology Affinity Error" // XXX do we have a proper constant? + numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d` ) // Helper for makeTopologyManagerPod(). @@ -570,13 +569,18 @@ func runTopologyManagerNegativeTest(f *framework.Framework, numaNodes, numPods i if pod.Status.Phase != v1.PodFailed { framework.Failf("pod %s not failed: %v", pod.Name, pod.Status) } - if pod.Status.Reason != topologyError { - framework.Failf("pod %s failed for wrong reason: %v", pod.Name, pod.Status) + if !isTopologyAffinityError(pod) { + framework.Failf("pod %s failed for wrong reason: %q", pod.Name, pod.Status.Reason) } deletePods(f, []string{pod.Name}) } +func isTopologyAffinityError(pod *v1.Pod) bool { + re := regexp.MustCompile(`Topology.*Affinity.*Error`) + return re.MatchString(pod.Status.Reason) +} + func getSRIOVDevicePluginConfigMap(cmFile string) *v1.ConfigMap { cmData := testfiles.ReadOrDie(SRIOVDevicePluginCMYAML) var err error From fee1dba054218873da853df7e8683e4d2dfb266d Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 10 Feb 2020 16:34:23 +0100 Subject: [PATCH 11/13] e2r: topomgr: improve the test logs Add clarification to which test is doing what, to make the test output easier to understand. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 38c10dae602..0e952c694bf 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -500,8 +500,6 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) { } func runTopologyManagerPositiveTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) { - ginkgo.By(fmt.Sprintf("allocate aligned resources for a %d pod(s): cpuAmount=%s %s=%s", numPods, cpuAmount, sriovResourceName, deviceAmount)) - var pods []*v1.Pod for podID := 0; podID < numPods; podID++ { @@ -538,8 +536,6 @@ func runTopologyManagerPositiveTest(f *framework.Framework, numaNodes, numPods i } func runTopologyManagerNegativeTest(f *framework.Framework, numaNodes, numPods int, cpuAmount, sriovResourceName, deviceAmount string) { - ginkgo.By(fmt.Sprintf("allocate aligned resources for a %d pod(s): cpuAmount=%s %s=%s", numPods, cpuAmount, sriovResourceName, deviceAmount)) - ctnAttrs := []tmCtnAttribute{ { ctnName: "gu-container", @@ -639,28 +635,37 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap } // could have been a loop, we unroll it to explain the testcases - // simplest case: one guaranteed core, one device + // simplest case + ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sriovResourceName)) runTopologyManagerPositiveTest(f, numaNodes, 1, "1000m", sriovResourceName, "1") - // two guaranteed cores, one device + ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 2 cores, 1 %s device", sriovResourceName)) runTopologyManagerPositiveTest(f, numaNodes, 1, "2000m", sriovResourceName, "1") - // take an entire socket; but for that, we need to have reserved CPUs on another one. if reservedSystemCPUs != "" { - runTopologyManagerPositiveTest(f, numaNodes, 1, fmt.Sprintf("%dm", threadsPerCore*coreCount*1000), sriovResourceName, "1") + // to avoid false negatives, we have put reserved CPUs in such a way there is at least a NUMA node + // with 1+ SRIOV devices and not reserved CPUs. + numCores := threadsPerCore * coreCount + ginkgo.By(fmt.Sprintf("Successfully admit an entire socket (%d cores), 1 %s device", numCores, sriovResourceName)) + runTopologyManagerPositiveTest(f, numaNodes, 1, fmt.Sprintf("%dm", numCores*1000), sriovResourceName, "1") } if sriovResourceAmount > 1 { // no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function // preconditions must ensure the following can be fulfilled + ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 1 core, 1 %s device", sriovResourceName)) runTopologyManagerPositiveTest(f, numaNodes, 2, "1000m", sriovResourceName, "1") + + ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 2 cores, 1 %s device", sriovResourceName)) runTopologyManagerPositiveTest(f, numaNodes, 2, "2000m", sriovResourceName, "1") // testing more complex conditions require knowledge about the system cpu+bus topology } // overflow NUMA node capacity: cores - runTopologyManagerNegativeTest(f, numaNodes, 1, fmt.Sprintf("%dm", (1+threadsPerCore)*coreCount*1000), sriovResourceName, "1") + numCores := 1 + (threadsPerCore * coreCount) + ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pods, with %d cores, 1 %s device - and it should be rejected", numCores, sriovResourceName)) + runTopologyManagerNegativeTest(f, numaNodes, 1, fmt.Sprintf("%dm", numCores*1000), sriovResourceName, "1") framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) From 2f0a6d2c76471bbe0557c6296f8fc04cc110b36c Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 10 Feb 2020 18:51:59 +0100 Subject: [PATCH 12/13] e2e: topomgr: use constants for test limits Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index 0e952c694bf..f7a0a6711eb 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -47,6 +47,9 @@ import ( const ( numalignCmd = `export CPULIST_ALLOWED=$( awk -F":\t*" '/Cpus_allowed_list/ { print $2 }' /proc/self/status); env; sleep 1d` + + minNumaNodes = 2 + minCoreCount = 4 ) // Helper for makeTopologyManagerPod(). @@ -707,10 +710,10 @@ func runTopologyManagerTests(f *framework.Framework) { numaNodes := detectNUMANodes() coreCount := detectCoresPerSocket() - if numaNodes < 2 { + if numaNodes < minNumaNodes { e2eskipper.Skipf("this test is meant to run on a multi-node NUMA system") } - if coreCount < 4 { + if coreCount < minCoreCount { e2eskipper.Skipf("this test is meant to run on a system with at least 4 cores per socket") } if sriovdevCount == 0 { From 70cce5e3f13961bbc78eed9f3869b433cfd846d2 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 10 Feb 2020 19:22:08 +0100 Subject: [PATCH 13/13] e2e: topomgr: introduce sriov setup/teardown funcs Reorganize the code with setup and teardown functions, to make room for the future addition of more device plugin support, and to make the code a bit tidier. Signed-off-by: Francesco Romani --- test/e2e_node/topology_manager_test.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/test/e2e_node/topology_manager_test.go b/test/e2e_node/topology_manager_test.go index f7a0a6711eb..51cf16b4bde 100644 --- a/test/e2e_node/topology_manager_test.go +++ b/test/e2e_node/topology_manager_test.go @@ -598,7 +598,7 @@ func getSRIOVDevicePluginConfigMap(cmFile string) *v1.ConfigMap { return readConfigMapV1OrDie(cmData) } -func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs string, numaNodes, coreCount int) { +func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) (*v1.Pod, string, int64) { var err error ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name)) @@ -632,10 +632,24 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap }, 2*time.Minute, framework.Poll).Should(gomega.BeTrue()) framework.Logf("Successfully created device plugin pod, detected %d SRIOV device %q", sriovResourceAmount, sriovResourceName) + return dpPod, sriovResourceName, sriovResourceAmount +} + +func teardownSRIOVConfigOrFail(f *framework.Framework, dpPod *v1.Pod) { + framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", + dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) + deletePodInNamespace(f, dpPod.Namespace, dpPod.Name) + waitForContainerRemoval(dpPod.Spec.Containers[0].Name, dpPod.Name, dpPod.Namespace) +} + +func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs string, numaNodes, coreCount int) { threadsPerCore := 1 if isHTEnabled() { threadsPerCore = 2 } + + dpPod, sriovResourceName, sriovResourceAmount := setupSRIOVConfigOrFail(f, configMap) + // could have been a loop, we unroll it to explain the testcases // simplest case @@ -670,10 +684,7 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pods, with %d cores, 1 %s device - and it should be rejected", numCores, sriovResourceName)) runTopologyManagerNegativeTest(f, numaNodes, 1, fmt.Sprintf("%dm", numCores*1000), sriovResourceName, "1") - framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal", - dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name) - deletePodInNamespace(f, dpPod.Namespace, dpPod.Name) - waitForContainerRemoval(dpPod.Spec.Containers[0].Name, dpPod.Name, dpPod.Namespace) + teardownSRIOVConfigOrFail(f, dpPod) } func runTopologyManagerTests(f *framework.Framework) {