e2e: TM: add option to fail instead of skip

The Topology Manager e2e tests wants to run on real multi-NUMA system
and want to consume real devices supported by device plugins; SRIOV
devices happen to be the most commonly available of such devices.

CI machines aren't multi NUMA nor expose SRIOV devices, so the biggest portion
of the tests will just skip, and we need to keep it like this until we
figure out how to enable these features.

However, some organizations can and want to run the testsuite on bare metal;
in this case, the current test will skip (not fail) with misconfigured
boxes, and this reports a misleading result. It will be much better to
fail if the test preconditions aren't met.

To satisfy both needs, we add an option, controlled by an environment
variable, to fail (not skip) if the machine on which the test run
doesn't meet the expectations (multi-NUMA, 4+ cores per NUMA cell,
expose SRIOV VFs).
We keep the old behaviour as default to keep being CI friendly.

Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
Francesco Romani
2021-03-03 19:14:20 +01:00
parent dd2d12f6dc
commit 54c7d8fbb1
7 changed files with 119 additions and 54 deletions

View File

@@ -90,20 +90,6 @@ func detectCoresPerSocket() int {
return coreCount
}
func countSRIOVDevices() (int, error) {
outData, err := exec.Command("/bin/sh", "-c", "ls /sys/bus/pci/devices/*/physfn | wc -w").Output()
if err != nil {
return -1, err
}
return strconv.Atoi(strings.TrimSpace(string(outData)))
}
func detectSRIOVDevices() int {
devCount, err := countSRIOVDevices()
framework.ExpectNoError(err)
return devCount
}
func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Container) {
for _, ctnAttr := range ctnAttributes {
ctn := v1.Container{
@@ -898,21 +884,7 @@ func runTopologyManagerTests(f *framework.Framework) {
})
ginkgo.It("run Topology Manager node alignment test suite", func() {
// this is a very rough check. We just want to rule out system that does NOT have
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
sriovdevCount := detectSRIOVDevices()
numaNodes := detectNUMANodes()
coreCount := detectCoresPerSocket()
if numaNodes < minNumaNodes {
e2eskipper.Skipf("this test is meant to run on a multi-node NUMA system")
}
if coreCount < minCoreCount {
e2eskipper.Skipf("this test is meant to run on a system with at least 4 cores per socket")
}
if sriovdevCount == 0 {
e2eskipper.Skipf("this test is meant to run on a system with at least one configured VF from SRIOV device")
}
numaNodes, coreCount := hostPrecheck()
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
@@ -935,19 +907,7 @@ func runTopologyManagerTests(f *framework.Framework) {
})
ginkgo.It("run the Topology Manager pod scope alignment test suite", func() {
sriovdevCount := detectSRIOVDevices()
numaNodes := detectNUMANodes()
coreCount := detectCoresPerSocket()
if numaNodes < minNumaNodes {
e2eskipper.Skipf("this test is intended to be run on a multi-node NUMA system")
}
if coreCount < minCoreCount {
e2eskipper.Skipf("this test is intended to be run on a system with at least %d cores per socket", minCoreCount)
}
if sriovdevCount == 0 {
e2eskipper.Skipf("this test is intended to be run on a system with at least one SR-IOV VF enabled")
}
numaNodes, coreCount := hostPrecheck()
configMap := getSRIOVDevicePluginConfigMap(framework.TestContext.SriovdpConfigMapFile)
@@ -968,6 +928,25 @@ func runTopologyManagerTests(f *framework.Framework) {
})
}
func hostPrecheck() (int, int) {
// this is a very rough check. We just want to rule out system that does NOT have
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
numaNodes := detectNUMANodes()
if numaNodes < minNumaNodes {
e2eskipper.Skipf("this test is intended to be run on a multi-node NUMA system")
}
coreCount := detectCoresPerSocket()
if coreCount < minCoreCount {
e2eskipper.Skipf("this test is intended to be run on a system with at least %d cores per socket", minCoreCount)
}
requireSRIOVDevices()
return numaNodes, coreCount
}
// Serial because the test updates kubelet configuration.
var _ = SIGDescribe("Topology Manager [Serial] [Feature:TopologyManager][NodeFeature:TopologyManager]", func() {
f := framework.NewDefaultFramework("topology-manager-test")