node: e2e: add test for the checkpoint recovery
Add a e2e test to exercise the checkpoint recovery flow. This means we need to actually create a old (V1, pre-1.20) checkpoint, but if we do it only in the e2e test, it's still fine. Signed-off-by: Francesco Romani <fromani@redhat.com>
This commit is contained in:
@@ -505,6 +505,15 @@ type sriovData struct {
|
||||
}
|
||||
|
||||
func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) *sriovData {
|
||||
sd := createSRIOVConfigOrFail(f, configMap)
|
||||
|
||||
e2enode.WaitForNodeToBeReady(f.ClientSet, framework.TestContext.NodeName, 5*time.Minute)
|
||||
|
||||
sd.pod = createSRIOVPodOrFail(f)
|
||||
return sd
|
||||
}
|
||||
|
||||
func createSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) *sriovData {
|
||||
var err error
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name))
|
||||
@@ -522,8 +531,13 @@ func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) *sr
|
||||
framework.Failf("unable to create test serviceAccount %s: %v", serviceAccount.Name, err)
|
||||
}
|
||||
|
||||
e2enode.WaitForNodeToBeReady(f.ClientSet, framework.TestContext.NodeName, 5*time.Minute)
|
||||
return &sriovData{
|
||||
configMap: configMap,
|
||||
serviceAccount: serviceAccount,
|
||||
}
|
||||
}
|
||||
|
||||
func createSRIOVPodOrFail(f *framework.Framework) *v1.Pod {
|
||||
dp := getSRIOVDevicePluginPod()
|
||||
dp.Spec.NodeName = framework.TestContext.NodeName
|
||||
|
||||
@@ -536,11 +550,7 @@ func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) *sr
|
||||
}
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
return &sriovData{
|
||||
configMap: configMap,
|
||||
serviceAccount: serviceAccount,
|
||||
pod: dpPod,
|
||||
}
|
||||
return dpPod
|
||||
}
|
||||
|
||||
// waitForSRIOVResources waits until enough SRIOV resources are avaailable, expecting to complete within the timeout.
|
||||
@@ -560,7 +570,7 @@ func waitForSRIOVResources(f *framework.Framework, sd *sriovData) {
|
||||
framework.Logf("Detected SRIOV allocatable devices name=%q amount=%d", sd.resourceName, sd.resourceAmount)
|
||||
}
|
||||
|
||||
func teardownSRIOVConfigOrFail(f *framework.Framework, sd *sriovData) {
|
||||
func deleteSRIOVPodOrFail(f *framework.Framework, sd *sriovData) {
|
||||
var err error
|
||||
gp := int64(0)
|
||||
deleteOptions := metav1.DeleteOptions{
|
||||
@@ -571,6 +581,14 @@ func teardownSRIOVConfigOrFail(f *framework.Framework, sd *sriovData) {
|
||||
err = f.ClientSet.CoreV1().Pods(sd.pod.Namespace).Delete(context.TODO(), sd.pod.Name, deleteOptions)
|
||||
framework.ExpectNoError(err)
|
||||
waitForAllContainerRemoval(sd.pod.Name, sd.pod.Namespace)
|
||||
}
|
||||
|
||||
func removeSRIOVConfigOrFail(f *framework.Framework, sd *sriovData) {
|
||||
var err error
|
||||
gp := int64(0)
|
||||
deleteOptions := metav1.DeleteOptions{
|
||||
GracePeriodSeconds: &gp,
|
||||
}
|
||||
|
||||
ginkgo.By(fmt.Sprintf("Deleting configMap %v/%v", metav1.NamespaceSystem, sd.configMap.Name))
|
||||
err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(context.TODO(), sd.configMap.Name, deleteOptions)
|
||||
@@ -581,6 +599,11 @@ func teardownSRIOVConfigOrFail(f *framework.Framework, sd *sriovData) {
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
func teardownSRIOVConfigOrFail(f *framework.Framework, sd *sriovData) {
|
||||
deleteSRIOVPodOrFail(f, sd)
|
||||
removeSRIOVConfigOrFail(f, sd)
|
||||
}
|
||||
|
||||
func runTMScopeResourceAlignmentTestSuite(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs, policy string, numaNodes, coreCount int) {
|
||||
threadsPerCore := getSMTLevel()
|
||||
sd := setupSRIOVConfigOrFail(f, configMap)
|
||||
|
Reference in New Issue
Block a user