Updated Regional PD failover test to use node taints instead of instance group deletion
This commit is contained in:
parent
4cbeddd7b2
commit
8ee882a98a
@ -58,6 +58,7 @@ go_library(
|
|||||||
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/rand:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
|
||||||
|
"//staging/src/k8s.io/apimachinery/pkg/util/strategicpatch:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/version:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/version:go_default_library",
|
||||||
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
|
||||||
|
@ -24,20 +24,24 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
appsv1 "k8s.io/api/apps/v1"
|
appsv1 "k8s.io/api/apps/v1"
|
||||||
"k8s.io/api/core/v1"
|
"k8s.io/api/core/v1"
|
||||||
storage "k8s.io/api/storage/v1"
|
storage "k8s.io/api/storage/v1"
|
||||||
"k8s.io/apimachinery/pkg/api/resource"
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/labels"
|
"k8s.io/apimachinery/pkg/labels"
|
||||||
|
"k8s.io/apimachinery/pkg/types"
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
|
"k8s.io/apimachinery/pkg/util/strategicpatch"
|
||||||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||||
"k8s.io/kubernetes/pkg/kubelet/apis"
|
"k8s.io/kubernetes/pkg/kubelet/apis"
|
||||||
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
|
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
|
||||||
"k8s.io/kubernetes/pkg/volume/util"
|
"k8s.io/kubernetes/pkg/volume/util"
|
||||||
"k8s.io/kubernetes/test/e2e/framework"
|
"k8s.io/kubernetes/test/e2e/framework"
|
||||||
"k8s.io/kubernetes/test/e2e/framework/providers/gce"
|
|
||||||
"k8s.io/kubernetes/test/e2e/storage/testsuites"
|
"k8s.io/kubernetes/test/e2e/storage/testsuites"
|
||||||
"k8s.io/kubernetes/test/e2e/storage/utils"
|
"k8s.io/kubernetes/test/e2e/storage/utils"
|
||||||
imageutils "k8s.io/kubernetes/test/utils/image"
|
imageutils "k8s.io/kubernetes/test/utils/image"
|
||||||
@ -46,6 +50,7 @@ import (
|
|||||||
const (
|
const (
|
||||||
pvDeletionTimeout = 3 * time.Minute
|
pvDeletionTimeout = 3 * time.Minute
|
||||||
statefulSetReadyTimeout = 3 * time.Minute
|
statefulSetReadyTimeout = 3 * time.Minute
|
||||||
|
taintKeyPrefix = "zoneTaint_"
|
||||||
)
|
)
|
||||||
|
|
||||||
var _ = utils.SIGDescribe("Regional PD", func() {
|
var _ = utils.SIGDescribe("Regional PD", func() {
|
||||||
@ -144,9 +149,6 @@ func testVolumeProvisioning(c clientset.Interface, ns string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func testZonalFailover(c clientset.Interface, ns string) {
|
func testZonalFailover(c clientset.Interface, ns string) {
|
||||||
nodes := framework.GetReadySchedulableNodesOrDie(c)
|
|
||||||
nodeCount := len(nodes.Items)
|
|
||||||
|
|
||||||
cloudZones := getTwoRandomZones(c)
|
cloudZones := getTwoRandomZones(c)
|
||||||
class := newRegionalStorageClass(ns, cloudZones)
|
class := newRegionalStorageClass(ns, cloudZones)
|
||||||
claimTemplate := newClaimTemplate(ns)
|
claimTemplate := newClaimTemplate(ns)
|
||||||
@ -203,41 +205,40 @@ func testZonalFailover(c clientset.Interface, ns string) {
|
|||||||
Expect(err).ToNot(HaveOccurred())
|
Expect(err).ToNot(HaveOccurred())
|
||||||
podZone := node.Labels[apis.LabelZoneFailureDomain]
|
podZone := node.Labels[apis.LabelZoneFailureDomain]
|
||||||
|
|
||||||
// TODO (verult) Consider using node taints to simulate zonal failure instead.
|
By("tainting nodes in the zone the pod is scheduled in")
|
||||||
By("deleting instance group belonging to pod's zone")
|
selector := labels.SelectorFromSet(labels.Set(map[string]string{apis.LabelZoneFailureDomain: podZone}))
|
||||||
|
nodesInZone, err := c.CoreV1().Nodes().List(metav1.ListOptions{LabelSelector: selector.String()})
|
||||||
// Asynchronously detect a pod reschedule is triggered during/after instance group deletion.
|
Expect(err).ToNot(HaveOccurred())
|
||||||
waitStatus := make(chan error)
|
removeTaintFunc := addTaint(c, ns, nodesInZone.Items, podZone)
|
||||||
go func() {
|
|
||||||
waitStatus <- waitForStatefulSetReplicasNotReady(statefulSet.Name, ns, c)
|
|
||||||
}()
|
|
||||||
|
|
||||||
cloud, err := gce.GetGCECloud()
|
|
||||||
if err != nil {
|
|
||||||
Expect(err).NotTo(HaveOccurred())
|
|
||||||
}
|
|
||||||
instanceGroupName := framework.TestContext.CloudConfig.NodeInstanceGroup
|
|
||||||
instanceGroup, err := cloud.GetInstanceGroup(instanceGroupName, podZone)
|
|
||||||
Expect(err).NotTo(HaveOccurred(),
|
|
||||||
"Error getting instance group %s in zone %s", instanceGroupName, podZone)
|
|
||||||
templateName, err := framework.GetManagedInstanceGroupTemplateName(podZone)
|
|
||||||
Expect(err).NotTo(HaveOccurred(),
|
|
||||||
"Error getting instance group template in zone %s", podZone)
|
|
||||||
err = framework.DeleteManagedInstanceGroup(podZone)
|
|
||||||
Expect(err).NotTo(HaveOccurred(),
|
|
||||||
"Error deleting instance group in zone %s", podZone)
|
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
framework.Logf("recreating instance group %s", instanceGroup.Name)
|
framework.Logf("removing previously added node taints")
|
||||||
|
removeTaintFunc()
|
||||||
framework.ExpectNoError(framework.CreateManagedInstanceGroup(instanceGroup.Size, podZone, templateName),
|
|
||||||
"Error recreating instance group %s in zone %s", instanceGroup.Name, podZone)
|
|
||||||
framework.ExpectNoError(framework.WaitForReadyNodes(c, nodeCount, framework.RestartNodeReadyAgainTimeout),
|
|
||||||
"Error waiting for nodes from the new instance group to become ready.")
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
err = <-waitStatus
|
By("deleting StatefulSet pod")
|
||||||
Expect(err).ToNot(HaveOccurred(), "Error waiting for replica to be deleted during failover: %v", err)
|
err = c.CoreV1().Pods(ns).Delete(pod.Name, &metav1.DeleteOptions{})
|
||||||
|
|
||||||
|
// Verify the pod is scheduled in the other zone.
|
||||||
|
By("verifying the pod is scheduled in a different zone.")
|
||||||
|
var otherZone string
|
||||||
|
if cloudZones[0] == podZone {
|
||||||
|
otherZone = cloudZones[1]
|
||||||
|
} else {
|
||||||
|
otherZone = cloudZones[0]
|
||||||
|
}
|
||||||
|
err = wait.PollImmediate(framework.Poll, statefulSetReadyTimeout, func() (bool, error) {
|
||||||
|
framework.Logf("checking whether new pod is scheduled in zone %q", otherZone)
|
||||||
|
pod = getPod(c, ns, regionalPDLabels)
|
||||||
|
nodeName = pod.Spec.NodeName
|
||||||
|
node, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
newPodZone := node.Labels[apis.LabelZoneFailureDomain]
|
||||||
|
return newPodZone == otherZone, nil
|
||||||
|
})
|
||||||
|
Expect(err).NotTo(HaveOccurred(), "Error waiting for pod to be scheduled in a different zone (%q): %v", otherZone, err)
|
||||||
|
|
||||||
err = framework.WaitForStatefulSetReplicasReady(statefulSet.Name, ns, c, 3*time.Second, framework.RestartPodReadyAgainTimeout)
|
err = framework.WaitForStatefulSetReplicasReady(statefulSet.Name, ns, c, 3*time.Second, framework.RestartPodReadyAgainTimeout)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -252,7 +253,6 @@ func testZonalFailover(c clientset.Interface, ns string) {
|
|||||||
"The same PVC should be used after failover.")
|
"The same PVC should be used after failover.")
|
||||||
|
|
||||||
By("verifying the container output has 2 lines, indicating the pod has been created twice using the same regional PD.")
|
By("verifying the container output has 2 lines, indicating the pod has been created twice using the same regional PD.")
|
||||||
pod = getPod(c, ns, regionalPDLabels)
|
|
||||||
logs, err := framework.GetPodLogs(c, ns, pod.Name, "")
|
logs, err := framework.GetPodLogs(c, ns, pod.Name, "")
|
||||||
Expect(err).NotTo(HaveOccurred(),
|
Expect(err).NotTo(HaveOccurred(),
|
||||||
"Error getting logs from pod %s in namespace %s", pod.Name, ns)
|
"Error getting logs from pod %s in namespace %s", pod.Name, ns)
|
||||||
@ -261,21 +261,40 @@ func testZonalFailover(c clientset.Interface, ns string) {
|
|||||||
Expect(lineCount).To(Equal(expectedLineCount),
|
Expect(lineCount).To(Equal(expectedLineCount),
|
||||||
"Line count of the written file should be %d.", expectedLineCount)
|
"Line count of the written file should be %d.", expectedLineCount)
|
||||||
|
|
||||||
// Verify the pod is scheduled in the other zone.
|
}
|
||||||
By("verifying the pod is scheduled in a different zone.")
|
|
||||||
var otherZone string
|
|
||||||
if cloudZones[0] == podZone {
|
|
||||||
otherZone = cloudZones[1]
|
|
||||||
} else {
|
|
||||||
otherZone = cloudZones[0]
|
|
||||||
}
|
|
||||||
nodeName = pod.Spec.NodeName
|
|
||||||
node, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
|
|
||||||
Expect(err).ToNot(HaveOccurred())
|
|
||||||
newPodZone := node.Labels[apis.LabelZoneFailureDomain]
|
|
||||||
Expect(newPodZone).To(Equal(otherZone),
|
|
||||||
"The pod should be scheduled in zone %s after all nodes in zone %s have been deleted", otherZone, podZone)
|
|
||||||
|
|
||||||
|
func addTaint(c clientset.Interface, ns string, nodes []v1.Node, podZone string) (removeTaint func()) {
|
||||||
|
reversePatches := make(map[string][]byte)
|
||||||
|
for _, node := range nodes {
|
||||||
|
oldData, err := json.Marshal(node)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
node.Spec.Taints = append(node.Spec.Taints, v1.Taint{
|
||||||
|
Key: taintKeyPrefix + ns,
|
||||||
|
Value: podZone,
|
||||||
|
Effect: v1.TaintEffectNoSchedule,
|
||||||
|
})
|
||||||
|
|
||||||
|
newData, err := json.Marshal(node)
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldData, newData, v1.Node{})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
|
reversePatchBytes, err := strategicpatch.CreateTwoWayMergePatch(newData, oldData, v1.Node{})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
reversePatches[node.Name] = reversePatchBytes
|
||||||
|
|
||||||
|
_, err = c.CoreV1().Nodes().Patch(node.Name, types.StrategicMergePatchType, patchBytes)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
}
|
||||||
|
|
||||||
|
return func() {
|
||||||
|
for nodeName, reversePatch := range reversePatches {
|
||||||
|
_, err := c.CoreV1().Nodes().Patch(nodeName, types.StrategicMergePatchType, reversePatch)
|
||||||
|
Expect(err).ToNot(HaveOccurred())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func testRegionalDelayedBinding(c clientset.Interface, ns string) {
|
func testRegionalDelayedBinding(c clientset.Interface, ns string) {
|
||||||
|
Loading…
Reference in New Issue
Block a user