Merge pull request #116724 from tallclair/gc-test-flake
Deflake GC e2e test
This commit is contained in:
		| @@ -38,6 +38,7 @@ import ( | ||||
| 	"k8s.io/apimachinery/pkg/util/wait" | ||||
| 	"k8s.io/apiserver/pkg/storage/names" | ||||
| 	clientset "k8s.io/client-go/kubernetes" | ||||
| 	clientv1 "k8s.io/client-go/kubernetes/typed/core/v1" | ||||
| 	"k8s.io/kubernetes/test/e2e/framework" | ||||
| 	e2edeployment "k8s.io/kubernetes/test/e2e/framework/deployment" | ||||
| 	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" | ||||
| @@ -98,7 +99,9 @@ func getOrphanOptions() metav1.DeleteOptions { | ||||
| var ( | ||||
| 	zero       = int64(0) | ||||
| 	lablecount = int64(0) | ||||
| ) | ||||
|  | ||||
| const ( | ||||
| 	// The GC controller periodically rediscovers available APIs and syncs running informers for those resources. | ||||
| 	// If previously available APIs are removed during that resync process, the sync process can fail and need to be retried. | ||||
| 	// | ||||
| @@ -108,6 +111,12 @@ var ( | ||||
| 	// This timeout covers two resync/retry periods, and should be added to wait timeouts to account for delays | ||||
| 	// to the GC controller caused by API changes in other tests. | ||||
| 	gcInformerResyncRetryTimeout = time.Minute | ||||
|  | ||||
| 	// Many operations in these tests are per-replica and may require 100 mutating requests. The | ||||
| 	// default client QPS of a controller is 5. If the qps is saturated, it will take 20s complete | ||||
| 	// 100 requests. The e2e tests are running in parallel, so a controller might be stuck | ||||
| 	// processing other tests. | ||||
| 	replicaSyncTimeout = 2 * time.Minute | ||||
| ) | ||||
|  | ||||
| func getPodTemplateSpec(labels map[string]string) v1.PodTemplateSpec { | ||||
| @@ -380,19 +389,8 @@ var _ = SIGDescribe("Garbage collector", func() { | ||||
| 			framework.Failf("Failed to create replication controller: %v", err) | ||||
| 		} | ||||
| 		// wait for rc to create pods | ||||
| 		if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { | ||||
| 			rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) | ||||
| 			if err != nil { | ||||
| 				return false, fmt.Errorf("failed to get rc: %w", err) | ||||
| 			} | ||||
| 			if rc.Status.Replicas == *rc.Spec.Replicas { | ||||
| 				return true, nil | ||||
| 			} | ||||
| 			return false, nil | ||||
| 		waitForReplicas(ctx, rc, rcClient) | ||||
|  | ||||
| 		}); err != nil { | ||||
| 			framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) | ||||
| 		} | ||||
| 		ginkgo.By("delete the rc") | ||||
| 		deleteOptions := getOrphanOptions() | ||||
| 		deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID)) | ||||
| @@ -449,18 +447,8 @@ var _ = SIGDescribe("Garbage collector", func() { | ||||
| 			framework.Failf("Failed to create replication controller: %v", err) | ||||
| 		} | ||||
| 		// wait for rc to create some pods | ||||
| 		if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { | ||||
| 			rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) | ||||
| 			if err != nil { | ||||
| 				return false, fmt.Errorf("failed to get rc: %w", err) | ||||
| 			} | ||||
| 			if rc.Status.Replicas == *rc.Spec.Replicas { | ||||
| 				return true, nil | ||||
| 			} | ||||
| 			return false, nil | ||||
| 		}); err != nil { | ||||
| 			framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) | ||||
| 		} | ||||
| 		waitForReplicas(ctx, rc, rcClient) | ||||
|  | ||||
| 		ginkgo.By("delete the rc") | ||||
| 		deleteOptions := metav1.DeleteOptions{ | ||||
| 			Preconditions: metav1.NewUIDPreconditions(string(rc.UID)), | ||||
| @@ -660,18 +648,8 @@ var _ = SIGDescribe("Garbage collector", func() { | ||||
| 			framework.Failf("Failed to create replication controller: %v", err) | ||||
| 		} | ||||
| 		// wait for rc to create pods | ||||
| 		if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { | ||||
| 			rc, err := rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) | ||||
| 			if err != nil { | ||||
| 				return false, fmt.Errorf("failed to get rc: %w", err) | ||||
| 			} | ||||
| 			if rc.Status.Replicas == *rc.Spec.Replicas { | ||||
| 				return true, nil | ||||
| 			} | ||||
| 			return false, nil | ||||
| 		}); err != nil { | ||||
| 			framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) | ||||
| 		} | ||||
| 		waitForReplicas(ctx, rc, rcClient) | ||||
|  | ||||
| 		ginkgo.By("delete the rc") | ||||
| 		deleteOptions := getForegroundOptions() | ||||
| 		deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(rc.UID)) | ||||
| @@ -755,18 +733,8 @@ var _ = SIGDescribe("Garbage collector", func() { | ||||
| 			framework.Failf("Failed to create replication controller: %v", err) | ||||
| 		} | ||||
| 		// wait for rc1 to be stable | ||||
| 		if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { | ||||
| 			rc1, err := rcClient.Get(ctx, rc1.Name, metav1.GetOptions{}) | ||||
| 			if err != nil { | ||||
| 				return false, fmt.Errorf("failed to get rc: %w", err) | ||||
| 			} | ||||
| 			if rc1.Status.Replicas == *rc1.Spec.Replicas { | ||||
| 				return true, nil | ||||
| 			} | ||||
| 			return false, nil | ||||
| 		}); err != nil { | ||||
| 			framework.Failf("failed to wait for the rc.Status.Replicas to reach rc.Spec.Replicas: %v", err) | ||||
| 		} | ||||
| 		waitForReplicas(ctx, rc1, rcClient) | ||||
|  | ||||
| 		ginkgo.By(fmt.Sprintf("set half of pods created by rc %s to have rc %s as owner as well", rc1Name, rc2Name)) | ||||
| 		pods, err := podClient.List(ctx, metav1.ListOptions{}) | ||||
| 		framework.ExpectNoError(err, "failed to list pods in namespace: %s", f.Namespace.Name) | ||||
| @@ -1134,7 +1102,7 @@ var _ = SIGDescribe("Garbage collector", func() { | ||||
|  | ||||
| 		// Wait 30s and ensure the dependent is not deleted. | ||||
| 		ginkgo.By("wait for 30 seconds to see if the garbage collector mistakenly deletes the dependent crd") | ||||
| 		if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second, func(ctx context.Context) (bool, error) { | ||||
| 		if err := wait.PollWithContext(ctx, 5*time.Second, 30*time.Second+gcInformerResyncRetryTimeout, func(ctx context.Context) (bool, error) { | ||||
| 			_, err := resourceClient.Get(ctx, dependentName, metav1.GetOptions{}) | ||||
| 			return false, err | ||||
| 		}); err != nil && err != wait.ErrWaitTimeout { | ||||
| @@ -1177,3 +1145,28 @@ var _ = SIGDescribe("Garbage collector", func() { | ||||
| 		gatherMetrics(ctx, f) | ||||
| 	}) | ||||
| }) | ||||
|  | ||||
| // TODO(106575): Migrate away from generic polling function. | ||||
| func waitForReplicas(ctx context.Context, rc *v1.ReplicationController, rcClient clientv1.ReplicationControllerInterface) { | ||||
| 	var ( | ||||
| 		lastObservedRC *v1.ReplicationController | ||||
| 		err            error | ||||
| 	) | ||||
| 	if err := wait.PollWithContext(ctx, framework.Poll, replicaSyncTimeout, func(ctx context.Context) (bool, error) { | ||||
| 		lastObservedRC, err = rcClient.Get(ctx, rc.Name, metav1.GetOptions{}) | ||||
| 		if err != nil { | ||||
| 			return false, err | ||||
| 		} | ||||
| 		if lastObservedRC.Status.Replicas == *rc.Spec.Replicas { | ||||
| 			return true, nil | ||||
| 		} | ||||
| 		return false, nil | ||||
| 	}); err != nil { | ||||
| 		if lastObservedRC == nil { | ||||
| 			framework.Failf("Failed to get ReplicationController %q: %v", rc.Name, err) | ||||
| 		} else { | ||||
| 			framework.Failf("failed to wait for the rc.Status.Replicas (%d) to reach rc.Spec.Replicas (%d): %v", | ||||
| 				lastObservedRC.Status.Replicas, *lastObservedRC.Spec.Replicas, err) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot