e2e: DaemonSet maxSurge test should account for terminated pods
that are terminated by the test
This commit is contained in:
		@@ -585,10 +585,12 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
 | 
				
			|||||||
		nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
 | 
							nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
 | 
				
			||||||
		framework.ExpectNoError(err)
 | 
							framework.ExpectNoError(err)
 | 
				
			||||||
		nodeCount := len(nodes.Items)
 | 
							nodeCount := len(nodes.Items)
 | 
				
			||||||
		retryTimeout := dsRetryTimeout + time.Duration(nodeCount*30)*time.Second
 | 
							// We disturb daemonset progress by randomly terminating pods.
 | 
				
			||||||
 | 
							randomPodTerminationTimeout := 5 * time.Minute
 | 
				
			||||||
 | 
							retryTimeout := dsRetryTimeout + randomPodTerminationTimeout + time.Duration(nodeCount*30)*time.Second
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		ginkgo.By("Check that daemon pods surge and invariants are preserved during that rollout")
 | 
							ginkgo.By("Check that daemon pods surge and invariants are preserved during that rollout")
 | 
				
			||||||
		ageOfOldPod := make(map[string]time.Time)
 | 
							nodeToAgeOfOldPod := make(map[string]map[string]time.Time)
 | 
				
			||||||
		deliberatelyDeletedPods := sets.NewString()
 | 
							deliberatelyDeletedPods := sets.NewString()
 | 
				
			||||||
		err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, retryTimeout, true, func(ctx context.Context) (bool, error) {
 | 
							err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, retryTimeout, true, func(ctx context.Context) (bool, error) {
 | 
				
			||||||
			podList, err := c.CoreV1().Pods(ds.Namespace).List(ctx, metav1.ListOptions{})
 | 
								podList, err := c.CoreV1().Pods(ds.Namespace).List(ctx, metav1.ListOptions{})
 | 
				
			||||||
@@ -682,17 +684,25 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
 | 
				
			|||||||
				// if this is a pod in an older version AND there is a new version of this pod, record when
 | 
									// if this is a pod in an older version AND there is a new version of this pod, record when
 | 
				
			||||||
				// we started seeing this, otherwise delete the record (perhaps the node was drained)
 | 
									// we started seeing this, otherwise delete the record (perhaps the node was drained)
 | 
				
			||||||
				if nodesToVersions[pod.Spec.NodeName][newVersion] > 0 {
 | 
									if nodesToVersions[pod.Spec.NodeName][newVersion] > 0 {
 | 
				
			||||||
					if _, ok := ageOfOldPod[string(pod.UID)]; !ok {
 | 
										if _, ok := nodeToAgeOfOldPod[pod.Spec.NodeName][string(pod.UID)]; !ok {
 | 
				
			||||||
						ageOfOldPod[string(pod.UID)] = now
 | 
											if _, ok := nodeToAgeOfOldPod[pod.Spec.NodeName]; !ok {
 | 
				
			||||||
 | 
												nodeToAgeOfOldPod[pod.Spec.NodeName] = make(map[string]time.Time)
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
											nodeToAgeOfOldPod[pod.Spec.NodeName][string(pod.UID)] = now
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
				} else {
 | 
									} else {
 | 
				
			||||||
					delete(ageOfOldPod, string(pod.UID))
 | 
										delete(nodeToAgeOfOldPod, pod.Spec.NodeName)
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			// purge the old pods list of any deleted pods
 | 
								// purge the old pods list of any deleted pods
 | 
				
			||||||
			for uid := range ageOfOldPod {
 | 
								for node, uidToTime := range nodeToAgeOfOldPod {
 | 
				
			||||||
 | 
									for uid := range uidToTime {
 | 
				
			||||||
					if !podUIDs.Has(uid) {
 | 
										if !podUIDs.Has(uid) {
 | 
				
			||||||
					delete(ageOfOldPod, uid)
 | 
											delete(uidToTime, uid)
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
									if len(uidToTime) == 0 {
 | 
				
			||||||
 | 
										delete(nodeToAgeOfOldPod, node)
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			deliberatelyDeletedPods = deliberatelyDeletedPods.Intersection(deletedPodUIDs)
 | 
								deliberatelyDeletedPods = deliberatelyDeletedPods.Intersection(deletedPodUIDs)
 | 
				
			||||||
@@ -713,9 +723,11 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
 | 
				
			|||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			// invariant: the controller must react to the new pod becoming ready within a reasonable timeframe (2x grace period)
 | 
								// invariant: the controller must react to the new pod becoming ready within a reasonable timeframe (2x grace period)
 | 
				
			||||||
			for uid, firstSeen := range ageOfOldPod {
 | 
								for node, uidToTime := range nodeToAgeOfOldPod {
 | 
				
			||||||
				if now.Sub(firstSeen) > maxSurgeOverlap {
 | 
									for uid, firstSeenSinceNewVersionPod := range uidToTime {
 | 
				
			||||||
					errs = append(errs, fmt.Sprintf("An old pod with UID %s has been running alongside a newer version for longer than %s", uid, maxSurgeOverlap))
 | 
										if now.Sub(firstSeenSinceNewVersionPod) > maxSurgeOverlap {
 | 
				
			||||||
 | 
											errs = append(errs, fmt.Sprintf("An old pod with UID %s on a node %s has been running alongside a newer version for longer than %s", uid, node, maxSurgeOverlap))
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -800,6 +812,9 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
 | 
				
			|||||||
							} else {
 | 
												} else {
 | 
				
			||||||
								framework.Logf("Deleted pod %s prematurely", pod.Name)
 | 
													framework.Logf("Deleted pod %s prematurely", pod.Name)
 | 
				
			||||||
								deliberatelyDeletedPods.Insert(string(pod.UID))
 | 
													deliberatelyDeletedPods.Insert(string(pod.UID))
 | 
				
			||||||
 | 
													// If it is an old version we do not need to measure the controller reaction because we have done it instead.
 | 
				
			||||||
 | 
													// If it is a new version, we have to reset the time to start counting the time for the replacement pod to reach readiness again.
 | 
				
			||||||
 | 
													delete(nodeToAgeOfOldPod, pod.Spec.NodeName)
 | 
				
			||||||
							}
 | 
												}
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user