Merge pull request #24434 from gmarek/services
Automatic merge from submit-queue Create multiple RCs in NC - prerequisite for adding services WIP because I need to make the logs readable again.
This commit is contained in:
		| @@ -84,6 +84,26 @@ func density30AddonResourceVerifier() map[string]framework.ResourceConstraint { | |||||||
| 	return constraints | 	return constraints | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func logPodStartupStatus(c *client.Client, expectedPods int, ns string, observedLabels map[string]string, period time.Duration, stopCh chan struct{}) { | ||||||
|  | 	label := labels.SelectorFromSet(labels.Set(observedLabels)) | ||||||
|  | 	podStore := framework.NewPodStore(c, ns, label, fields.Everything()) | ||||||
|  | 	defer podStore.Stop() | ||||||
|  | 	ticker := time.NewTicker(period) | ||||||
|  | 	for { | ||||||
|  | 		select { | ||||||
|  | 		case <-ticker.C: | ||||||
|  | 			pods := podStore.List() | ||||||
|  | 			startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods) | ||||||
|  | 			startupStatus.Print("Density") | ||||||
|  | 		case <-stopCh: | ||||||
|  | 			pods := podStore.List() | ||||||
|  | 			startupStatus := framework.ComputeRCStartupStatus(pods, expectedPods) | ||||||
|  | 			startupStatus.Print("Density") | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| // This test suite can take a long time to run, and can affect or be affected by other tests. | // This test suite can take a long time to run, and can affect or be affected by other tests. | ||||||
| // So by default it is added to the ginkgo.skip list (see driver.go). | // So by default it is added to the ginkgo.skip list (see driver.go). | ||||||
| // To run this suite you must explicitly ask for it by setting the | // To run this suite you must explicitly ask for it by setting the | ||||||
| @@ -185,7 +205,7 @@ var _ = framework.KubeDescribe("Density", func() { | |||||||
| 		{podsPerNode: 30, runLatencyTest: true, interval: 10 * time.Second}, | 		{podsPerNode: 30, runLatencyTest: true, interval: 10 * time.Second}, | ||||||
| 		{podsPerNode: 50, runLatencyTest: false, interval: 10 * time.Second}, | 		{podsPerNode: 50, runLatencyTest: false, interval: 10 * time.Second}, | ||||||
| 		{podsPerNode: 95, runLatencyTest: true, interval: 10 * time.Second}, | 		{podsPerNode: 95, runLatencyTest: true, interval: 10 * time.Second}, | ||||||
| 		{podsPerNode: 100, runLatencyTest: false, interval: 1 * time.Second}, | 		{podsPerNode: 100, runLatencyTest: false, interval: 10 * time.Second}, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for _, testArg := range densityTests { | 	for _, testArg := range densityTests { | ||||||
| @@ -201,22 +221,29 @@ var _ = framework.KubeDescribe("Density", func() { | |||||||
| 		} | 		} | ||||||
| 		itArg := testArg | 		itArg := testArg | ||||||
| 		It(name, func() { | 		It(name, func() { | ||||||
| 			podsPerNode := itArg.podsPerNode |  | ||||||
| 			totalPods = podsPerNode * nodeCount |  | ||||||
| 			RCName = "density" + strconv.Itoa(totalPods) + "-" + uuid |  | ||||||
| 			fileHndl, err := os.Create(fmt.Sprintf(framework.TestContext.OutputDir+"/%s/pod_states.csv", uuid)) | 			fileHndl, err := os.Create(fmt.Sprintf(framework.TestContext.OutputDir+"/%s/pod_states.csv", uuid)) | ||||||
| 			framework.ExpectNoError(err) | 			framework.ExpectNoError(err) | ||||||
| 			defer fileHndl.Close() | 			defer fileHndl.Close() | ||||||
| 			config := framework.RCConfig{Client: c, | 			podsPerNode := itArg.podsPerNode | ||||||
| 				Image:                "gcr.io/google_containers/pause:2.0", | 			totalPods = podsPerNode * nodeCount | ||||||
| 				Name:                 RCName, | 			// TODO: loop to podsPerNode instead of 1 when we're ready. | ||||||
| 				Namespace:            ns, | 			numberOrRCs := 1 | ||||||
| 				PollInterval:         itArg.interval, | 			RCConfigs := make([]framework.RCConfig, numberOrRCs) | ||||||
| 				PodStatusFile:        fileHndl, | 			for i := 0; i < numberOrRCs; i++ { | ||||||
| 				Replicas:             totalPods, | 				RCName = "density" + strconv.Itoa(totalPods) + "-" + strconv.Itoa(i) + "-" + uuid | ||||||
| 				CpuRequest:           nodeCpuCapacity / 100, | 				RCConfigs[i] = framework.RCConfig{Client: c, | ||||||
| 				MemRequest:           nodeMemCapacity / 100, | 					Image:                "gcr.io/google_containers/pause:2.0", | ||||||
| 				MaxContainerFailures: &MaxContainerFailures, | 					Name:                 RCName, | ||||||
|  | 					Namespace:            ns, | ||||||
|  | 					Labels:               map[string]string{"type": "densityPod"}, | ||||||
|  | 					PollInterval:         itArg.interval, | ||||||
|  | 					PodStatusFile:        fileHndl, | ||||||
|  | 					Replicas:             (totalPods + numberOrRCs - 1) / numberOrRCs, | ||||||
|  | 					CpuRequest:           nodeCpuCapacity / 100, | ||||||
|  | 					MemRequest:           nodeMemCapacity / 100, | ||||||
|  | 					MaxContainerFailures: &MaxContainerFailures, | ||||||
|  | 					Silent:               true, | ||||||
|  | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			// Create a listener for events. | 			// Create a listener for events. | ||||||
| @@ -249,7 +276,7 @@ var _ = framework.KubeDescribe("Density", func() { | |||||||
| 			// uLock is a lock protects the updateCount | 			// uLock is a lock protects the updateCount | ||||||
| 			var uLock sync.Mutex | 			var uLock sync.Mutex | ||||||
| 			updateCount := 0 | 			updateCount := 0 | ||||||
| 			label := labels.SelectorFromSet(labels.Set(map[string]string{"name": RCName})) | 			label := labels.SelectorFromSet(labels.Set(map[string]string{"type": "densityPod"})) | ||||||
| 			_, updateController := controllerframework.NewInformer( | 			_, updateController := controllerframework.NewInformer( | ||||||
| 				&cache.ListWatch{ | 				&cache.ListWatch{ | ||||||
| 					ListFunc: func(options api.ListOptions) (runtime.Object, error) { | 					ListFunc: func(options api.ListOptions) (runtime.Object, error) { | ||||||
| @@ -273,10 +300,22 @@ var _ = framework.KubeDescribe("Density", func() { | |||||||
| 			) | 			) | ||||||
| 			go updateController.Run(stop) | 			go updateController.Run(stop) | ||||||
|  |  | ||||||
| 			// Start the replication controller. | 			// Start all replication controllers. | ||||||
| 			startTime := time.Now() | 			startTime := time.Now() | ||||||
| 			framework.ExpectNoError(framework.RunRC(config)) | 			wg := sync.WaitGroup{} | ||||||
|  | 			wg.Add(len(RCConfigs)) | ||||||
|  | 			for i := range RCConfigs { | ||||||
|  | 				rcConfig := RCConfigs[i] | ||||||
|  | 				go func() { | ||||||
|  | 					framework.ExpectNoError(framework.RunRC(rcConfig)) | ||||||
|  | 					wg.Done() | ||||||
|  | 				}() | ||||||
|  | 			} | ||||||
|  | 			logStopCh := make(chan struct{}) | ||||||
|  | 			go logPodStartupStatus(c, totalPods, ns, map[string]string{"type": "densityPod"}, itArg.interval, logStopCh) | ||||||
|  | 			wg.Wait() | ||||||
| 			e2eStartupTime = time.Now().Sub(startTime) | 			e2eStartupTime = time.Now().Sub(startTime) | ||||||
|  | 			close(logStopCh) | ||||||
| 			framework.Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime) | 			framework.Logf("E2E startup time for %d pods: %v", totalPods, e2eStartupTime) | ||||||
| 			framework.Logf("Throughput (pods/s) during cluster saturation phase: %v", float32(totalPods)/float32(e2eStartupTime/time.Second)) | 			framework.Logf("Throughput (pods/s) during cluster saturation phase: %v", float32(totalPods)/float32(e2eStartupTime/time.Second)) | ||||||
|  |  | ||||||
| @@ -506,11 +545,14 @@ var _ = framework.KubeDescribe("Density", func() { | |||||||
|  |  | ||||||
| 			By("Deleting ReplicationController") | 			By("Deleting ReplicationController") | ||||||
| 			// We explicitly delete all pods to have API calls necessary for deletion accounted in metrics. | 			// We explicitly delete all pods to have API calls necessary for deletion accounted in metrics. | ||||||
| 			rc, err := c.ReplicationControllers(ns).Get(RCName) | 			for i := range RCConfigs { | ||||||
| 			if err == nil && rc.Spec.Replicas != 0 { | 				rcName := RCConfigs[i].Name | ||||||
| 				By("Cleaning up the replication controller") | 				rc, err := c.ReplicationControllers(ns).Get(rcName) | ||||||
| 				err := framework.DeleteRC(c, ns, RCName) | 				if err == nil && rc.Spec.Replicas != 0 { | ||||||
| 				framework.ExpectNoError(err) | 					By("Cleaning up the replication controller") | ||||||
|  | 					err := framework.DeleteRC(c, ns, rcName) | ||||||
|  | 					framework.ExpectNoError(err) | ||||||
|  | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			By("Removing additional replication controllers if any") | 			By("Removing additional replication controllers if any") | ||||||
|   | |||||||
| @@ -242,6 +242,9 @@ type RCConfig struct { | |||||||
| 	// Maximum allowable container failures. If exceeded, RunRC returns an error. | 	// Maximum allowable container failures. If exceeded, RunRC returns an error. | ||||||
| 	// Defaults to replicas*0.1 if unspecified. | 	// Defaults to replicas*0.1 if unspecified. | ||||||
| 	MaxContainerFailures *int | 	MaxContainerFailures *int | ||||||
|  |  | ||||||
|  | 	// If set to false starting RC will print progress, otherwise only errors will be printed. | ||||||
|  | 	Silent bool | ||||||
| } | } | ||||||
|  |  | ||||||
| type DeploymentConfig struct { | type DeploymentConfig struct { | ||||||
| @@ -1934,6 +1937,70 @@ func (config *RCConfig) applyTo(template *api.PodTemplateSpec) { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | type RCStartupStatus struct { | ||||||
|  | 	Expected              int | ||||||
|  | 	Terminating           int | ||||||
|  | 	Running               int | ||||||
|  | 	RunningButNotReady    int | ||||||
|  | 	Waiting               int | ||||||
|  | 	Pending               int | ||||||
|  | 	Unknown               int | ||||||
|  | 	Inactive              int | ||||||
|  | 	FailedContainers      int | ||||||
|  | 	Created               []*api.Pod | ||||||
|  | 	ContainerRestartNodes sets.String | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *RCStartupStatus) Print(name string) { | ||||||
|  | 	Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ", | ||||||
|  | 		name, len(s.Created), s.Expected, s.Running, s.Pending, s.Waiting, s.Inactive, s.Terminating, s.Unknown, s.RunningButNotReady) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ComputeRCStartupStatus(pods []*api.Pod, expected int) RCStartupStatus { | ||||||
|  | 	startupStatus := RCStartupStatus{ | ||||||
|  | 		Expected:              expected, | ||||||
|  | 		Created:               make([]*api.Pod, 0, expected), | ||||||
|  | 		ContainerRestartNodes: sets.NewString(), | ||||||
|  | 	} | ||||||
|  | 	for _, p := range pods { | ||||||
|  | 		if p.DeletionTimestamp != nil { | ||||||
|  | 			startupStatus.Terminating++ | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		startupStatus.Created = append(startupStatus.Created, p) | ||||||
|  | 		if p.Status.Phase == api.PodRunning { | ||||||
|  | 			ready := false | ||||||
|  | 			for _, c := range p.Status.Conditions { | ||||||
|  | 				if c.Type == api.PodReady && c.Status == api.ConditionTrue { | ||||||
|  | 					ready = true | ||||||
|  | 					break | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 			if ready { | ||||||
|  | 				// Only count a pod is running when it is also ready. | ||||||
|  | 				startupStatus.Running++ | ||||||
|  | 			} else { | ||||||
|  | 				startupStatus.RunningButNotReady++ | ||||||
|  | 			} | ||||||
|  | 			for _, v := range FailedContainers(p) { | ||||||
|  | 				startupStatus.FailedContainers = startupStatus.FailedContainers + v.Restarts | ||||||
|  | 				startupStatus.ContainerRestartNodes.Insert(p.Spec.NodeName) | ||||||
|  | 			} | ||||||
|  | 		} else if p.Status.Phase == api.PodPending { | ||||||
|  | 			if p.Spec.NodeName == "" { | ||||||
|  | 				startupStatus.Waiting++ | ||||||
|  | 			} else { | ||||||
|  | 				startupStatus.Pending++ | ||||||
|  | 			} | ||||||
|  | 		} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed { | ||||||
|  | 			startupStatus.Inactive++ | ||||||
|  | 		} else if p.Status.Phase == api.PodUnknown { | ||||||
|  | 			startupStatus.Unknown++ | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return startupStatus | ||||||
|  | } | ||||||
|  |  | ||||||
| func (config *RCConfig) start() error { | func (config *RCConfig) start() error { | ||||||
| 	// Don't force tests to fail if they don't care about containers restarting. | 	// Don't force tests to fail if they don't care about containers restarting. | ||||||
| 	var maxContainerFailures int | 	var maxContainerFailures int | ||||||
| @@ -1962,74 +2029,28 @@ func (config *RCConfig) start() error { | |||||||
| 	for oldRunning != config.Replicas { | 	for oldRunning != config.Replicas { | ||||||
| 		time.Sleep(interval) | 		time.Sleep(interval) | ||||||
|  |  | ||||||
| 		terminating := 0 |  | ||||||
|  |  | ||||||
| 		running := 0 |  | ||||||
| 		runningButNotReady := 0 |  | ||||||
| 		waiting := 0 |  | ||||||
| 		pending := 0 |  | ||||||
| 		unknown := 0 |  | ||||||
| 		inactive := 0 |  | ||||||
| 		failedContainers := 0 |  | ||||||
| 		containerRestartNodes := sets.NewString() |  | ||||||
|  |  | ||||||
| 		pods := PodStore.List() | 		pods := PodStore.List() | ||||||
| 		created := []*api.Pod{} | 		startupStatus := ComputeRCStartupStatus(pods, config.Replicas) | ||||||
| 		for _, p := range pods { |  | ||||||
| 			if p.DeletionTimestamp != nil { | 		pods = startupStatus.Created | ||||||
| 				terminating++ |  | ||||||
| 				continue |  | ||||||
| 			} |  | ||||||
| 			created = append(created, p) |  | ||||||
| 			if p.Status.Phase == api.PodRunning { |  | ||||||
| 				ready := false |  | ||||||
| 				for _, c := range p.Status.Conditions { |  | ||||||
| 					if c.Type == api.PodReady && c.Status == api.ConditionTrue { |  | ||||||
| 						ready = true |  | ||||||
| 						break |  | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
| 				if ready { |  | ||||||
| 					// Only count a pod is running when it is also ready. |  | ||||||
| 					running++ |  | ||||||
| 				} else { |  | ||||||
| 					runningButNotReady++ |  | ||||||
| 				} |  | ||||||
| 				for _, v := range FailedContainers(p) { |  | ||||||
| 					failedContainers = failedContainers + v.Restarts |  | ||||||
| 					containerRestartNodes.Insert(p.Spec.NodeName) |  | ||||||
| 				} |  | ||||||
| 			} else if p.Status.Phase == api.PodPending { |  | ||||||
| 				if p.Spec.NodeName == "" { |  | ||||||
| 					waiting++ |  | ||||||
| 				} else { |  | ||||||
| 					pending++ |  | ||||||
| 				} |  | ||||||
| 			} else if p.Status.Phase == api.PodSucceeded || p.Status.Phase == api.PodFailed { |  | ||||||
| 				inactive++ |  | ||||||
| 			} else if p.Status.Phase == api.PodUnknown { |  | ||||||
| 				unknown++ |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 		pods = created |  | ||||||
| 		if config.CreatedPods != nil { | 		if config.CreatedPods != nil { | ||||||
| 			*config.CreatedPods = pods | 			*config.CreatedPods = pods | ||||||
| 		} | 		} | ||||||
|  | 		if !config.Silent { | ||||||
| 		Logf("%v Pods: %d out of %d created, %d running, %d pending, %d waiting, %d inactive, %d terminating, %d unknown, %d runningButNotReady ", | 			startupStatus.Print(config.Name) | ||||||
| 			config.Name, len(pods), config.Replicas, running, pending, waiting, inactive, terminating, unknown, runningButNotReady) |  | ||||||
|  |  | ||||||
| 		promPushRunningPending(running, pending) |  | ||||||
|  |  | ||||||
| 		if config.PodStatusFile != nil { |  | ||||||
| 			fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", running, pending, waiting, inactive, unknown, runningButNotReady) |  | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if failedContainers > maxContainerFailures { | 		promPushRunningPending(startupStatus.Running, startupStatus.Pending) | ||||||
| 			DumpNodeDebugInfo(config.Client, containerRestartNodes.List()) |  | ||||||
|  | 		if config.PodStatusFile != nil { | ||||||
|  | 			fmt.Fprintf(config.PodStatusFile, "%d, running, %d, pending, %d, waiting, %d, inactive, %d, unknown, %d, runningButNotReady\n", startupStatus.Running, startupStatus.Pending, startupStatus.Waiting, startupStatus.Inactive, startupStatus.Unknown, startupStatus.RunningButNotReady) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if startupStatus.FailedContainers > maxContainerFailures { | ||||||
|  | 			DumpNodeDebugInfo(config.Client, startupStatus.ContainerRestartNodes.List()) | ||||||
| 			// Get the logs from the failed containers to help diagnose what caused them to fail | 			// Get the logs from the failed containers to help diagnose what caused them to fail | ||||||
| 			LogFailedContainers(config.Namespace) | 			LogFailedContainers(config.Namespace) | ||||||
| 			return fmt.Errorf("%d containers failed which is more than allowed %d", failedContainers, maxContainerFailures) | 			return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures) | ||||||
| 		} | 		} | ||||||
| 		if len(pods) < len(oldPods) || len(pods) > config.Replicas { | 		if len(pods) < len(oldPods) || len(pods) > config.Replicas { | ||||||
| 			// This failure mode includes: | 			// This failure mode includes: | ||||||
| @@ -2043,11 +2064,11 @@ func (config *RCConfig) start() error { | |||||||
| 			return fmt.Errorf(errorStr) | 			return fmt.Errorf(errorStr) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if len(pods) > len(oldPods) || running > oldRunning { | 		if len(pods) > len(oldPods) || startupStatus.Running > oldRunning { | ||||||
| 			lastChange = time.Now() | 			lastChange = time.Now() | ||||||
| 		} | 		} | ||||||
| 		oldPods = pods | 		oldPods = pods | ||||||
| 		oldRunning = running | 		oldRunning = startupStatus.Running | ||||||
|  |  | ||||||
| 		if time.Since(lastChange) > timeout { | 		if time.Since(lastChange) > timeout { | ||||||
| 			dumpPodDebugInfo(config.Client, pods) | 			dumpPodDebugInfo(config.Client, pods) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 k8s-merge-robot
					k8s-merge-robot