component tests as integration-tests with short option.

2016-09-12 15:42:54 -04:00
parent dbdaf2c22b
commit a1481f5a3e
7 changed files with 25 additions and 7 deletions
--- a/test/integration/framework/etcd_utils.go
+++ b/test/integration/framework/etcd_utils.go
@@ -30,9 +30,8 @@ import (
 // If you need to start an etcd instance by hand, you also need to insert a key
 // for this check to pass (*any* key will do, eg:
 //curl -L http://127.0.0.1:2379/v2/keys/message -XPUT -d value="Hello world").
-func init() {
-	RequireEtcd()
-}
+
+var testing_etcd = false

 func GetEtcdURLFromEnv() string {
 	url := env.GetEnvAsStringOrFallback("KUBE_INTEGRATION_ETCD_URL", "http://127.0.0.1:2379")
@@ -41,6 +40,10 @@ func GetEtcdURLFromEnv() string {
 }

 func NewEtcdClient() etcd.Client {
+	// gaurded to avoid infinite recursion, check etcd.
+	if testing_etcd {
+		RequireEtcd()
+	}
 	cfg := etcd.Config{
 		Endpoints: []string{GetEtcdURLFromEnv()},
 	}
@@ -52,9 +55,14 @@ func NewEtcdClient() etcd.Client {
 }

 func RequireEtcd() {
+	testing_etcd = true
+	defer func() {
+		testing_etcd = false
+	}()
 	if _, err := etcd.NewKeysAPI(NewEtcdClient()).Get(context.TODO(), "/", nil); err != nil {
 		glog.Fatalf("unable to connect to etcd for testing: %v", err)
 	}
+
 }

 func WithEtcdKey(f func(string)) {
--- a/test/integration/scheduler_perf/README.md
+++ b/test/integration/scheduler_perf/README.md
@@ -0,0 +1,78 @@
+<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
+
+<!-- BEGIN STRIP_FOR_RELEASE -->
+
+<img src="http://kubernetes.io/kubernetes/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/kubernetes/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/kubernetes/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/kubernetes/img/warning.png" alt="WARNING"
+     width="25" height="25">
+<img src="http://kubernetes.io/kubernetes/img/warning.png" alt="WARNING"
+     width="25" height="25">
+
+<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
+
+If you are using a released version of Kubernetes, you should
+refer to the docs that go with that version.
+
+<strong>
+The latest release of this document can be found
+[here](http://releases.k8s.io/release-1.1/docs/proposals/choosing-scheduler.md).
+
+Documentation for other releases can be found at
+[releases.k8s.io](http://releases.k8s.io).
+</strong>
+--
+
+<!-- END STRIP_FOR_RELEASE -->
+
+<!-- END MUNGE: UNVERSIONED_WARNING -->
+
+Scheduler Performance Test
+======
+
+Motivation
+------
+We already have a performance testing system -- Kubemark. However, Kubemark requires setting up and bootstrapping a whole cluster, which takes a lot of time.
+
+We want to have a standard way to reproduce scheduling latency metrics result and benchmark scheduler as simple and fast as possible. We have the following goals:
+
+- Save time on testing
+  - The test and benchmark can be run in a single box.
+    We only set up components necessary to scheduling without booting up a cluster.
+- Profiling runtime metrics to find out bottleneck
+  - Write scheduler integration test but focus on performance measurement.
+    Take advantage of go profiling tools and collect fine-grained metrics,
+    like cpu-profiling, memory-profiling and block-profiling.
+- Reproduce test result easily
+  - We want to have a known place to do the performance related test for scheduler.
+    Developers should just run one script to collect all the information they need.
+
+Currently the test suite has the following:
+
+- density test (by adding a new Go test)
+  - schedule 30k pods on 1000 (fake) nodes and 3k pods on 100 (fake) nodes
+  - print out scheduling rate every second
+  - let you learn the rate changes vs number of scheduled pods
+- benchmark
+  - make use of `go test -bench` and report nanosecond/op.
+  - schedule b.N pods when the cluster has N nodes and P scheduled pods. Since it takes relatively long time to finish one round, b.N is small: 10 - 100.
+
+
+How To Run
+------
+```
+cd kubernetes/test/component/scheduler/perf
+./test-performance.sh
+```
+
+
+<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/test/component/scheduler/perf/README.md?pixel)]()
+<!-- END MUNGE: GENERATED_ANALYTICS -->
+
+
+[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/test/integration/scheduler_perf/README.md?pixel)]()
--- a/test/integration/scheduler_perf/scheduler_bench_test.go
+++ b/test/integration/scheduler_perf/scheduler_bench_test.go
@@ -0,0 +1,79 @@
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package benchmark
+
+import (
+	"testing"
+	"time"
+)
+
+// BenchmarkScheduling100Nodes0Pods benchmarks the scheduling rate
+// when the cluster has 100 nodes and 0 scheduled pods
+func BenchmarkScheduling100Nodes0Pods(b *testing.B) {
+	benchmarkScheduling(100, 0, b)
+}
+
+// BenchmarkScheduling100Nodes1000Pods benchmarks the scheduling rate
+// when the cluster has 100 nodes and 1000 scheduled pods
+func BenchmarkScheduling100Nodes1000Pods(b *testing.B) {
+	benchmarkScheduling(100, 1000, b)
+}
+
+// BenchmarkScheduling1000Nodes0Pods benchmarks the scheduling rate
+// when the cluster has 1000 nodes and 0 scheduled pods
+func BenchmarkScheduling1000Nodes0Pods(b *testing.B) {
+	benchmarkScheduling(1000, 0, b)
+}
+
+// BenchmarkScheduling1000Nodes1000Pods benchmarks the scheduling rate
+// when the cluster has 1000 nodes and 1000 scheduled pods
+func BenchmarkScheduling1000Nodes1000Pods(b *testing.B) {
+	benchmarkScheduling(1000, 1000, b)
+}
+
+// benchmarkScheduling benchmarks scheduling rate with specific number of nodes
+// and specific number of pods already scheduled. Since an operation takes relatively
+// long time, b.N should be small: 10 - 100.
+func benchmarkScheduling(numNodes, numScheduledPods int, b *testing.B) {
+	schedulerConfigFactory, finalFunc := mustSetupScheduler()
+	defer finalFunc()
+	c := schedulerConfigFactory.Client
+
+	makeNodes(c, numNodes)
+	makePodsFromRC(c, "rc1", numScheduledPods)
+	for {
+		scheduled := schedulerConfigFactory.ScheduledPodLister.Indexer.List()
+		if len(scheduled) >= numScheduledPods {
+			break
+		}
+		time.Sleep(1 * time.Second)
+	}
+	// start benchmark
+	b.ResetTimer()
+	makePodsFromRC(c, "rc2", b.N)
+	for {
+		// This can potentially affect performance of scheduler, since List() is done under mutex.
+		// TODO: Setup watch on apiserver and wait until all pods scheduled.
+		scheduled := schedulerConfigFactory.ScheduledPodLister.Indexer.List()
+		if len(scheduled) >= numScheduledPods+b.N {
+			break
+		}
+		// Note: This might introduce slight deviation in accuracy of benchmark results.
+		// Since the total amount of time is relatively large, it might not be a concern.
+		time.Sleep(100 * time.Millisecond)
+	}
+}
--- a/test/integration/scheduler_perf/scheduler_test.go
+++ b/test/integration/scheduler_perf/scheduler_test.go
@@ -0,0 +1,67 @@
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package benchmark
+
+import (
+	"fmt"
+	"testing"
+	"time"
+)
+
+// TestSchedule100Node3KPods schedules 3k pods on 100 nodes.
+func TestSchedule100Node3KPods(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping because we want to run short tests")
+	}
+	schedulePods(100, 3000)
+}
+
+// TestSchedule1000Node30KPods schedules 30k pods on 1000 nodes.
+func TestSchedule1000Node30KPods(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping because we want to run short tests")
+	}
+	schedulePods(1000, 30000)
+}
+
+// schedulePods schedules specific number of pods on specific number of nodes.
+// This is used to learn the scheduling throughput on various
+// sizes of cluster and changes as more and more pods are scheduled.
+// It won't stop until all pods are scheduled.
+func schedulePods(numNodes, numPods int) {
+	schedulerConfigFactory, destroyFunc := mustSetupScheduler()
+	defer destroyFunc()
+	c := schedulerConfigFactory.Client
+
+	makeNodes(c, numNodes)
+	makePodsFromRC(c, "rc1", numPods)
+
+	prev := 0
+	start := time.Now()
+	for {
+		// This can potentially affect performance of scheduler, since List() is done under mutex.
+		// Listing 10000 pods is an expensive operation, so running it frequently may impact scheduler.
+		// TODO: Setup watch on apiserver and wait until all pods scheduled.
+		scheduled := schedulerConfigFactory.ScheduledPodLister.Indexer.List()
+		fmt.Printf("%ds\trate: %d\ttotal: %d\n", time.Since(start)/time.Second, len(scheduled)-prev, len(scheduled))
+		if len(scheduled) >= numPods {
+			return
+		}
+		prev = len(scheduled)
+		time.Sleep(1 * time.Second)
+	}
+}
--- a/test/integration/scheduler_perf/test-performance.sh
+++ b/test/integration/scheduler_perf/test-performance.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../../../..
+source "${KUBE_ROOT}/hack/lib/init.sh"
+
+kube::golang::setup_env
+
+DIR_BASENAME=$(dirname "${BASH_SOURCE}")
+pushd ${DIR_BASENAME}
+
+cleanup() {
+  popd 2> /dev/null
+  kube::etcd::cleanup
+  kube::log::status "performance test cleanup complete"
+}
+
+trap cleanup EXIT
+
+kube::etcd::start
+kube::log::status "performance test start"
+
+# We are using the benchmark suite to do profiling. Because it only runs a few pods and
+# theoretically it has less variance.
+if ${RUN_BENCHMARK:-false}; then
+  go test -c -o "perf.test"
+  "./perf.test" -test.bench=. -test.run=xxxx -test.cpuprofile=prof.out -test.short=false
+  kube::log::status "benchmark tests finished"
+fi
+# Running density tests. It might take a long time.
+go test -test.run=. -test.timeout=60m -test.short=false
+kube::log::status "density tests finished"
--- a/test/integration/scheduler_perf/util.go
+++ b/test/integration/scheduler_perf/util.go
@@ -0,0 +1,170 @@
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package benchmark
+
+import (
+	"net/http"
+	"net/http/httptest"
+
+	"github.com/golang/glog"
+	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/resource"
+	"k8s.io/kubernetes/pkg/api/testapi"
+	"k8s.io/kubernetes/pkg/client/record"
+	"k8s.io/kubernetes/pkg/client/restclient"
+	client "k8s.io/kubernetes/pkg/client/unversioned"
+	"k8s.io/kubernetes/pkg/master"
+	"k8s.io/kubernetes/pkg/util/workqueue"
+	"k8s.io/kubernetes/plugin/pkg/scheduler"
+	_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider"
+	"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
+	e2e "k8s.io/kubernetes/test/e2e/framework"
+	"k8s.io/kubernetes/test/integration/framework"
+)
+
+// mustSetupScheduler starts the following components:
+// - k8s api server (a.k.a. master)
+// - scheduler
+// It returns scheduler config factory and destroyFunc which should be used to
+// remove resources after finished.
+// Notes on rate limiter:
+//   - client rate limit is set to 5000.
+func mustSetupScheduler() (schedulerConfigFactory *factory.ConfigFactory, destroyFunc func()) {
+	// framework.DeleteAllEtcdKeys()
+
+	var m *master.Master
+	masterConfig := framework.NewIntegrationTestMasterConfig()
+	m, err := master.New(masterConfig)
+	if err != nil {
+		panic("error in brining up the master: " + err.Error())
+	}
+	s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+		m.Handler.ServeHTTP(w, req)
+	}))
+
+	c := client.NewOrDie(&restclient.Config{
+		Host:          s.URL,
+		ContentConfig: restclient.ContentConfig{GroupVersion: testapi.Default.GroupVersion()},
+		QPS:           5000.0,
+		Burst:         5000,
+	})
+
+	schedulerConfigFactory = factory.NewConfigFactory(c, api.DefaultSchedulerName, api.DefaultHardPodAffinitySymmetricWeight, api.DefaultFailureDomains)
+	schedulerConfig, err := schedulerConfigFactory.Create()
+	if err != nil {
+		panic("Couldn't create scheduler config")
+	}
+	eventBroadcaster := record.NewBroadcaster()
+	schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"})
+	eventBroadcaster.StartRecordingToSink(c.Events(""))
+	scheduler.New(schedulerConfig).Run()
+
+	destroyFunc = func() {
+		glog.Infof("destroying")
+		close(schedulerConfig.StopEverything)
+		s.Close()
+		glog.Infof("destroyed")
+	}
+	return
+}
+
+func makeNodes(c client.Interface, nodeCount int) {
+	glog.Infof("making %d nodes", nodeCount)
+	baseNode := &api.Node{
+		ObjectMeta: api.ObjectMeta{
+			GenerateName: "scheduler-test-node-",
+		},
+		Spec: api.NodeSpec{
+			ExternalID: "foobar",
+		},
+		Status: api.NodeStatus{
+			Capacity: api.ResourceList{
+				api.ResourcePods:   *resource.NewQuantity(110, resource.DecimalSI),
+				api.ResourceCPU:    resource.MustParse("4"),
+				api.ResourceMemory: resource.MustParse("32Gi"),
+			},
+			Phase: api.NodeRunning,
+			Conditions: []api.NodeCondition{
+				{Type: api.NodeReady, Status: api.ConditionTrue},
+			},
+		},
+	}
+	for i := 0; i < nodeCount; i++ {
+		if _, err := c.Nodes().Create(baseNode); err != nil {
+			panic("error creating node: " + err.Error())
+		}
+	}
+}
+
+func makePodSpec() api.PodSpec {
+	return api.PodSpec{
+		Containers: []api.Container{{
+			Name:  "pause",
+			Image: e2e.GetPauseImageNameForHostArch(),
+			Ports: []api.ContainerPort{{ContainerPort: 80}},
+			Resources: api.ResourceRequirements{
+				Limits: api.ResourceList{
+					api.ResourceCPU:    resource.MustParse("100m"),
+					api.ResourceMemory: resource.MustParse("500Mi"),
+				},
+				Requests: api.ResourceList{
+					api.ResourceCPU:    resource.MustParse("100m"),
+					api.ResourceMemory: resource.MustParse("500Mi"),
+				},
+			},
+		}},
+	}
+}
+
+// makePodsFromRC will create a ReplicationController object and
+// a given number of pods (imitating the controller).
+func makePodsFromRC(c client.Interface, name string, podCount int) {
+	rc := &api.ReplicationController{
+		ObjectMeta: api.ObjectMeta{
+			Name: name,
+		},
+		Spec: api.ReplicationControllerSpec{
+			Replicas: int32(podCount),
+			Selector: map[string]string{"name": name},
+			Template: &api.PodTemplateSpec{
+				ObjectMeta: api.ObjectMeta{
+					Labels: map[string]string{"name": name},
+				},
+				Spec: makePodSpec(),
+			},
+		},
+	}
+	if _, err := c.ReplicationControllers("default").Create(rc); err != nil {
+		glog.Fatalf("unexpected error: %v", err)
+	}
+
+	basePod := &api.Pod{
+		ObjectMeta: api.ObjectMeta{
+			GenerateName: "scheduler-test-pod-",
+			Labels:       map[string]string{"name": name},
+		},
+		Spec: makePodSpec(),
+	}
+	createPod := func(i int) {
+		for {
+			if _, err := c.Pods("default").Create(basePod); err == nil {
+				break
+			}
+		}
+	}
+	workqueue.Parallelize(30, podCount, createPod)
+}