diff --git a/hack/test-go.sh b/hack/test-go.sh index 2406878eb02..0d0148ba30c 100755 --- a/hack/test-go.sh +++ b/hack/test-go.sh @@ -39,6 +39,7 @@ kube::test::find_dirs() { -o -path './test/e2e/*' \ -o -path './test/e2e_node/*' \ -o -path './test/integration/*' \ + -o -path './test/component/scheduler/perf/*' \ \) -prune \ \) -name '*_test.go' -print0 | xargs -0n1 dirname | sed 's|^\./||' | sort -u ) diff --git a/test/component/scheduler/perf/scheduler_bench_test.go b/test/component/scheduler/perf/scheduler_bench_test.go new file mode 100644 index 00000000000..202cbca58e1 --- /dev/null +++ b/test/component/scheduler/perf/scheduler_bench_test.go @@ -0,0 +1,79 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package benchmark + +import ( + "testing" + "time" +) + +// BenchmarkScheduling100Nodes0Pods benchmarks the scheduling rate +// when the cluster has 100 nodes and 0 scheduled pods +func BenchmarkScheduling100Nodes0Pods(b *testing.B) { + benchmarkScheduling(100, 0, b) +} + +// BenchmarkScheduling100Nodes1000Pods benchmarks the scheduling rate +// when the cluster has 100 nodes and 1000 scheduled pods +func BenchmarkScheduling100Nodes1000Pods(b *testing.B) { + benchmarkScheduling(100, 1000, b) +} + +// BenchmarkScheduling1000Nodes0Pods benchmarks the scheduling rate +// when the cluster has 1000 nodes and 0 scheduled pods +func BenchmarkScheduling1000Nodes0Pods(b *testing.B) { + benchmarkScheduling(1000, 0, b) +} + +// BenchmarkScheduling1000Nodes1000Pods benchmarks the scheduling rate +// when the cluster has 1000 nodes and 1000 scheduled pods +func BenchmarkScheduling1000Nodes1000Pods(b *testing.B) { + benchmarkScheduling(1000, 1000, b) +} + +// benchmarkScheduling benchmarks scheduling rate with specific number of nodes +// and specific number of pods already scheduled. Since an operation takes relatively +// long time, b.N should be small: 10 - 100. +func benchmarkScheduling(numNodes, numScheduledPods int, b *testing.B) { + schedulerConfigFactory, finalFunc := mustSetupScheduler() + defer finalFunc() + c := schedulerConfigFactory.Client + + makeNodes(c, numNodes) + makePods(c, numScheduledPods) + for { + scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List() + if len(scheduled) >= numScheduledPods { + break + } + time.Sleep(1 * time.Second) + } + // start benchmark + b.ResetTimer() + makePods(c, b.N) + for { + // This can potentially affect performance of scheduler, since List() is done under mutex. + // TODO: Setup watch on apiserver and wait until all pods scheduled. + scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List() + if len(scheduled) >= numScheduledPods+b.N { + break + } + // Note: This might introduce slight deviation in accuracy of benchmark results. + // Since the total amount of time is relatively large, it might not be a concern. + time.Sleep(100 * time.Millisecond) + } +} diff --git a/test/component/scheduler/perf/scheduler_test.go b/test/component/scheduler/perf/scheduler_test.go new file mode 100644 index 00000000000..5c54851a64c --- /dev/null +++ b/test/component/scheduler/perf/scheduler_test.go @@ -0,0 +1,61 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package benchmark + +import ( + "fmt" + "testing" + "time" +) + +// TestSchedule100Node3KPods schedules 3k pods on 100 nodes. +func TestSchedule100Node3KPods(t *testing.T) { + schedulePods(100, 3000) +} + +// TestSchedule1000Node30KPods schedules 30k pods on 1000 nodes. +func TestSchedule1000Node30KPods(t *testing.T) { + schedulePods(1000, 30000) +} + +// schedulePods schedules specific number of pods on specific number of nodes. +// This is used to learn the scheduling throughput on various +// sizes of cluster and changes as more and more pods are scheduled. +// It won't stop until all pods are scheduled. +func schedulePods(numNodes, numPods int) { + schedulerConfigFactory, destroyFunc := mustSetupScheduler() + defer destroyFunc() + c := schedulerConfigFactory.Client + + makeNodes(c, numNodes) + makePods(c, numPods) + + prev := 0 + start := time.Now() + for { + // This can potentially affect performance of scheduler, since List() is done under mutex. + // Listing 10000 pods is an expensive operation, so running it frequently may impact scheduler. + // TODO: Setup watch on apiserver and wait until all pods scheduled. + scheduled := schedulerConfigFactory.ScheduledPodLister.Store.List() + fmt.Printf("%ds\trate: %d\ttotal: %d\n", time.Since(start)/time.Second, len(scheduled)-prev, len(scheduled)) + if len(scheduled) >= numPods { + return + } + prev = len(scheduled) + time.Sleep(1 * time.Second) + } +} diff --git a/test/component/scheduler/perf/util.go b/test/component/scheduler/perf/util.go new file mode 100644 index 00000000000..9a262740a8d --- /dev/null +++ b/test/component/scheduler/perf/util.go @@ -0,0 +1,158 @@ +/* +Copyright 2015 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package benchmark + +import ( + "net/http" + "net/http/httptest" + + "github.com/golang/glog" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + "k8s.io/kubernetes/pkg/api/testapi" + "k8s.io/kubernetes/pkg/client/record" + client "k8s.io/kubernetes/pkg/client/unversioned" + "k8s.io/kubernetes/pkg/master" + "k8s.io/kubernetes/plugin/pkg/scheduler" + _ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider" + "k8s.io/kubernetes/plugin/pkg/scheduler/factory" + "k8s.io/kubernetes/test/integration/framework" +) + +// mustSetupScheduler starts the following components: +// - k8s api server (a.k.a. master) +// - scheduler +// It returns scheduler config factory and destroyFunc which should be used to +// remove resources after finished. +// Notes on rate limiter: +// - The BindPodsRateLimiter is nil, meaning no rate limits. +// - client rate limit is set to 5000. +func mustSetupScheduler() (schedulerConfigFactory *factory.ConfigFactory, destroyFunc func()) { + framework.DeleteAllEtcdKeys() + + var m *master.Master + masterConfig := framework.NewIntegrationTestMasterConfig() + m = master.New(masterConfig) + s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + m.Handler.ServeHTTP(w, req) + })) + + c := client.NewOrDie(&client.Config{ + Host: s.URL, + GroupVersion: testapi.Default.GroupVersion(), + QPS: 5000.0, + Burst: 5000, + }) + + schedulerConfigFactory = factory.NewConfigFactory(c, nil) + schedulerConfig, err := schedulerConfigFactory.Create() + if err != nil { + panic("Couldn't create scheduler config") + } + eventBroadcaster := record.NewBroadcaster() + schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}) + eventBroadcaster.StartRecordingToSink(c.Events("")) + scheduler.New(schedulerConfig).Run() + + destroyFunc = func() { + glog.Infof("destroying") + close(schedulerConfig.StopEverything) + s.Close() + glog.Infof("destroyed") + } + return +} + +func makeNodes(c client.Interface, nodeCount int) { + glog.Infof("making %d nodes", nodeCount) + baseNode := &api.Node{ + ObjectMeta: api.ObjectMeta{ + GenerateName: "scheduler-test-node-", + }, + Spec: api.NodeSpec{ + ExternalID: "foobar", + }, + Status: api.NodeStatus{ + Capacity: api.ResourceList{ + api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), + api.ResourceCPU: resource.MustParse("4"), + api.ResourceMemory: resource.MustParse("32Gi"), + }, + Phase: api.NodeRunning, + Conditions: []api.NodeCondition{ + {Type: api.NodeReady, Status: api.ConditionTrue}, + }, + }, + } + for i := 0; i < nodeCount; i++ { + if _, err := c.Nodes().Create(baseNode); err != nil { + panic("error creating node: " + err.Error()) + } + } +} + +// makePods will setup specified number of scheduled pods. +// Currently it goes through scheduling path and it's very slow to setup large number of pods. +// TODO: Setup pods evenly on all nodes and quickly/non-linearly. +func makePods(c client.Interface, podCount int) { + glog.Infof("making %d pods", podCount) + basePod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + GenerateName: "scheduler-test-pod-", + }, + Spec: api.PodSpec{ + Containers: []api.Container{{ + Name: "pause", + Image: "gcr.io/google_containers/pause:1.0", + Resources: api.ResourceRequirements{ + Limits: api.ResourceList{ + api.ResourceCPU: resource.MustParse("100m"), + api.ResourceMemory: resource.MustParse("500Mi"), + }, + Requests: api.ResourceList{ + api.ResourceCPU: resource.MustParse("100m"), + api.ResourceMemory: resource.MustParse("500Mi"), + }, + }, + }}, + }, + } + threads := 30 + remaining := make(chan int, 1000) + go func() { + for i := 0; i < podCount; i++ { + remaining <- i + } + close(remaining) + }() + for i := 0; i < threads; i++ { + go func() { + for { + _, ok := <-remaining + if !ok { + return + } + for { + _, err := c.Pods("default").Create(basePod) + if err == nil { + break + } + } + } + }() + } +} diff --git a/test/integration/scheduler_test.go b/test/integration/scheduler_test.go index bdbf8a3a009..d7c7b60530f 100644 --- a/test/integration/scheduler_test.go +++ b/test/integration/scheduler_test.go @@ -24,7 +24,6 @@ import ( "fmt" "net/http" "net/http/httptest" - "sync" "testing" "time" @@ -274,133 +273,3 @@ func DoTestUnschedulableNodes(t *testing.T, restClient *client.Client, nodeStore } } } - -func BenchmarkScheduling(b *testing.B) { - framework.DeleteAllEtcdKeys() - - var m *master.Master - s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { - m.Handler.ServeHTTP(w, req) - })) - defer s.Close() - - masterConfig := framework.NewIntegrationTestMasterConfig() - m = master.New(masterConfig) - - c := client.NewOrDie(&client.Config{ - Host: s.URL, - GroupVersion: testapi.Default.GroupVersion(), - QPS: 5000.0, - Burst: 5000, - }) - - schedulerConfigFactory := factory.NewConfigFactory(c, nil) - schedulerConfig, err := schedulerConfigFactory.Create() - if err != nil { - b.Fatalf("Couldn't create scheduler config: %v", err) - } - eventBroadcaster := record.NewBroadcaster() - schedulerConfig.Recorder = eventBroadcaster.NewRecorder(api.EventSource{Component: "scheduler"}) - eventBroadcaster.StartRecordingToSink(c.Events("")) - scheduler.New(schedulerConfig).Run() - - defer close(schedulerConfig.StopEverything) - - makeNNodes(c, 1000) - N := b.N - b.ResetTimer() - makeNPods(c, N) - for { - objs := schedulerConfigFactory.ScheduledPodLister.Store.List() - if len(objs) >= N { - fmt.Printf("%v pods scheduled.\n", len(objs)) - /* // To prove that this actually works: - for _, o := range objs { - fmt.Printf("%s\n", o.(*api.Pod).Spec.NodeName) - } - */ - break - } - time.Sleep(time.Millisecond) - } - b.StopTimer() -} - -func makeNNodes(c client.Interface, N int) { - baseNode := &api.Node{ - ObjectMeta: api.ObjectMeta{ - GenerateName: "scheduler-test-node-", - }, - Spec: api.NodeSpec{ - ExternalID: "foobar", - }, - Status: api.NodeStatus{ - Capacity: api.ResourceList{ - api.ResourcePods: *resource.NewQuantity(32, resource.DecimalSI), - api.ResourceCPU: resource.MustParse("4"), - api.ResourceMemory: resource.MustParse("32Gi"), - }, - Phase: api.NodeRunning, - Conditions: []api.NodeCondition{ - {Type: api.NodeReady, Status: api.ConditionTrue}, - }, - }, - } - for i := 0; i < N; i++ { - if _, err := c.Nodes().Create(baseNode); err != nil { - panic("error creating node: " + err.Error()) - } - } -} - -func makeNPods(c client.Interface, N int) { - basePod := &api.Pod{ - ObjectMeta: api.ObjectMeta{ - GenerateName: "scheduler-test-pod-", - }, - Spec: api.PodSpec{ - Containers: []api.Container{{ - Name: "pause", - Image: "gcr.io/google_containers/pause:1.0", - Resources: api.ResourceRequirements{ - Limits: api.ResourceList{ - api.ResourceCPU: resource.MustParse("100m"), - api.ResourceMemory: resource.MustParse("500Mi"), - }, - Requests: api.ResourceList{ - api.ResourceCPU: resource.MustParse("100m"), - api.ResourceMemory: resource.MustParse("500Mi"), - }, - }, - }}, - }, - } - wg := sync.WaitGroup{} - threads := 30 - wg.Add(threads) - remaining := make(chan int, N) - go func() { - for i := 0; i < N; i++ { - remaining <- i - } - close(remaining) - }() - for i := 0; i < threads; i++ { - go func() { - defer wg.Done() - for { - _, ok := <-remaining - if !ok { - return - } - for { - _, err := c.Pods("default").Create(basePod) - if err == nil { - break - } - } - } - }() - } - wg.Wait() -}