Merge pull request #7869 from bprashanth/rc_rl
Rate limit replica creation
This commit is contained in:
@@ -215,7 +215,7 @@ func startComponents(firstManifestURL, secondManifestURL, apiVersion string) (st
|
||||
// ensure the service endpoints are sync'd several times within the window that the integration tests wait
|
||||
go endpoints.Run(3, util.NeverStop)
|
||||
|
||||
controllerManager := replicationControllerPkg.NewReplicationManager(cl)
|
||||
controllerManager := replicationControllerPkg.NewReplicationManager(cl, replicationControllerPkg.BurstReplicas)
|
||||
|
||||
// TODO: Write an integration test for the replication controllers watch.
|
||||
go controllerManager.Run(3, util.NeverStop)
|
||||
|
@@ -213,7 +213,7 @@ func (s *CMServer) Run(_ []string) error {
|
||||
endpoints := service.NewEndpointController(kubeClient)
|
||||
go endpoints.Run(s.ConcurrentEndpointSyncs, util.NeverStop)
|
||||
|
||||
controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient)
|
||||
controllerManager := replicationControllerPkg.NewReplicationManager(kubeClient, replicationControllerPkg.BurstReplicas)
|
||||
go controllerManager.Run(s.ConcurrentRCSyncs, util.NeverStop)
|
||||
|
||||
cloud := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
|
||||
|
@@ -144,7 +144,7 @@ func runControllerManager(machineList []string, cl *client.Client, nodeMilliCPU,
|
||||
endpoints := service.NewEndpointController(cl)
|
||||
go endpoints.Run(5, util.NeverStop)
|
||||
|
||||
controllerManager := controller.NewReplicationManager(cl)
|
||||
controllerManager := controller.NewReplicationManager(cl, controller.BurstReplicas)
|
||||
go controllerManager.Run(5, util.NeverStop)
|
||||
}
|
||||
|
||||
|
@@ -524,7 +524,7 @@ func (nc *NodeController) tryUpdateNodeStatus(node *api.Node) (time.Duration, ap
|
||||
// NodeReady condition was last set longer ago than gracePeriod, so update it to Unknown
|
||||
// (regardless of its current value) in the master, without contacting kubelet.
|
||||
if readyCondition == nil {
|
||||
glog.V(2).Infof("node %v is never updated by kubelet")
|
||||
glog.V(2).Infof("node %v is never updated by kubelet", node.Name)
|
||||
node.Status.Conditions = append(node.Status.Conditions, api.NodeCondition{
|
||||
Type: api.NodeReady,
|
||||
Status: api.ConditionUnknown,
|
||||
|
@@ -84,7 +84,7 @@ func (r *RCExpectations) SatisfiedExpectations(rc *api.ReplicationController) bo
|
||||
if podExp.Fulfilled() {
|
||||
return true
|
||||
} else {
|
||||
glog.V(4).Infof("Controller %v still waiting on expectations %#v", podExp)
|
||||
glog.V(4).Infof("Controller still waiting on expectations %#v", podExp)
|
||||
return false
|
||||
}
|
||||
} else if err != nil {
|
||||
|
@@ -58,12 +58,15 @@ const (
|
||||
// of expectations, without it the RC could stay asleep forever. This should
|
||||
// be set based on the expected latency of watch events.
|
||||
//
|
||||
// TODO: Set this per expectation, based on its size.
|
||||
// Currently an rc can service (create *and* observe the watch events for said
|
||||
// creation) about 10-20 pods a second, so it takes about 3.5 min to service
|
||||
// 3000 pods. Just creation is limited to 30qps, and watching happens with
|
||||
// ~10-30s latency/pod at scale.
|
||||
ExpectationsTimeout = 6 * time.Minute
|
||||
// creation) about 10-20 pods a second, so it takes about 1 min to service
|
||||
// 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
|
||||
// latency/pod at the scale of 3000 pods over 100 nodes.
|
||||
ExpectationsTimeout = 3 * time.Minute
|
||||
|
||||
// Realistic value of the burstReplica field for the replication manager based off
|
||||
// performance requirements for kubernetes 1.0.
|
||||
BurstReplicas = 500
|
||||
)
|
||||
|
||||
// ReplicationManager is responsible for synchronizing ReplicationController objects stored
|
||||
@@ -72,6 +75,9 @@ type ReplicationManager struct {
|
||||
kubeClient client.Interface
|
||||
podControl PodControlInterface
|
||||
|
||||
// An rc is temporarily suspended after creating/deleting these many replicas.
|
||||
// It resumes normal action after observing the watch events for them.
|
||||
burstReplicas int
|
||||
// To allow injection of syncReplicationController for testing.
|
||||
syncHandler func(rcKey string) error
|
||||
// A TTLCache of pod creates/deletes each rc expects to see
|
||||
@@ -89,7 +95,7 @@ type ReplicationManager struct {
|
||||
}
|
||||
|
||||
// NewReplicationManager creates a new ReplicationManager.
|
||||
func NewReplicationManager(kubeClient client.Interface) *ReplicationManager {
|
||||
func NewReplicationManager(kubeClient client.Interface, burstReplicas int) *ReplicationManager {
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
eventBroadcaster.StartRecordingToSink(kubeClient.Events(""))
|
||||
|
||||
@@ -99,8 +105,9 @@ func NewReplicationManager(kubeClient client.Interface) *ReplicationManager {
|
||||
kubeClient: kubeClient,
|
||||
recorder: eventBroadcaster.NewRecorder(api.EventSource{Component: "replication-controller"}),
|
||||
},
|
||||
expectations: NewRCExpectations(),
|
||||
queue: workqueue.New(),
|
||||
burstReplicas: burstReplicas,
|
||||
expectations: NewRCExpectations(),
|
||||
queue: workqueue.New(),
|
||||
}
|
||||
|
||||
rm.controllerStore.Store, rm.rcController = framework.NewInformer(
|
||||
@@ -277,15 +284,19 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
|
||||
diff := len(filteredPods) - controller.Spec.Replicas
|
||||
if diff < 0 {
|
||||
diff *= -1
|
||||
if diff > rm.burstReplicas {
|
||||
diff = rm.burstReplicas
|
||||
}
|
||||
rm.expectations.ExpectCreations(controller, diff)
|
||||
wait := sync.WaitGroup{}
|
||||
wait.Add(diff)
|
||||
glog.V(2).Infof("Too few %q replicas, creating %d", controller.Name, diff)
|
||||
glog.V(2).Infof("Too few %q/%q replicas, need %d, creating %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
|
||||
for i := 0; i < diff; i++ {
|
||||
go func() {
|
||||
defer wait.Done()
|
||||
if err := rm.podControl.createReplica(controller.Namespace, controller); err != nil {
|
||||
// Decrement the expected number of creates because the informer won't observe this pod
|
||||
glog.V(2).Infof("Failed creation, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
|
||||
rm.expectations.CreationObserved(controller)
|
||||
util.HandleError(err)
|
||||
}
|
||||
@@ -293,8 +304,11 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
|
||||
}
|
||||
wait.Wait()
|
||||
} else if diff > 0 {
|
||||
if diff > rm.burstReplicas {
|
||||
diff = rm.burstReplicas
|
||||
}
|
||||
rm.expectations.ExpectDeletions(controller, diff)
|
||||
glog.V(2).Infof("Too many %q replicas, deleting %d", controller.Name, diff)
|
||||
glog.V(2).Infof("Too many %q/%q replicas, need %d, deleting %d", controller.Namespace, controller.Name, controller.Spec.Replicas, diff)
|
||||
// Sort the pods in the order such that not-ready < ready, unscheduled
|
||||
// < scheduled, and pending < running. This ensures that we delete pods
|
||||
// in the earlier stages whenever possible.
|
||||
@@ -307,6 +321,7 @@ func (rm *ReplicationManager) manageReplicas(filteredPods []*api.Pod, controller
|
||||
defer wait.Done()
|
||||
if err := rm.podControl.deletePod(controller.Namespace, filteredPods[ix].Name); err != nil {
|
||||
// Decrement the expected number of deletes because the informer won't observe this deletion
|
||||
glog.V(2).Infof("Failed deletion, decrementing expectations for controller %q/%q", controller.Namespace, controller.Name)
|
||||
rm.expectations.DeletionObserved(controller)
|
||||
}
|
||||
}(i)
|
||||
|
@@ -225,7 +225,7 @@ func startManagerAndWait(manager *ReplicationManager, pods int, t *testing.T) ch
|
||||
func TestSyncReplicationControllerDoesNothing(t *testing.T) {
|
||||
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
|
||||
fakePodControl := FakePodControl{}
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
// 2 running pods, a controller with 2 replicas, sync is a no-op
|
||||
controllerSpec := newReplicationController(2)
|
||||
@@ -240,7 +240,7 @@ func TestSyncReplicationControllerDoesNothing(t *testing.T) {
|
||||
func TestSyncReplicationControllerDeletes(t *testing.T) {
|
||||
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
|
||||
fakePodControl := FakePodControl{}
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
manager.podControl = &fakePodControl
|
||||
|
||||
// 2 running pods and a controller with 1 replica, one pod delete expected
|
||||
@@ -254,7 +254,7 @@ func TestSyncReplicationControllerDeletes(t *testing.T) {
|
||||
|
||||
func TestSyncReplicationControllerCreates(t *testing.T) {
|
||||
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
// A controller with 2 replicas and no pods in the store, 2 creates expected
|
||||
controller := newReplicationController(2)
|
||||
@@ -319,7 +319,7 @@ func TestControllerNoReplicaUpdate(t *testing.T) {
|
||||
testServer := httptest.NewServer(&fakeHandler)
|
||||
defer testServer.Close()
|
||||
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
// Steady state for the replication controller, no Status.Replicas updates expected
|
||||
activePods := 5
|
||||
@@ -348,7 +348,7 @@ func TestControllerUpdateReplicas(t *testing.T) {
|
||||
defer testServer.Close()
|
||||
|
||||
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
// Insufficient number of pods in the system, and Status.Replicas is wrong;
|
||||
// Status.Replica should update to match number of pods in system, 1 new pod should be created.
|
||||
@@ -533,7 +533,7 @@ func TestSyncReplicationControllerDormancy(t *testing.T) {
|
||||
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
|
||||
|
||||
fakePodControl := FakePodControl{}
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
manager.podControl = &fakePodControl
|
||||
|
||||
controllerSpec := newReplicationController(2)
|
||||
@@ -572,7 +572,7 @@ func TestSyncReplicationControllerDormancy(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestPodControllerLookup(t *testing.T) {
|
||||
manager := NewReplicationManager(client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}))
|
||||
manager := NewReplicationManager(client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()}), BurstReplicas)
|
||||
testCases := []struct {
|
||||
inRCs []*api.ReplicationController
|
||||
pod *api.Pod
|
||||
@@ -638,7 +638,7 @@ type FakeWatcher struct {
|
||||
func TestWatchControllers(t *testing.T) {
|
||||
fakeWatch := watch.NewFake()
|
||||
client := &testclient.Fake{Watch: fakeWatch}
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
var testControllerSpec api.ReplicationController
|
||||
received := make(chan string)
|
||||
@@ -679,7 +679,7 @@ func TestWatchControllers(t *testing.T) {
|
||||
func TestWatchPods(t *testing.T) {
|
||||
fakeWatch := watch.NewFake()
|
||||
client := &testclient.Fake{Watch: fakeWatch}
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
// Put one rc and one pod into the controller's stores
|
||||
testControllerSpec := newReplicationController(1)
|
||||
@@ -722,7 +722,7 @@ func TestWatchPods(t *testing.T) {
|
||||
func TestUpdatePods(t *testing.T) {
|
||||
fakeWatch := watch.NewFake()
|
||||
client := &testclient.Fake{Watch: fakeWatch}
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
received := make(chan string)
|
||||
|
||||
@@ -780,7 +780,7 @@ func TestControllerUpdateRequeue(t *testing.T) {
|
||||
defer testServer.Close()
|
||||
|
||||
client := client.NewOrDie(&client.Config{Host: testServer.URL, Version: testapi.Version()})
|
||||
manager := NewReplicationManager(client)
|
||||
manager := NewReplicationManager(client, BurstReplicas)
|
||||
|
||||
rc := newReplicationController(1)
|
||||
manager.controllerStore.Store.Add(rc)
|
||||
@@ -852,3 +852,104 @@ func TestControllerUpdateStatusWithFailure(t *testing.T) {
|
||||
t.Errorf("Expected 1 get and 2 updates, got %d gets %d updates", gets, updates)
|
||||
}
|
||||
}
|
||||
|
||||
func doTestControllerBurstReplicas(t *testing.T, burstReplicas, numReplicas int) {
|
||||
client := client.NewOrDie(&client.Config{Host: "", Version: testapi.Version()})
|
||||
fakePodControl := FakePodControl{}
|
||||
manager := NewReplicationManager(client, burstReplicas)
|
||||
manager.podControl = &fakePodControl
|
||||
|
||||
controllerSpec := newReplicationController(numReplicas)
|
||||
manager.controllerStore.Store.Add(controllerSpec)
|
||||
|
||||
expectedPods := 0
|
||||
pods := newPodList(nil, numReplicas, api.PodPending, controllerSpec)
|
||||
|
||||
// Size up the controller, then size it down, and confirm the expected create/delete pattern
|
||||
for _, replicas := range []int{numReplicas, 0} {
|
||||
|
||||
controllerSpec.Spec.Replicas = replicas
|
||||
manager.controllerStore.Store.Add(controllerSpec)
|
||||
|
||||
for i := 0; i < numReplicas; i += burstReplicas {
|
||||
manager.syncReplicationController(getKey(controllerSpec, t))
|
||||
|
||||
// The store accrues active pods. It's also used by the rc to determine how many
|
||||
// replicas to create.
|
||||
activePods := len(manager.podStore.Store.List())
|
||||
if replicas != 0 {
|
||||
// This is the number of pods currently "in flight". They were created by the rc manager above,
|
||||
// which then puts the rc to sleep till all of them have been observed.
|
||||
expectedPods = replicas - activePods
|
||||
if expectedPods > burstReplicas {
|
||||
expectedPods = burstReplicas
|
||||
}
|
||||
// This validates the rc manager sync actually created pods
|
||||
validateSyncReplication(t, &fakePodControl, expectedPods, 0)
|
||||
|
||||
// This simulates the watch events for all but 1 of the expected pods.
|
||||
// None of these should wake the controller because it has expectations==BurstReplicas.
|
||||
for _, pod := range pods.Items[:expectedPods-1] {
|
||||
manager.podStore.Store.Add(&pod)
|
||||
manager.addPod(&pod)
|
||||
}
|
||||
|
||||
podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
|
||||
if !exists || err != nil {
|
||||
t.Fatalf("Did not find expectations for rc.")
|
||||
}
|
||||
if add, _ := podExp.getExpectations(); add != 1 {
|
||||
t.Fatalf("Expectations are wrong %v", podExp)
|
||||
}
|
||||
} else {
|
||||
expectedPods = (replicas - activePods) * -1
|
||||
if expectedPods > burstReplicas {
|
||||
expectedPods = burstReplicas
|
||||
}
|
||||
validateSyncReplication(t, &fakePodControl, 0, expectedPods)
|
||||
for _, pod := range pods.Items[:expectedPods-1] {
|
||||
manager.podStore.Store.Delete(&pod)
|
||||
manager.deletePod(&pod)
|
||||
}
|
||||
podExp, exists, err := manager.expectations.GetExpectations(controllerSpec)
|
||||
if !exists || err != nil {
|
||||
t.Fatalf("Did not find expectations for rc.")
|
||||
}
|
||||
if _, del := podExp.getExpectations(); del != 1 {
|
||||
t.Fatalf("Expectations are wrong %v", podExp)
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the rc didn't take any action for all the above pods
|
||||
fakePodControl.clear()
|
||||
manager.syncReplicationController(getKey(controllerSpec, t))
|
||||
validateSyncReplication(t, &fakePodControl, 0, 0)
|
||||
|
||||
// Create/Delete the last pod
|
||||
// The last add pod will decrease the expectation of the rc to 0,
|
||||
// which will cause it to create/delete the remaining replicas upto burstReplicas.
|
||||
if replicas != 0 {
|
||||
manager.podStore.Store.Add(&pods.Items[expectedPods-1])
|
||||
manager.addPod(&pods.Items[expectedPods-1])
|
||||
} else {
|
||||
manager.podStore.Store.Delete(&pods.Items[expectedPods-1])
|
||||
manager.deletePod(&pods.Items[expectedPods-1])
|
||||
}
|
||||
pods.Items = pods.Items[expectedPods:]
|
||||
}
|
||||
|
||||
// Confirm that we've created the right number of replicas
|
||||
activePods := len(manager.podStore.Store.List())
|
||||
if activePods != controllerSpec.Spec.Replicas {
|
||||
t.Fatalf("Unexpected number of active pods, expected %d, got %d", controllerSpec.Spec.Replicas, activePods)
|
||||
}
|
||||
// Replenish the pod list, since we cut it down sizing up
|
||||
pods = newPodList(nil, replicas, api.PodRunning, controllerSpec)
|
||||
}
|
||||
}
|
||||
|
||||
func TestControllerBurstReplicas(t *testing.T) {
|
||||
doTestControllerBurstReplicas(t, 5, 30)
|
||||
doTestControllerBurstReplicas(t, 5, 12)
|
||||
doTestControllerBurstReplicas(t, 3, 2)
|
||||
}
|
||||
|
Reference in New Issue
Block a user