Added rate limiting to pod deletion

Fixes #6228
This commit is contained in:
Piotr Szczesniak
2015-04-02 17:13:13 +02:00
parent e912d5204c
commit 8bc9f5fef7
6 changed files with 69 additions and 38 deletions

View File

@@ -71,15 +71,17 @@ var (
)
type NodeController struct {
cloud cloudprovider.Interface
matchRE string
staticResources *api.NodeResources
nodes []string
kubeClient client.Interface
kubeletClient client.KubeletClient
recorder record.EventRecorder
registerRetryCount int
podEvictionTimeout time.Duration
cloud cloudprovider.Interface
matchRE string
staticResources *api.NodeResources
nodes []string
kubeClient client.Interface
kubeletClient client.KubeletClient
recorder record.EventRecorder
registerRetryCount int
podEvictionTimeout time.Duration
deletingPodsRateLimiter util.RateLimiter
// Method for easy mocking in unittest.
lookupIP func(host string) ([]net.IP, error)
now func() util.Time
@@ -94,7 +96,8 @@ func NewNodeController(
kubeClient client.Interface,
kubeletClient client.KubeletClient,
registerRetryCount int,
podEvictionTimeout time.Duration) *NodeController {
podEvictionTimeout time.Duration,
deletingPodsRateLimiter util.RateLimiter) *NodeController {
eventBroadcaster := record.NewBroadcaster()
recorder := eventBroadcaster.NewRecorder(api.EventSource{Component: "controllermanager"})
if kubeClient != nil {
@@ -104,17 +107,18 @@ func NewNodeController(
glog.Infof("No api server defined - no events will be sent to API server.")
}
return &NodeController{
cloud: cloud,
matchRE: matchRE,
nodes: nodes,
staticResources: staticResources,
kubeClient: kubeClient,
kubeletClient: kubeletClient,
recorder: recorder,
registerRetryCount: registerRetryCount,
podEvictionTimeout: podEvictionTimeout,
lookupIP: net.LookupIP,
now: util.Now,
cloud: cloud,
matchRE: matchRE,
nodes: nodes,
staticResources: staticResources,
kubeClient: kubeClient,
kubeletClient: kubeletClient,
recorder: recorder,
registerRetryCount: registerRetryCount,
podEvictionTimeout: podEvictionTimeout,
deletingPodsRateLimiter: deletingPodsRateLimiter,
lookupIP: net.LookupIP,
now: util.Now,
}
}
@@ -558,13 +562,18 @@ func (nc *NodeController) MonitorNodeStatus() error {
if lastReadyCondition.Status == api.ConditionFalse &&
nc.now().After(lastReadyCondition.LastTransitionTime.Add(nc.podEvictionTimeout)) {
// Node stays in not ready for at least 'podEvictionTimeout' - evict all pods on the unhealthy node.
nc.deletePods(node.Name)
// Makes sure we are not removing pods from to many nodes in the same time.
if nc.deletingPodsRateLimiter.CanAccept() {
nc.deletePods(node.Name)
}
}
if lastReadyCondition.Status == api.ConditionUnknown &&
nc.now().After(lastReadyCondition.LastProbeTime.Add(nc.podEvictionTimeout-gracePeriod)) {
// Same as above. Note however, since condition unknown is posted by node controller, which means we
// need to substract monitoring grace period in order to get the real 'podEvictionTimeout'.
nc.deletePods(node.Name)
if nc.deletingPodsRateLimiter.CanAccept() {
nc.deletePods(node.Name)
}
}
}
}

View File

@@ -247,7 +247,7 @@ func TestRegisterNodes(t *testing.T) {
for _, machine := range item.machines {
nodes.Items = append(nodes.Items, *newNode(machine))
}
nodeController := NewNodeController(nil, "", item.machines, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute)
nodeController := NewNodeController(nil, "", item.machines, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute, util.NewFakeRateLimiter())
err := nodeController.RegisterNodes(&nodes, item.retryCount, time.Millisecond)
if !item.expectedFail && err != nil {
t.Errorf("unexpected error: %v", err)
@@ -332,7 +332,7 @@ func TestCreateGetStaticNodesWithSpec(t *testing.T) {
},
}
for _, item := range table {
nodeController := NewNodeController(nil, "", item.machines, &resources, nil, nil, 10, time.Minute)
nodeController := NewNodeController(nil, "", item.machines, &resources, nil, nil, 10, time.Minute, util.NewFakeRateLimiter())
nodes, err := nodeController.GetStaticNodesWithSpec()
if err != nil {
t.Errorf("unexpected error: %v", err)
@@ -393,7 +393,7 @@ func TestCreateGetCloudNodesWithSpec(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, &api.NodeResources{}, nil, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, ".*", nil, &api.NodeResources{}, nil, nil, 10, time.Minute, util.NewFakeRateLimiter())
nodes, err := nodeController.GetCloudNodesWithSpec()
if err != nil {
t.Errorf("unexpected error: %v", err)
@@ -499,7 +499,7 @@ func TestSyncCloudNodes(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute, util.NewFakeRateLimiter())
if err := nodeController.SyncCloudNodes(); err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -585,7 +585,7 @@ func TestSyncCloudNodesEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, item.matchRE, nil, &api.NodeResources{}, item.fakeNodeHandler, nil, 10, time.Minute, util.NewFakeRateLimiter())
if err := nodeController.SyncCloudNodes(); err != nil {
t.Errorf("unexpected error: %v", err)
}
@@ -687,7 +687,7 @@ func TestNodeConditionsCheck(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", nil, nil, nil, item.fakeKubeletClient, 10, time.Minute)
nodeController := NewNodeController(nil, "", nil, nil, nil, item.fakeKubeletClient, 10, time.Minute, util.NewFakeRateLimiter())
nodeController.now = func() util.Time { return fakeNow }
conditions := nodeController.DoCheck(item.node)
if !reflect.DeepEqual(item.expectedConditions, conditions) {
@@ -718,7 +718,7 @@ func TestPopulateNodeAddresses(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, nil, nil, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, nil, nil, 10, time.Minute, util.NewFakeRateLimiter())
result, err := nodeController.PopulateAddresses(item.nodes)
// In case of IP querying error, we should continue.
if err != nil {
@@ -821,7 +821,7 @@ func TestSyncProbedNodeStatus(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute)
nodeController := NewNodeController(item.fakeCloud, ".*", nil, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute, util.NewFakeRateLimiter())
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@@ -924,7 +924,7 @@ func TestSyncProbedNodeStatusTransitionTime(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, time.Minute, util.NewFakeRateLimiter())
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
@@ -1077,7 +1077,7 @@ func TestSyncProbedNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, 5*time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, item.fakeKubeletClient, 10, 5*time.Minute, util.NewFakeRateLimiter())
nodeController.lookupIP = func(host string) ([]net.IP, error) { return nil, fmt.Errorf("lookup %v: no such host", host) }
if err := nodeController.SyncProbedNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@@ -1235,7 +1235,7 @@ func TestMonitorNodeStatusEvictPods(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, 10, item.evictionTimeout)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, 10, item.evictionTimeout, util.NewFakeRateLimiter())
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.MonitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)
@@ -1417,7 +1417,7 @@ func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
}
for _, item := range table {
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, 10, 5*time.Minute)
nodeController := NewNodeController(nil, "", []string{"node0"}, nil, item.fakeNodeHandler, nil, 10, 5*time.Minute, util.NewFakeRateLimiter())
nodeController.now = func() util.Time { return fakeNow }
if err := nodeController.MonitorNodeStatus(); err != nil {
t.Errorf("unexpected error: %v", err)