Jittering periods of some kubelet's sync loops:

- pod_workers: pod syncing - prober workers: container syncing In order to synchronize the current state of Kubernetes's objects (e.g. pods, containers, etc.), periodic synch loops are run. When there is a lot of objects to synchronize with, loops increase communication traffic. At some point when all the traffic interfere cpu usage curve hits the roof causing 100% cpu utilization. To distribute the traffic in time, some sync loops can jitter their period in each loop and help to flatten the curve.
2016-02-05 16:27:06 +01:00
parent f93d9304a4
commit 392fc6668f
2 changed files with 18 additions and 3 deletions
--- a/pkg/kubelet/pod_workers.go
+++ b/pkg/kubelet/pod_workers.go
@@ -28,6 +28,7 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/util/queue"
 	"k8s.io/kubernetes/pkg/types"
 	"k8s.io/kubernetes/pkg/util/runtime"
+	"k8s.io/kubernetes/pkg/util/wait"
 )

 // PodWorkers is an abstract interface for testability.
@@ -39,6 +40,14 @@ type PodWorkers interface {

 type syncPodFnType func(*api.Pod, *api.Pod, *kubecontainer.PodStatus, kubetypes.SyncPodType) error

+const (
+	// jitter factor for resyncInterval
+	workerResyncIntervalJitterFactor = 0.5
+
+	// jitter factor for backOffPeriod
+	workerBackOffPeriodJitterFactor = 0.5
+)
+
 type podWorkers struct {
 	// Protects all per worker fields.
 	podLock sync.Mutex
@@ -209,10 +218,10 @@ func (p *podWorkers) wrapUp(uid types.UID, syncErr error) {
 	switch {
 	case syncErr == nil:
 		// No error; requeue at the regular resync interval.
-		p.workQueue.Enqueue(uid, p.resyncInterval)
+		p.workQueue.Enqueue(uid, wait.Jitter(p.resyncInterval, workerResyncIntervalJitterFactor))
 	default:
 		// Error occurred during the sync; back off and then retry.
-		p.workQueue.Enqueue(uid, p.backOffPeriod)
+		p.workQueue.Enqueue(uid, wait.Jitter(p.backOffPeriod, workerBackOffPeriodJitterFactor))
 	}
 	p.checkForUpdates(uid)
 }