Jittering periods of some kubelet's sync loops:

- pod_workers: pod syncing
- prober workers: container syncing

In order to synchronize the current state of Kubernetes's objects (e.g. pods, containers, etc.),
periodic synch loops are run. When there is a lot of objects to synchronize with,
loops increase communication traffic. At some point when all the traffic interfere cpu usage curve
hits the roof causing 100% cpu utilization.

To distribute the traffic in time, some sync loops can jitter their period in each loop
and help to flatten the curve.
This commit is contained in:
Jan Chaloupka
2016-02-05 16:27:06 +01:00
parent f93d9304a4
commit 392fc6668f
2 changed files with 18 additions and 3 deletions

View File

@@ -28,6 +28,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/util/queue"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/wait"
)
// PodWorkers is an abstract interface for testability.
@@ -39,6 +40,14 @@ type PodWorkers interface {
type syncPodFnType func(*api.Pod, *api.Pod, *kubecontainer.PodStatus, kubetypes.SyncPodType) error
const (
// jitter factor for resyncInterval
workerResyncIntervalJitterFactor = 0.5
// jitter factor for backOffPeriod
workerBackOffPeriodJitterFactor = 0.5
)
type podWorkers struct {
// Protects all per worker fields.
podLock sync.Mutex
@@ -209,10 +218,10 @@ func (p *podWorkers) wrapUp(uid types.UID, syncErr error) {
switch {
case syncErr == nil:
// No error; requeue at the regular resync interval.
p.workQueue.Enqueue(uid, p.resyncInterval)
p.workQueue.Enqueue(uid, wait.Jitter(p.resyncInterval, workerResyncIntervalJitterFactor))
default:
// Error occurred during the sync; back off and then retry.
p.workQueue.Enqueue(uid, p.backOffPeriod)
p.workQueue.Enqueue(uid, wait.Jitter(p.backOffPeriod, workerBackOffPeriodJitterFactor))
}
p.checkForUpdates(uid)
}