back off restarts of crashlooping containers

Signed-off-by: Sam Abed <samabed@gmail.com>
This commit is contained in:
Sam Abed
2015-08-13 22:59:15 +10:00
parent c92cc62145
commit 995cb15bb6
14 changed files with 505 additions and 34 deletions

111
pkg/util/backoff.go Normal file
View File

@@ -0,0 +1,111 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"math"
"sync"
"time"
)
type backoffEntry struct {
backoff time.Duration
lastUpdate time.Time
}
type Backoff struct {
sync.Mutex
Clock Clock
defaultDuration time.Duration
maxDuration time.Duration
perItemBackoff map[string]*backoffEntry
}
func NewBackOff(initial, max time.Duration) *Backoff {
return &Backoff{
perItemBackoff: map[string]*backoffEntry{},
Clock: RealClock{},
defaultDuration: initial,
maxDuration: max,
}
}
// Get the current backoff Duration
func (p *Backoff) Get(id string) time.Duration {
p.Lock()
defer p.Unlock()
var delay time.Duration
entry, ok := p.perItemBackoff[id]
if ok {
delay = entry.backoff
}
return delay
}
// move backoff to the next mark, capping at maxDuration
func (p *Backoff) Next(id string, eventTime time.Time) {
p.Lock()
defer p.Unlock()
entry, ok := p.perItemBackoff[id]
if !ok || hasExpired(eventTime, entry.lastUpdate, p.maxDuration) {
entry = p.initEntryUnsafe(id)
} else {
delay := entry.backoff * 2 // exponential
entry.backoff = time.Duration(math.Min(float64(delay), float64(p.maxDuration)))
}
entry.lastUpdate = p.Clock.Now()
}
// Returns True if the elapsed time since eventTime is smaller than the current backoff window
func (p *Backoff) IsInBackOffSince(id string, eventTime time.Time) bool {
p.Lock()
defer p.Unlock()
entry, ok := p.perItemBackoff[id]
if !ok {
return false
}
if hasExpired(eventTime, entry.lastUpdate, p.maxDuration) {
return false
}
return p.Clock.Now().Sub(eventTime) < entry.backoff
}
// Garbage collect records that have aged past maxDuration. Backoff users are expected
// to invoke this periodically.
func (p *Backoff) GC() {
p.Lock()
defer p.Unlock()
now := p.Clock.Now()
for id, entry := range p.perItemBackoff {
if now.Sub(entry.lastUpdate) > p.maxDuration*2 {
// GC when entry has not been updated for 2*maxDuration
delete(p.perItemBackoff, id)
}
}
}
// Take a lock on *Backoff, before calling initEntryUnsafe
func (p *Backoff) initEntryUnsafe(id string) *backoffEntry {
entry := &backoffEntry{backoff: p.defaultDuration}
p.perItemBackoff[id] = entry
return entry
}
// After 2*maxDuration we restart the backoff factor to the begining
func hasExpired(eventTime time.Time, lastUpdate time.Time, maxDuration time.Duration) bool {
return eventTime.Sub(lastUpdate) > maxDuration*2 // consider stable if it's ok for twice the maxDuration
}

125
pkg/util/backoff_test.go Normal file
View File

@@ -0,0 +1,125 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"time"
)
func NewFakeBackOff(initial, max time.Duration, tc *FakeClock) *Backoff {
return &Backoff{
perItemBackoff: map[string]*backoffEntry{},
Clock: tc,
defaultDuration: initial,
maxDuration: max,
}
}
func TestSlowBackoff(t *testing.T) {
id := "_idSlow"
tc := &FakeClock{Time: time.Now()}
step := time.Second
maxDuration := 50 * step
b := NewFakeBackOff(step, maxDuration, tc)
cases := []time.Duration{0, 1, 2, 4, 8, 16, 32, 50, 50, 50}
for ix, c := range cases {
tc.Step(step)
w := b.Get(id)
if w != c*step {
t.Errorf("input: '%d': expected %s, got %s", ix, c*step, w)
}
b.Next(id, tc.Now())
}
}
func TestBackoffReset(t *testing.T) {
id := "_idReset"
tc := &FakeClock{Time: time.Now()}
step := time.Second
maxDuration := step * 5
b := NewFakeBackOff(step, maxDuration, tc)
startTime := tc.Now()
// get to backoff = maxDuration
for i := 0; i <= int(maxDuration/step); i++ {
tc.Step(step)
b.Next(id, tc.Now())
}
// backoff should be capped at maxDuration
if !b.IsInBackOffSince(id, tc.Now()) {
t.Errorf("expected to be in Backoff got %s", b.Get(id))
}
lastUpdate := tc.Now()
tc.Step(2*maxDuration + step) // time += 11s, 11 > 2*maxDuration
if b.IsInBackOffSince(id, lastUpdate) {
t.Errorf("now=%s lastUpdate=%s (%s) expected Backoff reset got %s b.lastUpdate=%s", tc.Now(), startTime, tc.Now().Sub(lastUpdate), b.Get(id))
}
}
func TestBackoffHightWaterMark(t *testing.T) {
id := "_idHiWaterMark"
tc := &FakeClock{Time: time.Now()}
step := time.Second
maxDuration := 5 * step
b := NewFakeBackOff(step, maxDuration, tc)
// get to backoff = maxDuration
for i := 0; i <= int(maxDuration/step); i++ {
tc.Step(step)
b.Next(id, tc.Now())
}
// backoff high watermark expires after 2*maxDuration
tc.Step(maxDuration + step)
b.Next(id, tc.Now())
if b.Get(id) != maxDuration {
t.Errorf("expected Backoff to stay at high watermark %s got %s", maxDuration, b.Get(id))
}
}
func TestBackoffGC(t *testing.T) {
id := "_idGC"
tc := &FakeClock{Time: time.Now()}
step := time.Second
maxDuration := 5 * step
b := NewFakeBackOff(step, maxDuration, tc)
for i := 0; i <= int(maxDuration/step); i++ {
tc.Step(step)
b.Next(id, tc.Now())
}
lastUpdate := tc.Now()
tc.Step(maxDuration + step)
b.GC()
_, found := b.perItemBackoff[id]
if !found {
t.Errorf("expected GC to skip entry, elapsed time=%s maxDuration=%s", tc.Now().Sub(lastUpdate), maxDuration)
}
tc.Step(maxDuration + step)
b.GC()
r, found := b.perItemBackoff[id]
if found {
t.Errorf("expected GC of entry after %s got entry %v", tc.Now().Sub(lastUpdate), r)
}
}

View File

@@ -54,3 +54,8 @@ func (f *FakeClock) Now() time.Time {
func (f *FakeClock) Since(ts time.Time) time.Duration {
return f.Time.Sub(ts)
}
// Move clock by Duration
func (f *FakeClock) Step(d time.Duration) {
f.Time = f.Time.Add(d)
}

38
pkg/util/clock_test.go Normal file
View File

@@ -0,0 +1,38 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"testing"
"time"
)
func TestFakeClock(t *testing.T) {
startTime := time.Now()
tc := &FakeClock{Time: startTime}
tc.Step(time.Second)
now := tc.Now()
if now.Sub(startTime) != time.Second {
t.Errorf("input: %s now=%s gap=%s expected=%s", startTime, now, now.Sub(startTime), time.Second)
}
tt := tc.Now()
tc.Time = tt.Add(time.Hour)
if tc.Now().Sub(tt) != time.Hour {
t.Errorf("input: %s now=%s gap=%s expected=%s", tt, tc.Now(), tc.Now().Sub(tt), time.Hour)
}
}