back off restarts of crashlooping containers
Signed-off-by: Sam Abed <samabed@gmail.com>
This commit is contained in:
111
pkg/util/backoff.go
Normal file
111
pkg/util/backoff.go
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type backoffEntry struct {
|
||||
backoff time.Duration
|
||||
lastUpdate time.Time
|
||||
}
|
||||
|
||||
type Backoff struct {
|
||||
sync.Mutex
|
||||
Clock Clock
|
||||
defaultDuration time.Duration
|
||||
maxDuration time.Duration
|
||||
perItemBackoff map[string]*backoffEntry
|
||||
}
|
||||
|
||||
func NewBackOff(initial, max time.Duration) *Backoff {
|
||||
return &Backoff{
|
||||
perItemBackoff: map[string]*backoffEntry{},
|
||||
Clock: RealClock{},
|
||||
defaultDuration: initial,
|
||||
maxDuration: max,
|
||||
}
|
||||
}
|
||||
|
||||
// Get the current backoff Duration
|
||||
func (p *Backoff) Get(id string) time.Duration {
|
||||
p.Lock()
|
||||
defer p.Unlock()
|
||||
var delay time.Duration
|
||||
entry, ok := p.perItemBackoff[id]
|
||||
if ok {
|
||||
delay = entry.backoff
|
||||
}
|
||||
return delay
|
||||
}
|
||||
|
||||
// move backoff to the next mark, capping at maxDuration
|
||||
func (p *Backoff) Next(id string, eventTime time.Time) {
|
||||
p.Lock()
|
||||
defer p.Unlock()
|
||||
entry, ok := p.perItemBackoff[id]
|
||||
if !ok || hasExpired(eventTime, entry.lastUpdate, p.maxDuration) {
|
||||
entry = p.initEntryUnsafe(id)
|
||||
} else {
|
||||
delay := entry.backoff * 2 // exponential
|
||||
entry.backoff = time.Duration(math.Min(float64(delay), float64(p.maxDuration)))
|
||||
}
|
||||
entry.lastUpdate = p.Clock.Now()
|
||||
}
|
||||
|
||||
// Returns True if the elapsed time since eventTime is smaller than the current backoff window
|
||||
func (p *Backoff) IsInBackOffSince(id string, eventTime time.Time) bool {
|
||||
p.Lock()
|
||||
defer p.Unlock()
|
||||
entry, ok := p.perItemBackoff[id]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
if hasExpired(eventTime, entry.lastUpdate, p.maxDuration) {
|
||||
return false
|
||||
}
|
||||
return p.Clock.Now().Sub(eventTime) < entry.backoff
|
||||
}
|
||||
|
||||
// Garbage collect records that have aged past maxDuration. Backoff users are expected
|
||||
// to invoke this periodically.
|
||||
func (p *Backoff) GC() {
|
||||
p.Lock()
|
||||
defer p.Unlock()
|
||||
now := p.Clock.Now()
|
||||
for id, entry := range p.perItemBackoff {
|
||||
if now.Sub(entry.lastUpdate) > p.maxDuration*2 {
|
||||
// GC when entry has not been updated for 2*maxDuration
|
||||
delete(p.perItemBackoff, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Take a lock on *Backoff, before calling initEntryUnsafe
|
||||
func (p *Backoff) initEntryUnsafe(id string) *backoffEntry {
|
||||
entry := &backoffEntry{backoff: p.defaultDuration}
|
||||
p.perItemBackoff[id] = entry
|
||||
return entry
|
||||
}
|
||||
|
||||
// After 2*maxDuration we restart the backoff factor to the begining
|
||||
func hasExpired(eventTime time.Time, lastUpdate time.Time, maxDuration time.Duration) bool {
|
||||
return eventTime.Sub(lastUpdate) > maxDuration*2 // consider stable if it's ok for twice the maxDuration
|
||||
}
|
125
pkg/util/backoff_test.go
Normal file
125
pkg/util/backoff_test.go
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func NewFakeBackOff(initial, max time.Duration, tc *FakeClock) *Backoff {
|
||||
return &Backoff{
|
||||
perItemBackoff: map[string]*backoffEntry{},
|
||||
Clock: tc,
|
||||
defaultDuration: initial,
|
||||
maxDuration: max,
|
||||
}
|
||||
}
|
||||
|
||||
func TestSlowBackoff(t *testing.T) {
|
||||
id := "_idSlow"
|
||||
tc := &FakeClock{Time: time.Now()}
|
||||
step := time.Second
|
||||
maxDuration := 50 * step
|
||||
|
||||
b := NewFakeBackOff(step, maxDuration, tc)
|
||||
cases := []time.Duration{0, 1, 2, 4, 8, 16, 32, 50, 50, 50}
|
||||
for ix, c := range cases {
|
||||
tc.Step(step)
|
||||
w := b.Get(id)
|
||||
if w != c*step {
|
||||
t.Errorf("input: '%d': expected %s, got %s", ix, c*step, w)
|
||||
}
|
||||
b.Next(id, tc.Now())
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackoffReset(t *testing.T) {
|
||||
id := "_idReset"
|
||||
tc := &FakeClock{Time: time.Now()}
|
||||
step := time.Second
|
||||
maxDuration := step * 5
|
||||
b := NewFakeBackOff(step, maxDuration, tc)
|
||||
startTime := tc.Now()
|
||||
|
||||
// get to backoff = maxDuration
|
||||
for i := 0; i <= int(maxDuration/step); i++ {
|
||||
tc.Step(step)
|
||||
b.Next(id, tc.Now())
|
||||
}
|
||||
|
||||
// backoff should be capped at maxDuration
|
||||
if !b.IsInBackOffSince(id, tc.Now()) {
|
||||
t.Errorf("expected to be in Backoff got %s", b.Get(id))
|
||||
}
|
||||
|
||||
lastUpdate := tc.Now()
|
||||
tc.Step(2*maxDuration + step) // time += 11s, 11 > 2*maxDuration
|
||||
if b.IsInBackOffSince(id, lastUpdate) {
|
||||
t.Errorf("now=%s lastUpdate=%s (%s) expected Backoff reset got %s b.lastUpdate=%s", tc.Now(), startTime, tc.Now().Sub(lastUpdate), b.Get(id))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackoffHightWaterMark(t *testing.T) {
|
||||
id := "_idHiWaterMark"
|
||||
tc := &FakeClock{Time: time.Now()}
|
||||
step := time.Second
|
||||
maxDuration := 5 * step
|
||||
b := NewFakeBackOff(step, maxDuration, tc)
|
||||
|
||||
// get to backoff = maxDuration
|
||||
for i := 0; i <= int(maxDuration/step); i++ {
|
||||
tc.Step(step)
|
||||
b.Next(id, tc.Now())
|
||||
}
|
||||
|
||||
// backoff high watermark expires after 2*maxDuration
|
||||
tc.Step(maxDuration + step)
|
||||
b.Next(id, tc.Now())
|
||||
|
||||
if b.Get(id) != maxDuration {
|
||||
t.Errorf("expected Backoff to stay at high watermark %s got %s", maxDuration, b.Get(id))
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackoffGC(t *testing.T) {
|
||||
id := "_idGC"
|
||||
tc := &FakeClock{Time: time.Now()}
|
||||
step := time.Second
|
||||
maxDuration := 5 * step
|
||||
|
||||
b := NewFakeBackOff(step, maxDuration, tc)
|
||||
|
||||
for i := 0; i <= int(maxDuration/step); i++ {
|
||||
tc.Step(step)
|
||||
b.Next(id, tc.Now())
|
||||
}
|
||||
lastUpdate := tc.Now()
|
||||
tc.Step(maxDuration + step)
|
||||
b.GC()
|
||||
_, found := b.perItemBackoff[id]
|
||||
if !found {
|
||||
t.Errorf("expected GC to skip entry, elapsed time=%s maxDuration=%s", tc.Now().Sub(lastUpdate), maxDuration)
|
||||
}
|
||||
|
||||
tc.Step(maxDuration + step)
|
||||
b.GC()
|
||||
r, found := b.perItemBackoff[id]
|
||||
if found {
|
||||
t.Errorf("expected GC of entry after %s got entry %v", tc.Now().Sub(lastUpdate), r)
|
||||
}
|
||||
}
|
@@ -54,3 +54,8 @@ func (f *FakeClock) Now() time.Time {
|
||||
func (f *FakeClock) Since(ts time.Time) time.Duration {
|
||||
return f.Time.Sub(ts)
|
||||
}
|
||||
|
||||
// Move clock by Duration
|
||||
func (f *FakeClock) Step(d time.Duration) {
|
||||
f.Time = f.Time.Add(d)
|
||||
}
|
||||
|
38
pkg/util/clock_test.go
Normal file
38
pkg/util/clock_test.go
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package util
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestFakeClock(t *testing.T) {
|
||||
startTime := time.Now()
|
||||
tc := &FakeClock{Time: startTime}
|
||||
tc.Step(time.Second)
|
||||
now := tc.Now()
|
||||
if now.Sub(startTime) != time.Second {
|
||||
t.Errorf("input: %s now=%s gap=%s expected=%s", startTime, now, now.Sub(startTime), time.Second)
|
||||
}
|
||||
|
||||
tt := tc.Now()
|
||||
tc.Time = tt.Add(time.Hour)
|
||||
if tc.Now().Sub(tt) != time.Hour {
|
||||
t.Errorf("input: %s now=%s gap=%s expected=%s", tt, tc.Now(), tc.Now().Sub(tt), time.Hour)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user