kubernetes/pkg/controller/job/backoff_utils.go
Sathyanarayanan Saravanamuthu c84c8add70
Decouple batch/job back-off logic from workqueues (#114768)
* batch/job: decouple backoff from workqueue

Signed-off-by: Sathyanarayanan Saravanamuthu <sathyanarays@vmware.com>

* Resolving review comments

* Resolving more review comments

* Resolving review comments

Signed-off-by: Sathyanarayanan Saravanamuthu <sathyanarays@vmware.com>

* Computing finish time to now when FinishedAt is unix epoch

* Addressing review comments

Signed-off-by: Sathyanarayanan Saravanamuthu <sathyanarays@vmware.com>

---------

Signed-off-by: Sathyanarayanan Saravanamuthu <sathyanarays@vmware.com>
2023-03-16 10:15:21 -07:00

206 lines
4.9 KiB
Go

/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package job
import (
"fmt"
"sort"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/utils/clock"
)
type backoffRecord struct {
key string
failuresAfterLastSuccess int32
lastFailureTime *time.Time
}
type backoffStore struct {
store cache.Store
}
func (s *backoffStore) updateBackoffRecord(record backoffRecord) error {
b, ok, err := s.store.GetByKey(record.key)
if err != nil {
return err
}
if !ok {
err = s.store.Add(&record)
if err != nil {
return err
}
} else {
backoffRecord := b.(*backoffRecord)
backoffRecord.failuresAfterLastSuccess = record.failuresAfterLastSuccess
backoffRecord.lastFailureTime = record.lastFailureTime
}
return nil
}
func (s *backoffStore) removeBackoffRecord(jobId string) error {
b, ok, err := s.store.GetByKey(jobId)
if err != nil {
return err
}
if ok {
err = s.store.Delete(b)
if err != nil {
return err
}
}
return nil
}
func newBackoffRecordStore() *backoffStore {
return &backoffStore{
store: cache.NewStore(backoffRecordKeyFunc),
}
}
var backoffRecordKeyFunc = func(obj interface{}) (string, error) {
if u, ok := obj.(*backoffRecord); ok {
return u.key, nil
}
return "", fmt.Errorf("could not find key for obj %#v", obj)
}
func (backoffRecordStore *backoffStore) newBackoffRecord(clock clock.WithTicker, key string, newSucceededPods []*v1.Pod, newFailedPods []*v1.Pod) backoffRecord {
now := clock.Now()
var backoff *backoffRecord
if b, exists, _ := backoffRecordStore.store.GetByKey(key); exists {
old := b.(*backoffRecord)
backoff = &backoffRecord{
key: old.key,
failuresAfterLastSuccess: old.failuresAfterLastSuccess,
lastFailureTime: old.lastFailureTime,
}
} else {
backoff = &backoffRecord{
key: key,
failuresAfterLastSuccess: 0,
lastFailureTime: nil,
}
}
sortByFinishedTime(newSucceededPods, now)
sortByFinishedTime(newFailedPods, now)
if len(newSucceededPods) == 0 {
if len(newFailedPods) == 0 {
return *backoff
}
backoff.failuresAfterLastSuccess = backoff.failuresAfterLastSuccess + int32(len(newFailedPods))
lastFailureTime := getFinishedTime(newFailedPods[len(newFailedPods)-1], now)
backoff.lastFailureTime = &lastFailureTime
return *backoff
} else {
if len(newFailedPods) == 0 {
backoff.failuresAfterLastSuccess = 0
backoff.lastFailureTime = nil
return *backoff
}
backoff.failuresAfterLastSuccess = 0
backoff.lastFailureTime = nil
lastSuccessTime := getFinishedTime(newSucceededPods[len(newSucceededPods)-1], now)
for i := len(newFailedPods) - 1; i >= 0; i-- {
failedTime := getFinishedTime(newFailedPods[i], now)
if !failedTime.After(lastSuccessTime) {
break
}
if backoff.lastFailureTime == nil {
backoff.lastFailureTime = &failedTime
}
backoff.failuresAfterLastSuccess += 1
}
return *backoff
}
}
func sortByFinishedTime(pods []*v1.Pod, currentTime time.Time) {
sort.Slice(pods, func(i, j int) bool {
p1 := pods[i]
p2 := pods[j]
p1FinishTime := getFinishedTime(p1, currentTime)
p2FinishTime := getFinishedTime(p2, currentTime)
return p1FinishTime.Before(p2FinishTime)
})
}
func getFinishedTime(p *v1.Pod, currentTime time.Time) time.Time {
var finishTime *time.Time
for _, containerState := range p.Status.ContainerStatuses {
if containerState.State.Terminated == nil {
finishTime = nil
break
}
if finishTime == nil {
finishTime = &containerState.State.Terminated.FinishedAt.Time
} else {
if finishTime.Before(containerState.State.Terminated.FinishedAt.Time) {
finishTime = &containerState.State.Terminated.FinishedAt.Time
}
}
}
if finishTime == nil || finishTime.IsZero() {
return currentTime
}
return *finishTime
}
func (backoff backoffRecord) getRemainingTime(clock clock.WithTicker, defaultBackoff time.Duration, maxBackoff time.Duration) time.Duration {
if backoff.failuresAfterLastSuccess == 0 {
return 0
}
backoffDuration := defaultBackoff
for i := 1; i < int(backoff.failuresAfterLastSuccess); i++ {
backoffDuration = backoffDuration * 2
if backoffDuration >= maxBackoff {
backoffDuration = maxBackoff
break
}
}
timeElapsedSinceLastFailure := clock.Since(*backoff.lastFailureTime)
if backoffDuration < timeElapsedSinceLastFailure {
return 0
}
return backoffDuration - timeElapsedSinceLastFailure
}