Modify mostRecentScheduleTime to return more detailed information about missed schedules
Initially this method was returning a number of missed schedules, but that turned out to be not reliable for some complex schedules. For example, those which are being run only during week days. The second approach was to only return a boolean indicating the too many missed information. It turns out that we need to return all three values: none missed, few missed and many missed, to let consumers know what to do, but don't leak the wrong number out of mostRecentScheduleTime.
This commit is contained in:
@@ -36,6 +36,27 @@ import (
|
||||
|
||||
// Utilities for dealing with Jobs and CronJobs and time.
|
||||
|
||||
type missedSchedulesType int
|
||||
|
||||
const (
|
||||
noneMissed missedSchedulesType = iota
|
||||
fewMissed
|
||||
manyMissed
|
||||
)
|
||||
|
||||
func (e missedSchedulesType) String() string {
|
||||
switch e {
|
||||
case noneMissed:
|
||||
return "none"
|
||||
case fewMissed:
|
||||
return "few"
|
||||
case manyMissed:
|
||||
return "many"
|
||||
default:
|
||||
return fmt.Sprintf("unknown(%d)", int(e))
|
||||
}
|
||||
}
|
||||
|
||||
// inActiveList checks if cronjob's .status.active has a job with the same UID.
|
||||
func inActiveList(cj *batchv1.CronJob, uid types.UID) bool {
|
||||
for _, j := range cj.Status.Active {
|
||||
@@ -75,11 +96,12 @@ func deleteFromActiveList(cj *batchv1.CronJob, uid types.UID) {
|
||||
// mostRecentScheduleTime returns:
|
||||
// - the last schedule time or CronJob's creation time,
|
||||
// - the most recent time a Job should be created or nil, if that's after now,
|
||||
// - boolean indicating an excessive number of missed schedules,
|
||||
// - value indicating either none missed schedules, a few missed or many missed
|
||||
// - error in an edge case where the schedule specification is grammatically correct,
|
||||
// but logically doesn't make sense (31st day for months with only 30 days, for example).
|
||||
func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, includeStartingDeadlineSeconds bool) (time.Time, *time.Time, bool, error) {
|
||||
func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, includeStartingDeadlineSeconds bool) (time.Time, *time.Time, missedSchedulesType, error) {
|
||||
earliestTime := cj.ObjectMeta.CreationTimestamp.Time
|
||||
missedSchedules := noneMissed
|
||||
if cj.Status.LastScheduleTime != nil {
|
||||
earliestTime = cj.Status.LastScheduleTime.Time
|
||||
}
|
||||
@@ -96,10 +118,10 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
|
||||
t2 := schedule.Next(t1)
|
||||
|
||||
if now.Before(t1) {
|
||||
return earliestTime, nil, false, nil
|
||||
return earliestTime, nil, missedSchedules, nil
|
||||
}
|
||||
if now.Before(t2) {
|
||||
return earliestTime, &t1, false, nil
|
||||
return earliestTime, &t1, missedSchedules, nil
|
||||
}
|
||||
|
||||
// It is possible for cron.ParseStandard("59 23 31 2 *") to return an invalid schedule
|
||||
@@ -107,7 +129,7 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
|
||||
// In this case the timeBetweenTwoSchedules will be 0, and we error out the invalid schedule
|
||||
timeBetweenTwoSchedules := int64(t2.Sub(t1).Round(time.Second).Seconds())
|
||||
if timeBetweenTwoSchedules < 1 {
|
||||
return earliestTime, nil, false, fmt.Errorf("time difference between two schedules is less than 1 second")
|
||||
return earliestTime, nil, missedSchedules, fmt.Errorf("time difference between two schedules is less than 1 second")
|
||||
}
|
||||
// this logic used for calculating number of missed schedules does a rough
|
||||
// approximation, by calculating a diff between two schedules (t1 and t2),
|
||||
@@ -146,12 +168,18 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
|
||||
//
|
||||
// I've somewhat arbitrarily picked 100, as more than 80,
|
||||
// but less than "lots".
|
||||
tooManyMissed := numberOfMissedSchedules > 100
|
||||
switch {
|
||||
case numberOfMissedSchedules > 100:
|
||||
missedSchedules = manyMissed
|
||||
// inform about few missed, still
|
||||
case numberOfMissedSchedules > 0:
|
||||
missedSchedules = fewMissed
|
||||
}
|
||||
|
||||
if mostRecentTime.IsZero() {
|
||||
return earliestTime, nil, tooManyMissed, nil
|
||||
return earliestTime, nil, missedSchedules, nil
|
||||
}
|
||||
return earliestTime, &mostRecentTime, tooManyMissed, nil
|
||||
return earliestTime, &mostRecentTime, missedSchedules, nil
|
||||
}
|
||||
|
||||
// nextScheduleTimeDuration returns the time duration to requeue based on
|
||||
@@ -160,13 +188,18 @@ func mostRecentScheduleTime(cj *batchv1.CronJob, now time.Time, schedule cron.Sc
|
||||
// realistic cases should be around 100s, the job will still be executed without missing
|
||||
// the schedule.
|
||||
func nextScheduleTimeDuration(cj *batchv1.CronJob, now time.Time, schedule cron.Schedule) *time.Duration {
|
||||
earliestTime, mostRecentTime, _, err := mostRecentScheduleTime(cj, now, schedule, false)
|
||||
earliestTime, mostRecentTime, missedSchedules, err := mostRecentScheduleTime(cj, now, schedule, false)
|
||||
if err != nil {
|
||||
// we still have to requeue at some point, so aim for the next scheduling slot from now
|
||||
mostRecentTime = &now
|
||||
} else if mostRecentTime == nil {
|
||||
// no missed schedules since earliestTime
|
||||
mostRecentTime = &earliestTime
|
||||
if missedSchedules == noneMissed {
|
||||
// no missed schedules since earliestTime
|
||||
mostRecentTime = &earliestTime
|
||||
} else {
|
||||
// if there are missed schedules since earliestTime, always use now
|
||||
mostRecentTime = &now
|
||||
}
|
||||
}
|
||||
|
||||
t := schedule.Next(*mostRecentTime).Add(nextScheduleDelta).Sub(now)
|
||||
@@ -177,13 +210,13 @@ func nextScheduleTimeDuration(cj *batchv1.CronJob, now time.Time, schedule cron.
|
||||
// and before now, or nil if no unmet schedule times, and an error.
|
||||
// If there are too many (>100) unstarted times, it will also record a warning.
|
||||
func nextScheduleTime(logger klog.Logger, cj *batchv1.CronJob, now time.Time, schedule cron.Schedule, recorder record.EventRecorder) (*time.Time, error) {
|
||||
_, mostRecentTime, tooManyMissed, err := mostRecentScheduleTime(cj, now, schedule, true)
|
||||
_, mostRecentTime, missedSchedules, err := mostRecentScheduleTime(cj, now, schedule, true)
|
||||
|
||||
if mostRecentTime == nil || mostRecentTime.After(now) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tooManyMissed {
|
||||
if missedSchedules == manyMissed {
|
||||
recorder.Eventf(cj, corev1.EventTypeWarning, "TooManyMissedTimes", "too many missed start times. Set or decrease .spec.startingDeadlineSeconds or check clock skew")
|
||||
logger.Info("too many missed times", "cronjob", klog.KObj(cj))
|
||||
}
|
||||
|
@@ -364,7 +364,7 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
now time.Time
|
||||
expectedEarliestTime time.Time
|
||||
expectedRecentTime *time.Time
|
||||
expectedTooManyMissed bool
|
||||
expectedTooManyMissed missedSchedulesType
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
@@ -405,9 +405,10 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
Schedule: "0 * * * *",
|
||||
},
|
||||
},
|
||||
now: *deltaTimeAfterTopOfTheHour(301 * time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(300 * time.Minute),
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(10 * time.Second),
|
||||
now: *deltaTimeAfterTopOfTheHour(301 * time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(300 * time.Minute),
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(10 * time.Second),
|
||||
expectedTooManyMissed: fewMissed,
|
||||
},
|
||||
{
|
||||
name: "complex schedule",
|
||||
@@ -422,9 +423,10 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
LastScheduleTime: &metav1HalfPastTheHour,
|
||||
},
|
||||
},
|
||||
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
|
||||
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
|
||||
expectedTooManyMissed: fewMissed,
|
||||
},
|
||||
{
|
||||
name: "another complex schedule",
|
||||
@@ -439,9 +441,10 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
LastScheduleTime: &metav1HalfPastTheHour,
|
||||
},
|
||||
},
|
||||
now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute),
|
||||
expectedRecentTime: nil,
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
|
||||
now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute),
|
||||
expectedRecentTime: nil,
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
|
||||
expectedTooManyMissed: fewMissed,
|
||||
},
|
||||
{
|
||||
name: "complex schedule with longer diff between executions",
|
||||
@@ -456,9 +459,10 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
LastScheduleTime: &metav1HalfPastTheHour,
|
||||
},
|
||||
},
|
||||
now: *deltaTimeAfterTopOfTheHour(96*time.Hour + 31*time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(96*time.Hour + 30*time.Minute),
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
|
||||
now: *deltaTimeAfterTopOfTheHour(96*time.Hour + 31*time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(96*time.Hour + 30*time.Minute),
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(30 * time.Minute),
|
||||
expectedTooManyMissed: fewMissed,
|
||||
},
|
||||
{
|
||||
name: "complex schedule with shorter diff between executions",
|
||||
@@ -470,9 +474,10 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
Schedule: "30 6-16/4 * * 1-5",
|
||||
},
|
||||
},
|
||||
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
|
||||
expectedEarliestTime: *topOfTheHour(),
|
||||
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour(24*time.Hour + 30*time.Minute),
|
||||
expectedEarliestTime: *topOfTheHour(),
|
||||
expectedTooManyMissed: fewMissed,
|
||||
},
|
||||
{
|
||||
name: "@every schedule",
|
||||
@@ -491,7 +496,7 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
now: *deltaTimeAfterTopOfTheHour(7 * 24 * time.Hour),
|
||||
expectedRecentTime: deltaTimeAfterTopOfTheHour((6 * 24 * time.Hour) + 23*time.Hour + 1*time.Minute),
|
||||
expectedEarliestTime: *deltaTimeAfterTopOfTheHour(1 * time.Minute),
|
||||
expectedTooManyMissed: true,
|
||||
expectedTooManyMissed: manyMissed,
|
||||
},
|
||||
{
|
||||
name: "rogue cronjob",
|
||||
@@ -611,6 +616,63 @@ func TestMostRecentScheduleTime(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNextScheduleTimeDuration(t *testing.T) {
|
||||
metav1TopOfTheHour := metav1.NewTime(*topOfTheHour())
|
||||
metav1HalfPastTheHour := metav1.NewTime(*deltaTimeAfterTopOfTheHour(30 * time.Minute))
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
cj *batchv1.CronJob
|
||||
now time.Time
|
||||
expectedDuration time.Duration
|
||||
}{
|
||||
{
|
||||
name: "complex schedule skipping weekend",
|
||||
cj: &batchv1.CronJob{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
CreationTimestamp: metav1TopOfTheHour,
|
||||
},
|
||||
Spec: batchv1.CronJobSpec{
|
||||
Schedule: "30 6-16/4 * * 1-5",
|
||||
},
|
||||
Status: batchv1.CronJobStatus{
|
||||
LastScheduleTime: &metav1HalfPastTheHour,
|
||||
},
|
||||
},
|
||||
now: *deltaTimeAfterTopOfTheHour(24*time.Hour + 31*time.Minute),
|
||||
expectedDuration: 3*time.Hour + 59*time.Minute + nextScheduleDelta,
|
||||
},
|
||||
{
|
||||
name: "another complex schedule skipping weekend",
|
||||
cj: &batchv1.CronJob{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
CreationTimestamp: metav1TopOfTheHour,
|
||||
},
|
||||
Spec: batchv1.CronJobSpec{
|
||||
Schedule: "30 10,11,12 * * 1-5",
|
||||
},
|
||||
Status: batchv1.CronJobStatus{
|
||||
LastScheduleTime: &metav1HalfPastTheHour,
|
||||
},
|
||||
},
|
||||
now: *deltaTimeAfterTopOfTheHour(30*time.Hour + 30*time.Minute),
|
||||
expectedDuration: 66*time.Hour + nextScheduleDelta,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
sched, err := cron.ParseStandard(tt.cj.Spec.Schedule)
|
||||
if err != nil {
|
||||
t.Errorf("error setting up the test, %s", err)
|
||||
}
|
||||
gotScheduleTimeDuration := nextScheduleTimeDuration(tt.cj, tt.now, sched)
|
||||
if !reflect.DeepEqual(gotScheduleTimeDuration, &tt.expectedDuration) {
|
||||
t.Errorf("scheduleTimeDuration - got %s, want %s", gotScheduleTimeDuration, tt.expectedDuration)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func topOfTheHour() *time.Time {
|
||||
T1, err := time.Parse(time.RFC3339, "2016-05-19T10:00:00Z")
|
||||
if err != nil {
|
||||
|
Reference in New Issue
Block a user