Merge pull request #123343 from haircommander/image-gc-e2e-2

KEP-4210: add e2e tests and add small fix for ImageGCMaxAge
This commit is contained in:
Kubernetes Prow Robot
2024-02-20 10:48:15 -08:00
committed by GitHub
5 changed files with 149 additions and 16 deletions

View File

@@ -69,7 +69,7 @@ type StatsProvider interface {
type ImageGCManager interface {
// Applies the garbage collection policy. Errors include being unable to free
// enough space as per the garbage collection policy.
GarbageCollect(ctx context.Context) error
GarbageCollect(ctx context.Context, beganGC time.Time) error
// Start async garbage collection of images.
Start()
@@ -308,7 +308,7 @@ func (im *realImageGCManager) detectImages(ctx context.Context, detectTime time.
return imagesInUse, nil
}
func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
func (im *realImageGCManager) GarbageCollect(ctx context.Context, beganGC time.Time) error {
ctx, otelSpan := im.tracer.Start(ctx, "Images/GarbageCollect")
defer otelSpan.End()
@@ -318,7 +318,7 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return err
}
images, err = im.freeOldImages(ctx, images, freeTime)
images, err = im.freeOldImages(ctx, images, freeTime, beganGC)
if err != nil {
return err
}
@@ -369,10 +369,16 @@ func (im *realImageGCManager) GarbageCollect(ctx context.Context) error {
return nil
}
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime time.Time) ([]evictionInfo, error) {
func (im *realImageGCManager) freeOldImages(ctx context.Context, images []evictionInfo, freeTime, beganGC time.Time) ([]evictionInfo, error) {
if im.policy.MaxAge == 0 {
return images, nil
}
// Wait until the MaxAge has passed since the Kubelet has started,
// or else we risk prematurely garbage collecting images.
if freeTime.Sub(beganGC) <= im.policy.MaxAge {
return images, nil
}
var deletionErrors []error
remainingImages := make([]evictionInfo, 0)
for _, image := range images {

View File

@@ -659,7 +659,7 @@ func TestGarbageCollectBelowLowThreshold(t *testing.T) {
}
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(imageStats, imageStats, nil)
assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectCadvisorFailure(t *testing.T) {
@@ -674,7 +674,7 @@ func TestGarbageCollectCadvisorFailure(t *testing.T) {
manager, _ := newRealImageGCManager(policy, mockStatsProvider)
mockStatsProvider.EXPECT().ImageFsStats(gomock.Any()).Return(&statsapi.FsStats{}, &statsapi.FsStats{}, fmt.Errorf("error"))
assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectBelowSuccess(t *testing.T) {
@@ -699,7 +699,7 @@ func TestGarbageCollectBelowSuccess(t *testing.T) {
makeImage(0, 450),
}
assert.NoError(t, manager.GarbageCollect(ctx))
assert.NoError(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectNotEnoughFreed(t *testing.T) {
@@ -723,7 +723,7 @@ func TestGarbageCollectNotEnoughFreed(t *testing.T) {
makeImage(0, 50),
}
assert.NotNil(t, manager.GarbageCollect(ctx))
assert.NotNil(t, manager.GarbageCollect(ctx, time.Now()))
}
func TestGarbageCollectImageNotOldEnough(t *testing.T) {
@@ -824,14 +824,15 @@ func TestGarbageCollectImageTooOld(t *testing.T) {
// First GC round should not GC remaining image, as it was used too recently.
assert := assert.New(t)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
oldStartTime := fakeClock.Now()
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)
// move clock by a millisecond past maxAge duration, then 1 image will be garbage collected
fakeClock.Step(policy.MaxAge + 1)
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 0)
assert.Len(fakeRuntime.ImageList, 1)
@@ -879,8 +880,10 @@ func TestGarbageCollectImageMaxAgeDisabled(t *testing.T) {
require.Equal(t, len(images), 1)
assert.Len(fakeRuntime.ImageList, 2)
oldStartTime := fakeClock.Now()
// First GC round should not GC remaining image, as it was used too recently.
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, oldStartTime, oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)
@@ -888,7 +891,7 @@ func TestGarbageCollectImageMaxAgeDisabled(t *testing.T) {
// Move clock by a lot, and the images should continue to not be garbage colleced
// See https://stackoverflow.com/questions/25065055/what-is-the-maximum-time-time-in-go
fakeClock.SetTime(time.Unix(1<<63-62135596801, 999999999))
images, err = manager.freeOldImages(ctx, images, fakeClock.Now())
images, err = manager.freeOldImages(ctx, images, fakeClock.Now(), oldStartTime)
require.NoError(t, err)
assert.Len(images, 1)
assert.Len(fakeRuntime.ImageList, 2)

View File

@@ -1433,16 +1433,19 @@ func (kl *Kubelet) StartGarbageCollection() {
}
}, ContainerGCPeriod, wait.NeverStop)
// when the high threshold is set to 100, stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100, Disable image GC")
// when the high threshold is set to 100, and the max age is 0 (or the max age feature is disabled)
// stub the image GC manager
if kl.kubeletConfiguration.ImageGCHighThresholdPercent == 100 &&
(!utilfeature.DefaultFeatureGate.Enabled(features.ImageMaximumGCAge) || kl.kubeletConfiguration.ImageMaximumGCAge.Duration == 0) {
klog.V(2).InfoS("ImageGCHighThresholdPercent is set 100 and ImageMaximumGCAge is 0, Disable image GC")
return
}
prevImageGCFailed := false
beganGC := time.Now()
go wait.Until(func() {
ctx := context.Background()
if err := kl.imageManager.GarbageCollect(ctx); err != nil {
if err := kl.imageManager.GarbageCollect(ctx, beganGC); err != nil {
if prevImageGCFailed {
klog.ErrorS(err, "Image garbage collection failed multiple times in a row")
// Only create an event for repeated failures