Merge pull request #113787 from gjkim42/update-daemonset-status-despite-error
Update daemonSet status even if syncDaemonSet fails
This commit is contained in:
@@ -887,6 +887,32 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
|
||||
return nodesNeedingDaemonPods, podsToDelete
|
||||
}
|
||||
|
||||
func (dsc *DaemonSetsController) updateDaemonSet(ctx context.Context, ds *apps.DaemonSet, nodeList []*v1.Node, hash, key string, old []*apps.ControllerRevision) error {
|
||||
err := dsc.manage(ctx, ds, nodeList, hash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Process rolling updates if we're ready.
|
||||
if dsc.expectations.SatisfiedExpectations(key) {
|
||||
switch ds.Spec.UpdateStrategy.Type {
|
||||
case apps.OnDeleteDaemonSetStrategyType:
|
||||
case apps.RollingUpdateDaemonSetStrategyType:
|
||||
err = dsc.rollingUpdate(ctx, ds, nodeList, hash)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = dsc.cleanupHistory(ctx, ds, old)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to clean up revisions of DaemonSet: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// manage manages the scheduling and running of Pods of ds on nodes.
|
||||
// After figuring out which nodes should run a Pod of ds but not yet running one and
|
||||
// which nodes should not run a Pod of ds but currently running one, it calls function
|
||||
@@ -1138,7 +1164,7 @@ func (dsc *DaemonSetsController) updateDaemonSetStatus(ctx context.Context, ds *
|
||||
|
||||
err = storeDaemonSetStatus(ctx, dsc.kubeClient.AppsV1().DaemonSets(ds.Namespace), ds, desiredNumberScheduled, currentNumberScheduled, numberMisscheduled, numberReady, updatedNumberScheduled, numberAvailable, numberUnavailable, updateObservedGen)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error storing status for daemon set %#v: %v", ds, err)
|
||||
return fmt.Errorf("error storing status for daemon set %#v: %w", ds, err)
|
||||
}
|
||||
|
||||
// Resync the DaemonSet after MinReadySeconds as a last line of defense to guard against clock-skew.
|
||||
@@ -1212,29 +1238,21 @@ func (dsc *DaemonSetsController) syncDaemonSet(ctx context.Context, key string)
|
||||
return dsc.updateDaemonSetStatus(ctx, ds, nodeList, hash, false)
|
||||
}
|
||||
|
||||
err = dsc.manage(ctx, ds, nodeList, hash)
|
||||
if err != nil {
|
||||
err = dsc.updateDaemonSet(ctx, ds, nodeList, hash, dsKey, old)
|
||||
statusErr := dsc.updateDaemonSetStatus(ctx, ds, nodeList, hash, true)
|
||||
switch {
|
||||
case err != nil && statusErr != nil:
|
||||
// If there was an error, and we failed to update status,
|
||||
// log it and return the original error.
|
||||
klog.ErrorS(statusErr, "Failed to update status", "daemonSet", klog.KObj(ds))
|
||||
return err
|
||||
case err != nil:
|
||||
return err
|
||||
case statusErr != nil:
|
||||
return statusErr
|
||||
}
|
||||
|
||||
// Process rolling updates if we're ready.
|
||||
if dsc.expectations.SatisfiedExpectations(dsKey) {
|
||||
switch ds.Spec.UpdateStrategy.Type {
|
||||
case apps.OnDeleteDaemonSetStrategyType:
|
||||
case apps.RollingUpdateDaemonSetStrategyType:
|
||||
err = dsc.rollingUpdate(ctx, ds, nodeList, hash)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = dsc.cleanupHistory(ctx, ds, old)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to clean up revisions of DaemonSet: %v", err)
|
||||
}
|
||||
|
||||
return dsc.updateDaemonSetStatus(ctx, ds, nodeList, hash, true)
|
||||
return nil
|
||||
}
|
||||
|
||||
// NodeShouldRunDaemonPod checks a set of preconditions against a (node,daemonset) and returns a
|
||||
|
@@ -18,6 +18,7 @@ package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
@@ -255,7 +256,7 @@ func (f *fakePodControl) CreatePods(ctx context.Context, namespace string, templ
|
||||
f.Lock()
|
||||
defer f.Unlock()
|
||||
if err := f.FakePodControl.CreatePods(ctx, namespace, template, object, controllerRef); err != nil {
|
||||
return fmt.Errorf("failed to create pod for DaemonSet")
|
||||
return fmt.Errorf("failed to create pod for DaemonSet: %w", err)
|
||||
}
|
||||
|
||||
pod := &v1.Pod{
|
||||
@@ -387,6 +388,11 @@ func validateSyncDaemonSets(manager *daemonSetsController, fakePodControl *fakeP
|
||||
}
|
||||
|
||||
func expectSyncDaemonSets(t *testing.T, manager *daemonSetsController, ds *apps.DaemonSet, podControl *fakePodControl, expectedCreates, expectedDeletes int, expectedEvents int) {
|
||||
t.Helper()
|
||||
expectSyncDaemonSetsWithError(t, manager, ds, podControl, expectedCreates, expectedDeletes, expectedEvents, nil)
|
||||
}
|
||||
|
||||
func expectSyncDaemonSetsWithError(t *testing.T, manager *daemonSetsController, ds *apps.DaemonSet, podControl *fakePodControl, expectedCreates, expectedDeletes int, expectedEvents int, expectedError error) {
|
||||
t.Helper()
|
||||
key, err := controller.KeyFunc(ds)
|
||||
if err != nil {
|
||||
@@ -394,7 +400,11 @@ func expectSyncDaemonSets(t *testing.T, manager *daemonSetsController, ds *apps.
|
||||
}
|
||||
|
||||
err = manager.syncHandler(context.TODO(), key)
|
||||
if err != nil {
|
||||
if expectedError != nil && !errors.Is(err, expectedError) {
|
||||
t.Fatalf("Unexpected error returned from syncHandler: %v", err)
|
||||
}
|
||||
|
||||
if expectedError == nil && err != nil {
|
||||
t.Log(err)
|
||||
}
|
||||
|
||||
@@ -771,7 +781,7 @@ func TestSimpleDaemonSetPodCreateErrors(t *testing.T) {
|
||||
for _, strategy := range updateStrategies() {
|
||||
ds := newDaemonSet("foo")
|
||||
ds.Spec.UpdateStrategy = *strategy
|
||||
manager, podControl, _, err := newTestController(ds)
|
||||
manager, podControl, clientset, err := newTestController(ds)
|
||||
if err != nil {
|
||||
t.Fatalf("error creating DaemonSets controller: %v", err)
|
||||
}
|
||||
@@ -782,6 +792,17 @@ func TestSimpleDaemonSetPodCreateErrors(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var updated *apps.DaemonSet
|
||||
clientset.PrependReactor("update", "daemonsets", func(action core.Action) (handled bool, ret runtime.Object, err error) {
|
||||
if action.GetSubresource() != "status" {
|
||||
return false, nil, nil
|
||||
}
|
||||
if u, ok := action.(core.UpdateAction); ok {
|
||||
updated = u.GetObject().(*apps.DaemonSet)
|
||||
}
|
||||
return false, nil, nil
|
||||
})
|
||||
|
||||
expectSyncDaemonSets(t, manager, ds, podControl, podControl.FakePodControl.CreateLimit, 0, 0)
|
||||
|
||||
expectedLimit := 0
|
||||
@@ -791,6 +812,18 @@ func TestSimpleDaemonSetPodCreateErrors(t *testing.T) {
|
||||
if podControl.FakePodControl.CreateCallCount > expectedLimit {
|
||||
t.Errorf("Unexpected number of create calls. Expected <= %d, saw %d\n", podControl.FakePodControl.CreateLimit*2, podControl.FakePodControl.CreateCallCount)
|
||||
}
|
||||
if updated == nil {
|
||||
t.Fatalf("Failed to get updated status")
|
||||
}
|
||||
if got, want := updated.Status.DesiredNumberScheduled, int32(podControl.FakePodControl.CreateLimit)*10; got != want {
|
||||
t.Errorf("Status.DesiredNumberScheduled = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := updated.Status.CurrentNumberScheduled, int32(podControl.FakePodControl.CreateLimit); got != want {
|
||||
t.Errorf("Status.CurrentNumberScheduled = %v, want %v", got, want)
|
||||
}
|
||||
if got, want := updated.Status.UpdatedNumberScheduled, int32(podControl.FakePodControl.CreateLimit); got != want {
|
||||
t.Errorf("Status.UpdatedNumberScheduled = %v, want %v", got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -856,6 +889,74 @@ func TestSimpleDaemonSetUpdatesStatusAfterLaunchingPods(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSimpleDaemonSetUpdatesStatusError(t *testing.T) {
|
||||
var (
|
||||
syncErr = fmt.Errorf("sync error")
|
||||
statusErr = fmt.Errorf("status error")
|
||||
)
|
||||
|
||||
testCases := []struct {
|
||||
desc string
|
||||
|
||||
hasSyncErr bool
|
||||
hasStatusErr bool
|
||||
|
||||
expectedErr error
|
||||
}{
|
||||
{
|
||||
desc: "sync error",
|
||||
hasSyncErr: true,
|
||||
hasStatusErr: false,
|
||||
expectedErr: syncErr,
|
||||
},
|
||||
{
|
||||
desc: "status error",
|
||||
hasSyncErr: false,
|
||||
hasStatusErr: true,
|
||||
expectedErr: statusErr,
|
||||
},
|
||||
{
|
||||
desc: "sync and status error",
|
||||
hasSyncErr: true,
|
||||
hasStatusErr: true,
|
||||
expectedErr: syncErr,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
for _, strategy := range updateStrategies() {
|
||||
ds := newDaemonSet("foo")
|
||||
ds.Spec.UpdateStrategy = *strategy
|
||||
manager, podControl, clientset, err := newTestController(ds)
|
||||
if err != nil {
|
||||
t.Fatalf("error creating DaemonSets controller: %v", err)
|
||||
}
|
||||
|
||||
if tc.hasSyncErr {
|
||||
podControl.FakePodControl.Err = syncErr
|
||||
}
|
||||
|
||||
clientset.PrependReactor("update", "daemonsets", func(action core.Action) (handled bool, ret runtime.Object, err error) {
|
||||
if action.GetSubresource() != "status" {
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
if tc.hasStatusErr {
|
||||
return true, nil, statusErr
|
||||
} else {
|
||||
return false, nil, nil
|
||||
}
|
||||
})
|
||||
|
||||
manager.dsStore.Add(ds)
|
||||
addNodes(manager.nodeStore, 0, 1, nil)
|
||||
expectSyncDaemonSetsWithError(t, manager, ds, podControl, 1, 0, 0, tc.expectedErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// DaemonSets should do nothing if there aren't any nodes
|
||||
func TestNoNodesDoesNothing(t *testing.T) {
|
||||
for _, strategy := range updateStrategies() {
|
||||
|
Reference in New Issue
Block a user