Merge pull request #43006 from apilloud/fix_failure_crash
Automatic merge from submit-queue Fix crash on Pods().Get() failure **What this PR does / why we need it**: Fixes a potential crash in syncPod when Pods().Get() returns an error other than NotFound. This is unlikely to occur with the standard client, but easily shows up with a stub kube client that returns Unimplemented to everything. Updates the unit test as well. **Release note**: `NONE`
This commit is contained in:
@@ -427,42 +427,45 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
|
|||||||
// RemoveOrphanedStatuses, so we just ignore the update here.
|
// RemoveOrphanedStatuses, so we just ignore the update here.
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err == nil {
|
if err != nil {
|
||||||
translatedUID := m.podManager.TranslatePodUID(pod.UID)
|
glog.Warningf("Failed to get status for pod %q: %v", format.PodDesc(status.podName, status.podNamespace, uid), err)
|
||||||
if len(translatedUID) > 0 && translatedUID != uid {
|
return
|
||||||
glog.V(2).Infof("Pod %q was deleted and then recreated, skipping status update; old UID %q, new UID %q", format.Pod(pod), uid, translatedUID)
|
|
||||||
m.deletePodStatus(uid)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
pod.Status = status.status
|
|
||||||
if err := podutil.SetInitContainersStatusesAnnotations(pod); err != nil {
|
|
||||||
glog.Error(err)
|
|
||||||
}
|
|
||||||
// TODO: handle conflict as a retry, make that easier too.
|
|
||||||
pod, err = m.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod)
|
|
||||||
if err == nil {
|
|
||||||
glog.V(3).Infof("Status for pod %q updated successfully: (%d, %+v)", format.Pod(pod), status.version, status.status)
|
|
||||||
m.apiStatusVersions[pod.UID] = status.version
|
|
||||||
if kubepod.IsMirrorPod(pod) {
|
|
||||||
// We don't handle graceful deletion of mirror pods.
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if !m.podDeletionSafety.OkToDeletePod(pod) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
deleteOptions := metav1.NewDeleteOptions(0)
|
|
||||||
// Use the pod UID as the precondition for deletion to prevent deleting a newly created pod with the same name and namespace.
|
|
||||||
deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pod.UID))
|
|
||||||
if err = m.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, deleteOptions); err == nil {
|
|
||||||
glog.V(3).Infof("Pod %q fully terminated and removed from etcd", format.Pod(pod))
|
|
||||||
m.deletePodStatus(uid)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// We failed to update status, wait for periodic sync to retry.
|
translatedUID := m.podManager.TranslatePodUID(pod.UID)
|
||||||
glog.Warningf("Failed to update status for pod %q: %v", format.Pod(pod), err)
|
if len(translatedUID) > 0 && translatedUID != uid {
|
||||||
|
glog.V(2).Infof("Pod %q was deleted and then recreated, skipping status update; old UID %q, new UID %q", format.Pod(pod), uid, translatedUID)
|
||||||
|
m.deletePodStatus(uid)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pod.Status = status.status
|
||||||
|
if err := podutil.SetInitContainersStatusesAnnotations(pod); err != nil {
|
||||||
|
glog.Error(err)
|
||||||
|
}
|
||||||
|
// TODO: handle conflict as a retry, make that easier too.
|
||||||
|
newPod, err := m.kubeClient.Core().Pods(pod.Namespace).UpdateStatus(pod)
|
||||||
|
if err != nil {
|
||||||
|
glog.Warningf("Failed to update status for pod %q: %v", format.Pod(pod), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pod = newPod
|
||||||
|
|
||||||
|
glog.V(3).Infof("Status for pod %q updated successfully: (%d, %+v)", format.Pod(pod), status.version, status.status)
|
||||||
|
m.apiStatusVersions[pod.UID] = status.version
|
||||||
|
|
||||||
|
// We don't handle graceful deletion of mirror pods.
|
||||||
|
if !kubepod.IsMirrorPod(pod) && m.podDeletionSafety.OkToDeletePod(pod) {
|
||||||
|
deleteOptions := metav1.NewDeleteOptions(0)
|
||||||
|
// Use the pod UID as the precondition for deletion to prevent deleting a newly created pod with the same name and namespace.
|
||||||
|
deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(pod.UID))
|
||||||
|
err = m.kubeClient.Core().Pods(pod.Namespace).Delete(pod.Name, deleteOptions)
|
||||||
|
if err != nil {
|
||||||
|
glog.Warningf("Failed to delete status for pod %q: %v", format.Pod(pod), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
glog.V(3).Infof("Pod %q fully terminated and removed from etcd", format.Pod(pod))
|
||||||
|
m.deletePodStatus(uid)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// needsUpdate returns whether the status is stale for the given pod UID.
|
// needsUpdate returns whether the status is stale for the given pod UID.
|
||||||
|
@@ -324,7 +324,7 @@ func TestSyncPodNoDeadlock(t *testing.T) {
|
|||||||
pod := getTestPod()
|
pod := getTestPod()
|
||||||
|
|
||||||
// Setup fake client.
|
// Setup fake client.
|
||||||
var ret v1.Pod
|
var ret *v1.Pod
|
||||||
var err error
|
var err error
|
||||||
client.AddReactor("*", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
client.AddReactor("*", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||||
switch action := action.(type) {
|
switch action := action.(type) {
|
||||||
@@ -335,25 +335,26 @@ func TestSyncPodNoDeadlock(t *testing.T) {
|
|||||||
default:
|
default:
|
||||||
assert.Fail(t, "Unexpected Action: %+v", action)
|
assert.Fail(t, "Unexpected Action: %+v", action)
|
||||||
}
|
}
|
||||||
return true, &ret, err
|
return true, ret, err
|
||||||
})
|
})
|
||||||
|
|
||||||
pod.Status.ContainerStatuses = []v1.ContainerStatus{{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}}
|
pod.Status.ContainerStatuses = []v1.ContainerStatus{{State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}}}
|
||||||
|
|
||||||
t.Logf("Pod not found.")
|
t.Logf("Pod not found.")
|
||||||
ret = *pod
|
ret = nil
|
||||||
err = errors.NewNotFound(api.Resource("pods"), pod.Name)
|
err = errors.NewNotFound(api.Resource("pods"), pod.Name)
|
||||||
m.SetPodStatus(pod, getRandomPodStatus())
|
m.SetPodStatus(pod, getRandomPodStatus())
|
||||||
verifyActions(t, m, []core.Action{getAction()})
|
verifyActions(t, m, []core.Action{getAction()})
|
||||||
|
|
||||||
t.Logf("Pod was recreated.")
|
t.Logf("Pod was recreated.")
|
||||||
|
ret = getTestPod()
|
||||||
ret.UID = "other_pod"
|
ret.UID = "other_pod"
|
||||||
err = nil
|
err = nil
|
||||||
m.SetPodStatus(pod, getRandomPodStatus())
|
m.SetPodStatus(pod, getRandomPodStatus())
|
||||||
verifyActions(t, m, []core.Action{getAction()})
|
verifyActions(t, m, []core.Action{getAction()})
|
||||||
|
|
||||||
t.Logf("Pod not deleted (success case).")
|
t.Logf("Pod not deleted (success case).")
|
||||||
ret = *pod
|
ret = getTestPod()
|
||||||
m.SetPodStatus(pod, getRandomPodStatus())
|
m.SetPodStatus(pod, getRandomPodStatus())
|
||||||
verifyActions(t, m, []core.Action{getAction(), updateAction()})
|
verifyActions(t, m, []core.Action{getAction(), updateAction()})
|
||||||
|
|
||||||
@@ -369,6 +370,7 @@ func TestSyncPodNoDeadlock(t *testing.T) {
|
|||||||
verifyActions(t, m, []core.Action{getAction(), updateAction()})
|
verifyActions(t, m, []core.Action{getAction(), updateAction()})
|
||||||
|
|
||||||
t.Logf("Error case.")
|
t.Logf("Error case.")
|
||||||
|
ret = nil
|
||||||
err = fmt.Errorf("intentional test error")
|
err = fmt.Errorf("intentional test error")
|
||||||
m.SetPodStatus(pod, getRandomPodStatus())
|
m.SetPodStatus(pod, getRandomPodStatus())
|
||||||
verifyActions(t, m, []core.Action{getAction()})
|
verifyActions(t, m, []core.Action{getAction()})
|
||||||
|
Reference in New Issue
Block a user