From 9bd006de40b670c6e4fb444fd042e9bf0ef916dd Mon Sep 17 00:00:00 2001 From: Jing Xu Date: Mon, 12 Mar 2018 16:11:43 -0700 Subject: [PATCH] Fix issue with race condition during pod deletion This PR fixes two issues 1. When desired_state_populator removes podvolume state, it should check whether the actual state already has the volume before deleting it to make sure actual state has a chance to add the volume into the state 2. When checking podVolume still exists, it not only checks the actual state, but also the volume disk directory because actual state might not reflect the real world when kubelet starts. --- pkg/kubelet/kubelet_getters.go | 18 ++++++++++++++++++ pkg/kubelet/kubelet_volumes.go | 12 ++++++++++++ .../desired_state_of_world_populator.go | 4 ++++ .../desired_state_of_world_populator_test.go | 3 ++- .../volumemanager/reconciler/reconciler.go | 2 +- 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pkg/kubelet/kubelet_getters.go b/pkg/kubelet/kubelet_getters.go index 12e416ac3ba..04284f6e3ea 100644 --- a/pkg/kubelet/kubelet_getters.go +++ b/pkg/kubelet/kubelet_getters.go @@ -274,6 +274,24 @@ func (kl *Kubelet) getPodVolumePathListFromDisk(podUID types.UID) ([]string, err return volumes, nil } +func (kl *Kubelet) getMountedVolumePathListFromDisk(podUID types.UID) ([]string, error) { + mountedVolumes := []string{} + volumePaths, err := kl.getPodVolumePathListFromDisk(podUID) + if err != nil { + return mountedVolumes, err + } + for _, volumePath := range volumePaths { + isNotMount, err := kl.mounter.IsLikelyNotMountPoint(volumePath) + if err != nil { + return mountedVolumes, err + } + if !isNotMount { + mountedVolumes = append(mountedVolumes, volumePath) + } + } + return mountedVolumes, nil +} + // GetVersionInfo returns information about the version of cAdvisor in use. func (kl *Kubelet) GetVersionInfo() (*cadvisorapiv1.VersionInfo, error) { return kl.cadvisor.VersionInfo() diff --git a/pkg/kubelet/kubelet_volumes.go b/pkg/kubelet/kubelet_volumes.go index 79b0fe3ea3a..09179fec804 100644 --- a/pkg/kubelet/kubelet_volumes.go +++ b/pkg/kubelet/kubelet_volumes.go @@ -57,6 +57,18 @@ func (kl *Kubelet) podVolumesExist(podUID types.UID) bool { volumetypes.UniquePodName(podUID)); len(mountedVolumes) > 0 { return true } + // TODO: This checks pod volume paths and whether they are mounted. If checking returns error, podVolumesExist will return true + // which means we consider volumes might exist and requires further checking. + // There are some volume plugins such as flexvolume might not have mounts. See issue #61229 + volumePaths, err := kl.getMountedVolumePathListFromDisk(podUID) + if err != nil { + glog.Errorf("pod %q found, but error %v occurred during checking mounted volumes from disk", podUID, err) + return true + } + if len(volumePaths) > 0 { + glog.V(4).Infof("pod %q found, but volumes are still mounted on disk %v", podUID, volumePaths) + return true + } return false } diff --git a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go index a00d605c081..cc642511a72 100644 --- a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go +++ b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator.go @@ -245,6 +245,10 @@ func (dswp *desiredStateOfWorldPopulator) findAndRemoveDeletedPods() { continue } + if !dswp.actualStateOfWorld.VolumeExists(volumeToMount.VolumeName) && podExists { + glog.V(4).Infof(volumeToMount.GenerateMsgDetailed("Actual state has not yet has this information skip removing volume from desired state", "")) + continue + } glog.V(4).Infof(volumeToMount.GenerateMsgDetailed("Removing volume from desired state", "")) dswp.desiredStateOfWorld.DeletePodFromVolume( diff --git a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator_test.go b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator_test.go index f2fcf19723a..90c6bf182a5 100644 --- a/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator_test.go +++ b/pkg/kubelet/volumemanager/populator/desired_state_of_world_populator_test.go @@ -111,6 +111,7 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods(t *testing.T) { } podGet.Status.Phase = v1.PodFailed + fakePodManager.DeletePod(pod) //pod is added to fakePodManager but fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted dswp.findAndRemoveDeletedPods() @@ -220,7 +221,7 @@ func TestFindAndAddNewPods_FindAndRemoveDeletedPods_Valid_Block_VolumeDevices(t t.Fatalf("Failed to get pod by pod name: %s and namespace: %s", pod.Name, pod.Namespace) } podGet.Status.Phase = v1.PodFailed - + fakePodManager.DeletePod(pod) //pod is added to fakePodManager but fakeRuntime can not get the pod,so here findAndRemoveDeletedPods() will remove the pod and volumes it is mounted dswp.findAndRemoveDeletedPods() diff --git a/pkg/kubelet/volumemanager/reconciler/reconciler.go b/pkg/kubelet/volumemanager/reconciler/reconciler.go index a1266a9e182..a00e11cabe6 100644 --- a/pkg/kubelet/volumemanager/reconciler/reconciler.go +++ b/pkg/kubelet/volumemanager/reconciler/reconciler.go @@ -455,7 +455,7 @@ func (rc *reconciler) reconstructVolume(volume podVolume) (*reconstructedVolume, // Check existence of mount point for filesystem volume or symbolic link for block volume isExist, checkErr := rc.operationExecutor.CheckVolumeExistenceOperation(volumeSpec, volume.mountPath, volumeSpec.Name(), rc.mounter, uniqueVolumeName, volume.podName, pod.UID, attachablePlugin) if checkErr != nil { - return nil, err + return nil, checkErr } // If mount or symlink doesn't exist, volume reconstruction should be failed if !isExist {