396 lines
14 KiB
Go
396 lines
14 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package pvcprotection
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
corelisters "k8s.io/client-go/listers/core/v1"
|
|
"k8s.io/client-go/tools/cache"
|
|
"k8s.io/client-go/util/workqueue"
|
|
"k8s.io/component-helpers/storage/ephemeral"
|
|
"k8s.io/klog/v2"
|
|
"k8s.io/kubernetes/pkg/controller/volume/common"
|
|
"k8s.io/kubernetes/pkg/controller/volume/protectionutil"
|
|
"k8s.io/kubernetes/pkg/util/slice"
|
|
volumeutil "k8s.io/kubernetes/pkg/volume/util"
|
|
)
|
|
|
|
// Controller is controller that removes PVCProtectionFinalizer
|
|
// from PVCs that are used by no pods.
|
|
type Controller struct {
|
|
client clientset.Interface
|
|
|
|
pvcLister corelisters.PersistentVolumeClaimLister
|
|
pvcListerSynced cache.InformerSynced
|
|
|
|
podLister corelisters.PodLister
|
|
podListerSynced cache.InformerSynced
|
|
podIndexer cache.Indexer
|
|
|
|
queue workqueue.RateLimitingInterface
|
|
}
|
|
|
|
// NewPVCProtectionController returns a new instance of PVCProtectionController.
|
|
func NewPVCProtectionController(logger klog.Logger, pvcInformer coreinformers.PersistentVolumeClaimInformer, podInformer coreinformers.PodInformer, cl clientset.Interface) (*Controller, error) {
|
|
e := &Controller{
|
|
client: cl,
|
|
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "pvcprotection"),
|
|
}
|
|
|
|
e.pvcLister = pvcInformer.Lister()
|
|
e.pvcListerSynced = pvcInformer.Informer().HasSynced
|
|
pvcInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
|
AddFunc: func(obj interface{}) {
|
|
e.pvcAddedUpdated(logger, obj)
|
|
},
|
|
UpdateFunc: func(old, new interface{}) {
|
|
e.pvcAddedUpdated(logger, new)
|
|
},
|
|
})
|
|
|
|
e.podLister = podInformer.Lister()
|
|
e.podListerSynced = podInformer.Informer().HasSynced
|
|
e.podIndexer = podInformer.Informer().GetIndexer()
|
|
if err := common.AddIndexerIfNotPresent(e.podIndexer, common.PodPVCIndex, common.PodPVCIndexFunc()); err != nil {
|
|
return nil, fmt.Errorf("could not initialize pvc protection controller: %w", err)
|
|
}
|
|
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
|
AddFunc: func(obj interface{}) {
|
|
e.podAddedDeletedUpdated(logger, nil, obj, false)
|
|
},
|
|
DeleteFunc: func(obj interface{}) {
|
|
e.podAddedDeletedUpdated(logger, nil, obj, true)
|
|
},
|
|
UpdateFunc: func(old, new interface{}) {
|
|
e.podAddedDeletedUpdated(logger, old, new, false)
|
|
},
|
|
})
|
|
|
|
return e, nil
|
|
}
|
|
|
|
// Run runs the controller goroutines.
|
|
func (c *Controller) Run(ctx context.Context, workers int) {
|
|
defer utilruntime.HandleCrash()
|
|
defer c.queue.ShutDown()
|
|
|
|
logger := klog.FromContext(ctx)
|
|
logger.Info("Starting PVC protection controller")
|
|
defer logger.Info("Shutting down PVC protection controller")
|
|
|
|
if !cache.WaitForNamedCacheSync("PVC protection", ctx.Done(), c.pvcListerSynced, c.podListerSynced) {
|
|
return
|
|
}
|
|
|
|
for i := 0; i < workers; i++ {
|
|
go wait.UntilWithContext(ctx, c.runWorker, time.Second)
|
|
}
|
|
|
|
<-ctx.Done()
|
|
}
|
|
|
|
func (c *Controller) runWorker(ctx context.Context) {
|
|
for c.processNextWorkItem(ctx) {
|
|
}
|
|
}
|
|
|
|
// processNextWorkItem deals with one pvcKey off the queue. It returns false when it's time to quit.
|
|
func (c *Controller) processNextWorkItem(ctx context.Context) bool {
|
|
pvcKey, quit := c.queue.Get()
|
|
if quit {
|
|
return false
|
|
}
|
|
defer c.queue.Done(pvcKey)
|
|
|
|
pvcNamespace, pvcName, err := cache.SplitMetaNamespaceKey(pvcKey.(string))
|
|
if err != nil {
|
|
utilruntime.HandleError(fmt.Errorf("error parsing PVC key %q: %v", pvcKey, err))
|
|
return true
|
|
}
|
|
|
|
err = c.processPVC(ctx, pvcNamespace, pvcName)
|
|
if err == nil {
|
|
c.queue.Forget(pvcKey)
|
|
return true
|
|
}
|
|
|
|
utilruntime.HandleError(fmt.Errorf("PVC %v failed with : %v", pvcKey, err))
|
|
c.queue.AddRateLimited(pvcKey)
|
|
|
|
return true
|
|
}
|
|
|
|
func (c *Controller) processPVC(ctx context.Context, pvcNamespace, pvcName string) error {
|
|
logger := klog.FromContext(ctx)
|
|
logger.V(4).Info("Processing PVC", "PVC", klog.KRef(pvcNamespace, pvcName))
|
|
startTime := time.Now()
|
|
defer func() {
|
|
logger.V(4).Info("Finished processing PVC", "PVC", klog.KRef(pvcNamespace, pvcName), "duration", time.Since(startTime))
|
|
}()
|
|
|
|
pvc, err := c.pvcLister.PersistentVolumeClaims(pvcNamespace).Get(pvcName)
|
|
if apierrors.IsNotFound(err) {
|
|
logger.V(4).Info("PVC not found, ignoring", "PVC", klog.KRef(pvcNamespace, pvcName))
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if protectionutil.IsDeletionCandidate(pvc, volumeutil.PVCProtectionFinalizer) {
|
|
// PVC should be deleted. Check if it's used and remove finalizer if
|
|
// it's not.
|
|
isUsed, err := c.isBeingUsed(ctx, pvc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !isUsed {
|
|
return c.removeFinalizer(ctx, pvc)
|
|
}
|
|
logger.V(2).Info("Keeping PVC because it is being used", "PVC", klog.KObj(pvc))
|
|
}
|
|
|
|
if protectionutil.NeedToAddFinalizer(pvc, volumeutil.PVCProtectionFinalizer) {
|
|
// PVC is not being deleted -> it should have the finalizer. The
|
|
// finalizer should be added by admission plugin, this is just to add
|
|
// the finalizer to old PVCs that were created before the admission
|
|
// plugin was enabled.
|
|
return c.addFinalizer(ctx, pvc)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Controller) addFinalizer(ctx context.Context, pvc *v1.PersistentVolumeClaim) error {
|
|
claimClone := pvc.DeepCopy()
|
|
claimClone.ObjectMeta.Finalizers = append(claimClone.ObjectMeta.Finalizers, volumeutil.PVCProtectionFinalizer)
|
|
_, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(ctx, claimClone, metav1.UpdateOptions{})
|
|
logger := klog.FromContext(ctx)
|
|
if err != nil {
|
|
logger.Error(err, "Error adding protection finalizer to PVC", "PVC", klog.KObj(pvc))
|
|
return err
|
|
}
|
|
logger.V(3).Info("Added protection finalizer to PVC", "PVC", klog.KObj(pvc))
|
|
return nil
|
|
}
|
|
|
|
func (c *Controller) removeFinalizer(ctx context.Context, pvc *v1.PersistentVolumeClaim) error {
|
|
claimClone := pvc.DeepCopy()
|
|
claimClone.ObjectMeta.Finalizers = slice.RemoveString(claimClone.ObjectMeta.Finalizers, volumeutil.PVCProtectionFinalizer, nil)
|
|
_, err := c.client.CoreV1().PersistentVolumeClaims(claimClone.Namespace).Update(ctx, claimClone, metav1.UpdateOptions{})
|
|
logger := klog.FromContext(ctx)
|
|
if err != nil {
|
|
logger.Error(err, "Error removing protection finalizer from PVC", "PVC", klog.KObj(pvc))
|
|
return err
|
|
}
|
|
logger.V(3).Info("Removed protection finalizer from PVC", "PVC", klog.KObj(pvc))
|
|
return nil
|
|
}
|
|
|
|
func (c *Controller) isBeingUsed(ctx context.Context, pvc *v1.PersistentVolumeClaim) (bool, error) {
|
|
// Look for a Pod using pvc in the Informer's cache. If one is found the
|
|
// correct decision to keep pvc is taken without doing an expensive live
|
|
// list.
|
|
logger := klog.FromContext(ctx)
|
|
if inUse, err := c.askInformer(logger, pvc); err != nil {
|
|
// No need to return because a live list will follow.
|
|
logger.Error(err, "")
|
|
} else if inUse {
|
|
return true, nil
|
|
}
|
|
|
|
// Even if no Pod using pvc was found in the Informer's cache it doesn't
|
|
// mean such a Pod doesn't exist: it might just not be in the cache yet. To
|
|
// be 100% confident that it is safe to delete pvc make sure no Pod is using
|
|
// it among those returned by a live list.
|
|
return c.askAPIServer(ctx, pvc)
|
|
}
|
|
|
|
func (c *Controller) askInformer(logger klog.Logger, pvc *v1.PersistentVolumeClaim) (bool, error) {
|
|
logger.V(4).Info("Looking for Pods using PVC in the Informer's cache", "PVC", klog.KObj(pvc))
|
|
|
|
// The indexer is used to find pods which might use the PVC.
|
|
objs, err := c.podIndexer.ByIndex(common.PodPVCIndex, fmt.Sprintf("%s/%s", pvc.Namespace, pvc.Name))
|
|
if err != nil {
|
|
return false, fmt.Errorf("cache-based list of pods failed while processing %s/%s: %s", pvc.Namespace, pvc.Name, err.Error())
|
|
}
|
|
for _, obj := range objs {
|
|
pod, ok := obj.(*v1.Pod)
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
// We still need to look at each volume: that's redundant for volume.PersistentVolumeClaim,
|
|
// but for volume.Ephemeral we need to be sure that this particular PVC is the one
|
|
// created for the ephemeral volume.
|
|
if c.podUsesPVC(logger, pod, pvc) {
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
logger.V(4).Info("No Pod using PVC was found in the Informer's cache", "PVC", klog.KObj(pvc))
|
|
return false, nil
|
|
}
|
|
|
|
func (c *Controller) askAPIServer(ctx context.Context, pvc *v1.PersistentVolumeClaim) (bool, error) {
|
|
logger := klog.FromContext(ctx)
|
|
logger.V(4).Info("Looking for Pods using PVC with a live list", "PVC", klog.KObj(pvc))
|
|
|
|
podsList, err := c.client.CoreV1().Pods(pvc.Namespace).List(ctx, metav1.ListOptions{})
|
|
if err != nil {
|
|
return false, fmt.Errorf("live list of pods failed: %s", err.Error())
|
|
}
|
|
|
|
for _, pod := range podsList.Items {
|
|
if c.podUsesPVC(logger, &pod, pvc) {
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
logger.V(2).Info("PVC is unused", "PVC", klog.KObj(pvc))
|
|
return false, nil
|
|
}
|
|
|
|
func (c *Controller) podUsesPVC(logger klog.Logger, pod *v1.Pod, pvc *v1.PersistentVolumeClaim) bool {
|
|
// Check whether pvc is used by pod only if pod is scheduled, because
|
|
// kubelet sees pods after they have been scheduled and it won't allow
|
|
// starting a pod referencing a PVC with a non-nil deletionTimestamp.
|
|
if pod.Spec.NodeName != "" {
|
|
for _, volume := range pod.Spec.Volumes {
|
|
if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName == pvc.Name ||
|
|
!podIsShutDown(pod) && volume.Ephemeral != nil && ephemeral.VolumeClaimName(pod, &volume) == pvc.Name && ephemeral.VolumeIsForPod(pod, pvc) == nil {
|
|
logger.V(2).Info("Pod uses PVC", "pod", klog.KObj(pod), "PVC", klog.KObj(pvc))
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// podIsShutDown returns true if kubelet is done with the pod or
|
|
// it was force-deleted.
|
|
func podIsShutDown(pod *v1.Pod) bool {
|
|
// A pod that has a deletionTimestamp and a zero
|
|
// deletionGracePeriodSeconds
|
|
// a) has been processed by kubelet and was set up for deletion
|
|
// by the apiserver:
|
|
// - canBeDeleted has verified that volumes were unpublished
|
|
// https://github.com/kubernetes/kubernetes/blob/5404b5a28a2114299608bab00e4292960dd864a0/pkg/kubelet/kubelet_pods.go#L980
|
|
// - deletionGracePeriodSeconds was set via a delete
|
|
// with zero GracePeriodSeconds
|
|
// https://github.com/kubernetes/kubernetes/blob/5404b5a28a2114299608bab00e4292960dd864a0/pkg/kubelet/status/status_manager.go#L580-L592
|
|
// or
|
|
// b) was force-deleted.
|
|
//
|
|
// It's now just waiting for garbage collection. We could wait
|
|
// for it to actually get removed, but that may be blocked by
|
|
// finalizers for the pod and thus get delayed.
|
|
//
|
|
// Worse, it is possible that there is a cyclic dependency
|
|
// (pod finalizer waits for PVC to get removed, PVC protection
|
|
// controller waits for pod to get removed). By considering
|
|
// the PVC unused in this case, we allow the PVC to get
|
|
// removed and break such a cycle.
|
|
//
|
|
// Therefore it is better to proceed with PVC removal,
|
|
// which is safe (case a) and/or desirable (case b).
|
|
return pod.DeletionTimestamp != nil && pod.DeletionGracePeriodSeconds != nil && *pod.DeletionGracePeriodSeconds == 0
|
|
}
|
|
|
|
// pvcAddedUpdated reacts to pvc added/updated events
|
|
func (c *Controller) pvcAddedUpdated(logger klog.Logger, obj interface{}) {
|
|
pvc, ok := obj.(*v1.PersistentVolumeClaim)
|
|
if !ok {
|
|
utilruntime.HandleError(fmt.Errorf("PVC informer returned non-PVC object: %#v", obj))
|
|
return
|
|
}
|
|
key, err := cache.MetaNamespaceKeyFunc(pvc)
|
|
if err != nil {
|
|
utilruntime.HandleError(fmt.Errorf("couldn't get key for Persistent Volume Claim %#v: %v", pvc, err))
|
|
return
|
|
}
|
|
logger.V(4).Info("Got event on PVC", "pvc", klog.KObj(pvc))
|
|
|
|
if protectionutil.NeedToAddFinalizer(pvc, volumeutil.PVCProtectionFinalizer) || protectionutil.IsDeletionCandidate(pvc, volumeutil.PVCProtectionFinalizer) {
|
|
c.queue.Add(key)
|
|
}
|
|
}
|
|
|
|
// podAddedDeletedUpdated reacts to Pod events
|
|
func (c *Controller) podAddedDeletedUpdated(logger klog.Logger, old, new interface{}, deleted bool) {
|
|
if pod := c.parsePod(new); pod != nil {
|
|
c.enqueuePVCs(logger, pod, deleted)
|
|
|
|
// An update notification might mask the deletion of a pod X and the
|
|
// following creation of a pod Y with the same namespaced name as X. If
|
|
// that's the case X needs to be processed as well to handle the case
|
|
// where it is blocking deletion of a PVC not referenced by Y, otherwise
|
|
// such PVC will never be deleted.
|
|
if oldPod := c.parsePod(old); oldPod != nil && oldPod.UID != pod.UID {
|
|
c.enqueuePVCs(logger, oldPod, true)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (*Controller) parsePod(obj interface{}) *v1.Pod {
|
|
if obj == nil {
|
|
return nil
|
|
}
|
|
pod, ok := obj.(*v1.Pod)
|
|
if !ok {
|
|
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
|
|
if !ok {
|
|
utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %#v", obj))
|
|
return nil
|
|
}
|
|
pod, ok = tombstone.Obj.(*v1.Pod)
|
|
if !ok {
|
|
utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a Pod %#v", obj))
|
|
return nil
|
|
}
|
|
}
|
|
return pod
|
|
}
|
|
|
|
func (c *Controller) enqueuePVCs(logger klog.Logger, pod *v1.Pod, deleted bool) {
|
|
// Filter out pods that can't help us to remove a finalizer on PVC
|
|
if !deleted && !volumeutil.IsPodTerminated(pod, pod.Status) && pod.Spec.NodeName != "" {
|
|
return
|
|
}
|
|
|
|
logger.V(4).Info("Enqueuing PVCs for Pod", "pod", klog.KObj(pod), "podUID", pod.UID)
|
|
|
|
// Enqueue all PVCs that the pod uses
|
|
for _, volume := range pod.Spec.Volumes {
|
|
switch {
|
|
case volume.PersistentVolumeClaim != nil:
|
|
c.queue.Add(pod.Namespace + "/" + volume.PersistentVolumeClaim.ClaimName)
|
|
case volume.Ephemeral != nil:
|
|
c.queue.Add(pod.Namespace + "/" + ephemeral.VolumeClaimName(pod, &volume))
|
|
}
|
|
}
|
|
}
|