kube-controller-manager: add ResourceClaim controller
The controller uses the exact same logic as the generic ephemeral inline volume controller, just for inline ResourceClaimTemplate -> ResourceClaim. In addition, it supports removal of pods from the ReservedFor field when those pods are known to not need the claim anymore. At the moment, only this special case is supported. Removal of arbitrary objects would imply granting full read access to all types to determine whether a) an object is gone and b) if the current incarnation is the one which is listed in ReservedFor. This may get added later.
This commit is contained in:
@@ -14,58 +14,69 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package ephemeral
|
||||
package resourceclaim
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourcev1alpha1 "k8s.io/api/resource/v1alpha1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
v1informers "k8s.io/client-go/informers/core/v1"
|
||||
resourcev1alpha1informers "k8s.io/client-go/informers/resource/v1alpha1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
corelisters "k8s.io/client-go/listers/core/v1"
|
||||
v1listers "k8s.io/client-go/listers/core/v1"
|
||||
resourcev1alpha1listers "k8s.io/client-go/listers/resource/v1alpha1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
kcache "k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
"k8s.io/component-helpers/storage/ephemeral"
|
||||
"k8s.io/kubernetes/pkg/controller/volume/common"
|
||||
ephemeralvolumemetrics "k8s.io/kubernetes/pkg/controller/volume/ephemeral/metrics"
|
||||
"k8s.io/kubernetes/pkg/controller/volume/events"
|
||||
"k8s.io/dynamic-resource-allocation/resourceclaim"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/controller/resourceclaim/metrics"
|
||||
)
|
||||
|
||||
// Controller creates PVCs for ephemeral inline volumes in a pod spec.
|
||||
type Controller interface {
|
||||
Run(ctx context.Context, workers int)
|
||||
}
|
||||
const (
|
||||
// podResourceClaimIndex is the lookup name for the index function which indexes by pod ResourceClaim templates.
|
||||
podResourceClaimIndex = "pod-resource-claim-index"
|
||||
|
||||
type ephemeralController struct {
|
||||
// kubeClient is the kube API client used by volumehost to communicate with
|
||||
// the API server.
|
||||
maxUIDCacheEntries = 500
|
||||
)
|
||||
|
||||
// Controller creates ResourceClaims for ResourceClaimTemplates in a pod spec.
|
||||
type Controller struct {
|
||||
// kubeClient is the kube API client used to communicate with the API
|
||||
// server.
|
||||
kubeClient clientset.Interface
|
||||
|
||||
// pvcLister is the shared PVC lister used to fetch and store PVC
|
||||
// claimLister is the shared ResourceClaim lister used to fetch and store ResourceClaim
|
||||
// objects from the API server. It is shared with other controllers and
|
||||
// therefore the PVC objects in its store should be treated as immutable.
|
||||
pvcLister corelisters.PersistentVolumeClaimLister
|
||||
pvcsSynced kcache.InformerSynced
|
||||
// therefore the ResourceClaim objects in its store should be treated as immutable.
|
||||
claimLister resourcev1alpha1listers.ResourceClaimLister
|
||||
claimsSynced cache.InformerSynced
|
||||
|
||||
// podLister is the shared Pod lister used to fetch Pod
|
||||
// objects from the API server. It is shared with other controllers and
|
||||
// therefore the Pod objects in its store should be treated as immutable.
|
||||
podLister corelisters.PodLister
|
||||
podSynced kcache.InformerSynced
|
||||
podLister v1listers.PodLister
|
||||
podSynced cache.InformerSynced
|
||||
|
||||
// podIndexer has the common PodPVC indexer indexer installed To
|
||||
// templateLister is the shared ResourceClaimTemplate lister used to
|
||||
// fetch template objects from the API server. It is shared with other
|
||||
// controllers and therefore the objects in its store should be treated
|
||||
// as immutable.
|
||||
templateLister resourcev1alpha1listers.ResourceClaimTemplateLister
|
||||
templatesSynced cache.InformerSynced
|
||||
|
||||
// podIndexer has the common PodResourceClaim indexer indexer installed To
|
||||
// limit iteration over pods to those of interest.
|
||||
podIndexer cache.Indexer
|
||||
|
||||
@@ -73,104 +84,158 @@ type ephemeralController struct {
|
||||
recorder record.EventRecorder
|
||||
|
||||
queue workqueue.RateLimitingInterface
|
||||
|
||||
// The deletedObjects cache keeps track of Pods for which we know that
|
||||
// they have existed and have been removed. For those we can be sure
|
||||
// that a ReservedFor entry needs to be removed.
|
||||
deletedObjects *uidCache
|
||||
}
|
||||
|
||||
// NewController creates an ephemeral volume controller.
|
||||
const (
|
||||
claimKeyPrefix = "claim:"
|
||||
podKeyPrefix = "pod:"
|
||||
)
|
||||
|
||||
// NewController creates a ResourceClaim controller.
|
||||
func NewController(
|
||||
kubeClient clientset.Interface,
|
||||
podInformer coreinformers.PodInformer,
|
||||
pvcInformer coreinformers.PersistentVolumeClaimInformer) (Controller, error) {
|
||||
podInformer v1informers.PodInformer,
|
||||
claimInformer resourcev1alpha1informers.ResourceClaimInformer,
|
||||
templateInformer resourcev1alpha1informers.ResourceClaimTemplateInformer) (*Controller, error) {
|
||||
|
||||
ec := &ephemeralController{
|
||||
kubeClient: kubeClient,
|
||||
podLister: podInformer.Lister(),
|
||||
podIndexer: podInformer.Informer().GetIndexer(),
|
||||
podSynced: podInformer.Informer().HasSynced,
|
||||
pvcLister: pvcInformer.Lister(),
|
||||
pvcsSynced: pvcInformer.Informer().HasSynced,
|
||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "ephemeral_volume"),
|
||||
ec := &Controller{
|
||||
kubeClient: kubeClient,
|
||||
podLister: podInformer.Lister(),
|
||||
podIndexer: podInformer.Informer().GetIndexer(),
|
||||
podSynced: podInformer.Informer().HasSynced,
|
||||
claimLister: claimInformer.Lister(),
|
||||
claimsSynced: claimInformer.Informer().HasSynced,
|
||||
templateLister: templateInformer.Lister(),
|
||||
templatesSynced: templateInformer.Informer().HasSynced,
|
||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "resource_claim"),
|
||||
deletedObjects: newUIDCache(maxUIDCacheEntries),
|
||||
}
|
||||
|
||||
ephemeralvolumemetrics.RegisterMetrics()
|
||||
metrics.RegisterMetrics()
|
||||
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
eventBroadcaster.StartLogging(klog.Infof)
|
||||
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
|
||||
ec.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "ephemeral_volume"})
|
||||
ec.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "resource_claim"})
|
||||
|
||||
podInformer.Informer().AddEventHandler(kcache.ResourceEventHandlerFuncs{
|
||||
AddFunc: ec.enqueuePod,
|
||||
// The pod spec is immutable. Therefore the controller can ignore pod updates
|
||||
// because there cannot be any changes that have to be copied into the generated
|
||||
// PVC.
|
||||
// Deletion of the PVC is handled through the owner reference and garbage collection.
|
||||
// Therefore pod deletions also can be ignored.
|
||||
})
|
||||
pvcInformer.Informer().AddEventHandler(kcache.ResourceEventHandlerFuncs{
|
||||
DeleteFunc: ec.onPVCDelete,
|
||||
})
|
||||
if err := common.AddPodPVCIndexerIfNotPresent(ec.podIndexer); err != nil {
|
||||
return nil, fmt.Errorf("could not initialize pvc protection controller: %w", err)
|
||||
if _, err := podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(obj interface{}) {
|
||||
ec.enqueuePod(obj, false)
|
||||
},
|
||||
UpdateFunc: func(old, updated interface{}) {
|
||||
ec.enqueuePod(updated, false)
|
||||
},
|
||||
DeleteFunc: func(obj interface{}) {
|
||||
ec.enqueuePod(obj, true)
|
||||
},
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, err := claimInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: ec.onResourceClaimAddOrUpdate,
|
||||
UpdateFunc: func(old, updated interface{}) {
|
||||
ec.onResourceClaimAddOrUpdate(updated)
|
||||
},
|
||||
DeleteFunc: ec.onResourceClaimDelete,
|
||||
}); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := ec.podIndexer.AddIndexers(cache.Indexers{podResourceClaimIndex: podResourceClaimIndexFunc}); err != nil {
|
||||
return nil, fmt.Errorf("could not initialize ResourceClaim controller: %w", err)
|
||||
}
|
||||
|
||||
return ec, nil
|
||||
}
|
||||
|
||||
func (ec *ephemeralController) enqueuePod(obj interface{}) {
|
||||
func (ec *Controller) enqueuePod(obj interface{}, deleted bool) {
|
||||
if d, ok := obj.(cache.DeletedFinalStateUnknown); ok {
|
||||
obj = d.Obj
|
||||
}
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
if !ok {
|
||||
// Not a pod?!
|
||||
return
|
||||
}
|
||||
|
||||
// Ignore pods which are already getting deleted.
|
||||
if pod.DeletionTimestamp != nil {
|
||||
if deleted {
|
||||
ec.deletedObjects.Add(pod.UID)
|
||||
}
|
||||
|
||||
if len(pod.Spec.ResourceClaims) == 0 {
|
||||
// Nothing to do for it at all.
|
||||
return
|
||||
}
|
||||
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
if vol.Ephemeral != nil {
|
||||
// It has at least one ephemeral inline volume, work on it.
|
||||
key, err := kcache.DeletionHandlingMetaNamespaceKeyFunc(pod)
|
||||
if err != nil {
|
||||
runtime.HandleError(fmt.Errorf("couldn't get key for object %#v: %v", pod, err))
|
||||
return
|
||||
// Release reservations of a deleted or completed pod?
|
||||
if deleted ||
|
||||
podutil.IsPodTerminal(pod) ||
|
||||
// Deleted and not scheduled:
|
||||
pod.DeletionTimestamp != nil && pod.Spec.NodeName == "" {
|
||||
for _, podClaim := range pod.Spec.ResourceClaims {
|
||||
claimName := resourceclaim.Name(pod, &podClaim)
|
||||
ec.queue.Add(claimKeyPrefix + pod.Namespace + "/" + claimName)
|
||||
}
|
||||
}
|
||||
|
||||
// Create ResourceClaim for inline templates?
|
||||
if pod.DeletionTimestamp == nil {
|
||||
for _, podClaim := range pod.Spec.ResourceClaims {
|
||||
if podClaim.Source.ResourceClaimTemplateName != nil {
|
||||
// It has at least one inline template, work on it.
|
||||
ec.queue.Add(podKeyPrefix + pod.Namespace + "/" + pod.Name)
|
||||
break
|
||||
}
|
||||
ec.queue.Add(key)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ec *ephemeralController) onPVCDelete(obj interface{}) {
|
||||
pvc, ok := obj.(*v1.PersistentVolumeClaim)
|
||||
func (ec *Controller) onResourceClaimAddOrUpdate(obj interface{}) {
|
||||
claim, ok := obj.(*resourcev1alpha1.ResourceClaim)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Someone deleted a PVC, either intentionally or
|
||||
// When starting up, we have to check all claims to find those with
|
||||
// stale pods in ReservedFor. During an update, a pod might get added
|
||||
// that already no longer exists.
|
||||
ec.queue.Add(claimKeyPrefix + claim.Namespace + "/" + claim.Name)
|
||||
}
|
||||
|
||||
func (ec *Controller) onResourceClaimDelete(obj interface{}) {
|
||||
claim, ok := obj.(*resourcev1alpha1.ResourceClaim)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Someone deleted a ResourceClaim, either intentionally or
|
||||
// accidentally. If there is a pod referencing it because of
|
||||
// an ephemeral volume, then we should re-create the PVC.
|
||||
// an inline resource, then we should re-create the ResourceClaim.
|
||||
// The common indexer does some prefiltering for us by
|
||||
// limiting the list to those pods which reference
|
||||
// the PVC.
|
||||
objs, err := ec.podIndexer.ByIndex(common.PodPVCIndex, fmt.Sprintf("%s/%s", pvc.Namespace, pvc.Name))
|
||||
// the ResourceClaim.
|
||||
objs, err := ec.podIndexer.ByIndex(podResourceClaimIndex, fmt.Sprintf("%s/%s", claim.Namespace, claim.Name))
|
||||
if err != nil {
|
||||
runtime.HandleError(fmt.Errorf("listing pods from cache: %v", err))
|
||||
return
|
||||
}
|
||||
for _, obj := range objs {
|
||||
ec.enqueuePod(obj)
|
||||
ec.enqueuePod(obj, false)
|
||||
}
|
||||
}
|
||||
|
||||
func (ec *ephemeralController) Run(ctx context.Context, workers int) {
|
||||
func (ec *Controller) Run(ctx context.Context, workers int) {
|
||||
defer runtime.HandleCrash()
|
||||
defer ec.queue.ShutDown()
|
||||
|
||||
klog.Infof("Starting ephemeral volume controller")
|
||||
defer klog.Infof("Shutting down ephemeral volume controller")
|
||||
|
||||
if !cache.WaitForNamedCacheSync("ephemeral", ctx.Done(), ec.podSynced, ec.pvcsSynced) {
|
||||
if !cache.WaitForNamedCacheSync("ephemeral", ctx.Done(), ec.podSynced, ec.claimsSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -181,12 +246,12 @@ func (ec *ephemeralController) Run(ctx context.Context, workers int) {
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
func (ec *ephemeralController) runWorker(ctx context.Context) {
|
||||
func (ec *Controller) runWorker(ctx context.Context) {
|
||||
for ec.processNextWorkItem(ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
func (ec *ephemeralController) processNextWorkItem(ctx context.Context) bool {
|
||||
func (ec *Controller) processNextWorkItem(ctx context.Context) bool {
|
||||
key, shutdown := ec.queue.Get()
|
||||
if shutdown {
|
||||
return false
|
||||
@@ -205,65 +270,92 @@ func (ec *ephemeralController) processNextWorkItem(ctx context.Context) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// syncHandler is invoked for each pod which might need to be processed.
|
||||
// If an error is returned from this function, the pod will be requeued.
|
||||
func (ec *ephemeralController) syncHandler(ctx context.Context, key string) error {
|
||||
namespace, name, err := kcache.SplitMetaNamespaceKey(key)
|
||||
// syncHandler is invoked for each work item which might need to be processed.
|
||||
// If an error is returned from this function, the item will be requeued.
|
||||
func (ec *Controller) syncHandler(ctx context.Context, key string) error {
|
||||
sep := strings.Index(key, ":")
|
||||
if sep < 0 {
|
||||
return fmt.Errorf("unexpected key: %s", key)
|
||||
}
|
||||
prefix, object := key[0:sep+1], key[sep+1:]
|
||||
namespace, name, err := cache.SplitMetaNamespaceKey(object)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch prefix {
|
||||
case podKeyPrefix:
|
||||
return ec.syncPod(ctx, namespace, name)
|
||||
case claimKeyPrefix:
|
||||
return ec.syncClaim(ctx, namespace, name)
|
||||
default:
|
||||
return fmt.Errorf("unexpected key prefix: %s", prefix)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func (ec *Controller) syncPod(ctx context.Context, namespace, name string) error {
|
||||
logger := klog.LoggerWithValues(klog.FromContext(ctx), "pod", klog.KRef(namespace, name))
|
||||
ctx = klog.NewContext(ctx, logger)
|
||||
pod, err := ec.podLister.Pods(namespace).Get(name)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
klog.V(5).Infof("ephemeral: nothing to do for pod %s, it is gone", key)
|
||||
logger.V(5).Info("nothing to do for pod, it is gone")
|
||||
return nil
|
||||
}
|
||||
klog.V(5).Infof("Error getting pod %s/%s (uid: %q) from informer : %v", pod.Namespace, pod.Name, pod.UID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Ignore pods which are already getting deleted.
|
||||
if pod.DeletionTimestamp != nil {
|
||||
klog.V(5).Infof("ephemeral: nothing to do for pod %s, it is marked for deletion", key)
|
||||
logger.V(5).Info("nothing to do for pod, it is marked for deletion")
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, vol := range pod.Spec.Volumes {
|
||||
if err := ec.handleVolume(ctx, pod, vol); err != nil {
|
||||
ec.recorder.Event(pod, v1.EventTypeWarning, events.FailedBinding, fmt.Sprintf("ephemeral volume %s: %v", vol.Name, err))
|
||||
return fmt.Errorf("pod %s, ephemeral volume %s: %v", key, vol.Name, err)
|
||||
for _, podClaim := range pod.Spec.ResourceClaims {
|
||||
if err := ec.handleClaim(ctx, pod, podClaim); err != nil {
|
||||
ec.recorder.Event(pod, v1.EventTypeWarning, "FailedResourceClaimCreation", fmt.Sprintf("PodResourceClaim %s: %v", podClaim.Name, err))
|
||||
return fmt.Errorf("pod %s/%s, PodResourceClaim %s: %v", namespace, name, podClaim.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleEphemeralVolume is invoked for each volume of a pod.
|
||||
func (ec *ephemeralController) handleVolume(ctx context.Context, pod *v1.Pod, vol v1.Volume) error {
|
||||
klog.V(5).Infof("ephemeral: checking volume %s", vol.Name)
|
||||
if vol.Ephemeral == nil {
|
||||
// handleResourceClaim is invoked for each volume of a pod.
|
||||
func (ec *Controller) handleClaim(ctx context.Context, pod *v1.Pod, podClaim v1.PodResourceClaim) error {
|
||||
logger := klog.LoggerWithValues(klog.FromContext(ctx), "podClaim", podClaim.Name)
|
||||
ctx = klog.NewContext(ctx, logger)
|
||||
logger.V(5).Info("checking", "podClaim", podClaim.Name)
|
||||
templateName := podClaim.Source.ResourceClaimTemplateName
|
||||
if templateName == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
pvcName := ephemeral.VolumeClaimName(pod, &vol)
|
||||
pvc, err := ec.pvcLister.PersistentVolumeClaims(pod.Namespace).Get(pvcName)
|
||||
claimName := resourceclaim.Name(pod, &podClaim)
|
||||
claim, err := ec.claimLister.ResourceClaims(pod.Namespace).Get(claimName)
|
||||
if err != nil && !errors.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
if pvc != nil {
|
||||
if err := ephemeral.VolumeIsForPod(pod, pvc); err != nil {
|
||||
if claim != nil {
|
||||
if err := resourceclaim.IsForPod(pod, claim); err != nil {
|
||||
return err
|
||||
}
|
||||
// Already created, nothing more to do.
|
||||
klog.V(5).Infof("ephemeral: volume %s: PVC %s already created", vol.Name, pvcName)
|
||||
logger.V(5).Info("claim already created", "podClaim", podClaim.Name, "resourceClaim", claimName)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the PVC with pod as owner.
|
||||
template, err := ec.templateLister.ResourceClaimTemplates(pod.Namespace).Get(*templateName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resource claim template %q: %v", *templateName, err)
|
||||
}
|
||||
|
||||
// Create the ResourceClaim with pod as owner.
|
||||
isTrue := true
|
||||
pvc = &v1.PersistentVolumeClaim{
|
||||
claim = &resourcev1alpha1.ResourceClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: pvcName,
|
||||
Name: claimName,
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
APIVersion: "v1",
|
||||
@@ -274,16 +366,114 @@ func (ec *ephemeralController) handleVolume(ctx context.Context, pod *v1.Pod, vo
|
||||
BlockOwnerDeletion: &isTrue,
|
||||
},
|
||||
},
|
||||
Annotations: vol.Ephemeral.VolumeClaimTemplate.Annotations,
|
||||
Labels: vol.Ephemeral.VolumeClaimTemplate.Labels,
|
||||
Annotations: template.Spec.ObjectMeta.Annotations,
|
||||
Labels: template.Spec.ObjectMeta.Labels,
|
||||
},
|
||||
Spec: vol.Ephemeral.VolumeClaimTemplate.Spec,
|
||||
Spec: template.Spec.Spec,
|
||||
}
|
||||
ephemeralvolumemetrics.EphemeralVolumeCreateAttempts.Inc()
|
||||
_, err = ec.kubeClient.CoreV1().PersistentVolumeClaims(pod.Namespace).Create(ctx, pvc, metav1.CreateOptions{})
|
||||
metrics.ResourceClaimCreateAttempts.Inc()
|
||||
_, err = ec.kubeClient.ResourceV1alpha1().ResourceClaims(pod.Namespace).Create(ctx, claim, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
ephemeralvolumemetrics.EphemeralVolumeCreateFailures.Inc()
|
||||
return fmt.Errorf("create PVC %s: %v", pvcName, err)
|
||||
metrics.ResourceClaimCreateFailures.Inc()
|
||||
return fmt.Errorf("create ResourceClaim %s: %v", claimName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ec *Controller) syncClaim(ctx context.Context, namespace, name string) error {
|
||||
logger := klog.LoggerWithValues(klog.FromContext(ctx), "claim", klog.KRef(namespace, name))
|
||||
ctx = klog.NewContext(ctx, logger)
|
||||
claim, err := ec.claimLister.ResourceClaims(namespace).Get(name)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
logger.V(5).Info("nothing to do for claim, it is gone")
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Check if the ReservedFor entries are all still valid.
|
||||
valid := make([]resourcev1alpha1.ResourceClaimConsumerReference, 0, len(claim.Status.ReservedFor))
|
||||
for _, reservedFor := range claim.Status.ReservedFor {
|
||||
if reservedFor.APIGroup == "" &&
|
||||
reservedFor.Resource == "pods" {
|
||||
// A pod falls into one of three categories:
|
||||
// - we have it in our cache -> don't remove it until we are told that it got removed
|
||||
// - we don't have it in our cache anymore, but we have seen it before -> it was deleted, remove it
|
||||
// - not in our cache, not seen -> double-check with API server before removal
|
||||
|
||||
keepEntry := true
|
||||
|
||||
// Tracking deleted pods in the LRU cache is an
|
||||
// optimization. Without this cache, the code would
|
||||
// have to do the API call below for every deleted pod
|
||||
// to ensure that the pod really doesn't exist. With
|
||||
// the cache, most of the time the pod will be recorded
|
||||
// as deleted and the API call can be avoided.
|
||||
if ec.deletedObjects.Has(reservedFor.UID) {
|
||||
// We know that the pod was deleted. This is
|
||||
// easy to check and thus is done first.
|
||||
keepEntry = false
|
||||
} else {
|
||||
pod, err := ec.podLister.Pods(claim.Namespace).Get(reservedFor.Name)
|
||||
if err != nil && !errors.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
if pod == nil {
|
||||
// We might not have it in our informer cache
|
||||
// yet. Removing the pod while the scheduler is
|
||||
// scheduling it would be bad. We have to be
|
||||
// absolutely sure and thus have to check with
|
||||
// the API server.
|
||||
pod, err := ec.kubeClient.CoreV1().Pods(claim.Namespace).Get(ctx, reservedFor.Name, metav1.GetOptions{})
|
||||
if err != nil && !errors.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
if pod == nil || pod.UID != reservedFor.UID {
|
||||
keepEntry = false
|
||||
}
|
||||
} else if pod.UID != reservedFor.UID {
|
||||
// Pod exists, but is a different incarnation under the same name.
|
||||
keepEntry = false
|
||||
}
|
||||
}
|
||||
|
||||
if keepEntry {
|
||||
valid = append(valid, reservedFor)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO: support generic object lookup
|
||||
return fmt.Errorf("unsupported ReservedFor entry: %v", reservedFor)
|
||||
}
|
||||
|
||||
if len(valid) < len(claim.Status.ReservedFor) {
|
||||
// TODO (#113700): patch
|
||||
claim := claim.DeepCopy()
|
||||
claim.Status.ReservedFor = valid
|
||||
_, err := ec.kubeClient.ResourceV1alpha1().ResourceClaims(claim.Namespace).UpdateStatus(ctx, claim, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// podResourceClaimIndexFunc is an index function that returns ResourceClaim keys (=
|
||||
// namespace/name) for ResourceClaimTemplates in a given pod.
|
||||
func podResourceClaimIndexFunc(obj interface{}) ([]string, error) {
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
if !ok {
|
||||
return []string{}, nil
|
||||
}
|
||||
keys := []string{}
|
||||
for _, podClaim := range pod.Spec.ResourceClaims {
|
||||
if podClaim.Source.ResourceClaimTemplateName != nil {
|
||||
claimName := resourceclaim.Name(pod, &podClaim)
|
||||
keys = append(keys, fmt.Sprintf("%s/%s", pod.Namespace, claimName))
|
||||
}
|
||||
}
|
||||
return keys, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user