Merge pull request #112670 from yangjunmyfm192085/delklogV0

use contextual logging(nodeipam and nodelifecycle part)
This commit is contained in:
Kubernetes Prow Robot 2023-03-07 09:40:33 -08:00 committed by GitHub
commit 37326f7cea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 606 additions and 516 deletions

View File

@ -62,11 +62,11 @@ func (nodeIpamController *nodeIPAMController) StartNodeIpamControllerWrapper(ini
nodeIpamController.nodeIPAMControllerOptions.ApplyTo(&nodeIpamController.nodeIPAMControllerConfiguration) nodeIpamController.nodeIPAMControllerOptions.ApplyTo(&nodeIpamController.nodeIPAMControllerConfiguration)
return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) { return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) {
return startNodeIpamController(initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud) return startNodeIpamController(ctx, initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud)
} }
} }
func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, ctx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) { func startNodeIpamController(ctx context.Context, initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, controllerCtx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) {
var serviceCIDR *net.IPNet var serviceCIDR *net.IPNet
var secondaryServiceCIDR *net.IPNet var secondaryServiceCIDR *net.IPNet
@ -130,14 +130,14 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
var clusterCIDRInformer v1alpha1.ClusterCIDRInformer var clusterCIDRInformer v1alpha1.ClusterCIDRInformer
if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
clusterCIDRInformer = ctx.InformerFactory.Networking().V1alpha1().ClusterCIDRs() clusterCIDRInformer = controllerCtx.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
} }
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController( nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
ctx.InformerFactory.Core().V1().Nodes(), ctx,
controllerCtx.InformerFactory.Core().V1().Nodes(),
clusterCIDRInformer, clusterCIDRInformer,
cloud, cloud,
ctx.ClientBuilder.ClientOrDie(initContext.ClientName), controllerCtx.ClientBuilder.ClientOrDie(initContext.ClientName),
clusterCIDRs, clusterCIDRs,
serviceCIDR, serviceCIDR,
secondaryServiceCIDR, secondaryServiceCIDR,
@ -147,7 +147,7 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
if err != nil { if err != nil {
return nil, true, err return nil, true, err
} }
go nodeIpamController.Run(ctx.Stop) go nodeIpamController.Run(ctx)
return nil, true, nil return nil, true, nil
} }

View File

@ -154,7 +154,9 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs() clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
} }
ctx = klog.NewContext(ctx, klog.LoggerWithName(klog.FromContext(ctx), "NodeIpamController"))
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController( nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
ctx,
controllerContext.InformerFactory.Core().V1().Nodes(), controllerContext.InformerFactory.Core().V1().Nodes(),
clusterCIDRInformer, clusterCIDRInformer,
controllerContext.Cloud, controllerContext.Cloud,
@ -168,7 +170,7 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
if err != nil { if err != nil {
return nil, true, err return nil, true, err
} }
go nodeIpamController.RunWithMetrics(ctx.Done(), controllerContext.ControllerManagerMetrics) go nodeIpamController.RunWithMetrics(ctx, controllerContext.ControllerManagerMetrics)
return nil, true, nil return nil, true, nil
} }

View File

@ -61,7 +61,7 @@ func newAdapter(k8s clientset.Interface, cloud *gce.Cloud) *adapter {
return ret return ret
} }
func (a *adapter) Run(stopCh <-chan struct{}) { func (a *adapter) Run(ctx context.Context) {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
// Start event processing pipeline. // Start event processing pipeline.
@ -69,7 +69,7 @@ func (a *adapter) Run(stopCh <-chan struct{}) {
a.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: a.k8s.CoreV1().Events("")}) a.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: a.k8s.CoreV1().Events("")})
defer a.broadcaster.Shutdown() defer a.broadcaster.Shutdown()
<-stopCh <-ctx.Done()
} }
func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) { func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
@ -88,7 +88,7 @@ func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error)
case 1: case 1:
break break
default: default:
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", node.Name, cidrs) klog.FromContext(ctx).Info("Node has more than one alias assigned, defaulting to the first", "node", klog.KObj(node), "CIDRs", cidrs)
} }
_, cidrRange, err := netutils.ParseCIDRSloppy(cidrs[0]) _, cidrRange, err := netutils.ParseCIDRSloppy(cidrs[0])

View File

@ -91,11 +91,11 @@ type CIDRAllocator interface {
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid // AllocateOrOccupyCIDR looks at the given node, assigns it a valid
// CIDR if it doesn't currently have one or mark the CIDR as used if // CIDR if it doesn't currently have one or mark the CIDR as used if
// the node already have one. // the node already have one.
AllocateOrOccupyCIDR(node *v1.Node) error AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error
// ReleaseCIDR releases the CIDR of the removed node. // ReleaseCIDR releases the CIDR of the removed node.
ReleaseCIDR(node *v1.Node) error ReleaseCIDR(logger klog.Logger, node *v1.Node) error
// Run starts all the working logic of the allocator. // Run starts all the working logic of the allocator.
Run(stopCh <-chan struct{}) Run(ctx context.Context)
} }
// CIDRAllocatorParams is parameters that's required for creating new // CIDRAllocatorParams is parameters that's required for creating new
@ -119,29 +119,30 @@ type nodeReservedCIDRs struct {
} }
// New creates a new CIDR range allocator. // New creates a new CIDR range allocator.
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) { func New(ctx context.Context, kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
nodeList, err := listNodes(kubeClient) logger := klog.FromContext(ctx)
nodeList, err := listNodes(logger, kubeClient)
if err != nil { if err != nil {
return nil, err return nil, err
} }
switch allocatorType { switch allocatorType {
case RangeAllocatorType: case RangeAllocatorType:
return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList) return NewCIDRRangeAllocator(logger, kubeClient, nodeInformer, allocatorParams, nodeList)
case MultiCIDRRangeAllocatorType: case MultiCIDRRangeAllocatorType:
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) { if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator) return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator)
} }
return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil) return NewMultiCIDRRangeAllocator(ctx, kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
case CloudAllocatorType: case CloudAllocatorType:
return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer) return NewCloudCIDRAllocator(logger, kubeClient, cloud, nodeInformer)
default: default:
return nil, fmt.Errorf("invalid CIDR allocator type: %v", allocatorType) return nil, fmt.Errorf("invalid CIDR allocator type: %v", allocatorType)
} }
} }
func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) { func listNodes(logger klog.Logger, kubeClient clientset.Interface) (*v1.NodeList, error) {
var nodeList *v1.NodeList var nodeList *v1.NodeList
// We must poll because apiserver might not be up. This error causes // We must poll because apiserver might not be up. This error causes
// controller manager to restart. // controller manager to restart.
@ -152,7 +153,7 @@ func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
LabelSelector: labels.Everything().String(), LabelSelector: labels.Everything().String(),
}) })
if err != nil { if err != nil {
klog.Errorf("Failed to list all nodes: %v", err) logger.Error(err, "Failed to list all nodes")
return false, nil return false, nil
} }
return true, nil return true, nil

View File

@ -23,7 +23,6 @@ import (
"testing" "testing"
"k8s.io/component-base/metrics/testutil" "k8s.io/component-base/metrics/testutil"
"k8s.io/klog/v2"
netutils "k8s.io/utils/net" netutils "k8s.io/utils/net"
) )
@ -558,34 +557,35 @@ func TestGetBitforCIDR(t *testing.T) {
} }
for _, tc := range cases { for _, tc := range cases {
_, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr) t.Run(tc.description, func(t *testing.T) {
if err != nil { _, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr)
t.Fatalf("unexpected error: %v for %v", err, tc.description) if err != nil {
} t.Fatalf("unexpected error: %v", err)
}
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize) cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
if err != nil { if err != nil {
t.Fatalf("Error allocating CIDRSet for %v", tc.description) t.Fatalf("Error allocating CIDRSet")
} }
_, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr) _, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description) t.Fatalf("unexpected error: %v", err)
} }
got, err := cs.getIndexForCIDR(subnetCIDR)
if err == nil && tc.expectErr {
t.Errorf("expected error but got null")
return
}
got, err := cs.getIndexForCIDR(subnetCIDR) if err != nil && !tc.expectErr {
if err == nil && tc.expectErr { t.Errorf("unexpected error: %v", err)
klog.Errorf("expected error but got null for %v", tc.description) return
continue }
}
if err != nil && !tc.expectErr { if got != tc.expectedBit {
klog.Errorf("unexpected error: %v for %v", err, tc.description) t.Errorf("expected %v, but got %v", tc.expectedBit, got)
continue }
} })
if got != tc.expectedBit {
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
}
} }
} }

View File

@ -20,6 +20,7 @@ limitations under the License.
package ipam package ipam
import ( import (
"context"
"fmt" "fmt"
"math/rand" "math/rand"
"net" "net"
@ -27,6 +28,7 @@ import (
"time" "time"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"k8s.io/klog/v2" "k8s.io/klog/v2"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
@ -85,9 +87,10 @@ type cloudCIDRAllocator struct {
var _ CIDRAllocator = (*cloudCIDRAllocator)(nil) var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
// NewCloudCIDRAllocator creates a new cloud CIDR allocator. // NewCloudCIDRAllocator creates a new cloud CIDR allocator.
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) { func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
if client == nil { if client == nil {
klog.Fatalf("kubeClient is nil when starting NodeController") logger.Error(nil, "kubeClient is nil when starting cloud CIDR allocator")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
eventBroadcaster := record.NewBroadcaster() eventBroadcaster := record.NewBroadcaster()
@ -111,74 +114,80 @@ func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Inter
} }
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(ca.AllocateOrOccupyCIDR), AddFunc: controllerutil.CreateAddNodeHandler(
func(node *v1.Node) error {
return ca.AllocateOrOccupyCIDR(logger, node)
}),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error { UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
if newNode.Spec.PodCIDR == "" { if newNode.Spec.PodCIDR == "" {
return ca.AllocateOrOccupyCIDR(newNode) return ca.AllocateOrOccupyCIDR(logger, newNode)
} }
// Even if PodCIDR is assigned, but NetworkUnavailable condition is // Even if PodCIDR is assigned, but NetworkUnavailable condition is
// set to true, we need to process the node to set the condition. // set to true, we need to process the node to set the condition.
networkUnavailableTaint := &v1.Taint{Key: v1.TaintNodeNetworkUnavailable, Effect: v1.TaintEffectNoSchedule} networkUnavailableTaint := &v1.Taint{Key: v1.TaintNodeNetworkUnavailable, Effect: v1.TaintEffectNoSchedule}
_, cond := controllerutil.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable) _, cond := controllerutil.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable)
if cond == nil || cond.Status != v1.ConditionFalse || utiltaints.TaintExists(newNode.Spec.Taints, networkUnavailableTaint) { if cond == nil || cond.Status != v1.ConditionFalse || utiltaints.TaintExists(newNode.Spec.Taints, networkUnavailableTaint) {
return ca.AllocateOrOccupyCIDR(newNode) return ca.AllocateOrOccupyCIDR(logger, newNode)
} }
return nil return nil
}), }),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ca.ReleaseCIDR), DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
return ca.ReleaseCIDR(logger, node)
}),
}) })
logger.Info("Using cloud CIDR allocator", "provider", cloud.ProviderName())
klog.Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName())
return ca, nil return ca, nil
} }
func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) { func (ca *cloudCIDRAllocator) Run(ctx context.Context) {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
// Start event processing pipeline. // Start event processing pipeline.
ca.broadcaster.StartStructuredLogging(0) ca.broadcaster.StartStructuredLogging(0)
klog.Infof("Sending events to api server.") logger := klog.FromContext(ctx)
logger.Info("Sending events to api server")
ca.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: ca.client.CoreV1().Events("")}) ca.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: ca.client.CoreV1().Events("")})
defer ca.broadcaster.Shutdown() defer ca.broadcaster.Shutdown()
klog.Infof("Starting cloud CIDR allocator") logger.Info("Starting cloud CIDR allocator")
defer klog.Infof("Shutting down cloud CIDR allocator") defer logger.Info("Shutting down cloud CIDR allocator")
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, ca.nodesSynced) { if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), ca.nodesSynced) {
return return
} }
for i := 0; i < cidrUpdateWorkers; i++ { for i := 0; i < cidrUpdateWorkers; i++ {
go ca.worker(stopCh) go ca.worker(ctx)
} }
<-stopCh <-ctx.Done()
} }
func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) { func (ca *cloudCIDRAllocator) worker(ctx context.Context) {
logger := klog.FromContext(ctx)
for { for {
select { select {
case workItem, ok := <-ca.nodeUpdateChannel: case workItem, ok := <-ca.nodeUpdateChannel:
if !ok { if !ok {
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed") logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
return return
} }
if err := ca.updateCIDRAllocation(workItem); err == nil { if err := ca.updateCIDRAllocation(logger, workItem); err == nil {
klog.V(3).Infof("Updated CIDR for %q", workItem) logger.V(3).Info("Updated CIDR", "workItem", workItem)
} else { } else {
klog.Errorf("Error updating CIDR for %q: %v", workItem, err) logger.Error(err, "Error updating CIDR", "workItem", workItem)
if canRetry, timeout := ca.retryParams(workItem); canRetry { if canRetry, timeout := ca.retryParams(logger, workItem); canRetry {
klog.V(2).Infof("Retrying update for %q after %v", workItem, timeout) logger.V(2).Info("Retrying update on next period", "workItem", workItem, "timeout", timeout)
time.AfterFunc(timeout, func() { time.AfterFunc(timeout, func() {
// Requeue the failed node for update again. // Requeue the failed node for update again.
ca.nodeUpdateChannel <- workItem ca.nodeUpdateChannel <- workItem
}) })
continue continue
} }
klog.Errorf("Exceeded retry count for %q, dropping from queue", workItem) logger.Error(nil, "Exceeded retry count, dropping from queue", "workItem", workItem)
} }
ca.removeNodeFromProcessing(workItem) ca.removeNodeFromProcessing(workItem)
case <-stopChan: case <-ctx.Done():
return return
} }
} }
@ -194,13 +203,13 @@ func (ca *cloudCIDRAllocator) insertNodeToProcessing(nodeName string) bool {
return true return true
} }
func (ca *cloudCIDRAllocator) retryParams(nodeName string) (bool, time.Duration) { func (ca *cloudCIDRAllocator) retryParams(logger klog.Logger, nodeName string) (bool, time.Duration) {
ca.lock.Lock() ca.lock.Lock()
defer ca.lock.Unlock() defer ca.lock.Unlock()
entry, ok := ca.nodesInProcessing[nodeName] entry, ok := ca.nodesInProcessing[nodeName]
if !ok { if !ok {
klog.Errorf("Cannot get retryParams for %q as entry does not exist", nodeName) logger.Error(nil, "Cannot get retryParams for node as entry does not exist", "node", klog.KRef("", nodeName))
return false, 0 return false, 0
} }
@ -233,28 +242,28 @@ func (ca *cloudCIDRAllocator) removeNodeFromProcessing(nodeName string) {
// WARNING: If you're adding any return calls or defer any more work from this // WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the // function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done. // disposition of the node when the work is done.
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
if node == nil { if node == nil {
return nil return nil
} }
if !ca.insertNodeToProcessing(node.Name) { if !ca.insertNodeToProcessing(node.Name) {
klog.V(2).InfoS("Node is already in a process of CIDR assignment", "node", klog.KObj(node)) logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
return nil return nil
} }
klog.V(4).Infof("Putting node %s into the work queue", node.Name) logger.V(4).Info("Putting node into the work queue", "node", klog.KObj(node))
ca.nodeUpdateChannel <- node.Name ca.nodeUpdateChannel <- node.Name
return nil return nil
} }
// updateCIDRAllocation assigns CIDR to Node and sends an update to the API server. // updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error { func (ca *cloudCIDRAllocator) updateCIDRAllocation(logger klog.Logger, nodeName string) error {
node, err := ca.nodeLister.Get(nodeName) node, err := ca.nodeLister.Get(nodeName)
if err != nil { if err != nil {
if errors.IsNotFound(err) { if errors.IsNotFound(err) {
return nil // node no longer available, skip processing return nil // node no longer available, skip processing
} }
klog.ErrorS(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "nodeName", nodeName) logger.Error(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "node", klog.KRef("", nodeName))
return err return err
} }
if node.Spec.ProviderID == "" { if node.Spec.ProviderID == "" {
@ -272,7 +281,7 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
} }
//Can have at most 2 ips (one for v4 and one for v6) //Can have at most 2 ips (one for v4 and one for v6)
if len(cidrStrings) > 2 { if len(cidrStrings) > 2 {
klog.InfoS("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings) logger.Info("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings)
cidrStrings = cidrStrings[:2] cidrStrings = cidrStrings[:2]
} }
@ -281,13 +290,13 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
return fmt.Errorf("failed to parse strings %v as CIDRs: %v", cidrStrings, err) return fmt.Errorf("failed to parse strings %v as CIDRs: %v", cidrStrings, err)
} }
needUpdate, err := needPodCIDRsUpdate(node, cidrs) needUpdate, err := needPodCIDRsUpdate(logger, node, cidrs)
if err != nil { if err != nil {
return fmt.Errorf("err: %v, CIDRS: %v", err, cidrStrings) return fmt.Errorf("err: %v, CIDRS: %v", err, cidrStrings)
} }
if needUpdate { if needUpdate {
if node.Spec.PodCIDR != "" { if node.Spec.PodCIDR != "" {
klog.ErrorS(nil, "PodCIDR being reassigned!", "nodeName", node.Name, "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings) logger.Error(nil, "PodCIDR being reassigned", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings)
// We fall through and set the CIDR despite this error. This // We fall through and set the CIDR despite this error. This
// implements the same logic as implemented in the // implements the same logic as implemented in the
// rangeAllocator. // rangeAllocator.
@ -296,14 +305,14 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
} }
for i := 0; i < cidrUpdateRetries; i++ { for i := 0; i < cidrUpdateRetries; i++ {
if err = nodeutil.PatchNodeCIDRs(ca.client, types.NodeName(node.Name), cidrStrings); err == nil { if err = nodeutil.PatchNodeCIDRs(ca.client, types.NodeName(node.Name), cidrStrings); err == nil {
klog.InfoS("Set the node PodCIDRs", "nodeName", node.Name, "cidrStrings", cidrStrings) logger.Info("Set the node PodCIDRs", "node", klog.KObj(node), "cidrStrings", cidrStrings)
break break
} }
} }
} }
if err != nil { if err != nil {
controllerutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed") controllerutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
klog.ErrorS(err, "Failed to update the node PodCIDR after multiple attempts", "nodeName", node.Name, "cidrStrings", cidrStrings) logger.Error(err, "Failed to update the node PodCIDR after multiple attempts", "node", klog.KObj(node), "cidrStrings", cidrStrings)
return err return err
} }
@ -315,31 +324,31 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
LastTransitionTime: metav1.Now(), LastTransitionTime: metav1.Now(),
}) })
if err != nil { if err != nil {
klog.ErrorS(err, "Error setting route status for the node", "nodeName", node.Name) logger.Error(err, "Error setting route status for the node", "node", klog.KObj(node))
} }
return err return err
} }
func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) { func needPodCIDRsUpdate(logger klog.Logger, node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
if node.Spec.PodCIDR == "" { if node.Spec.PodCIDR == "" {
return true, nil return true, nil
} }
_, nodePodCIDR, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR) _, nodePodCIDR, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
if err != nil { if err != nil {
klog.ErrorS(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR) logger.Error(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR)
// We will try to overwrite with new CIDR(s) // We will try to overwrite with new CIDR(s)
return true, nil return true, nil
} }
nodePodCIDRs, err := netutils.ParseCIDRs(node.Spec.PodCIDRs) nodePodCIDRs, err := netutils.ParseCIDRs(node.Spec.PodCIDRs)
if err != nil { if err != nil {
klog.ErrorS(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs) logger.Error(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs)
// We will try to overwrite with new CIDR(s) // We will try to overwrite with new CIDR(s)
return true, nil return true, nil
} }
if len(podCIDRs) == 1 { if len(podCIDRs) == 1 {
if cmp.Equal(nodePodCIDR, podCIDRs[0]) { if cmp.Equal(nodePodCIDR, podCIDRs[0]) {
klog.V(4).InfoS("Node already has allocated CIDR. It matches the proposed one.", "nodeName", node.Name, "podCIDR", podCIDRs[0]) logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "podCIDR", podCIDRs[0])
return false, nil return false, nil
} }
} else if len(nodePodCIDRs) == len(podCIDRs) { } else if len(nodePodCIDRs) == len(podCIDRs) {
@ -351,15 +360,15 @@ func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
return true, nil return true, nil
} }
} }
klog.V(4).InfoS("Node already has allocated CIDRs. It matches the proposed one.", "nodeName", node.Name, "podCIDRs", podCIDRs) logger.V(4).Info("Node already has allocated CIDRs. It matches the proposed one", "node", klog.KObj(node), "podCIDRs", podCIDRs)
return false, nil return false, nil
} }
return true, nil return true, nil
} }
func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error { func (ca *cloudCIDRAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
klog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)", logger.V(2).Info("Node's PodCIDR will be released by external cloud provider (not managed by controller)",
node.Name, node.Spec.PodCIDR) "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
return nil return nil
} }

View File

@ -25,9 +25,10 @@ import (
informers "k8s.io/client-go/informers/core/v1" informers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
cloudprovider "k8s.io/cloud-provider" cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog/v2"
) )
// NewCloudCIDRAllocator creates a new cloud CIDR allocator. // NewCloudCIDRAllocator creates a new cloud CIDR allocator.
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) { func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
return nil, errors.New("legacy cloud provider support not built") return nil, errors.New("legacy cloud provider support not built")
} }

View File

@ -28,6 +28,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
"k8s.io/klog/v2/ktesting"
netutils "k8s.io/utils/net" netutils "k8s.io/utils/net"
) )
@ -42,7 +43,6 @@ func hasNodeInProcessing(ca *cloudCIDRAllocator, name string) bool {
func TestBoundedRetries(t *testing.T) { func TestBoundedRetries(t *testing.T) {
clientSet := fake.NewSimpleClientset() clientSet := fake.NewSimpleClientset()
updateChan := make(chan string, 1) // need to buffer as we are using only on go routine updateChan := make(chan string, 1) // need to buffer as we are using only on go routine
stopChan := make(chan struct{})
sharedInfomer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour) sharedInfomer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
ca := &cloudCIDRAllocator{ ca := &cloudCIDRAllocator{
client: clientSet, client: clientSet,
@ -51,9 +51,10 @@ func TestBoundedRetries(t *testing.T) {
nodesSynced: sharedInfomer.Core().V1().Nodes().Informer().HasSynced, nodesSynced: sharedInfomer.Core().V1().Nodes().Informer().HasSynced,
nodesInProcessing: map[string]*nodeProcessingInfo{}, nodesInProcessing: map[string]*nodeProcessingInfo{},
} }
go ca.worker(stopChan) logger, ctx := ktesting.NewTestContext(t)
go ca.worker(ctx)
nodeName := "testNode" nodeName := "testNode"
ca.AllocateOrOccupyCIDR(&v1.Node{ ca.AllocateOrOccupyCIDR(logger, &v1.Node{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: nodeName, Name: nodeName,
}, },
@ -176,9 +177,9 @@ func TestNeedPodCIDRsUpdate(t *testing.T) {
if err != nil { if err != nil {
t.Errorf("failed to parse %v as CIDRs: %v", tc.cidrs, err) t.Errorf("failed to parse %v as CIDRs: %v", tc.cidrs, err)
} }
logger, _ := ktesting.NewTestContext(t)
t.Run(tc.desc, func(t *testing.T) { t.Run(tc.desc, func(t *testing.T) {
got, err := needPodCIDRsUpdate(&node, netCIDRs) got, err := needPodCIDRsUpdate(logger, &node, netCIDRs)
if tc.wantErr == (err == nil) { if tc.wantErr == (err == nil) {
t.Errorf("err: %v, wantErr: %v", err, tc.wantErr) t.Errorf("err: %v, wantErr: %v", err, tc.wantErr)
} }

View File

@ -20,6 +20,7 @@ limitations under the License.
package ipam package ipam
import ( import (
"context"
"fmt" "fmt"
"net" "net"
"sync" "sync"
@ -113,10 +114,10 @@ func NewController(
// Start initializes the Controller with the existing list of nodes and // Start initializes the Controller with the existing list of nodes and
// registers the informers for node changes. This will start synchronization // registers the informers for node changes. This will start synchronization
// of the node and cloud CIDR range allocations. // of the node and cloud CIDR range allocations.
func (c *Controller) Start(nodeInformer informers.NodeInformer) error { func (c *Controller) Start(logger klog.Logger, nodeInformer informers.NodeInformer) error {
klog.InfoS("Starting IPAM controller", "config", c.config) logger.Info("Starting IPAM controller", "config", c.config)
nodes, err := listNodes(c.adapter.k8s) nodes, err := listNodes(logger, c.adapter.k8s)
if err != nil { if err != nil {
return err return err
} }
@ -125,9 +126,9 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
_, cidrRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR) _, cidrRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
if err == nil { if err == nil {
c.set.Occupy(cidrRange) c.set.Occupy(cidrRange)
klog.V(3).InfoS("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", node.Name) logger.V(3).Info("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", klog.KObj(&node))
} else { } else {
klog.ErrorS(err, "Node has an invalid CIDR", "node", node.Name, "CIDR", node.Spec.PodCIDR) logger.Error(err, "Node has an invalid CIDR", "node", klog.KObj(&node), "CIDR", node.Spec.PodCIDR)
} }
} }
@ -138,24 +139,30 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
// XXX/bowei -- stagger the start of each sync cycle. // XXX/bowei -- stagger the start of each sync cycle.
syncer := c.newSyncer(node.Name) syncer := c.newSyncer(node.Name)
c.syncers[node.Name] = syncer c.syncers[node.Name] = syncer
go syncer.Loop(nil) go syncer.Loop(logger, nil)
}() }()
} }
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(c.onAdd), AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
UpdateFunc: controllerutil.CreateUpdateNodeHandler(c.onUpdate), return c.onAdd(logger, node)
DeleteFunc: controllerutil.CreateDeleteNodeHandler(c.onDelete), }),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
return c.onUpdate(logger, newNode)
}),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
return c.onDelete(logger, node)
}),
}) })
return nil return nil
} }
func (c *Controller) Run(stopCh <-chan struct{}) { func (c *Controller) Run(ctx context.Context) {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
go c.adapter.Run(stopCh) go c.adapter.Run(ctx)
<-stopCh <-ctx.Done()
} }
// occupyServiceCIDR removes the service CIDR range from the cluster CIDR if it // occupyServiceCIDR removes the service CIDR range from the cluster CIDR if it
@ -192,7 +199,7 @@ func (c *Controller) newSyncer(name string) *nodesync.NodeSync {
return nodesync.New(ns, c.adapter, c.adapter, c.config.Mode, name, c.set) return nodesync.New(ns, c.adapter, c.adapter, c.config.Mode, name, c.set)
} }
func (c *Controller) onAdd(node *v1.Node) error { func (c *Controller) onAdd(logger klog.Logger, node *v1.Node) error {
c.lock.Lock() c.lock.Lock()
defer c.lock.Unlock() defer c.lock.Unlock()
@ -200,30 +207,30 @@ func (c *Controller) onAdd(node *v1.Node) error {
if !ok { if !ok {
syncer = c.newSyncer(node.Name) syncer = c.newSyncer(node.Name)
c.syncers[node.Name] = syncer c.syncers[node.Name] = syncer
go syncer.Loop(nil) go syncer.Loop(logger, nil)
} else { } else {
klog.InfoS("Add for node that already exists", "node", node.Name) logger.Info("Add for node that already exists", "node", klog.KObj(node))
} }
syncer.Update(node) syncer.Update(node)
return nil return nil
} }
func (c *Controller) onUpdate(_, node *v1.Node) error { func (c *Controller) onUpdate(logger klog.Logger, node *v1.Node) error {
c.lock.Lock() c.lock.Lock()
defer c.lock.Unlock() defer c.lock.Unlock()
if sync, ok := c.syncers[node.Name]; ok { if sync, ok := c.syncers[node.Name]; ok {
sync.Update(node) sync.Update(node)
} else { } else {
klog.ErrorS(nil, "Received update for non-existent node", "node", node.Name) logger.Error(nil, "Received update for non-existent node", "node", klog.KObj(node))
return fmt.Errorf("unknown node %q", node.Name) return fmt.Errorf("unknown node %q", node.Name)
} }
return nil return nil
} }
func (c *Controller) onDelete(node *v1.Node) error { func (c *Controller) onDelete(logger klog.Logger, node *v1.Node) error {
c.lock.Lock() c.lock.Lock()
defer c.lock.Unlock() defer c.lock.Unlock()
@ -231,7 +238,7 @@ func (c *Controller) onDelete(node *v1.Node) error {
syncer.Delete(node) syncer.Delete(node)
delete(c.syncers, node.Name) delete(c.syncers, node.Name)
} else { } else {
klog.InfoS("Node was already deleted", "node", node.Name) logger.Info("Node was already deleted", "node", klog.KObj(node))
} }
return nil return nil

View File

@ -102,6 +102,7 @@ type multiCIDRRangeAllocator struct {
// Caller must always pass in a list of existing nodes to the new allocator. // Caller must always pass in a list of existing nodes to the new allocator.
// NodeList is only nil in testing. // NodeList is only nil in testing.
func NewMultiCIDRRangeAllocator( func NewMultiCIDRRangeAllocator(
ctx context.Context,
client clientset.Interface, client clientset.Interface,
nodeInformer informers.NodeInformer, nodeInformer informers.NodeInformer,
clusterCIDRInformer networkinginformers.ClusterCIDRInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
@ -109,8 +110,10 @@ func NewMultiCIDRRangeAllocator(
nodeList *v1.NodeList, nodeList *v1.NodeList,
testCIDRMap map[string][]*cidrset.ClusterCIDR, testCIDRMap map[string][]*cidrset.ClusterCIDR,
) (CIDRAllocator, error) { ) (CIDRAllocator, error) {
logger := klog.FromContext(ctx)
if client == nil { if client == nil {
klog.Fatalf("client is nil") logger.Error(nil, "kubeClient is nil when starting multi CIDRRangeAllocator")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
eventBroadcaster := record.NewBroadcaster() eventBroadcaster := record.NewBroadcaster()
@ -137,10 +140,10 @@ func NewMultiCIDRRangeAllocator(
// testCIDRMap is only set for testing purposes. // testCIDRMap is only set for testing purposes.
if len(testCIDRMap) > 0 { if len(testCIDRMap) > 0 {
ra.cidrMap = testCIDRMap ra.cidrMap = testCIDRMap
klog.Warningf("testCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug.") logger.Info("TestCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug")
} }
ccList, err := listClusterCIDRs(client) ccList, err := listClusterCIDRs(ctx, client)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -148,14 +151,14 @@ func NewMultiCIDRRangeAllocator(
if ccList == nil { if ccList == nil {
ccList = &networkingv1alpha1.ClusterCIDRList{} ccList = &networkingv1alpha1.ClusterCIDRList{}
} }
createDefaultClusterCIDR(ccList, allocatorParams) createDefaultClusterCIDR(logger, ccList, allocatorParams)
// Regenerate the cidrMaps from the existing ClusterCIDRs. // Regenerate the cidrMaps from the existing ClusterCIDRs.
for _, clusterCIDR := range ccList.Items { for _, clusterCIDR := range ccList.Items {
klog.Infof("Regenerating existing ClusterCIDR: %v", clusterCIDR) logger.Info("Regenerating existing ClusterCIDR", "clusterCIDR", clusterCIDR)
// Create an event for invalid ClusterCIDRs, do not crash on failures. // Create an event for invalid ClusterCIDRs, do not crash on failures.
if err := ra.reconcileBootstrap(&clusterCIDR); err != nil { if err := ra.reconcileBootstrap(ctx, &clusterCIDR); err != nil {
klog.Errorf("Error while regenerating existing ClusterCIDR: %v", err) logger.Error(err, "Error while regenerating existing ClusterCIDR")
ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error()) ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error())
} }
} }
@ -184,25 +187,25 @@ func NewMultiCIDRRangeAllocator(
}) })
if allocatorParams.ServiceCIDR != nil { if allocatorParams.ServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.ServiceCIDR) ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
} else { } else {
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.") logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
} }
if allocatorParams.SecondaryServiceCIDR != nil { if allocatorParams.SecondaryServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR) ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
} else { } else {
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.") logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
} }
if nodeList != nil { if nodeList != nil {
for _, node := range nodeList.Items { for _, node := range nodeList.Items {
if len(node.Spec.PodCIDRs) == 0 { if len(node.Spec.PodCIDRs) == 0 {
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name) logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
continue continue
} }
klog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDRs) logger.Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDRs", node.Spec.PodCIDRs)
if err := ra.occupyCIDRs(&node); err != nil { if err := ra.occupyCIDRs(logger, &node); err != nil {
// This will happen if: // This will happen if:
// 1. We find garbage in the podCIDRs field. Retrying is useless. // 1. We find garbage in the podCIDRs field. Retrying is useless.
// 2. CIDR out of range: This means ClusterCIDR is not yet created // 2. CIDR out of range: This means ClusterCIDR is not yet created
@ -231,7 +234,7 @@ func NewMultiCIDRRangeAllocator(
// we don't see the Update with DeletionTimestamp != 0. // we don't see the Update with DeletionTimestamp != 0.
// TODO: instead of executing the operation directly in the handler, build a small cache with key node.Name // TODO: instead of executing the operation directly in the handler, build a small cache with key node.Name
// and value PodCIDRs use ReleaseCIDR on the reconcile loop so we can retry on `ReleaseCIDR` failures. // and value PodCIDRs use ReleaseCIDR on the reconcile loop so we can retry on `ReleaseCIDR` failures.
ra.ReleaseCIDR(obj.(*v1.Node)) ra.ReleaseCIDR(logger, obj.(*v1.Node))
// IndexerInformer uses a delta nodeQueue, therefore for deletes we have to use this // IndexerInformer uses a delta nodeQueue, therefore for deletes we have to use this
// key function. // key function.
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
@ -244,51 +247,52 @@ func NewMultiCIDRRangeAllocator(
return ra, nil return ra, nil
} }
func (r *multiCIDRRangeAllocator) Run(stopCh <-chan struct{}) { func (r *multiCIDRRangeAllocator) Run(ctx context.Context) {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
// Start event processing pipeline. // Start event processing pipeline.
logger := klog.FromContext(ctx)
r.broadcaster.StartStructuredLogging(0) r.broadcaster.StartStructuredLogging(0)
klog.Infof("Started sending events to API Server.") logger.Info("Started sending events to API Server")
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")}) r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
defer r.broadcaster.Shutdown() defer r.broadcaster.Shutdown()
defer r.cidrQueue.ShutDown() defer r.cidrQueue.ShutDown()
defer r.nodeQueue.ShutDown() defer r.nodeQueue.ShutDown()
klog.Infof("Starting Multi CIDR Range allocator") logger.Info("Starting Multi CIDR Range allocator")
defer klog.Infof("Shutting down Multi CIDR Range allocator") defer logger.Info("Shutting down Multi CIDR Range allocator")
if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", stopCh, r.nodesSynced, r.clusterCIDRSynced) { if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", ctx.Done(), r.nodesSynced, r.clusterCIDRSynced) {
return return
} }
for i := 0; i < cidrUpdateWorkers; i++ { for i := 0; i < cidrUpdateWorkers; i++ {
go wait.Until(r.runCIDRWorker, time.Second, stopCh) go wait.UntilWithContext(ctx, r.runCIDRWorker, time.Second)
go wait.Until(r.runNodeWorker, time.Second, stopCh) go wait.UntilWithContext(ctx, r.runNodeWorker, time.Second)
} }
<-stopCh <-ctx.Done()
} }
// runWorker is a long-running function that will continually call the // runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the // processNextWorkItem function in order to read and process a message on the
// cidrQueue. // cidrQueue.
func (r *multiCIDRRangeAllocator) runCIDRWorker() { func (r *multiCIDRRangeAllocator) runCIDRWorker(ctx context.Context) {
for r.processNextCIDRWorkItem() { for r.processNextCIDRWorkItem(ctx) {
} }
} }
// processNextWorkItem will read a single work item off the cidrQueue and // processNextWorkItem will read a single work item off the cidrQueue and
// attempt to process it, by calling the syncHandler. // attempt to process it, by calling the syncHandler.
func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool { func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem(ctx context.Context) bool {
obj, shutdown := r.cidrQueue.Get() obj, shutdown := r.cidrQueue.Get()
if shutdown { if shutdown {
return false return false
} }
// We wrap this block in a func so we can defer c.cidrQueue.Done. // We wrap this block in a func so we can defer c.cidrQueue.Done.
err := func(obj interface{}) error { err := func(ctx context.Context, obj interface{}) error {
// We call Done here so the cidrQueue knows we have finished // We call Done here so the cidrQueue knows we have finished
// processing this item. We also must remember to call Forget if we // processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do // do not want this work item being re-queued. For example, we do
@ -313,7 +317,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
} }
// Run the syncHandler, passing it the namespace/name string of the // Run the syncHandler, passing it the namespace/name string of the
// Foo resource to be synced. // Foo resource to be synced.
if err := r.syncClusterCIDR(key); err != nil { if err := r.syncClusterCIDR(ctx, key); err != nil {
// Put the item back on the cidrQueue to handle any transient errors. // Put the item back on the cidrQueue to handle any transient errors.
r.cidrQueue.AddRateLimited(key) r.cidrQueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error()) return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
@ -323,7 +327,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
r.cidrQueue.Forget(obj) r.cidrQueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key) klog.Infof("Successfully synced '%s'", key)
return nil return nil
}(obj) }(ctx, obj)
if err != nil { if err != nil {
utilruntime.HandleError(err) utilruntime.HandleError(err)
@ -333,21 +337,21 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
return true return true
} }
func (r *multiCIDRRangeAllocator) runNodeWorker() { func (r *multiCIDRRangeAllocator) runNodeWorker(ctx context.Context) {
for r.processNextNodeWorkItem() { for r.processNextNodeWorkItem(ctx) {
} }
} }
// processNextWorkItem will read a single work item off the cidrQueue and // processNextWorkItem will read a single work item off the cidrQueue and
// attempt to process it, by calling the syncHandler. // attempt to process it, by calling the syncHandler.
func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool { func (r *multiCIDRRangeAllocator) processNextNodeWorkItem(ctx context.Context) bool {
obj, shutdown := r.nodeQueue.Get() obj, shutdown := r.nodeQueue.Get()
if shutdown { if shutdown {
return false return false
} }
// We wrap this block in a func so we can defer c.cidrQueue.Done. // We wrap this block in a func so we can defer c.cidrQueue.Done.
err := func(obj interface{}) error { err := func(logger klog.Logger, obj interface{}) error {
// We call Done here so the workNodeQueue knows we have finished // We call Done here so the workNodeQueue knows we have finished
// processing this item. We also must remember to call Forget if we // processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do // do not want this work item being re-queued. For example, we do
@ -372,7 +376,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
} }
// Run the syncHandler, passing it the namespace/name string of the // Run the syncHandler, passing it the namespace/name string of the
// Foo resource to be synced. // Foo resource to be synced.
if err := r.syncNode(key); err != nil { if err := r.syncNode(logger, key); err != nil {
// Put the item back on the cidrQueue to handle any transient errors. // Put the item back on the cidrQueue to handle any transient errors.
r.nodeQueue.AddRateLimited(key) r.nodeQueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error()) return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
@ -382,7 +386,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
r.nodeQueue.Forget(obj) r.nodeQueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key) klog.Infof("Successfully synced '%s'", key)
return nil return nil
}(obj) }(klog.FromContext(ctx), obj)
if err != nil { if err != nil {
utilruntime.HandleError(err) utilruntime.HandleError(err)
@ -392,7 +396,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
return true return true
} }
func (r *multiCIDRRangeAllocator) syncNode(key string) error { func (r *multiCIDRRangeAllocator) syncNode(logger klog.Logger, key string) error {
startTime := time.Now() startTime := time.Now()
defer func() { defer func() {
klog.V(4).Infof("Finished syncing Node request %q (%v)", key, time.Since(startTime)) klog.V(4).Infof("Finished syncing Node request %q (%v)", key, time.Since(startTime))
@ -411,9 +415,9 @@ func (r *multiCIDRRangeAllocator) syncNode(key string) error {
// Check the DeletionTimestamp to determine if object is under deletion. // Check the DeletionTimestamp to determine if object is under deletion.
if !node.DeletionTimestamp.IsZero() { if !node.DeletionTimestamp.IsZero() {
klog.V(3).Infof("node is being deleted: %v", key) klog.V(3).Infof("node is being deleted: %v", key)
return r.ReleaseCIDR(node) return r.ReleaseCIDR(logger, node)
} }
return r.AllocateOrOccupyCIDR(node) return r.AllocateOrOccupyCIDR(logger, node)
} }
// needToAddFinalizer checks if a finalizer should be added to the object. // needToAddFinalizer checks if a finalizer should be added to the object.
@ -422,15 +426,16 @@ func needToAddFinalizer(obj metav1.Object, finalizer string) bool {
finalizer, nil) finalizer, nil)
} }
func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error { func (r *multiCIDRRangeAllocator) syncClusterCIDR(ctx context.Context, key string) error {
startTime := time.Now() startTime := time.Now()
logger := klog.FromContext(ctx)
defer func() { defer func() {
klog.V(4).Infof("Finished syncing clusterCIDR request %q (%v)", key, time.Since(startTime)) logger.V(4).Info("Finished syncing clusterCIDR request", "key", key, "latency", time.Since(startTime))
}() }()
clusterCIDR, err := r.clusterCIDRLister.Get(key) clusterCIDR, err := r.clusterCIDRLister.Get(key)
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
klog.V(3).Infof("clusterCIDR has been deleted: %v", key) logger.V(3).Info("clusterCIDR has been deleted", "key", key)
return nil return nil
} }
@ -440,21 +445,20 @@ func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error {
// Check the DeletionTimestamp to determine if object is under deletion. // Check the DeletionTimestamp to determine if object is under deletion.
if !clusterCIDR.DeletionTimestamp.IsZero() { if !clusterCIDR.DeletionTimestamp.IsZero() {
return r.reconcileDelete(clusterCIDR) return r.reconcileDelete(ctx, clusterCIDR)
} }
return r.reconcileCreate(clusterCIDR) return r.reconcileCreate(ctx, clusterCIDR)
} }
// occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet. // occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet.
func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error { func (r *multiCIDRRangeAllocator) occupyCIDRs(logger klog.Logger, node *v1.Node) error {
err := func(node *v1.Node) error { err := func(node *v1.Node) error {
if len(node.Spec.PodCIDRs) == 0 { if len(node.Spec.PodCIDRs) == 0 {
return nil return nil
} }
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true)
if err != nil { if err != nil {
return err return err
} }
@ -468,10 +472,10 @@ func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error {
return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err) return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err)
} }
klog.Infof("occupy CIDR %s for node: %s", cidr, node.Name) logger.Info("occupy CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
if err := r.Occupy(clusterCIDR, podCIDR); err != nil { if err := r.Occupy(clusterCIDR, podCIDR); err != nil {
klog.V(3).Infof("Could not occupy cidr: %v, trying next range: %w", node.Spec.PodCIDRs, err) logger.V(3).Info("Could not occupy cidr, trying next range", "podCIDRs", node.Spec.PodCIDRs, "err", err)
break break
} }
@ -519,14 +523,14 @@ func (r *multiCIDRRangeAllocator) Occupy(clusterCIDR *cidrset.ClusterCIDR, cidr
// Release marks the CIDR as free in the cidrSet used bitmap, // Release marks the CIDR as free in the cidrSet used bitmap,
// Also removes the CIDR from the allocatedCIDRSet. // Also removes the CIDR from the allocatedCIDRSet.
func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error { func (r *multiCIDRRangeAllocator) Release(logger klog.Logger, clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error {
currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr) currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr)
if err != nil { if err != nil {
return err return err
} }
if err := currCIDRSet.Release(cidr); err != nil { if err := currCIDRSet.Release(cidr); err != nil {
klog.Infof("Unable to release cidr %v in cidrSet", cidr) logger.Info("Unable to release cidr in cidrSet", "CIDR", cidr)
return err return err
} }
@ -539,7 +543,7 @@ func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr
// WARNING: If you're adding any return calls or defer any more work from this // WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the // function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done. // disposition of the node when the work is done.
func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
r.lock.Lock() r.lock.Lock()
defer r.lock.Unlock() defer r.lock.Unlock()
@ -548,10 +552,10 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
} }
if len(node.Spec.PodCIDRs) > 0 { if len(node.Spec.PodCIDRs) > 0 {
return r.occupyCIDRs(node) return r.occupyCIDRs(logger, node)
} }
cidrs, clusterCIDR, err := r.prioritizedCIDRs(node) cidrs, clusterCIDR, err := r.prioritizedCIDRs(logger, node)
if err != nil { if err != nil {
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable") controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to get cidrs for node %s", node.Name) return fmt.Errorf("failed to get cidrs for node %s", node.Name)
@ -571,11 +575,11 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
clusterCIDR: clusterCIDR, clusterCIDR: clusterCIDR,
} }
return r.updateCIDRsAllocation(allocated) return r.updateCIDRsAllocation(logger, allocated)
} }
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets. // ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets.
func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error { func (r *multiCIDRRangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
r.lock.Lock() r.lock.Lock()
defer r.lock.Unlock() defer r.lock.Unlock()
@ -583,7 +587,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
return nil return nil
} }
clusterCIDR, err := r.allocatedClusterCIDR(node) clusterCIDR, err := r.allocatedClusterCIDR(logger, node)
if err != nil { if err != nil {
return err return err
} }
@ -594,8 +598,8 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err) return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err)
} }
klog.Infof("release CIDR %s for node: %s", cidr, node.Name) logger.Info("release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
if err := r.Release(clusterCIDR, podCIDR); err != nil { if err := r.Release(logger, clusterCIDR, podCIDR); err != nil {
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err) return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err)
} }
} }
@ -608,7 +612,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs // Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
// so that they won't be assignable. // so that they won't be assignable.
func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { func (r *multiCIDRRangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
// Checks if service CIDR has a nonempty intersection with cluster // Checks if service CIDR has a nonempty intersection with cluster
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with // CIDR. It is the case if either clusterCIDR contains serviceCIDR with
// clusterCIDR's Mask applied (this means that clusterCIDR contains // clusterCIDR's Mask applied (this means that clusterCIDR contains
@ -617,7 +621,7 @@ func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet)
for _, clusterCIDRList := range r.cidrMap { for _, clusterCIDRList := range r.cidrMap {
for _, clusterCIDR := range clusterCIDRList { for _, clusterCIDR := range clusterCIDRList {
if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil { if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil {
klog.Errorf("unable to occupy service CIDR: %w", err) logger.Error(err, "Unable to occupy service CIDR")
} }
} }
} }
@ -645,12 +649,12 @@ func (r *multiCIDRRangeAllocator) occupyServiceCIDR(clusterCIDR *cidrset.Cluster
} }
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server. // updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReservedCIDRs) error { func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(logger klog.Logger, data multiCIDRNodeReservedCIDRs) error {
err := func(data multiCIDRNodeReservedCIDRs) error { err := func(data multiCIDRNodeReservedCIDRs) error {
cidrsString := ipnetToStringList(data.allocatedCIDRs) cidrsString := ipnetToStringList(data.allocatedCIDRs)
node, err := r.nodeLister.Get(data.nodeName) node, err := r.nodeLister.Get(data.nodeName)
if err != nil { if err != nil {
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err) logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
return err return err
} }
@ -666,16 +670,16 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
} }
} }
if match { if match {
klog.V(4).Infof("Node %q already has allocated CIDR %q. It matches the proposed one.", node.Name, data.allocatedCIDRs) logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one.", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
return nil return nil
} }
} }
// node has cidrs allocated, release the reserved. // node has cidrs allocated, release the reserved.
if len(node.Spec.PodCIDRs) != 0 { if len(node.Spec.PodCIDRs) != 0 {
klog.Errorf("Node %q already has a CIDR allocated %q. Releasing the new one.", node.Name, node.Spec.PodCIDRs) logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
for _, cidr := range data.allocatedCIDRs { for _, cidr := range data.allocatedCIDRs {
if err := r.Release(data.clusterCIDR, cidr); err != nil { if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err) return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err)
} }
} }
@ -686,20 +690,20 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
for i := 0; i < cidrUpdateRetries; i++ { for i := 0; i < cidrUpdateRetries; i++ {
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil { if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
data.clusterCIDR.AssociatedNodes[node.Name] = true data.clusterCIDR.AssociatedNodes[node.Name] = true
klog.Infof("Set node %q PodCIDR to %q", node.Name, cidrsString) logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrsString)
return nil return nil
} }
} }
// failed release back to the pool. // failed release back to the pool.
klog.Errorf("Failed to update node %q PodCIDR to %q after %d attempts: %v", node.Name, cidrsString, cidrUpdateRetries, err) logger.Error(err, "Failed to update node PodCIDR after attempts", "node", klog.KObj(node), "podCIDR", cidrsString, "retries", cidrUpdateRetries)
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
// We accept the fact that we may leak CIDRs here. This is safer than releasing // We accept the fact that we may leak CIDRs here. This is safer than releasing
// them in case when we don't know if request went through. // them in case when we don't know if request went through.
// NodeController restart will return all falsely allocated CIDRs to the pool. // NodeController restart will return all falsely allocated CIDRs to the pool.
if !apierrors.IsServerTimeout(err) { if !apierrors.IsServerTimeout(err) {
klog.Errorf("CIDR assignment for node %q failed: %v. Releasing allocated CIDR", node.Name, err) logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
for _, cidr := range data.allocatedCIDRs { for _, cidr := range data.allocatedCIDRs {
if err := r.Release(data.clusterCIDR, cidr); err != nil { if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err) return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err)
} }
} }
@ -732,8 +736,8 @@ func defaultNodeSelector() *v1.NodeSelector {
// prioritizedCIDRs returns a list of CIDRs to be allocated to the node. // prioritizedCIDRs returns a list of CIDRs to be allocated to the node.
// Returns 1 CIDR if single stack. // Returns 1 CIDR if single stack.
// Returns 2 CIDRs , 1 from each ip family if dual stack. // Returns 2 CIDRs , 1 from each ip family if dual stack.
func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) { func (r *multiCIDRRangeAllocator) prioritizedCIDRs(logger klog.Logger, node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) {
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true) clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err) return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
} }
@ -743,7 +747,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
if clusterCIDR.IPv4CIDRSet != nil { if clusterCIDR.IPv4CIDRSet != nil {
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet) cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet)
if err != nil { if err != nil {
klog.V(3).Infof("unable to allocate IPv4 CIDR, trying next range: %w", err) logger.V(3).Info("Unable to allocate IPv4 CIDR, trying next range", "err", err)
continue continue
} }
cidrs = append(cidrs, cidr) cidrs = append(cidrs, cidr)
@ -752,7 +756,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
if clusterCIDR.IPv6CIDRSet != nil { if clusterCIDR.IPv6CIDRSet != nil {
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet) cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet)
if err != nil { if err != nil {
klog.V(3).Infof("unable to allocate IPv6 CIDR, trying next range: %w", err) logger.V(3).Info("Unable to allocate IPv6 CIDR, trying next range", "err", err)
continue continue
} }
cidrs = append(cidrs, cidr) cidrs = append(cidrs, cidr)
@ -827,8 +831,8 @@ func (r *multiCIDRRangeAllocator) cidrOverlapWithAllocatedList(cidr *net.IPNet)
} }
// allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated. // allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated.
func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.ClusterCIDR, error) { func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(logger klog.Logger, node *v1.Node) (*cidrset.ClusterCIDR, error) {
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, false) clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, false)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err) return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
} }
@ -852,12 +856,12 @@ func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.
// orderedMatchingClusterCIDRs takes `occupy` as an argument, it determines whether the function // orderedMatchingClusterCIDRs takes `occupy` as an argument, it determines whether the function
// is called during an occupy or a release operation. For a release operation, a ClusterCIDR must // is called during an occupy or a release operation. For a release operation, a ClusterCIDR must
// be added to the matching ClusterCIDRs list, irrespective of whether the ClusterCIDR is terminating. // be added to the matching ClusterCIDRs list, irrespective of whether the ClusterCIDR is terminating.
func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) { func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(logger klog.Logger, node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) {
matchingCIDRs := make([]*cidrset.ClusterCIDR, 0) matchingCIDRs := make([]*cidrset.ClusterCIDR, 0)
pq := make(PriorityQueue, 0) pq := make(PriorityQueue, 0)
for label, clusterCIDRList := range r.cidrMap { for label, clusterCIDRList := range r.cidrMap {
labelsMatch, matchCnt, err := r.matchCIDRLabels(node, label) labelsMatch, matchCnt, err := r.matchCIDRLabels(logger, node, label)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -902,14 +906,14 @@ func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occ
// matchCIDRLabels Matches the Node labels to CIDR Configs. // matchCIDRLabels Matches the Node labels to CIDR Configs.
// Returns true only if all the labels match, also returns the count of matching labels. // Returns true only if all the labels match, also returns the count of matching labels.
func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (bool, int, error) { func (r *multiCIDRRangeAllocator) matchCIDRLabels(logger klog.Logger, node *v1.Node, label string) (bool, int, error) {
var labelSet labels.Set var labelSet labels.Set
var matchCnt int var matchCnt int
labelsMatch := false labelsMatch := false
ls, err := labels.Parse(label) ls, err := labels.Parse(label)
if err != nil { if err != nil {
klog.Errorf("Unable to parse label %s to labels.Selector: %v", label, err) logger.Error(err, "Unable to parse label to labels.Selector", "label", label)
return labelsMatch, 0, err return labelsMatch, 0, err
} }
reqs, selectable := ls.Requirements() reqs, selectable := ls.Requirements()
@ -934,7 +938,7 @@ func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (
// createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has // createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has
// been configured. It converts the --cluster-cidr and --per-node-mask-size* flags // been configured. It converts the --cluster-cidr and --per-node-mask-size* flags
// to appropriate ClusterCIDR fields. // to appropriate ClusterCIDR fields.
func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDRList, func createDefaultClusterCIDR(logger klog.Logger, existingConfigList *networkingv1alpha1.ClusterCIDRList,
allocatorParams CIDRAllocatorParams) { allocatorParams CIDRAllocatorParams) {
// Create default ClusterCIDR only if --cluster-cidr has been configured // Create default ClusterCIDR only if --cluster-cidr has been configured
if len(allocatorParams.ClusterCIDRs) == 0 { if len(allocatorParams.ClusterCIDRs) == 0 {
@ -944,7 +948,7 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
for _, clusterCIDR := range existingConfigList.Items { for _, clusterCIDR := range existingConfigList.Items {
if clusterCIDR.Name == defaultClusterCIDRName { if clusterCIDR.Name == defaultClusterCIDRName {
// Default ClusterCIDR already exists, no further action required. // Default ClusterCIDR already exists, no further action required.
klog.V(3).Infof("Default ClusterCIDR %s already exists", defaultClusterCIDRName) logger.V(3).Info("Default ClusterCIDR already exists", "defaultClusterCIDRName", defaultClusterCIDRName)
return return
} }
} }
@ -1006,14 +1010,15 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
} }
// reconcileCreate handles create ClusterCIDR events. // reconcileCreate handles create ClusterCIDR events.
func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { func (r *multiCIDRRangeAllocator) reconcileCreate(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
r.lock.Lock() r.lock.Lock()
defer r.lock.Unlock() defer r.lock.Unlock()
logger := klog.FromContext(ctx)
if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) { if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) {
klog.V(3).Infof("Creating ClusterCIDR %s", clusterCIDR.Name) logger.V(3).Info("Creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
if err := r.createClusterCIDR(clusterCIDR, false); err != nil { if err := r.createClusterCIDR(ctx, clusterCIDR, false); err != nil {
klog.Errorf("Unable to create ClusterCIDR %s : %v", clusterCIDR.Name, err) logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
return err return err
} }
} }
@ -1022,21 +1027,22 @@ func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha
// reconcileBootstrap handles creation of existing ClusterCIDRs. // reconcileBootstrap handles creation of existing ClusterCIDRs.
// adds a finalizer if not already present. // adds a finalizer if not already present.
func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { func (r *multiCIDRRangeAllocator) reconcileBootstrap(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
r.lock.Lock() r.lock.Lock()
defer r.lock.Unlock() defer r.lock.Unlock()
logger := klog.FromContext(ctx)
terminating := false terminating := false
// Create the ClusterCIDR only if the Spec has not been modified. // Create the ClusterCIDR only if the Spec has not been modified.
if clusterCIDR.Generation > 1 { if clusterCIDR.Generation > 1 {
terminating = true terminating = true
err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name) err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name)
klog.Errorf("ClusterCIDR Modified: %v", err) logger.Error(err, "ClusterCIDR Modified")
} }
klog.V(2).Infof("Creating ClusterCIDR %s during bootstrap", clusterCIDR.Name) logger.V(2).Info("Creating ClusterCIDR during bootstrap", "clusterCIDR", clusterCIDR.Name)
if err := r.createClusterCIDR(clusterCIDR, terminating); err != nil { if err := r.createClusterCIDR(ctx, clusterCIDR, terminating); err != nil {
klog.Errorf("Unable to create ClusterCIDR %s: %v", clusterCIDR.Name, err) logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
return err return err
} }
@ -1044,7 +1050,7 @@ func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1al
} }
// createClusterCIDR creates and maps the cidrSets in the cidrMap. // createClusterCIDR creates and maps the cidrSets in the cidrMap.
func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error { func (r *multiCIDRRangeAllocator) createClusterCIDR(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error {
nodeSelector, err := r.nodeSelectorKey(clusterCIDR) nodeSelector, err := r.nodeSelectorKey(clusterCIDR)
if err != nil { if err != nil {
return fmt.Errorf("unable to get labelSelector key: %w", err) return fmt.Errorf("unable to get labelSelector key: %w", err)
@ -1069,16 +1075,17 @@ func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alp
updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer) updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer)
} }
logger := klog.FromContext(ctx)
if updatedClusterCIDR.ResourceVersion == "" { if updatedClusterCIDR.ResourceVersion == "" {
// Create is only used for creating default ClusterCIDR. // Create is only used for creating default ClusterCIDR.
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(context.TODO(), updatedClusterCIDR, metav1.CreateOptions{}); err != nil { if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(ctx, updatedClusterCIDR, metav1.CreateOptions{}); err != nil {
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err) logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", klog.KObj(clusterCIDR), "err", err)
return err return err
} }
} else { } else {
// Update the ClusterCIDR object when called from reconcileCreate. // Update the ClusterCIDR object when called from reconcileCreate.
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), updatedClusterCIDR, metav1.UpdateOptions{}); err != nil { if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, updatedClusterCIDR, metav1.UpdateOptions{}); err != nil {
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err) logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
return err return err
} }
} }
@ -1136,30 +1143,31 @@ func (r *multiCIDRRangeAllocator) mapClusterCIDRSet(cidrMap map[string][]*cidrse
// reconcileDelete releases the assigned ClusterCIDR and removes the finalizer // reconcileDelete releases the assigned ClusterCIDR and removes the finalizer
// if the deletion timestamp is set. // if the deletion timestamp is set.
func (r *multiCIDRRangeAllocator) reconcileDelete(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { func (r *multiCIDRRangeAllocator) reconcileDelete(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
r.lock.Lock() r.lock.Lock()
defer r.lock.Unlock() defer r.lock.Unlock()
logger := klog.FromContext(ctx)
if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) { if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) {
klog.V(2).Infof("Releasing ClusterCIDR: %s", clusterCIDR.Name) logger.V(2).Info("Releasing ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
if err := r.deleteClusterCIDR(clusterCIDR); err != nil { if err := r.deleteClusterCIDR(logger, clusterCIDR); err != nil {
klog.V(2).Infof("Error while deleting ClusterCIDR: %+v", err) klog.V(2).Info("Error while deleting ClusterCIDR", "err", err)
return err return err
} }
// Remove the finalizer as delete is successful. // Remove the finalizer as delete is successful.
cccCopy := clusterCIDR.DeepCopy() cccCopy := clusterCIDR.DeepCopy()
cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil) cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil)
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), cccCopy, metav1.UpdateOptions{}); err != nil { if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, cccCopy, metav1.UpdateOptions{}); err != nil {
klog.V(2).Infof("Error removing finalizer for ClusterCIDR %s: %v", clusterCIDR.Name, err) logger.V(2).Info("Error removing finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
return err return err
} }
klog.V(2).Infof("Removed finalizer for ClusterCIDR %s", clusterCIDR.Name) logger.V(2).Info("Removed finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
} }
return nil return nil
} }
// deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap. // deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap.
func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR) error { func (r *multiCIDRRangeAllocator) deleteClusterCIDR(logger klog.Logger, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
labelSelector, err := r.nodeSelectorKey(clusterCIDR) labelSelector, err := r.nodeSelectorKey(clusterCIDR)
if err != nil { if err != nil {
@ -1168,7 +1176,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
clusterCIDRSetList, ok := r.cidrMap[labelSelector] clusterCIDRSetList, ok := r.cidrMap[labelSelector]
if !ok { if !ok {
klog.Infof("Label %s not found in CIDRMap, proceeding with delete", labelSelector) logger.Info("Label not found in CIDRMap, proceeding with delete", "labelSelector", labelSelector)
return nil return nil
} }
@ -1196,7 +1204,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
r.cidrMap[labelSelector] = clusterCIDRSetList r.cidrMap[labelSelector] = clusterCIDRSetList
return nil return nil
} }
klog.V(2).Info("clusterCIDR not found, proceeding with delete", "Name", clusterCIDR.Name, "label", labelSelector) logger.V(2).Info("clusterCIDR not found, proceeding with delete", "clusterCIDR", clusterCIDR.Name, "label", labelSelector)
return nil return nil
} }
@ -1217,7 +1225,7 @@ func (r *multiCIDRRangeAllocator) nodeSelectorKey(clusterCIDR *networkingv1alpha
return nodeSelector.String(), nil return nodeSelector.String(), nil
} }
func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) { func listClusterCIDRs(ctx context.Context, kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
var clusterCIDRList *networkingv1alpha1.ClusterCIDRList var clusterCIDRList *networkingv1alpha1.ClusterCIDRList
// We must poll because apiserver might not be up. This error causes // We must poll because apiserver might not be up. This error causes
// controller manager to restart. // controller manager to restart.
@ -1230,19 +1238,20 @@ func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.Clust
Steps: 11, Steps: 11,
} }
logger := klog.FromContext(ctx)
if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) { if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
var err error var err error
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(context.TODO(), metav1.ListOptions{ clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(ctx, metav1.ListOptions{
FieldSelector: fields.Everything().String(), FieldSelector: fields.Everything().String(),
LabelSelector: labels.Everything().String(), LabelSelector: labels.Everything().String(),
}) })
if err != nil { if err != nil {
klog.Errorf("Failed to list all clusterCIDRs: %v", err) logger.Error(err, "Failed to list all clusterCIDRs")
return false, nil return false, nil
} }
return true, nil return true, nil
}); pollErr != nil { }); pollErr != nil {
klog.Errorf("Failed to list clusterCIDRs (after %v)", time.Now().Sub(startTimestamp)) logger.Error(nil, "Failed to list clusterCIDRs", "latency", time.Now().Sub(startTimestamp))
return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map", return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map",
apiserverStartupGracePeriod) apiserverStartupGracePeriod)
} }

View File

@ -34,6 +34,7 @@ import (
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
k8stesting "k8s.io/client-go/testing" k8stesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
"k8s.io/klog/v2/ktesting"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller"
cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset" cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset"
@ -473,6 +474,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
} }
// test function // test function
_, ctx := ktesting.NewTestContext(t)
for _, tc := range testCaseMultiCIDRs { for _, tc := range testCaseMultiCIDRs {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
// Initialize the range allocator. // Initialize the range allocator.
@ -482,7 +484,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
_, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap) _, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
if err == nil && tc.ctrlCreateFail { if err == nil && tc.ctrlCreateFail {
t.Fatalf("creating range allocator was expected to fail, but it did not") t.Fatalf("creating range allocator was expected to fail, but it did not")
} }
@ -1177,6 +1179,8 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
}, },
} }
logger, ctx := ktesting.NewTestContext(t)
// test function // test function
testFunc := func(tc testCaseMultiCIDR) { testFunc := func(tc testCaseMultiCIDR) {
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
@ -1185,7 +1189,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
fakeClient := &fake.Clientset{} fakeClient := &fake.Clientset{}
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap) allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
if err != nil { if err != nil {
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
return return
@ -1230,7 +1234,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
if node.Spec.PodCIDRs == nil { if node.Spec.PodCIDRs == nil {
updateCount++ updateCount++
} }
if err := allocator.AllocateOrOccupyCIDR(node); err != nil { if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
} }
} }
@ -1308,13 +1312,15 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
}, },
} }
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc testCaseMultiCIDR) { testFunc := func(tc testCaseMultiCIDR) {
fakeClient := &fake.Clientset{} fakeClient := &fake.Clientset{}
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
// Initialize the range allocator. // Initialize the range allocator.
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap) allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
if err != nil { if err != nil {
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
} }
@ -1353,7 +1359,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
} }
} }
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil { if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
} }
// We don't expect any updates, so just sleep for some time // We don't expect any updates, so just sleep for some time
@ -1498,13 +1504,13 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
}, },
}, },
} }
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc releasetestCaseMultiCIDR) { testFunc := func(tc releasetestCaseMultiCIDR) {
fakeClient := &fake.Clientset{} fakeClient := &fake.Clientset{}
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc()) fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs() fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
// Initialize the range allocator. // Initialize the range allocator.
allocator, _ := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap) allocator, _ := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator) rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator)
if !ok { if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
@ -1540,7 +1546,7 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
} }
} }
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]) err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
if len(tc.expectedAllocatedCIDRFirstRound) != 0 { if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
if err != nil { if err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
@ -1570,12 +1576,12 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
}, },
} }
nodeToRelease.Spec.PodCIDRs = cidrToRelease nodeToRelease.Spec.PodCIDRs = cidrToRelease
err = allocator.ReleaseCIDR(&nodeToRelease) err = allocator.ReleaseCIDR(logger, &nodeToRelease)
if err != nil { if err != nil {
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err) t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
} }
} }
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
} }
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
@ -1614,7 +1620,7 @@ type clusterCIDRController struct {
clusterCIDRStore cache.Store clusterCIDRStore cache.Store
} }
func newController() (*fake.Clientset, *clusterCIDRController) { func newController(ctx context.Context) (*fake.Clientset, *clusterCIDRController) {
client := fake.NewSimpleClientset() client := fake.NewSimpleClientset()
informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc()) informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc())
@ -1657,7 +1663,7 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0) testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0)
// Initialize the range allocator. // Initialize the range allocator.
ra, _ := NewMultiCIDRRangeAllocator(client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap) ra, _ := NewMultiCIDRRangeAllocator(ctx, client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap)
cccController := ra.(*multiCIDRRangeAllocator) cccController := ra.(*multiCIDRRangeAllocator)
cccController.clusterCIDRSynced = alwaysReady cccController.clusterCIDRSynced = alwaysReady
@ -1671,8 +1677,8 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
// Ensure default ClusterCIDR is created during bootstrap. // Ensure default ClusterCIDR is created during bootstrap.
func TestClusterCIDRDefault(t *testing.T) { func TestClusterCIDRDefault(t *testing.T) {
defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil) defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil)
_, ctx := ktesting.NewTestContext(t)
client, _ := newController() client, _ := newController(ctx)
createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{}) createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{})
assert.Nil(t, err, "Expected no error getting clustercidr objects") assert.Nil(t, err, "Expected no error getting clustercidr objects")
assert.Equal(t, defaultCCC.Spec, createdCCC.Spec) assert.Equal(t, defaultCCC.Spec, createdCCC.Spec)
@ -1752,11 +1758,11 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
wantErr: true, wantErr: true,
}, },
} }
_, ctx := ktesting.NewTestContext(t)
client, cccController := newController() client, cccController := newController(ctx)
for _, tc := range tests { for _, tc := range tests {
cccController.clusterCIDRStore.Add(tc.ccc) cccController.clusterCIDRStore.Add(tc.ccc)
err := cccController.syncClusterCIDR(tc.ccc.Name) err := cccController.syncClusterCIDR(ctx, tc.ccc.Name)
if tc.wantErr { if tc.wantErr {
assert.Error(t, err) assert.Error(t, err)
continue continue
@ -1773,30 +1779,32 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
// Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR. // Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR.
func TestSyncClusterCIDRDelete(t *testing.T) { func TestSyncClusterCIDRDelete(t *testing.T) {
_, cccController := newController() _, ctx := ktesting.NewTestContext(t)
_, cccController := newController(ctx)
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})) testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
cccController.clusterCIDRStore.Add(testCCC) cccController.clusterCIDRStore.Add(testCCC)
err := cccController.syncClusterCIDR(testCCC.Name) err := cccController.syncClusterCIDR(ctx, testCCC.Name)
assert.NoError(t, err) assert.NoError(t, err)
deletionTimestamp := metav1.Now() deletionTimestamp := metav1.Now()
testCCC.DeletionTimestamp = &deletionTimestamp testCCC.DeletionTimestamp = &deletionTimestamp
cccController.clusterCIDRStore.Update(testCCC) cccController.clusterCIDRStore.Update(testCCC)
err = cccController.syncClusterCIDR(testCCC.Name) err = cccController.syncClusterCIDR(ctx, testCCC.Name)
assert.NoError(t, err) assert.NoError(t, err)
} }
// Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR // Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR
// if a node is associated with the ClusterCIDR. // if a node is associated with the ClusterCIDR.
func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) { func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
client, cccController := newController() _, ctx := ktesting.NewTestContext(t)
client, cccController := newController(ctx)
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"})) testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
cccController.clusterCIDRStore.Add(testCCC) cccController.clusterCIDRStore.Add(testCCC)
err := cccController.syncClusterCIDR(testCCC.Name) err := cccController.syncClusterCIDR(ctx, testCCC.Name)
assert.NoError(t, err) assert.NoError(t, err)
// Mock the IPAM controller behavior associating node with ClusterCIDR. // Mock the IPAM controller behavior associating node with ClusterCIDR.
@ -1810,7 +1818,7 @@ func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
deletionTimestamp := metav1.Now() deletionTimestamp := metav1.Now()
createdCCC.DeletionTimestamp = &deletionTimestamp createdCCC.DeletionTimestamp = &deletionTimestamp
cccController.clusterCIDRStore.Update(createdCCC) cccController.clusterCIDRStore.Update(createdCCC)
err = cccController.syncClusterCIDR(createdCCC.Name) err = cccController.syncClusterCIDR(ctx, createdCCC.Name)
assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name)) assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name))
} }

View File

@ -22,7 +22,7 @@ import (
"testing" "testing"
"k8s.io/component-base/metrics/testutil" "k8s.io/component-base/metrics/testutil"
"k8s.io/klog/v2" "k8s.io/klog/v2/ktesting"
utilnet "k8s.io/utils/net" utilnet "k8s.io/utils/net"
) )
@ -568,6 +568,7 @@ func TestGetBitforCIDR(t *testing.T) {
}, },
} }
logger, _ := ktesting.NewTestContext(t)
for _, tc := range cases { for _, tc := range cases {
_, clusterCIDR, err := utilnet.ParseCIDRSloppy(tc.clusterCIDRStr) _, clusterCIDR, err := utilnet.ParseCIDRSloppy(tc.clusterCIDRStr)
if err != nil { if err != nil {
@ -585,17 +586,17 @@ func TestGetBitforCIDR(t *testing.T) {
got, err := cs.getIndexForCIDR(subnetCIDR) got, err := cs.getIndexForCIDR(subnetCIDR)
if err == nil && tc.expectErr { if err == nil && tc.expectErr {
klog.Errorf("expected error but got null for %v", tc.description) logger.Error(nil, "Expected error but got null", "description", tc.description)
continue continue
} }
if err != nil && !tc.expectErr { if err != nil && !tc.expectErr {
klog.Errorf("unexpected error: %v for %v", err, tc.description) logger.Error(err, "Unexpected error", "description", tc.description)
continue continue
} }
if got != tc.expectedBit { if got != tc.expectedBit {
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description) logger.Error(nil, "Unexpected value", "description", tc.description, "expected", tc.expectedBit, "got", got)
} }
} }
} }

View File

@ -17,6 +17,7 @@ limitations under the License.
package ipam package ipam
import ( import (
"context"
"fmt" "fmt"
"net" "net"
"sync" "sync"
@ -66,9 +67,10 @@ type rangeAllocator struct {
// Caller must always pass in a list of existing nodes so the new allocator. // Caller must always pass in a list of existing nodes so the new allocator.
// Caller must ensure that ClusterCIDRs are semantically correct e.g (1 for non DualStack, 2 for DualStack etc..) // Caller must ensure that ClusterCIDRs are semantically correct e.g (1 for non DualStack, 2 for DualStack etc..)
// can initialize its CIDR map. NodeList is only nil in testing. // can initialize its CIDR map. NodeList is only nil in testing.
func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) { func NewCIDRRangeAllocator(logger klog.Logger, client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) {
if client == nil { if client == nil {
klog.Fatalf("kubeClient is nil when starting NodeController") logger.Error(nil, "kubeClient is nil when starting CIDRRangeAllocator")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
eventBroadcaster := record.NewBroadcaster() eventBroadcaster := record.NewBroadcaster()
@ -98,24 +100,24 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
} }
if allocatorParams.ServiceCIDR != nil { if allocatorParams.ServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.ServiceCIDR) ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
} else { } else {
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.") logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
} }
if allocatorParams.SecondaryServiceCIDR != nil { if allocatorParams.SecondaryServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR) ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
} else { } else {
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.") logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
} }
if nodeList != nil { if nodeList != nil {
for _, node := range nodeList.Items { for _, node := range nodeList.Items {
if len(node.Spec.PodCIDRs) == 0 { if len(node.Spec.PodCIDRs) == 0 {
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name) logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
continue continue
} }
klog.V(4).Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR) logger.V(4).Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDR", node.Spec.PodCIDR)
if err := ra.occupyCIDRs(&node); err != nil { if err := ra.occupyCIDRs(&node); err != nil {
// This will happen if: // This will happen if:
// 1. We find garbage in the podCIDRs field. Retrying is useless. // 1. We find garbage in the podCIDRs field. Retrying is useless.
@ -127,7 +129,9 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
} }
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR), AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
return ra.AllocateOrOccupyCIDR(logger, node)
}),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error { UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
// If the PodCIDRs list is not empty we either: // If the PodCIDRs list is not empty we either:
// - already processed a Node that already had CIDRs after NC restarted // - already processed a Node that already had CIDRs after NC restarted
@ -149,52 +153,56 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
// state is correct. // state is correct.
// Restart of NC fixes the issue. // Restart of NC fixes the issue.
if len(newNode.Spec.PodCIDRs) == 0 { if len(newNode.Spec.PodCIDRs) == 0 {
return ra.AllocateOrOccupyCIDR(newNode) return ra.AllocateOrOccupyCIDR(logger, newNode)
} }
return nil return nil
}), }),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ra.ReleaseCIDR), DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
return ra.ReleaseCIDR(logger, node)
}),
}) })
return ra, nil return ra, nil
} }
func (r *rangeAllocator) Run(stopCh <-chan struct{}) { func (r *rangeAllocator) Run(ctx context.Context) {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
// Start event processing pipeline. // Start event processing pipeline.
r.broadcaster.StartStructuredLogging(0) r.broadcaster.StartStructuredLogging(0)
klog.Infof("Sending events to api server.") logger := klog.FromContext(ctx)
logger.Info("Sending events to api server")
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")}) r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
defer r.broadcaster.Shutdown() defer r.broadcaster.Shutdown()
klog.Infof("Starting range CIDR allocator") logger.Info("Starting range CIDR allocator")
defer klog.Infof("Shutting down range CIDR allocator") defer logger.Info("Shutting down range CIDR allocator")
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, r.nodesSynced) { if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), r.nodesSynced) {
return return
} }
for i := 0; i < cidrUpdateWorkers; i++ { for i := 0; i < cidrUpdateWorkers; i++ {
go r.worker(stopCh) go r.worker(ctx)
} }
<-stopCh <-ctx.Done()
} }
func (r *rangeAllocator) worker(stopChan <-chan struct{}) { func (r *rangeAllocator) worker(ctx context.Context) {
logger := klog.FromContext(ctx)
for { for {
select { select {
case workItem, ok := <-r.nodeCIDRUpdateChannel: case workItem, ok := <-r.nodeCIDRUpdateChannel:
if !ok { if !ok {
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed") logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
return return
} }
if err := r.updateCIDRsAllocation(workItem); err != nil { if err := r.updateCIDRsAllocation(logger, workItem); err != nil {
// Requeue the failed node for update again. // Requeue the failed node for update again.
r.nodeCIDRUpdateChannel <- workItem r.nodeCIDRUpdateChannel <- workItem
} }
case <-stopChan: case <-ctx.Done():
return return
} }
} }
@ -244,12 +252,12 @@ func (r *rangeAllocator) occupyCIDRs(node *v1.Node) error {
// WARNING: If you're adding any return calls or defer any more work from this // WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the // function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done. // disposition of the node when the work is done.
func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error { func (r *rangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
if node == nil { if node == nil {
return nil return nil
} }
if !r.insertNodeToProcessing(node.Name) { if !r.insertNodeToProcessing(node.Name) {
klog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name) logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
return nil return nil
} }
@ -273,13 +281,13 @@ func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
} }
//queue the assignment //queue the assignment
klog.V(4).Infof("Putting node %s with CIDR %v into the work queue", node.Name, allocated.allocatedCIDRs) logger.V(4).Info("Putting node with CIDR into the work queue", "node", klog.KObj(node), "CIDRs", allocated.allocatedCIDRs)
r.nodeCIDRUpdateChannel <- allocated r.nodeCIDRUpdateChannel <- allocated
return nil return nil
} }
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets // ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets
func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error { func (r *rangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
if node == nil || len(node.Spec.PodCIDRs) == 0 { if node == nil || len(node.Spec.PodCIDRs) == 0 {
return nil return nil
} }
@ -297,7 +305,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx) return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx)
} }
klog.V(4).Infof("release CIDR %s for node:%v", cidr, node.Name) logger.V(4).Info("Release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
if err = r.cidrSets[idx].Release(podCIDR); err != nil { if err = r.cidrSets[idx].Release(podCIDR); err != nil {
return fmt.Errorf("error when releasing CIDR %v: %v", cidr, err) return fmt.Errorf("error when releasing CIDR %v: %v", cidr, err)
} }
@ -307,7 +315,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs // Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
// so that they won't be assignable. // so that they won't be assignable.
func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) { func (r *rangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
// Checks if service CIDR has a nonempty intersection with cluster // Checks if service CIDR has a nonempty intersection with cluster
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with // CIDR. It is the case if either clusterCIDR contains serviceCIDR with
// clusterCIDR's Mask applied (this means that clusterCIDR contains // clusterCIDR's Mask applied (this means that clusterCIDR contains
@ -321,20 +329,20 @@ func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
// at this point, len(cidrSet) == len(clusterCidr) // at this point, len(cidrSet) == len(clusterCidr)
if err := r.cidrSets[idx].Occupy(serviceCIDR); err != nil { if err := r.cidrSets[idx].Occupy(serviceCIDR); err != nil {
klog.Errorf("Error filtering out service cidr out cluster cidr:%v (index:%v) %v: %v", cidr, idx, serviceCIDR, err) logger.Error(err, "Error filtering out service cidr out cluster cidr", "CIDR", cidr, "index", idx, "serviceCIDR", serviceCIDR)
} }
} }
} }
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server. // updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error { func (r *rangeAllocator) updateCIDRsAllocation(logger klog.Logger, data nodeReservedCIDRs) error {
var err error var err error
var node *v1.Node var node *v1.Node
defer r.removeNodeFromProcessing(data.nodeName) defer r.removeNodeFromProcessing(data.nodeName)
cidrsString := ipnetToStringList(data.allocatedCIDRs) cidrsString := ipnetToStringList(data.allocatedCIDRs)
node, err = r.nodeLister.Get(data.nodeName) node, err = r.nodeLister.Get(data.nodeName)
if err != nil { if err != nil {
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err) logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
return err return err
} }
@ -350,17 +358,17 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
} }
} }
if match { if match {
klog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, data.allocatedCIDRs) logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
return nil return nil
} }
} }
// node has cidrs, release the reserved // node has cidrs, release the reserved
if len(node.Spec.PodCIDRs) != 0 { if len(node.Spec.PodCIDRs) != 0 {
klog.Errorf("Node %v already has a CIDR allocated %v. Releasing the new one.", node.Name, node.Spec.PodCIDRs) logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
for idx, cidr := range data.allocatedCIDRs { for idx, cidr := range data.allocatedCIDRs {
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil { if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
klog.Errorf("Error when releasing CIDR idx:%v value: %v err:%v", idx, cidr, releaseErr) logger.Error(releaseErr, "Error when releasing CIDR", "index", idx, "CIDR", cidr)
} }
} }
return nil return nil
@ -369,21 +377,21 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
// If we reached here, it means that the node has no CIDR currently assigned. So we set it. // If we reached here, it means that the node has no CIDR currently assigned. So we set it.
for i := 0; i < cidrUpdateRetries; i++ { for i := 0; i < cidrUpdateRetries; i++ {
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil { if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
klog.Infof("Set node %v PodCIDR to %v", node.Name, cidrsString) logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDRs", cidrsString)
return nil return nil
} }
} }
// failed release back to the pool // failed release back to the pool
klog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, cidrsString, err) logger.Error(err, "Failed to update node PodCIDR after multiple attempts", "node", klog.KObj(node), "podCIDRs", cidrsString)
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed") controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
// We accept the fact that we may leak CIDRs here. This is safer than releasing // We accept the fact that we may leak CIDRs here. This is safer than releasing
// them in case when we don't know if request went through. // them in case when we don't know if request went through.
// NodeController restart will return all falsely allocated CIDRs to the pool. // NodeController restart will return all falsely allocated CIDRs to the pool.
if !apierrors.IsServerTimeout(err) { if !apierrors.IsServerTimeout(err) {
klog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", node.Name, err) logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
for idx, cidr := range data.allocatedCIDRs { for idx, cidr := range data.allocatedCIDRs {
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil { if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
klog.Errorf("Error releasing allocated CIDR for node %v: %v", node.Name, releaseErr) logger.Error(releaseErr, "Error releasing allocated CIDR for node", "node", klog.KObj(node))
} }
} }
} }

View File

@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
"k8s.io/kubernetes/pkg/controller/testutil" "k8s.io/kubernetes/pkg/controller/testutil"
netutils "k8s.io/utils/net" netutils "k8s.io/utils/net"
@ -274,12 +275,13 @@ func TestOccupyPreExistingCIDR(t *testing.T) {
} }
// test function // test function
logger, _ := ktesting.NewTestContext(t)
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
// Initialize the range allocator. // Initialize the range allocator.
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
_, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList) _, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
if err == nil && tc.ctrlCreateFail { if err == nil && tc.ctrlCreateFail {
t.Fatalf("creating range allocator was expected to fail, but it did not") t.Fatalf("creating range allocator was expected to fail, but it did not")
} }
@ -508,11 +510,12 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
} }
// test function // test function
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc testCase) { testFunc := func(tc testCase) {
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler) fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{}) nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
// Initialize the range allocator. // Initialize the range allocator.
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList) allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
if err != nil { if err != nil {
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
return return
@ -524,7 +527,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
} }
rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.nodesSynced = test.AlwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder() rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop) go allocator.Run(ctx)
// this is a bit of white box testing // this is a bit of white box testing
// pre allocate the cidrs as per the test // pre allocate the cidrs as per the test
@ -545,7 +548,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
if node.Spec.PodCIDRs == nil { if node.Spec.PodCIDRs == nil {
updateCount++ updateCount++
} }
if err := allocator.AllocateOrOccupyCIDR(node); err != nil { if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
} }
} }
@ -608,10 +611,10 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
}, },
}, },
} }
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc testCase) { testFunc := func(tc testCase) {
// Initialize the range allocator. // Initialize the range allocator.
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
if err != nil { if err != nil {
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err) t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
} }
@ -622,7 +625,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
} }
rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.nodesSynced = test.AlwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder() rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop) go allocator.Run(ctx)
// this is a bit of white box testing // this is a bit of white box testing
for setIdx, allocatedList := range tc.allocatedCIDRs { for setIdx, allocatedList := range tc.allocatedCIDRs {
@ -637,7 +640,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
} }
} }
} }
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil { if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err) t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
} }
// We don't expect any updates, so just sleep for some time // We don't expect any updates, so just sleep for some time
@ -753,10 +756,10 @@ func TestReleaseCIDRSuccess(t *testing.T) {
}, },
}, },
} }
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc releaseTestCase) { testFunc := func(tc releaseTestCase) {
// Initialize the range allocator. // Initialize the range allocator.
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil) allocator, _ := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
rangeAllocator, ok := allocator.(*rangeAllocator) rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok { if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description) t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
@ -764,7 +767,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
} }
rangeAllocator.nodesSynced = test.AlwaysReady rangeAllocator.nodesSynced = test.AlwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder() rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop) go allocator.Run(ctx)
// this is a bit of white box testing // this is a bit of white box testing
for setIdx, allocatedList := range tc.allocatedCIDRs { for setIdx, allocatedList := range tc.allocatedCIDRs {
@ -780,7 +783,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
} }
} }
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]) err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
if len(tc.expectedAllocatedCIDRFirstRound) != 0 { if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
if err != nil { if err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
@ -805,12 +808,12 @@ func TestReleaseCIDRSuccess(t *testing.T) {
}, },
} }
nodeToRelease.Spec.PodCIDRs = cidrToRelease nodeToRelease.Spec.PodCIDRs = cidrToRelease
err = allocator.ReleaseCIDR(&nodeToRelease) err = allocator.ReleaseCIDR(logger, &nodeToRelease)
if err != nil { if err != nil {
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err) t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
} }
} }
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil { if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err) t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
} }
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil { if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {

View File

@ -120,8 +120,8 @@ func New(c controller, cloudAlias cloudAlias, kubeAPI kubeAPI, mode NodeSyncMode
// Loop runs the sync loop for a given node. done is an optional channel that // Loop runs the sync loop for a given node. done is an optional channel that
// is closed when the Loop() returns. // is closed when the Loop() returns.
func (sync *NodeSync) Loop(done chan struct{}) { func (sync *NodeSync) Loop(logger klog.Logger, done chan struct{}) {
klog.V(2).Infof("Starting sync loop for node %q", sync.nodeName) logger.V(2).Info("Starting sync loop", "node", klog.KRef("", sync.nodeName))
defer func() { defer func() {
if done != nil { if done != nil {
@ -131,27 +131,27 @@ func (sync *NodeSync) Loop(done chan struct{}) {
timeout := sync.c.ResyncTimeout() timeout := sync.c.ResyncTimeout()
delayTimer := time.NewTimer(timeout) delayTimer := time.NewTimer(timeout)
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout) logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
for { for {
select { select {
case op, more := <-sync.opChan: case op, more := <-sync.opChan:
if !more { if !more {
klog.V(2).Infof("Stopping sync loop") logger.V(2).Info("Stopping sync loop")
return return
} }
sync.c.ReportResult(op.run(sync)) sync.c.ReportResult(op.run(logger, sync))
if !delayTimer.Stop() { if !delayTimer.Stop() {
<-delayTimer.C <-delayTimer.C
} }
case <-delayTimer.C: case <-delayTimer.C:
klog.V(4).Infof("Running resync for node %q", sync.nodeName) logger.V(4).Info("Running resync", "node", klog.KRef("", sync.nodeName))
sync.c.ReportResult((&updateOp{}).run(sync)) sync.c.ReportResult((&updateOp{}).run(logger, sync))
} }
timeout := sync.c.ResyncTimeout() timeout := sync.c.ResyncTimeout()
delayTimer.Reset(timeout) delayTimer.Reset(timeout)
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout) logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
} }
} }
@ -175,7 +175,7 @@ func (sync *NodeSync) Delete(node *v1.Node) {
// syncOp is the interface for generic sync operation. // syncOp is the interface for generic sync operation.
type syncOp interface { type syncOp interface {
// run the requested sync operation. // run the requested sync operation.
run(sync *NodeSync) error run(logger klog.Logger, sync *NodeSync) error
} }
// updateOp handles creation and updates of a node. // updateOp handles creation and updates of a node.
@ -190,16 +190,16 @@ func (op *updateOp) String() string {
return fmt.Sprintf("updateOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR) return fmt.Sprintf("updateOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
} }
func (op *updateOp) run(sync *NodeSync) error { func (op *updateOp) run(logger klog.Logger, sync *NodeSync) error {
klog.V(3).Infof("Running updateOp %+v", op) logger.V(3).Info("Running updateOp", "updateOp", op)
ctx := context.Background() ctx := context.Background()
if op.node == nil { if op.node == nil {
klog.V(3).Infof("Getting node spec for %q", sync.nodeName) logger.V(3).Info("Getting node spec", "node", klog.KRef("", sync.nodeName))
node, err := sync.kubeAPI.Node(ctx, sync.nodeName) node, err := sync.kubeAPI.Node(ctx, sync.nodeName)
if err != nil { if err != nil {
klog.Errorf("Error getting node %q spec: %v", sync.nodeName, err) logger.Error(err, "Error getting node pec", "node", klog.KRef("", sync.nodeName))
return err return err
} }
op.node = node op.node = node
@ -207,7 +207,7 @@ func (op *updateOp) run(sync *NodeSync) error {
aliasRange, err := sync.cloudAlias.Alias(ctx, op.node) aliasRange, err := sync.cloudAlias.Alias(ctx, op.node)
if err != nil { if err != nil {
klog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err) logger.Error(err, "Error getting cloud alias for node", "node", klog.KRef("", sync.nodeName))
return err return err
} }
@ -229,14 +229,13 @@ func (op *updateOp) run(sync *NodeSync) error {
// match. // match.
func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error { func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
if node.Spec.PodCIDR != aliasRange.String() { if node.Spec.PodCIDR != aliasRange.String() {
klog.Errorf("Inconsistency detected between node PodCIDR and node alias (%v != %v)", klog.FromContext(ctx).Error(nil, "Inconsistency detected between node PodCIDR and node alias", "podCIDR", node.Spec.PodCIDR, "alias", aliasRange)
node.Spec.PodCIDR, aliasRange)
sync.kubeAPI.EmitNodeWarningEvent(node.Name, MismatchEvent, sync.kubeAPI.EmitNodeWarningEvent(node.Name, MismatchEvent,
"Node.Spec.PodCIDR != cloud alias (%v != %v)", node.Spec.PodCIDR, aliasRange) "Node.Spec.PodCIDR != cloud alias (%v != %v)", node.Spec.PodCIDR, aliasRange)
// User intervention is required in this case, as this is most likely due // User intervention is required in this case, as this is most likely due
// to the user mucking around with their VM aliases on the side. // to the user mucking around with their VM aliases on the side.
} else { } else {
klog.V(4).Infof("Node %q CIDR range %v is matches cloud assignment", node.Name, node.Spec.PodCIDR) klog.FromContext(ctx).V(4).Info("Node CIDR range is matches cloud assignment", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
} }
return nil return nil
} }
@ -249,27 +248,27 @@ func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, nod
"Cannot sync from cloud in mode %q", sync.mode) "Cannot sync from cloud in mode %q", sync.mode)
return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode) return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode)
} }
logger := klog.FromContext(ctx)
klog.V(2).Infof("Updating node spec with alias range, node.PodCIDR = %v", aliasRange) logger.V(2).Info("Updating node spec with alias range", "podCIDR", aliasRange)
if err := sync.set.Occupy(aliasRange); err != nil { if err := sync.set.Occupy(aliasRange); err != nil {
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName) logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
return err return err
} }
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, aliasRange); err != nil { if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, aliasRange); err != nil {
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, aliasRange, err) logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", aliasRange)
return err return err
} }
klog.V(2).Infof("Node %q PodCIDR set to %v", node.Name, aliasRange) logger.V(2).Info("Node PodCIDR updated", "node", klog.KObj(node), "podCIDR", aliasRange)
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil { if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err) logger.Error(err, "Could not update node NetworkUnavailable status to false")
return err return err
} }
klog.V(2).Infof("Updated node %q PodCIDR from cloud alias %v", node.Name, aliasRange) logger.V(2).Info("Updated node PodCIDR from cloud alias", "node", klog.KObj(node), "alias", aliasRange)
return nil return nil
} }
@ -283,29 +282,29 @@ func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, nod
} }
_, aliasRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR) _, aliasRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
logger := klog.FromContext(ctx)
if err != nil { if err != nil {
klog.Errorf("Could not parse PodCIDR (%q) for node %q: %v", logger.Error(err, "Could not parse PodCIDR for node", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
node.Spec.PodCIDR, node.Name, err)
return err return err
} }
if err := sync.set.Occupy(aliasRange); err != nil { if err := sync.set.Occupy(aliasRange); err != nil {
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName) logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
return err return err
} }
if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil { if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err) logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", aliasRange)
return err return err
} }
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil { if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err) logger.Error(err, "Could not update node NetworkUnavailable status to false")
return err return err
} }
klog.V(2).Infof("Updated node %q cloud alias with node spec, node.PodCIDR = %v", logger.V(2).Info("Updated node cloud alias with node spec", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
node.Name, node.Spec.PodCIDR)
return nil return nil
} }
@ -326,22 +325,23 @@ func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1.
// If addAlias returns a hard error, cidrRange will be leaked as there // If addAlias returns a hard error, cidrRange will be leaked as there
// is no durable record of the range. The missing space will be // is no durable record of the range. The missing space will be
// recovered on the next restart of the controller. // recovered on the next restart of the controller.
logger := klog.FromContext(ctx)
if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil { if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err) logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", cidrRange)
return err return err
} }
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, cidrRange); err != nil { if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, cidrRange); err != nil {
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, cidrRange, err) logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrRange)
return err return err
} }
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil { if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err) logger.Error(err, "Could not update node NetworkUnavailable status to false")
return err return err
} }
klog.V(2).Infof("Allocated PodCIDR %v for node %q", cidrRange, node.Name) logger.V(2).Info("Allocated PodCIDR for node", "node", klog.KObj(node), "podCIDR", cidrRange)
return nil return nil
} }
@ -358,25 +358,23 @@ func (op *deleteOp) String() string {
return fmt.Sprintf("deleteOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR) return fmt.Sprintf("deleteOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
} }
func (op *deleteOp) run(sync *NodeSync) error { func (op *deleteOp) run(logger klog.Logger, sync *NodeSync) error {
klog.V(3).Infof("Running deleteOp %+v", op) logger.V(3).Info("Running deleteOp", "deleteOp", op)
if op.node.Spec.PodCIDR == "" { if op.node.Spec.PodCIDR == "" {
klog.V(2).Infof("Node %q was deleted, node had no PodCIDR range assigned", op.node.Name) logger.V(2).Info("Node was deleted, node had no PodCIDR range assigned", "node", klog.KObj(op.node))
return nil return nil
} }
_, cidrRange, err := netutils.ParseCIDRSloppy(op.node.Spec.PodCIDR) _, cidrRange, err := netutils.ParseCIDRSloppy(op.node.Spec.PodCIDR)
if err != nil { if err != nil {
klog.Errorf("Deleted node %q has an invalid podCIDR %q: %v", logger.Error(err, "Deleted node has an invalid podCIDR", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
op.node.Name, op.node.Spec.PodCIDR, err)
sync.kubeAPI.EmitNodeWarningEvent(op.node.Name, InvalidPodCIDR, sync.kubeAPI.EmitNodeWarningEvent(op.node.Name, InvalidPodCIDR,
"Node %q has an invalid PodCIDR: %q", op.node.Name, op.node.Spec.PodCIDR) "Node %q has an invalid PodCIDR: %q", op.node.Name, op.node.Spec.PodCIDR)
return nil return nil
} }
sync.set.Release(cidrRange) sync.set.Release(cidrRange)
klog.V(2).Infof("Node %q was deleted, releasing CIDR range %v", logger.V(2).Info("Node was deleted, releasing CIDR range", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
op.node.Name, op.node.Spec.PodCIDR)
return nil return nil
} }

View File

@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
netutils "k8s.io/utils/net" netutils "k8s.io/utils/net"
@ -200,7 +201,8 @@ func TestNodeSyncUpdate(t *testing.T) {
doneChan := make(chan struct{}) doneChan := make(chan struct{})
// Do a single step of the loop. // Do a single step of the loop.
go sync.Loop(doneChan) logger, _ := ktesting.NewTestContext(t)
go sync.Loop(logger, doneChan)
sync.Update(tc.node) sync.Update(tc.node)
close(sync.opChan) close(sync.opChan)
<-doneChan <-doneChan
@ -230,8 +232,8 @@ func TestNodeSyncResync(t *testing.T) {
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24) cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
sync := New(fake, fake, fake, SyncFromCluster, "node1", cidr) sync := New(fake, fake, fake, SyncFromCluster, "node1", cidr)
doneChan := make(chan struct{}) doneChan := make(chan struct{})
logger, _ := ktesting.NewTestContext(t)
go sync.Loop(doneChan) go sync.Loop(logger, doneChan)
<-fake.reportChan <-fake.reportChan
close(sync.opChan) close(sync.opChan)
// Unblock loop(). // Unblock loop().
@ -275,7 +277,8 @@ func TestNodeSyncDelete(t *testing.T) {
doneChan := make(chan struct{}) doneChan := make(chan struct{})
// Do a single step of the loop. // Do a single step of the loop.
go sync.Loop(doneChan) logger, _ := ktesting.NewTestContext(t)
go sync.Loop(logger, doneChan)
sync.Delete(tc.node) sync.Delete(tc.node)
<-doneChan <-doneChan
tc.fake.dumpTrace() tc.fake.dumpTrace()

View File

@ -32,6 +32,7 @@ import (
) )
func createLegacyIPAM( func createLegacyIPAM(
logger klog.Logger,
ic *Controller, ic *Controller,
nodeInformer coreinformers.NodeInformer, nodeInformer coreinformers.NodeInformer,
cloud cloudprovider.Interface, cloud cloudprovider.Interface,
@ -58,14 +59,16 @@ func createLegacyIPAM(
cidr = clusterCIDRs[0] cidr = clusterCIDRs[0]
} }
if len(clusterCIDRs) > 1 { if len(clusterCIDRs) > 1 {
klog.Warningf("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded") logger.Info("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded")
} }
ipamc, err := ipam.NewController(cfg, kubeClient, cloud, cidr, serviceCIDR, nodeCIDRMaskSizes[0]) ipamc, err := ipam.NewController(cfg, kubeClient, cloud, cidr, serviceCIDR, nodeCIDRMaskSizes[0])
if err != nil { if err != nil {
klog.Fatalf("Error creating ipam controller: %v", err) logger.Error(err, "Error creating ipam controller")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
if err := ipamc.Start(nodeInformer); err != nil { if err := ipamc.Start(logger, nodeInformer); err != nil {
klog.Fatalf("Error trying to Init(): %v", err) logger.Error(err, "Error trying to Init()")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
return ipamc return ipamc
} }

View File

@ -17,6 +17,7 @@ limitations under the License.
package nodeipam package nodeipam
import ( import (
"context"
"net" "net"
"time" "time"
@ -50,7 +51,7 @@ const (
// legacy mode. It is needed to ensure correct building for // legacy mode. It is needed to ensure correct building for
// both provider-specific and providerless environments. // both provider-specific and providerless environments.
type ipamController interface { type ipamController interface {
Run(<-chan struct{}) Run(ctx context.Context)
} }
// Controller is the controller that manages node ipam state. // Controller is the controller that manages node ipam state.
@ -79,6 +80,7 @@ type Controller struct {
// podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes // podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes
// currently, this should be handled as a fatal error. // currently, this should be handled as a fatal error.
func NewNodeIpamController( func NewNodeIpamController(
ctx context.Context,
nodeInformer coreinformers.NodeInformer, nodeInformer coreinformers.NodeInformer,
clusterCIDRInformer networkinginformers.ClusterCIDRInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
cloud cloudprovider.Interface, cloud cloudprovider.Interface,
@ -89,20 +91,24 @@ func NewNodeIpamController(
nodeCIDRMaskSizes []int, nodeCIDRMaskSizes []int,
allocatorType ipam.CIDRAllocatorType) (*Controller, error) { allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
logger := klog.FromContext(ctx)
if kubeClient == nil { if kubeClient == nil {
klog.Fatalf("kubeClient is nil when starting Controller") logger.Error(nil, "kubeClient is nil when starting Controller")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
// Cloud CIDR allocator does not rely on clusterCIDR or nodeCIDRMaskSize for allocation. // Cloud CIDR allocator does not rely on clusterCIDR or nodeCIDRMaskSize for allocation.
if allocatorType != ipam.CloudAllocatorType { if allocatorType != ipam.CloudAllocatorType {
if len(clusterCIDRs) == 0 { if len(clusterCIDRs) == 0 {
klog.Fatal("Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set") logger.Error(nil, "Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
for idx, cidr := range clusterCIDRs { for idx, cidr := range clusterCIDRs {
mask := cidr.Mask mask := cidr.Mask
if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSizes[idx] { if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSizes[idx] {
klog.Fatal("Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family") logger.Error(nil, "Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
} }
} }
@ -120,7 +126,7 @@ func NewNodeIpamController(
// TODO: Abstract this check into a generic controller manager should run method. // TODO: Abstract this check into a generic controller manager should run method.
if ic.allocatorType == ipam.IPAMFromClusterAllocatorType || ic.allocatorType == ipam.IPAMFromCloudAllocatorType { if ic.allocatorType == ipam.IPAMFromClusterAllocatorType || ic.allocatorType == ipam.IPAMFromCloudAllocatorType {
ic.legacyIPAM = createLegacyIPAM(ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes) ic.legacyIPAM = createLegacyIPAM(logger, ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes)
} else { } else {
var err error var err error
@ -131,7 +137,7 @@ func NewNodeIpamController(
NodeCIDRMaskSizes: nodeCIDRMaskSizes, NodeCIDRMaskSizes: nodeCIDRMaskSizes,
} }
ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams) ic.cidrAllocator, err = ipam.New(ctx, kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -144,33 +150,32 @@ func NewNodeIpamController(
} }
// Run starts an asynchronous loop that monitors the status of cluster nodes. // Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *Controller) Run(stopCh <-chan struct{}) { func (nc *Controller) Run(ctx context.Context) {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
// Start event processing pipeline. // Start event processing pipeline.
nc.eventBroadcaster.StartStructuredLogging(0) nc.eventBroadcaster.StartStructuredLogging(0)
nc.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: nc.kubeClient.CoreV1().Events("")}) nc.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: nc.kubeClient.CoreV1().Events("")})
defer nc.eventBroadcaster.Shutdown() defer nc.eventBroadcaster.Shutdown()
klog.FromContext(ctx).Info("Starting ipam controller")
defer klog.FromContext(ctx).Info("Shutting down ipam controller")
klog.Infof("Starting ipam controller") if !cache.WaitForNamedCacheSync("node", ctx.Done(), nc.nodeInformerSynced) {
defer klog.Infof("Shutting down ipam controller")
if !cache.WaitForNamedCacheSync("node", stopCh, nc.nodeInformerSynced) {
return return
} }
if nc.allocatorType == ipam.IPAMFromClusterAllocatorType || nc.allocatorType == ipam.IPAMFromCloudAllocatorType { if nc.allocatorType == ipam.IPAMFromClusterAllocatorType || nc.allocatorType == ipam.IPAMFromCloudAllocatorType {
go nc.legacyIPAM.Run(stopCh) go nc.legacyIPAM.Run(ctx)
} else { } else {
go nc.cidrAllocator.Run(stopCh) go nc.cidrAllocator.Run(ctx)
} }
<-stopCh <-ctx.Done()
} }
// RunWithMetrics is a wrapper for Run that also tracks starting and stopping of the nodeipam controller with additional metric // RunWithMetrics is a wrapper for Run that also tracks starting and stopping of the nodeipam controller with additional metric
func (nc *Controller) RunWithMetrics(stopCh <-chan struct{}, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) { func (nc *Controller) RunWithMetrics(ctx context.Context, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) {
controllerManagerMetrics.ControllerStarted("nodeipam") controllerManagerMetrics.ControllerStarted("nodeipam")
defer controllerManagerMetrics.ControllerStopped("nodeipam") defer controllerManagerMetrics.ControllerStopped("nodeipam")
nc.Run(stopCh) nc.Run(ctx)
} }

View File

@ -20,6 +20,7 @@ limitations under the License.
package nodeipam package nodeipam
import ( import (
"context"
"net" "net"
"os" "os"
"os/exec" "os/exec"
@ -30,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
"k8s.io/kubernetes/pkg/controller/testutil" "k8s.io/kubernetes/pkg/controller/testutil"
@ -37,7 +39,7 @@ import (
netutils "k8s.io/utils/net" netutils "k8s.io/utils/net"
) )
func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) { func newTestNodeIpamController(ctx context.Context, clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
clientSet := fake.NewSimpleClientset() clientSet := fake.NewSimpleClientset()
fakeNodeHandler := &testutil.FakeNodeHandler{ fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{ Existing: []*v1.Node{
@ -56,6 +58,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet,
fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues()) fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues())
return NewNodeIpamController( return NewNodeIpamController(
ctx,
fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet, fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet,
clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType, clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType,
) )
@ -92,6 +95,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
{"invalid_serviceCIDR_contains_clusterCIDR", "10.0.0.0/23", "10.0.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, true}, {"invalid_serviceCIDR_contains_clusterCIDR", "10.0.0.0/23", "10.0.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, true},
{"invalid_CIDR_mask_size", "10.0.0.0/24,2000::/64", "10.1.0.0/21", emptyServiceCIDR, []int{24, 48}, ipam.IPAMFromClusterAllocatorType, true}, {"invalid_CIDR_mask_size", "10.0.0.0/24,2000::/64", "10.1.0.0/21", emptyServiceCIDR, []int{24, 48}, ipam.IPAMFromClusterAllocatorType, true},
} { } {
_, ctx := ktesting.NewTestContext(t)
t.Run(tc.desc, func(t *testing.T) { t.Run(tc.desc, func(t *testing.T) {
clusterCidrs, _ := netutils.ParseCIDRs(strings.Split(tc.clusterCIDR, ",")) clusterCidrs, _ := netutils.ParseCIDRs(strings.Split(tc.clusterCIDR, ","))
_, serviceCIDRIpNet, _ := netutils.ParseCIDRSloppy(tc.serviceCIDR) _, serviceCIDRIpNet, _ := netutils.ParseCIDRSloppy(tc.serviceCIDR)
@ -99,7 +103,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
if os.Getenv("EXIT_ON_FATAL") == "1" { if os.Getenv("EXIT_ON_FATAL") == "1" {
// This is the subprocess which runs the actual code. // This is the subprocess which runs the actual code.
newTestNodeIpamController(clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType) newTestNodeIpamController(ctx, clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType)
return return
} }
// This is the host process that monitors the exit code of the subprocess. // This is the host process that monitors the exit code of the subprocess.

View File

@ -20,6 +20,7 @@ limitations under the License.
package nodeipam package nodeipam
import ( import (
"context"
"net" "net"
coreinformers "k8s.io/client-go/informers/core/v1" coreinformers "k8s.io/client-go/informers/core/v1"
@ -31,11 +32,12 @@ import (
type fakeController struct { type fakeController struct {
} }
func (f *fakeController) Run(stopCh <-chan struct{}) { func (f *fakeController) Run(ctx context.Context) {
<-stopCh <-ctx.Done()
} }
func createLegacyIPAM( func createLegacyIPAM(
logger klog.Logger,
ic *Controller, ic *Controller,
nodeInformer coreinformers.NodeInformer, nodeInformer coreinformers.NodeInformer,
cloud cloudprovider.Interface, cloud cloudprovider.Interface,
@ -44,6 +46,7 @@ func createLegacyIPAM(
serviceCIDR *net.IPNet, serviceCIDR *net.IPNet,
nodeCIDRMaskSizes []int, nodeCIDRMaskSizes []int,
) *fakeController { ) *fakeController {
klog.Fatal("Error trying to Init(): legacy cloud provider support disabled at build time") logger.Error(nil, "Error trying to Init(): legacy cloud provider support disabled at build time")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
return &fakeController{} return &fakeController{}
} }

View File

@ -368,9 +368,10 @@ func NewNodeLifecycleController(
unhealthyZoneThreshold float32, unhealthyZoneThreshold float32,
runTaintManager bool, runTaintManager bool,
) (*Controller, error) { ) (*Controller, error) {
logger := klog.LoggerWithName(klog.FromContext(ctx), "NodeLifecycleController")
if kubeClient == nil { if kubeClient == nil {
klog.Fatalf("kubeClient is nil when starting Controller") logger.Error(nil, "kubeClient is nil when starting nodelifecycle Controller")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
eventBroadcaster := record.NewBroadcaster() eventBroadcaster := record.NewBroadcaster()
@ -428,12 +429,12 @@ func NewNodeLifecycleController(
if !isPod { if !isPod {
deletedState, ok := obj.(cache.DeletedFinalStateUnknown) deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok { if !ok {
klog.Errorf("Received unexpected object: %v", obj) logger.Error(nil, "Received unexpected object", "object", obj)
return return
} }
pod, ok = deletedState.Obj.(*v1.Pod) pod, ok = deletedState.Obj.(*v1.Pod)
if !ok { if !ok {
klog.Errorf("DeletedFinalStateUnknown contained non-Pod object: %v", deletedState.Obj) logger.Error(nil, "DeletedFinalStateUnknown contained non-Pod object", "object", deletedState.Obj)
return return
} }
} }
@ -494,7 +495,7 @@ func NewNodeLifecycleController(
}) })
} }
klog.Infof("Controller will reconcile labels.") logger.Info("Controller will reconcile labels")
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error { AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
nc.nodeUpdateQueue.Add(node.Name) nc.nodeUpdateQueue.Add(node.Name)
@ -529,7 +530,8 @@ func (nc *Controller) Run(ctx context.Context) {
// Start events processing pipeline. // Start events processing pipeline.
nc.broadcaster.StartStructuredLogging(0) nc.broadcaster.StartStructuredLogging(0)
klog.Infof("Sending events to api server.") logger := klog.FromContext(ctx)
logger.Info("Sending events to api server")
nc.broadcaster.StartRecordingToSink( nc.broadcaster.StartRecordingToSink(
&v1core.EventSinkImpl{ &v1core.EventSinkImpl{
Interface: v1core.New(nc.kubeClient.CoreV1().RESTClient()).Events(""), Interface: v1core.New(nc.kubeClient.CoreV1().RESTClient()).Events(""),
@ -540,8 +542,8 @@ func (nc *Controller) Run(ctx context.Context) {
defer nc.nodeUpdateQueue.ShutDown() defer nc.nodeUpdateQueue.ShutDown()
defer nc.podUpdateQueue.ShutDown() defer nc.podUpdateQueue.ShutDown()
klog.Infof("Starting node controller") logger.Info("Starting node controller")
defer klog.Infof("Shutting down node controller") defer logger.Info("Shutting down node controller")
if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) { if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
return return
@ -578,7 +580,7 @@ func (nc *Controller) Run(ctx context.Context) {
// Incorporate the results of node health signal pushed from kubelet to master. // Incorporate the results of node health signal pushed from kubelet to master.
go wait.UntilWithContext(ctx, func(ctx context.Context) { go wait.UntilWithContext(ctx, func(ctx context.Context) {
if err := nc.monitorNodeHealth(ctx); err != nil { if err := nc.monitorNodeHealth(ctx); err != nil {
klog.Errorf("Error monitoring node health: %v", err) logger.Error(err, "Error monitoring node health")
} }
}, nc.nodeMonitorPeriod) }, nc.nodeMonitorPeriod)
@ -586,6 +588,7 @@ func (nc *Controller) Run(ctx context.Context) {
} }
func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) { func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
logger := klog.FromContext(ctx)
for { for {
obj, shutdown := nc.nodeUpdateQueue.Get() obj, shutdown := nc.nodeUpdateQueue.Get()
// "nodeUpdateQueue" will be shutdown when "stopCh" closed; // "nodeUpdateQueue" will be shutdown when "stopCh" closed;
@ -595,13 +598,13 @@ func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
} }
nodeName := obj.(string) nodeName := obj.(string)
if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil { if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil {
klog.Errorf("Failed to taint NoSchedule on node <%s>, requeue it: %v", nodeName, err) logger.Error(err, "Failed to taint NoSchedule on node, requeue it", "node", klog.KRef("", nodeName))
// TODO(k82cn): Add nodeName back to the queue // TODO(k82cn): Add nodeName back to the queue
} }
// TODO: re-evaluate whether there are any labels that need to be // TODO: re-evaluate whether there are any labels that need to be
// reconcile in 1.19. Remove this function if it's no longer necessary. // reconcile in 1.19. Remove this function if it's no longer necessary.
if err := nc.reconcileNodeLabels(nodeName); err != nil { if err := nc.reconcileNodeLabels(nodeName); err != nil {
klog.Errorf("Failed to reconcile labels for node <%s>, requeue it: %v", nodeName, err) logger.Error(err, "Failed to reconcile labels for node, requeue it", "node", klog.KRef("", nodeName))
// TODO(yujuhong): Add nodeName back to the queue // TODO(yujuhong): Add nodeName back to the queue
} }
nc.nodeUpdateQueue.Done(nodeName) nc.nodeUpdateQueue.Done(nodeName)
@ -677,6 +680,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k) zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k)
} }
}() }()
logger := klog.FromContext(ctx)
for _, k := range zoneNoExecuteTainterKeys { for _, k := range zoneNoExecuteTainterKeys {
var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue
func() { func() {
@ -689,13 +693,13 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k] zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k]
}() }()
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded). // Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
zoneNoExecuteTainterWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) { zoneNoExecuteTainterWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value) node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
klog.Warningf("Node %v no longer present in nodeLister!", value.Value) logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
return true, 0 return true, 0
} else if err != nil { } else if err != nil {
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err) logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
// retry in 50 millisecond // retry in 50 millisecond
return false, 50 * time.Millisecond return false, 50 * time.Millisecond
} }
@ -712,7 +716,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
oppositeTaint = *NotReadyTaintTemplate oppositeTaint = *NotReadyTaintTemplate
default: default:
// It seems that the Node is ready again, so there's no need to taint it. // It seems that the Node is ready again, so there's no need to taint it.
klog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value) logger.V(4).Info("Node was in a taint queue, but it's ready now. Ignoring taint request", "node", klog.KRef("", value.Value))
return true, 0 return true, 0
} }
result := controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node) result := controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
@ -742,7 +746,7 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
zonePodEvictorKeys = append(zonePodEvictorKeys, k) zonePodEvictorKeys = append(zonePodEvictorKeys, k)
} }
}() }()
logger := klog.FromContext(ctx)
for _, k := range zonePodEvictorKeys { for _, k := range zonePodEvictorKeys {
var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue
func() { func() {
@ -756,12 +760,12 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
}() }()
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded). // Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
zonePodEvictionWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) { zonePodEvictionWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value) node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
klog.Warningf("Node %v no longer present in nodeLister!", value.Value) logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
} else if err != nil { } else if err != nil {
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err) logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
} }
nodeUID, _ := value.UID.(string) nodeUID, _ := value.UID.(string)
pods, err := nc.getPodsAssignedToNode(value.Value) pods, err := nc.getPodsAssignedToNode(value.Value)
@ -778,10 +782,10 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
return false, 0 return false, 0
} }
if !nc.nodeEvictionMap.setStatus(value.Value, evicted) { if !nc.nodeEvictionMap.setStatus(value.Value, evicted) {
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", value.Value) logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KRef("", value.Value))
} }
if remaining { if remaining {
klog.Infof("Pods awaiting deletion due to Controller eviction") logger.Info("Pods awaiting deletion due to Controller eviction")
} }
if node != nil { if node != nil {
@ -811,25 +815,24 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
return err return err
} }
added, deleted, newZoneRepresentatives := nc.classifyNodes(nodes) added, deleted, newZoneRepresentatives := nc.classifyNodes(nodes)
logger := klog.FromContext(ctx)
for i := range newZoneRepresentatives { for i := range newZoneRepresentatives {
nc.addPodEvictorForNewZone(newZoneRepresentatives[i]) nc.addPodEvictorForNewZone(logger, newZoneRepresentatives[i])
} }
for i := range added { for i := range added {
klog.V(1).Infof("Controller observed a new Node: %#v", added[i].Name) logger.V(1).Info("Controller observed a new Node", "node", klog.KRef("", added[i].Name))
controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name)) controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i] nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i]) nc.addPodEvictorForNewZone(logger, added[i])
if nc.runTaintManager { if nc.runTaintManager {
nc.markNodeAsReachable(ctx, added[i]) nc.markNodeAsReachable(ctx, added[i])
} else { } else {
nc.cancelPodEviction(added[i]) nc.cancelPodEviction(logger, added[i])
} }
} }
for i := range deleted { for i := range deleted {
klog.V(1).Infof("Controller observed a Node deletion: %v", deleted[i].Name) logger.V(1).Info("Controller observed a Node deletion", "node", klog.KRef("", deleted[i].Name))
controllerutil.RecordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from Controller", deleted[i].Name)) controllerutil.RecordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from Controller", deleted[i].Name))
delete(nc.knownNodeSet, deleted[i].Name) delete(nc.knownNodeSet, deleted[i].Name)
} }
@ -856,13 +859,12 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
name := node.Name name := node.Name
node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{}) node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
if err != nil { if err != nil {
klog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name) logger.Error(nil, "Failed while getting a Node to retry updating node health. Probably Node was deleted", "node", klog.KRef("", name))
return false, err return false, err
} }
return false, nil return false, nil
}); err != nil { }); err != nil {
klog.Errorf("Update health of Node '%v' from Controller error: %v. "+ logger.Error(err, "Update health of Node from Controller error, Skipping - no pods will be evicted", "node", klog.KObj(node))
"Skipping - no pods will be evicted.", node.Name, err)
return return
} }
@ -925,40 +927,35 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) { func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
decisionTimestamp := nc.now() decisionTimestamp := nc.now()
// Check eviction timeout against decisionTimestamp // Check eviction timeout against decisionTimestamp
logger := klog.FromContext(ctx)
switch observedReadyCondition.Status { switch observedReadyCondition.Status {
case v1.ConditionFalse: case v1.ConditionFalse:
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint // We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) { if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
taintToAdd := *NotReadyTaintTemplate taintToAdd := *NotReadyTaintTemplate
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) { if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.") logger.Error(nil, "Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle")
} }
} else if nc.markNodeForTainting(node, v1.ConditionFalse) { } else if nc.markNodeForTainting(node, v1.ConditionFalse) {
klog.V(2).Infof("Node %v is NotReady as of %v. Adding it to the Taint queue.", logger.V(2).Info("Node is NotReady. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
node.Name,
decisionTimestamp,
)
} }
case v1.ConditionUnknown: case v1.ConditionUnknown:
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint // We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) { if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
taintToAdd := *UnreachableTaintTemplate taintToAdd := *UnreachableTaintTemplate
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) { if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.") logger.Error(nil, "Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle")
} }
} else if nc.markNodeForTainting(node, v1.ConditionUnknown) { } else if nc.markNodeForTainting(node, v1.ConditionUnknown) {
klog.V(2).Infof("Node %v is unresponsive as of %v. Adding it to the Taint queue.", logger.V(2).Info("Node is unresponsive. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
node.Name,
decisionTimestamp,
)
} }
case v1.ConditionTrue: case v1.ConditionTrue:
removed, err := nc.markNodeAsReachable(ctx, node) removed, err := nc.markNodeAsReachable(ctx, node)
if err != nil { if err != nil {
klog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name) logger.Error(nil, "Failed to remove taints from node. Will retry in next iteration", "node", klog.KObj(node))
} }
if removed { if removed {
klog.V(2).Infof("Node %s is healthy again, removing all taints", node.Name) logger.V(2).Info("Node is healthy again, removing all taints", "node", klog.KObj(node))
} }
} }
} }
@ -970,6 +967,7 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
return fmt.Errorf("health data doesn't exist for node %q", node.Name) return fmt.Errorf("health data doesn't exist for node %q", node.Name)
} }
// Check eviction timeout against decisionTimestamp // Check eviction timeout against decisionTimestamp
logger := klog.FromContext(ctx)
switch observedReadyCondition.Status { switch observedReadyCondition.Status {
case v1.ConditionFalse: case v1.ConditionFalse:
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) { if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
@ -978,11 +976,11 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
return err return err
} }
if enqueued { if enqueued {
klog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v", logger.V(2).Info("Node is NotReady. Adding Pods on Node to eviction queue: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout",
node.Name, "node", klog.KObj(node),
decisionTimestamp, "decisionTimestamp", decisionTimestamp,
nodeHealthData.readyTransitionTimestamp, "readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
nc.podEvictionTimeout, "podEvictionTimeout", nc.podEvictionTimeout,
) )
} }
} }
@ -993,17 +991,17 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
return err return err
} }
if enqueued { if enqueued {
klog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v", logger.V(2).Info("Node is unresponsive. Adding Pods on Node to eviction queues: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout-gracePeriod",
node.Name, "node", klog.KObj(node),
decisionTimestamp, "decisionTimestamp", decisionTimestamp,
nodeHealthData.readyTransitionTimestamp, "readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
nc.podEvictionTimeout-gracePeriod, "podEvictionTimeoutGracePeriod", nc.podEvictionTimeout-gracePeriod,
) )
} }
} }
case v1.ConditionTrue: case v1.ConditionTrue:
if nc.cancelPodEviction(node) { if nc.cancelPodEviction(logger, node) {
klog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name) logger.V(2).Info("Node is ready again, cancelled pod eviction", "node", klog.KObj(node))
} }
} }
return nil return nil
@ -1076,23 +1074,23 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
_, savedCondition = controllerutil.GetNodeCondition(nodeHealth.status, v1.NodeReady) _, savedCondition = controllerutil.GetNodeCondition(nodeHealth.status, v1.NodeReady)
savedLease = nodeHealth.lease savedLease = nodeHealth.lease
} }
logger := klog.FromContext(ctx)
if nodeHealth == nil { if nodeHealth == nil {
klog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name) logger.Info("Missing timestamp for Node. Assuming now as a timestamp", "node", klog.KObj(node))
nodeHealth = &nodeHealthData{ nodeHealth = &nodeHealthData{
status: &node.Status, status: &node.Status,
probeTimestamp: nc.now(), probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(), readyTransitionTimestamp: nc.now(),
} }
} else if savedCondition == nil && currentReadyCondition != nil { } else if savedCondition == nil && currentReadyCondition != nil {
klog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name) logger.V(1).Info("Creating timestamp entry for newly observed Node", "node", klog.KObj(node))
nodeHealth = &nodeHealthData{ nodeHealth = &nodeHealthData{
status: &node.Status, status: &node.Status,
probeTimestamp: nc.now(), probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(), readyTransitionTimestamp: nc.now(),
} }
} else if savedCondition != nil && currentReadyCondition == nil { } else if savedCondition != nil && currentReadyCondition == nil {
klog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name) logger.Error(nil, "ReadyCondition was removed from Status of Node", "node", klog.KObj(node))
// TODO: figure out what to do in this case. For now we do the same thing as above. // TODO: figure out what to do in this case. For now we do the same thing as above.
nodeHealth = &nodeHealthData{ nodeHealth = &nodeHealthData{
status: &node.Status, status: &node.Status,
@ -1104,15 +1102,15 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now", // If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
// otherwise we leave it as it is. // otherwise we leave it as it is.
if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime { if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime {
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, currentReadyCondition) logger.V(3).Info("ReadyCondition for Node transitioned from savedCondition to currentReadyCondition", "node", klog.KObj(node), "savedCondition", savedCondition, "currentReadyCondition", currentReadyCondition)
transitionTime = nc.now() transitionTime = nc.now()
} else { } else {
transitionTime = nodeHealth.readyTransitionTimestamp transitionTime = nodeHealth.readyTransitionTimestamp
} }
if klogV := klog.V(5); klogV.Enabled() { if loggerV := logger.V(5); loggerV.Enabled() {
klogV.Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, nodeHealth.status, node.Status) loggerV.Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node), "nodeHealthStatus", nodeHealth.status, "nodeStatus", node.Status)
} else { } else {
klog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name) logger.V(3).Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node))
} }
nodeHealth = &nodeHealthData{ nodeHealth = &nodeHealthData{
status: &node.Status, status: &node.Status,
@ -1147,7 +1145,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
for _, nodeConditionType := range nodeConditionTypes { for _, nodeConditionType := range nodeConditionTypes {
_, currentCondition := controllerutil.GetNodeCondition(&node.Status, nodeConditionType) _, currentCondition := controllerutil.GetNodeCondition(&node.Status, nodeConditionType)
if currentCondition == nil { if currentCondition == nil {
klog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name) logger.V(2).Info("Condition of node was never updated by kubelet", "nodeConditionType", nodeConditionType, "node", klog.KObj(node))
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{ node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: nodeConditionType, Type: nodeConditionType,
Status: v1.ConditionUnknown, Status: v1.ConditionUnknown,
@ -1157,8 +1155,8 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
LastTransitionTime: nowTimestamp, LastTransitionTime: nowTimestamp,
}) })
} else { } else {
klog.V(2).Infof("node %v hasn't been updated for %+v. Last %v is: %+v", logger.V(2).Info("Node hasn't been updated",
node.Name, nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition) "node", klog.KObj(node), "duration", nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), "nodeConditionType", nodeConditionType, "currentCondition", currentCondition)
if currentCondition.Status != v1.ConditionUnknown { if currentCondition.Status != v1.ConditionUnknown {
currentCondition.Status = v1.ConditionUnknown currentCondition.Status = v1.ConditionUnknown
currentCondition.Reason = "NodeStatusUnknown" currentCondition.Reason = "NodeStatusUnknown"
@ -1172,7 +1170,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) { if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil { if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
klog.Errorf("Error updating node %s: %v", node.Name, err) logger.Error(err, "Error updating node", "node", klog.KObj(node))
return gracePeriod, observedReadyCondition, currentReadyCondition, err return gracePeriod, observedReadyCondition, currentReadyCondition, err
} }
nodeHealth = &nodeHealthData{ nodeHealth = &nodeHealthData{
@ -1191,6 +1189,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) { func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
newZoneStates := map[string]ZoneState{} newZoneStates := map[string]ZoneState{}
allAreFullyDisrupted := true allAreFullyDisrupted := true
logger := klog.FromContext(ctx)
for k, v := range zoneToNodeConditions { for k, v := range zoneToNodeConditions {
zoneSize.WithLabelValues(k).Set(float64(len(v))) zoneSize.WithLabelValues(k).Set(float64(len(v)))
unhealthy, newState := nc.computeZoneStateFunc(v) unhealthy, newState := nc.computeZoneStateFunc(v)
@ -1201,7 +1200,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
} }
newZoneStates[k] = newState newZoneStates[k] = newState
if _, had := nc.zoneStates[k]; !had { if _, had := nc.zoneStates[k]; !had {
klog.Errorf("Setting initial state for unseen zone: %v", k) logger.Error(nil, "Setting initial state for unseen zone", "zone", k)
nc.zoneStates[k] = stateInitial nc.zoneStates[k] = stateInitial
} }
} }
@ -1229,15 +1228,15 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
if !allAreFullyDisrupted || !allWasFullyDisrupted { if !allAreFullyDisrupted || !allWasFullyDisrupted {
// We're switching to full disruption mode // We're switching to full disruption mode
if allAreFullyDisrupted { if allAreFullyDisrupted {
klog.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.") logger.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode")
for i := range nodes { for i := range nodes {
if nc.runTaintManager { if nc.runTaintManager {
_, err := nc.markNodeAsReachable(ctx, nodes[i]) _, err := nc.markNodeAsReachable(ctx, nodes[i])
if err != nil { if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name) logger.Error(nil, "Failed to remove taints from Node", "node", klog.KObj(nodes[i]))
} }
} else { } else {
nc.cancelPodEviction(nodes[i]) nc.cancelPodEviction(logger, nodes[i])
} }
} }
// We stop all evictions. // We stop all evictions.
@ -1256,7 +1255,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
} }
// We're exiting full disruption mode // We're exiting full disruption mode
if allWasFullyDisrupted { if allWasFullyDisrupted {
klog.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode.") logger.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode")
// When exiting disruption mode update probe timestamps on all Nodes. // When exiting disruption mode update probe timestamps on all Nodes.
now := nc.now() now := nc.now()
for i := range nodes { for i := range nodes {
@ -1279,7 +1278,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
if v == newState { if v == newState {
continue continue
} }
klog.Infof("Controller detected that zone %v is now in state %v.", k, newState) logger.Info("Controller detected that zone is now in new state", "zone", k, "newState", newState)
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState) nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
nc.zoneStates[k] = newState nc.zoneStates[k] = newState
} }
@ -1317,12 +1316,13 @@ func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) { func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
defer nc.podUpdateQueue.Done(podItem) defer nc.podUpdateQueue.Done(podItem)
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name) pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
logger := klog.FromContext(ctx)
if err != nil { if err != nil {
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
// If the pod was deleted, there is no need to requeue. // If the pod was deleted, there is no need to requeue.
return return
} }
klog.Warningf("Failed to read pod %v/%v: %v.", podItem.namespace, podItem.name, err) logger.Info("Failed to read pod", "pod", klog.KRef(podItem.namespace, podItem.name), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem) nc.podUpdateQueue.AddRateLimited(podItem)
return return
} }
@ -1338,7 +1338,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
node, err := nc.nodeLister.Get(nodeName) node, err := nc.nodeLister.Get(nodeName)
if err != nil { if err != nil {
klog.Warningf("Failed to read node %v: %v.", nodeName, err) logger.Info("Failed to read node", "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem) nc.podUpdateQueue.AddRateLimited(podItem)
return return
} }
@ -1356,7 +1356,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
// Pods are processed by TaintManager. // Pods are processed by TaintManager.
if !nc.runTaintManager { if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil { if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err) logger.Info("Unable to process pod eviction from node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem) nc.podUpdateQueue.AddRateLimited(podItem)
return return
} }
@ -1364,7 +1364,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
if currentReadyCondition.Status != v1.ConditionTrue { if currentReadyCondition.Status != v1.ConditionTrue {
if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil { if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
klog.Warningf("Unable to mark pod %+v NotReady on node %v: %v.", podItem, nodeName, err) logger.Info("Unable to mark pod NotReady on node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem) nc.podUpdateQueue.AddRateLimited(podItem)
} }
} }
@ -1447,7 +1447,7 @@ func (nc *Controller) ReducedQPSFunc(nodeNum int) float32 {
} }
// addPodEvictorForNewZone checks if new zone appeared, and if so add new evictor. // addPodEvictorForNewZone checks if new zone appeared, and if so add new evictor.
func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) { func (nc *Controller) addPodEvictorForNewZone(logger klog.Logger, node *v1.Node) {
nc.evictorLock.Lock() nc.evictorLock.Lock()
defer nc.evictorLock.Unlock() defer nc.evictorLock.Unlock()
zone := nodetopology.GetZoneKey(node) zone := nodetopology.GetZoneKey(node)
@ -1463,7 +1463,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst)) flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
} }
// Init the metric for the new zone. // Init the metric for the new zone.
klog.Infof("Initializing eviction metric for zone: %v", zone) logger.Info("Initializing eviction metric for zone", "zone", zone)
evictionsNumber.WithLabelValues(zone).Add(0) evictionsNumber.WithLabelValues(zone).Add(0)
evictionsTotal.WithLabelValues(zone).Add(0) evictionsTotal.WithLabelValues(zone).Add(0)
} }
@ -1471,16 +1471,16 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
// cancelPodEviction removes any queued evictions, typically because the node is available again. It // cancelPodEviction removes any queued evictions, typically because the node is available again. It
// returns true if an eviction was queued. // returns true if an eviction was queued.
func (nc *Controller) cancelPodEviction(node *v1.Node) bool { func (nc *Controller) cancelPodEviction(logger klog.Logger, node *v1.Node) bool {
zone := nodetopology.GetZoneKey(node) zone := nodetopology.GetZoneKey(node)
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) { if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name) logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
} }
nc.evictorLock.Lock() nc.evictorLock.Lock()
defer nc.evictorLock.Unlock() defer nc.evictorLock.Unlock()
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name) wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
if wasDeleting { if wasDeleting {
klog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name) logger.V(2).Info("Cancelling pod Eviction on Node", "node", klog.KObj(node))
return true return true
} }
return false return false
@ -1502,8 +1502,9 @@ func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.P
} }
return false, nil return false, nil
} }
logger := klog.FromContext(ctx)
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) { if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name) logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
} }
nc.evictorLock.Lock() nc.evictorLock.Lock()
@ -1532,13 +1533,14 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) { func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate) err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
logger := klog.FromContext(ctx)
if err != nil { if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err) logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
return false, err return false, err
} }
err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate) err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
if err != nil { if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err) logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
return false, err return false, err
} }
nc.evictorLock.Lock() nc.evictorLock.Lock()

View File

@ -41,6 +41,8 @@ import (
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
testcore "k8s.io/client-go/testing" testcore "k8s.io/client-go/testing"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
kubeletapis "k8s.io/kubelet/pkg/apis" kubeletapis "k8s.io/kubelet/pkg/apis"
"k8s.io/kubernetes/pkg/controller" "k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler" "k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
@ -88,12 +90,12 @@ type nodeLifecycleController struct {
} }
// doEviction does the fake eviction and returns the status of eviction operation. // doEviction does the fake eviction and returns the status of eviction operation.
func (nc *nodeLifecycleController) doEviction(fakeNodeHandler *testutil.FakeNodeHandler) bool { func (nc *nodeLifecycleController) doEviction(logger klog.Logger, fakeNodeHandler *testutil.FakeNodeHandler) bool {
nc.evictorLock.Lock() nc.evictorLock.Lock()
defer nc.evictorLock.Unlock() defer nc.evictorLock.Unlock()
zones := testutil.GetZones(fakeNodeHandler) zones := testutil.GetZones(fakeNodeHandler)
for _, zone := range zones { for _, zone := range zones {
nc.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) { nc.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
uid, _ := value.UID.(string) uid, _ := value.UID.(string)
pods, _ := nc.getPodsAssignedToNode(value.Value) pods, _ := nc.getPodsAssignedToNode(value.Value)
controllerutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore) controllerutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
@ -721,9 +723,10 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
zones := testutil.GetZones(item.fakeNodeHandler) zones := testutil.GetZones(item.fakeNodeHandler)
logger, _ := ktesting.NewTestContext(t)
for _, zone := range zones { for _, zone := range zones {
if _, ok := nodeController.zonePodEvictor[zone]; ok { if _, ok := nodeController.zonePodEvictor[zone]; ok {
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) { nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
nodeUID, _ := value.UID.(string) nodeUID, _ := value.UID.(string)
pods, err := nodeController.getPodsAssignedToNode(value.Value) pods, err := nodeController.getPodsAssignedToNode(value.Value)
if err != nil { if err != nil {
@ -883,8 +886,9 @@ func TestPodStatusChange(t *testing.T) {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
zones := testutil.GetZones(item.fakeNodeHandler) zones := testutil.GetZones(item.fakeNodeHandler)
logger, _ := ktesting.NewTestContext(t)
for _, zone := range zones { for _, zone := range zones {
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) { nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
nodeUID, _ := value.UID.(string) nodeUID, _ := value.UID.(string)
pods, err := nodeController.getPodsAssignedToNode(value.Value) pods, err := nodeController.getPodsAssignedToNode(value.Value)
if err != nil { if err != nil {
@ -1467,8 +1471,9 @@ func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
// Infinite loop, used for retrying in case ratelimiter fails to reload for Try function. // Infinite loop, used for retrying in case ratelimiter fails to reload for Try function.
// this breaks when we have the status that we need for test case or when we don't see the // this breaks when we have the status that we need for test case or when we don't see the
// intended result after 1 minute. // intended result after 1 minute.
logger, _ := ktesting.NewTestContext(t)
for { for {
podEvicted = nodeController.doEviction(fakeNodeHandler) podEvicted = nodeController.doEviction(logger, fakeNodeHandler)
if podEvicted == item.expectedEvictPods || time.Since(start) > 1*time.Minute { if podEvicted == item.expectedEvictPods || time.Since(start) > 1*time.Minute {
break break
} }

View File

@ -23,7 +23,6 @@ import (
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/flowcontrol" "k8s.io/client-go/util/flowcontrol"
"k8s.io/klog/v2" "k8s.io/klog/v2"
) )
@ -229,14 +228,14 @@ type ActionFunc func(TimedValue) (bool, time.Duration)
// function in NodeController when Node becomes Ready again) TODO: // function in NodeController when Node becomes Ready again) TODO:
// figure out a good way to do garbage collection for all Nodes that // figure out a good way to do garbage collection for all Nodes that
// were removed from the cluster. // were removed from the cluster.
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) { func (q *RateLimitedTimedQueue) Try(logger klog.Logger, fn ActionFunc) {
val, ok := q.queue.Head() val, ok := q.queue.Head()
q.limiterLock.Lock() q.limiterLock.Lock()
defer q.limiterLock.Unlock() defer q.limiterLock.Unlock()
for ok { for ok {
// rate limit the queue checking // rate limit the queue checking
if !q.limiter.TryAccept() { if !q.limiter.TryAccept() {
klog.V(10).Infof("Try rate limited for value: %v", val) logger.V(10).Info("Try rate limited", "value", val)
// Try again later // Try again later
break break
} }

View File

@ -23,6 +23,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/flowcontrol" "k8s.io/client-go/util/flowcontrol"
"k8s.io/klog/v2/ktesting"
) )
func CheckQueueEq(lhs []string, rhs TimedQueue) bool { func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
@ -144,7 +145,8 @@ func TestTry(t *testing.T) {
evictor.Remove("second") evictor.Remove("second")
deletedMap := sets.NewString() deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) { logger, _ := ktesting.NewTestContext(t)
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value) deletedMap.Insert(value.Value)
return true, 0 return true, 0
}) })
@ -180,7 +182,8 @@ func TestTryOrdering(t *testing.T) {
order := []string{} order := []string{}
count := 0 count := 0
hasQueued := false hasQueued := false
evictor.Try(func(value TimedValue) (bool, time.Duration) { logger, _ := ktesting.NewTestContext(t)
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
count++ count++
t.Logf("eviction %d", count) t.Logf("eviction %d", count)
if value.ProcessAt.IsZero() { if value.ProcessAt.IsZero() {
@ -242,8 +245,8 @@ func TestTryRemovingWhileTry(t *testing.T) {
evictor.Remove("second") evictor.Remove("second")
close(wait) close(wait)
}() }()
logger, _ := ktesting.NewTestContext(t)
evictor.Try(func(value TimedValue) (bool, time.Duration) { evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
count++ count++
if value.AddedAt.IsZero() { if value.AddedAt.IsZero() {
t.Fatalf("added should not be zero") t.Fatalf("added should not be zero")
@ -313,7 +316,8 @@ func TestAddAfterTry(t *testing.T) {
evictor.Remove("second") evictor.Remove("second")
deletedMap := sets.NewString() deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) { logger, _ := ktesting.NewTestContext(t)
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value) deletedMap.Insert(value.Value)
return true, 0 return true, 0
}) })
@ -327,7 +331,7 @@ func TestAddAfterTry(t *testing.T) {
} }
evictor.Add("first", "11111") evictor.Add("first", "11111")
evictor.Try(func(value TimedValue) (bool, time.Duration) { evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.") t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.")
return true, 0 return true, 0
}) })

View File

@ -105,7 +105,7 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced
return func(ctx context.Context, args *WorkArgs) error { return func(ctx context.Context, args *WorkArgs) error {
ns := args.NamespacedName.Namespace ns := args.NamespacedName.Namespace
name := args.NamespacedName.Name name := args.NamespacedName.Name
klog.InfoS("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String()) klog.FromContext(ctx).Info("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
if emitEventFunc != nil { if emitEventFunc != nil {
emitEventFunc(args.NamespacedName) emitEventFunc(args.NamespacedName)
} }
@ -202,16 +202,17 @@ func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, podLis
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed. // Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
func (tc *NoExecuteTaintManager) Run(ctx context.Context) { func (tc *NoExecuteTaintManager) Run(ctx context.Context) {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
logger := klog.FromContext(ctx)
klog.InfoS("Starting NoExecuteTaintManager") logger.Info("Starting NoExecuteTaintManager")
// Start events processing pipeline. // Start events processing pipeline.
tc.broadcaster.StartStructuredLogging(0) tc.broadcaster.StartStructuredLogging(0)
if tc.client != nil { if tc.client != nil {
klog.InfoS("Sending events to api server") logger.Info("Sending events to api server")
tc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: tc.client.CoreV1().Events("")}) tc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: tc.client.CoreV1().Events("")})
} else { } else {
klog.Fatalf("kubeClient is nil when starting NodeController") logger.Error(nil, "kubeClient is nil when starting NodeController")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} }
defer tc.broadcaster.Shutdown() defer tc.broadcaster.Shutdown()
@ -363,8 +364,8 @@ func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node)
tc.nodeUpdateQueue.Add(updateItem) tc.nodeUpdateQueue.Add(updateItem)
} }
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName) { func (tc *NoExecuteTaintManager) cancelWorkWithEvent(logger klog.Logger, nsName types.NamespacedName) {
if tc.taintEvictionQueue.CancelWork(nsName.String()) { if tc.taintEvictionQueue.CancelWork(logger, nsName.String()) {
tc.emitCancelPodDeletionEvent(nsName) tc.emitCancelPodDeletionEvent(nsName)
} }
} }
@ -377,22 +378,23 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
taints []v1.Taint, taints []v1.Taint,
now time.Time, now time.Time,
) { ) {
logger := klog.FromContext(ctx)
if len(taints) == 0 { if len(taints) == 0 {
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(logger, podNamespacedName)
} }
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations) allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
if !allTolerated { if !allTolerated {
klog.V(2).InfoS("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName)) logger.V(2).Info("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName))
// We're canceling scheduled work (if any), as we're going to delete the Pod right away. // We're canceling scheduled work (if any), as we're going to delete the Pod right away.
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(logger, podNamespacedName)
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now()) tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
return return
} }
minTolerationTime := getMinTolerationTime(usedTolerations) minTolerationTime := getMinTolerationTime(usedTolerations)
// getMinTolerationTime returns negative value to denote infinite toleration. // getMinTolerationTime returns negative value to denote infinite toleration.
if minTolerationTime < 0 { if minTolerationTime < 0 {
klog.V(4).InfoS("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String()) logger.V(4).Info("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(logger, podNamespacedName)
return return
} }
@ -404,19 +406,20 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
if startTime.Add(minTolerationTime).Before(triggerTime) { if startTime.Add(minTolerationTime).Before(triggerTime) {
return return
} }
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(logger, podNamespacedName)
} }
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime) tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
} }
func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) { func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) {
pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName) pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName)
logger := klog.FromContext(ctx)
if err != nil { if err != nil {
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
// Delete // Delete
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName} podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
klog.V(4).InfoS("Noticed pod deletion", "pod", podNamespacedName) logger.V(4).Info("Noticed pod deletion", "pod", podNamespacedName)
tc.cancelWorkWithEvent(podNamespacedName) tc.cancelWorkWithEvent(logger, podNamespacedName)
return return
} }
utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err)) utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err))
@ -430,7 +433,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
// Create or Update // Create or Update
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
klog.V(4).InfoS("Noticed pod update", "pod", podNamespacedName) logger.V(4).Info("Noticed pod update", "pod", podNamespacedName)
nodeName := pod.Spec.NodeName nodeName := pod.Spec.NodeName
if nodeName == "" { if nodeName == "" {
return return
@ -451,10 +454,11 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) { func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
node, err := tc.nodeLister.Get(nodeUpdate.nodeName) node, err := tc.nodeLister.Get(nodeUpdate.nodeName)
logger := klog.FromContext(ctx)
if err != nil { if err != nil {
if apierrors.IsNotFound(err) { if apierrors.IsNotFound(err) {
// Delete // Delete
klog.V(4).InfoS("Noticed node deletion", "node", nodeUpdate.nodeName) logger.V(4).Info("Noticed node deletion", "node", klog.KRef("", nodeUpdate.nodeName))
tc.taintedNodesLock.Lock() tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock() defer tc.taintedNodesLock.Unlock()
delete(tc.taintedNodes, nodeUpdate.nodeName) delete(tc.taintedNodes, nodeUpdate.nodeName)
@ -465,12 +469,12 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
} }
// Create or Update // Create or Update
klog.V(4).InfoS("Noticed node update", "node", nodeUpdate) logger.V(4).Info("Noticed node update", "node", klog.KObj(node))
taints := getNoExecuteTaints(node.Spec.Taints) taints := getNoExecuteTaints(node.Spec.Taints)
func() { func() {
tc.taintedNodesLock.Lock() tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock() defer tc.taintedNodesLock.Unlock()
klog.V(4).InfoS("Updating known taints on node", "node", node.Name, "taints", taints) logger.V(4).Info("Updating known taints on node", "node", klog.KObj(node), "taints", taints)
if len(taints) == 0 { if len(taints) == 0 {
delete(tc.taintedNodes, node.Name) delete(tc.taintedNodes, node.Name)
} else { } else {
@ -483,7 +487,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
// tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods. // tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods.
pods, err := tc.getPodsAssignedToNode(node.Name) pods, err := tc.getPodsAssignedToNode(node.Name)
if err != nil { if err != nil {
klog.ErrorS(err, "Failed to get pods assigned to node", "node", node.Name) logger.Error(err, "Failed to get pods assigned to node", "node", klog.KObj(node))
return return
} }
if len(pods) == 0 { if len(pods) == 0 {
@ -491,9 +495,9 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
} }
// Short circuit, to make this controller a bit faster. // Short circuit, to make this controller a bit faster.
if len(taints) == 0 { if len(taints) == 0 {
klog.V(4).InfoS("All taints were removed from the node. Cancelling all evictions...", "node", node.Name) logger.V(4).Info("All taints were removed from the node. Cancelling all evictions...", "node", klog.KObj(node))
for i := range pods { for i := range pods {
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name}) tc.cancelWorkWithEvent(logger, types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
} }
return return
} }

View File

@ -52,10 +52,11 @@ type TimedWorker struct {
// createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`. // createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker { func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker {
delay := fireAt.Sub(createdAt) delay := fireAt.Sub(createdAt)
logger := klog.FromContext(ctx)
fWithErrorLogging := func() { fWithErrorLogging := func() {
err := f(ctx, args) err := f(ctx, args)
if err != nil { if err != nil {
klog.Errorf("NodeLifecycle: timed worker failed with error: %q", err) logger.Error(err, "NodeLifecycle: timed worker failed")
} }
} }
if delay <= 0 { if delay <= 0 {
@ -116,12 +117,13 @@ func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(ctx context.Con
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`. // AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time) { func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time) {
key := args.KeyFromWorkArgs() key := args.KeyFromWorkArgs()
klog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt) logger := klog.FromContext(ctx)
logger.V(4).Info("Adding TimedWorkerQueue item and to be fired at firedTime", "item", key, "createTime", createdAt, "firedTime", fireAt)
q.Lock() q.Lock()
defer q.Unlock() defer q.Unlock()
if _, exists := q.workers[key]; exists { if _, exists := q.workers[key]; exists {
klog.Warningf("Trying to add already existing work for %+v. Skipping.", args) logger.Info("Trying to add already existing work, skipping", "args", args)
return return
} }
worker := createWorker(ctx, args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock) worker := createWorker(ctx, args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock)
@ -129,13 +131,13 @@ func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdA
} }
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled. // CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
func (q *TimedWorkerQueue) CancelWork(key string) bool { func (q *TimedWorkerQueue) CancelWork(logger klog.Logger, key string) bool {
q.Lock() q.Lock()
defer q.Unlock() defer q.Unlock()
worker, found := q.workers[key] worker, found := q.workers[key]
result := false result := false
if found { if found {
klog.V(4).Infof("Cancelling TimedWorkerQueue item %v at %v", key, time.Now()) logger.V(4).Info("Cancelling TimedWorkerQueue item", "item", key, "time", time.Now())
if worker != nil { if worker != nil {
result = true result = true
worker.Cancel() worker.Cancel()

View File

@ -23,6 +23,7 @@ import (
"testing" "testing"
"time" "time"
"k8s.io/klog/v2/ktesting"
testingclock "k8s.io/utils/clock/testing" testingclock "k8s.io/utils/clock/testing"
) )
@ -108,8 +109,9 @@ func TestCancel(t *testing.T) {
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then) queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then) queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then) queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs()) logger, _ := ktesting.NewTestContext(t)
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs()) queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
fakeClock.Step(11 * time.Second) fakeClock.Step(11 * time.Second)
wg.Wait() wg.Wait()
lastVal := atomic.LoadInt32(&testVal) lastVal := atomic.LoadInt32(&testVal)
@ -141,8 +143,9 @@ func TestCancelAndReadd(t *testing.T) {
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then) queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then) queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then) queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs()) logger, _ := ktesting.NewTestContext(t)
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs()) queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then) queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
fakeClock.Step(11 * time.Second) fakeClock.Step(11 * time.Second)
wg.Wait() wg.Wait()

View File

@ -33,6 +33,7 @@ import (
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
featuregatetesting "k8s.io/component-base/featuregate/testing" featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/cmd/kube-apiserver/app/options" "k8s.io/kubernetes/cmd/kube-apiserver/app/options"
"k8s.io/kubernetes/pkg/controller/nodeipam" "k8s.io/kubernetes/pkg/controller/nodeipam"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam" "k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
@ -63,7 +64,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRAllocate(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
go ipamController.Run(ctx.Done()) go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done()) sharedInformer.Start(ctx.Done())
tests := []struct { tests := []struct {
@ -149,7 +150,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRRelease(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
go ipamController.Run(ctx.Done()) go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done()) sharedInformer.Start(ctx.Done())
t.Run("Pod CIDR release after node delete", func(t *testing.T) { t.Run("Pod CIDR release after node delete", func(t *testing.T) {
@ -226,7 +227,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRDelete(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
go ipamController.Run(ctx.Done()) go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done()) sharedInformer.Start(ctx.Done())
t.Run("delete cc with node associated", func(t *testing.T) { t.Run("delete cc with node associated", func(t *testing.T) {
@ -322,7 +323,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTerminate(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
go ipamController.Run(ctx.Done()) go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done()) sharedInformer.Start(ctx.Done())
t.Run("Pod CIDRS must not be allocated from a terminating CC", func(t *testing.T) { t.Run("Pod CIDRS must not be allocated from a terminating CC", func(t *testing.T) {
@ -407,7 +408,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTieBreak(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
go ipamController.Run(ctx.Done()) go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done()) sharedInformer.Start(ctx.Done())
tests := []struct { tests := []struct {
@ -557,8 +558,9 @@ func booststrapMultiCIDRRangeAllocator(t *testing.T,
if _, err := clientSet.CoreV1().Nodes().Create(context.TODO(), initialNode, metav1.CreateOptions{}); err != nil { if _, err := clientSet.CoreV1().Nodes().Create(context.TODO(), initialNode, metav1.CreateOptions{}); err != nil {
t.Fatal(err) t.Fatal(err)
} }
_, ctx := ktesting.NewTestContext(t)
ipamController, err := nodeipam.NewNodeIpamController( ipamController, err := nodeipam.NewNodeIpamController(
ctx,
sharedInformer.Core().V1().Nodes(), sharedInformer.Core().V1().Nodes(),
sharedInformer.Networking().V1alpha1().ClusterCIDRs(), sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
nil, nil,

View File

@ -17,6 +17,7 @@ limitations under the License.
package ipamperf package ipamperf
import ( import (
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"net" "net"
@ -25,6 +26,7 @@ import (
"time" "time"
"k8s.io/klog/v2" "k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
netutils "k8s.io/utils/net" netutils "k8s.io/utils/net"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
@ -37,7 +39,7 @@ import (
"k8s.io/kubernetes/test/integration/util" "k8s.io/kubernetes/test/integration/util"
) )
func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) { func setupAllocator(ctx context.Context, kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) {
controllerStopChan := make(chan struct{}) controllerStopChan := make(chan struct{})
shutdownFunc := func() { shutdownFunc := func() {
close(controllerStopChan) close(controllerStopChan)
@ -50,6 +52,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour) sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
ipamController, err := nodeipam.NewNodeIpamController( ipamController, err := nodeipam.NewNodeIpamController(
ctx,
sharedInformer.Core().V1().Nodes(), sharedInformer.Core().V1().Nodes(),
sharedInformer.Networking().V1alpha1().ClusterCIDRs(), sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil, config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil,
@ -58,7 +61,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
if err != nil { if err != nil {
return nil, shutdownFunc, err return nil, shutdownFunc, err
} }
go ipamController.Run(controllerStopChan) go ipamController.Run(ctx)
sharedInformer.Start(controllerStopChan) sharedInformer.Start(controllerStopChan)
return clientSet, shutdownFunc, nil return clientSet, shutdownFunc, nil
@ -74,8 +77,8 @@ func runTest(t *testing.T, kubeConfig *restclient.Config, config *Config, cluste
nodeClient := clientset.NewForConfigOrDie(nodeClientConfig) nodeClient := clientset.NewForConfigOrDie(nodeClientConfig)
defer deleteNodes(nodeClient) // cleanup nodes on after controller shutdown defer deleteNodes(nodeClient) // cleanup nodes on after controller shutdown
_, ctx := ktesting.NewTestContext(t)
clientSet, shutdownFunc, err := setupAllocator(kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize) clientSet, shutdownFunc, err := setupAllocator(ctx, kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize)
if err != nil { if err != nil {
t.Fatalf("Error starting IPAM allocator: %v", err) t.Fatalf("Error starting IPAM allocator: %v", err)
} }