Merge pull request #112670 from yangjunmyfm192085/delklogV0
use contextual logging(nodeipam and nodelifecycle part)
This commit is contained in:
commit
37326f7cea
@ -62,11 +62,11 @@ func (nodeIpamController *nodeIPAMController) StartNodeIpamControllerWrapper(ini
|
||||
nodeIpamController.nodeIPAMControllerOptions.ApplyTo(&nodeIpamController.nodeIPAMControllerConfiguration)
|
||||
|
||||
return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) {
|
||||
return startNodeIpamController(initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud)
|
||||
return startNodeIpamController(ctx, initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud)
|
||||
}
|
||||
}
|
||||
|
||||
func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, ctx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) {
|
||||
func startNodeIpamController(ctx context.Context, initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, controllerCtx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) {
|
||||
var serviceCIDR *net.IPNet
|
||||
var secondaryServiceCIDR *net.IPNet
|
||||
|
||||
@ -130,14 +130,14 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
|
||||
|
||||
var clusterCIDRInformer v1alpha1.ClusterCIDRInformer
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
|
||||
clusterCIDRInformer = ctx.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||
clusterCIDRInformer = controllerCtx.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||
}
|
||||
|
||||
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
|
||||
ctx.InformerFactory.Core().V1().Nodes(),
|
||||
ctx,
|
||||
controllerCtx.InformerFactory.Core().V1().Nodes(),
|
||||
clusterCIDRInformer,
|
||||
cloud,
|
||||
ctx.ClientBuilder.ClientOrDie(initContext.ClientName),
|
||||
controllerCtx.ClientBuilder.ClientOrDie(initContext.ClientName),
|
||||
clusterCIDRs,
|
||||
serviceCIDR,
|
||||
secondaryServiceCIDR,
|
||||
@ -147,7 +147,7 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
|
||||
if err != nil {
|
||||
return nil, true, err
|
||||
}
|
||||
go nodeIpamController.Run(ctx.Stop)
|
||||
go nodeIpamController.Run(ctx)
|
||||
return nil, true, nil
|
||||
}
|
||||
|
||||
|
@ -154,7 +154,9 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
|
||||
clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||
}
|
||||
|
||||
ctx = klog.NewContext(ctx, klog.LoggerWithName(klog.FromContext(ctx), "NodeIpamController"))
|
||||
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
|
||||
ctx,
|
||||
controllerContext.InformerFactory.Core().V1().Nodes(),
|
||||
clusterCIDRInformer,
|
||||
controllerContext.Cloud,
|
||||
@ -168,7 +170,7 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
|
||||
if err != nil {
|
||||
return nil, true, err
|
||||
}
|
||||
go nodeIpamController.RunWithMetrics(ctx.Done(), controllerContext.ControllerManagerMetrics)
|
||||
go nodeIpamController.RunWithMetrics(ctx, controllerContext.ControllerManagerMetrics)
|
||||
return nil, true, nil
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,7 @@ func newAdapter(k8s clientset.Interface, cloud *gce.Cloud) *adapter {
|
||||
return ret
|
||||
}
|
||||
|
||||
func (a *adapter) Run(stopCh <-chan struct{}) {
|
||||
func (a *adapter) Run(ctx context.Context) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
// Start event processing pipeline.
|
||||
@ -69,7 +69,7 @@ func (a *adapter) Run(stopCh <-chan struct{}) {
|
||||
a.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: a.k8s.CoreV1().Events("")})
|
||||
defer a.broadcaster.Shutdown()
|
||||
|
||||
<-stopCh
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
|
||||
@ -88,7 +88,7 @@ func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error)
|
||||
case 1:
|
||||
break
|
||||
default:
|
||||
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", node.Name, cidrs)
|
||||
klog.FromContext(ctx).Info("Node has more than one alias assigned, defaulting to the first", "node", klog.KObj(node), "CIDRs", cidrs)
|
||||
}
|
||||
|
||||
_, cidrRange, err := netutils.ParseCIDRSloppy(cidrs[0])
|
||||
|
@ -91,11 +91,11 @@ type CIDRAllocator interface {
|
||||
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid
|
||||
// CIDR if it doesn't currently have one or mark the CIDR as used if
|
||||
// the node already have one.
|
||||
AllocateOrOccupyCIDR(node *v1.Node) error
|
||||
AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error
|
||||
// ReleaseCIDR releases the CIDR of the removed node.
|
||||
ReleaseCIDR(node *v1.Node) error
|
||||
ReleaseCIDR(logger klog.Logger, node *v1.Node) error
|
||||
// Run starts all the working logic of the allocator.
|
||||
Run(stopCh <-chan struct{})
|
||||
Run(ctx context.Context)
|
||||
}
|
||||
|
||||
// CIDRAllocatorParams is parameters that's required for creating new
|
||||
@ -119,29 +119,30 @@ type nodeReservedCIDRs struct {
|
||||
}
|
||||
|
||||
// New creates a new CIDR range allocator.
|
||||
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
|
||||
nodeList, err := listNodes(kubeClient)
|
||||
func New(ctx context.Context, kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
nodeList, err := listNodes(logger, kubeClient)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch allocatorType {
|
||||
case RangeAllocatorType:
|
||||
return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList)
|
||||
return NewCIDRRangeAllocator(logger, kubeClient, nodeInformer, allocatorParams, nodeList)
|
||||
case MultiCIDRRangeAllocatorType:
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
|
||||
return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator)
|
||||
}
|
||||
return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
|
||||
return NewMultiCIDRRangeAllocator(ctx, kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
|
||||
|
||||
case CloudAllocatorType:
|
||||
return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer)
|
||||
return NewCloudCIDRAllocator(logger, kubeClient, cloud, nodeInformer)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid CIDR allocator type: %v", allocatorType)
|
||||
}
|
||||
}
|
||||
|
||||
func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
|
||||
func listNodes(logger klog.Logger, kubeClient clientset.Interface) (*v1.NodeList, error) {
|
||||
var nodeList *v1.NodeList
|
||||
// We must poll because apiserver might not be up. This error causes
|
||||
// controller manager to restart.
|
||||
@ -152,7 +153,7 @@ func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to list all nodes: %v", err)
|
||||
logger.Error(err, "Failed to list all nodes")
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
|
@ -23,7 +23,6 @@ import (
|
||||
"testing"
|
||||
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
"k8s.io/klog/v2"
|
||||
netutils "k8s.io/utils/net"
|
||||
)
|
||||
|
||||
@ -558,34 +557,35 @@ func TestGetBitforCIDR(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
_, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v for %v", err, tc.description)
|
||||
}
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
_, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
|
||||
if err != nil {
|
||||
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
|
||||
}
|
||||
_, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v for %v", err, tc.description)
|
||||
}
|
||||
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
|
||||
if err != nil {
|
||||
t.Fatalf("Error allocating CIDRSet")
|
||||
}
|
||||
_, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
got, err := cs.getIndexForCIDR(subnetCIDR)
|
||||
if err == nil && tc.expectErr {
|
||||
t.Errorf("expected error but got null")
|
||||
return
|
||||
}
|
||||
|
||||
got, err := cs.getIndexForCIDR(subnetCIDR)
|
||||
if err == nil && tc.expectErr {
|
||||
klog.Errorf("expected error but got null for %v", tc.description)
|
||||
continue
|
||||
}
|
||||
if err != nil && !tc.expectErr {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil && !tc.expectErr {
|
||||
klog.Errorf("unexpected error: %v for %v", err, tc.description)
|
||||
continue
|
||||
}
|
||||
|
||||
if got != tc.expectedBit {
|
||||
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
|
||||
}
|
||||
if got != tc.expectedBit {
|
||||
t.Errorf("expected %v, but got %v", tc.expectedBit, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
package ipam
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
@ -27,6 +28,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@ -85,9 +87,10 @@ type cloudCIDRAllocator struct {
|
||||
var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
|
||||
|
||||
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
|
||||
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
||||
func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
||||
if client == nil {
|
||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
||||
logger.Error(nil, "kubeClient is nil when starting cloud CIDR allocator")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
@ -111,74 +114,80 @@ func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Inter
|
||||
}
|
||||
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: controllerutil.CreateAddNodeHandler(ca.AllocateOrOccupyCIDR),
|
||||
AddFunc: controllerutil.CreateAddNodeHandler(
|
||||
func(node *v1.Node) error {
|
||||
return ca.AllocateOrOccupyCIDR(logger, node)
|
||||
}),
|
||||
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||
if newNode.Spec.PodCIDR == "" {
|
||||
return ca.AllocateOrOccupyCIDR(newNode)
|
||||
return ca.AllocateOrOccupyCIDR(logger, newNode)
|
||||
}
|
||||
// Even if PodCIDR is assigned, but NetworkUnavailable condition is
|
||||
// set to true, we need to process the node to set the condition.
|
||||
networkUnavailableTaint := &v1.Taint{Key: v1.TaintNodeNetworkUnavailable, Effect: v1.TaintEffectNoSchedule}
|
||||
_, cond := controllerutil.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable)
|
||||
if cond == nil || cond.Status != v1.ConditionFalse || utiltaints.TaintExists(newNode.Spec.Taints, networkUnavailableTaint) {
|
||||
return ca.AllocateOrOccupyCIDR(newNode)
|
||||
return ca.AllocateOrOccupyCIDR(logger, newNode)
|
||||
}
|
||||
return nil
|
||||
}),
|
||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ca.ReleaseCIDR),
|
||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
|
||||
return ca.ReleaseCIDR(logger, node)
|
||||
}),
|
||||
})
|
||||
|
||||
klog.Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName())
|
||||
logger.Info("Using cloud CIDR allocator", "provider", cloud.ProviderName())
|
||||
return ca, nil
|
||||
}
|
||||
|
||||
func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) {
|
||||
func (ca *cloudCIDRAllocator) Run(ctx context.Context) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
// Start event processing pipeline.
|
||||
ca.broadcaster.StartStructuredLogging(0)
|
||||
klog.Infof("Sending events to api server.")
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.Info("Sending events to api server")
|
||||
ca.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: ca.client.CoreV1().Events("")})
|
||||
defer ca.broadcaster.Shutdown()
|
||||
|
||||
klog.Infof("Starting cloud CIDR allocator")
|
||||
defer klog.Infof("Shutting down cloud CIDR allocator")
|
||||
logger.Info("Starting cloud CIDR allocator")
|
||||
defer logger.Info("Shutting down cloud CIDR allocator")
|
||||
|
||||
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, ca.nodesSynced) {
|
||||
if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), ca.nodesSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||
go ca.worker(stopCh)
|
||||
go ca.worker(ctx)
|
||||
}
|
||||
|
||||
<-stopCh
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) {
|
||||
func (ca *cloudCIDRAllocator) worker(ctx context.Context) {
|
||||
logger := klog.FromContext(ctx)
|
||||
for {
|
||||
select {
|
||||
case workItem, ok := <-ca.nodeUpdateChannel:
|
||||
if !ok {
|
||||
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
||||
logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
||||
return
|
||||
}
|
||||
if err := ca.updateCIDRAllocation(workItem); err == nil {
|
||||
klog.V(3).Infof("Updated CIDR for %q", workItem)
|
||||
if err := ca.updateCIDRAllocation(logger, workItem); err == nil {
|
||||
logger.V(3).Info("Updated CIDR", "workItem", workItem)
|
||||
} else {
|
||||
klog.Errorf("Error updating CIDR for %q: %v", workItem, err)
|
||||
if canRetry, timeout := ca.retryParams(workItem); canRetry {
|
||||
klog.V(2).Infof("Retrying update for %q after %v", workItem, timeout)
|
||||
logger.Error(err, "Error updating CIDR", "workItem", workItem)
|
||||
if canRetry, timeout := ca.retryParams(logger, workItem); canRetry {
|
||||
logger.V(2).Info("Retrying update on next period", "workItem", workItem, "timeout", timeout)
|
||||
time.AfterFunc(timeout, func() {
|
||||
// Requeue the failed node for update again.
|
||||
ca.nodeUpdateChannel <- workItem
|
||||
})
|
||||
continue
|
||||
}
|
||||
klog.Errorf("Exceeded retry count for %q, dropping from queue", workItem)
|
||||
logger.Error(nil, "Exceeded retry count, dropping from queue", "workItem", workItem)
|
||||
}
|
||||
ca.removeNodeFromProcessing(workItem)
|
||||
case <-stopChan:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -194,13 +203,13 @@ func (ca *cloudCIDRAllocator) insertNodeToProcessing(nodeName string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (ca *cloudCIDRAllocator) retryParams(nodeName string) (bool, time.Duration) {
|
||||
func (ca *cloudCIDRAllocator) retryParams(logger klog.Logger, nodeName string) (bool, time.Duration) {
|
||||
ca.lock.Lock()
|
||||
defer ca.lock.Unlock()
|
||||
|
||||
entry, ok := ca.nodesInProcessing[nodeName]
|
||||
if !ok {
|
||||
klog.Errorf("Cannot get retryParams for %q as entry does not exist", nodeName)
|
||||
logger.Error(nil, "Cannot get retryParams for node as entry does not exist", "node", klog.KRef("", nodeName))
|
||||
return false, 0
|
||||
}
|
||||
|
||||
@ -233,28 +242,28 @@ func (ca *cloudCIDRAllocator) removeNodeFromProcessing(nodeName string) {
|
||||
// WARNING: If you're adding any return calls or defer any more work from this
|
||||
// function you have to make sure to update nodesInProcessing properly with the
|
||||
// disposition of the node when the work is done.
|
||||
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
||||
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
if !ca.insertNodeToProcessing(node.Name) {
|
||||
klog.V(2).InfoS("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
|
||||
logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
|
||||
return nil
|
||||
}
|
||||
|
||||
klog.V(4).Infof("Putting node %s into the work queue", node.Name)
|
||||
logger.V(4).Info("Putting node into the work queue", "node", klog.KObj(node))
|
||||
ca.nodeUpdateChannel <- node.Name
|
||||
return nil
|
||||
}
|
||||
|
||||
// updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
|
||||
func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
||||
func (ca *cloudCIDRAllocator) updateCIDRAllocation(logger klog.Logger, nodeName string) error {
|
||||
node, err := ca.nodeLister.Get(nodeName)
|
||||
if err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
return nil // node no longer available, skip processing
|
||||
}
|
||||
klog.ErrorS(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "nodeName", nodeName)
|
||||
logger.Error(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "node", klog.KRef("", nodeName))
|
||||
return err
|
||||
}
|
||||
if node.Spec.ProviderID == "" {
|
||||
@ -272,7 +281,7 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
||||
}
|
||||
//Can have at most 2 ips (one for v4 and one for v6)
|
||||
if len(cidrStrings) > 2 {
|
||||
klog.InfoS("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings)
|
||||
logger.Info("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings)
|
||||
cidrStrings = cidrStrings[:2]
|
||||
}
|
||||
|
||||
@ -281,13 +290,13 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
||||
return fmt.Errorf("failed to parse strings %v as CIDRs: %v", cidrStrings, err)
|
||||
}
|
||||
|
||||
needUpdate, err := needPodCIDRsUpdate(node, cidrs)
|
||||
needUpdate, err := needPodCIDRsUpdate(logger, node, cidrs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("err: %v, CIDRS: %v", err, cidrStrings)
|
||||
}
|
||||
if needUpdate {
|
||||
if node.Spec.PodCIDR != "" {
|
||||
klog.ErrorS(nil, "PodCIDR being reassigned!", "nodeName", node.Name, "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings)
|
||||
logger.Error(nil, "PodCIDR being reassigned", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings)
|
||||
// We fall through and set the CIDR despite this error. This
|
||||
// implements the same logic as implemented in the
|
||||
// rangeAllocator.
|
||||
@ -296,14 +305,14 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
||||
}
|
||||
for i := 0; i < cidrUpdateRetries; i++ {
|
||||
if err = nodeutil.PatchNodeCIDRs(ca.client, types.NodeName(node.Name), cidrStrings); err == nil {
|
||||
klog.InfoS("Set the node PodCIDRs", "nodeName", node.Name, "cidrStrings", cidrStrings)
|
||||
logger.Info("Set the node PodCIDRs", "node", klog.KObj(node), "cidrStrings", cidrStrings)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
controllerutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
|
||||
klog.ErrorS(err, "Failed to update the node PodCIDR after multiple attempts", "nodeName", node.Name, "cidrStrings", cidrStrings)
|
||||
logger.Error(err, "Failed to update the node PodCIDR after multiple attempts", "node", klog.KObj(node), "cidrStrings", cidrStrings)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -315,31 +324,31 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
||||
LastTransitionTime: metav1.Now(),
|
||||
})
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error setting route status for the node", "nodeName", node.Name)
|
||||
logger.Error(err, "Error setting route status for the node", "node", klog.KObj(node))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
|
||||
func needPodCIDRsUpdate(logger klog.Logger, node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
|
||||
if node.Spec.PodCIDR == "" {
|
||||
return true, nil
|
||||
}
|
||||
_, nodePodCIDR, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR)
|
||||
logger.Error(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR)
|
||||
// We will try to overwrite with new CIDR(s)
|
||||
return true, nil
|
||||
}
|
||||
nodePodCIDRs, err := netutils.ParseCIDRs(node.Spec.PodCIDRs)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs)
|
||||
logger.Error(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs)
|
||||
// We will try to overwrite with new CIDR(s)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
if len(podCIDRs) == 1 {
|
||||
if cmp.Equal(nodePodCIDR, podCIDRs[0]) {
|
||||
klog.V(4).InfoS("Node already has allocated CIDR. It matches the proposed one.", "nodeName", node.Name, "podCIDR", podCIDRs[0])
|
||||
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "podCIDR", podCIDRs[0])
|
||||
return false, nil
|
||||
}
|
||||
} else if len(nodePodCIDRs) == len(podCIDRs) {
|
||||
@ -351,15 +360,15 @@ func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
klog.V(4).InfoS("Node already has allocated CIDRs. It matches the proposed one.", "nodeName", node.Name, "podCIDRs", podCIDRs)
|
||||
logger.V(4).Info("Node already has allocated CIDRs. It matches the proposed one", "node", klog.KObj(node), "podCIDRs", podCIDRs)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
klog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)",
|
||||
node.Name, node.Spec.PodCIDR)
|
||||
func (ca *cloudCIDRAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
|
||||
logger.V(2).Info("Node's PodCIDR will be released by external cloud provider (not managed by controller)",
|
||||
"node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||
return nil
|
||||
}
|
||||
|
@ -25,9 +25,10 @@ import (
|
||||
informers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
cloudprovider "k8s.io/cloud-provider"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
|
||||
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
||||
func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
||||
return nil, errors.New("legacy cloud provider support not built")
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
netutils "k8s.io/utils/net"
|
||||
)
|
||||
|
||||
@ -42,7 +43,6 @@ func hasNodeInProcessing(ca *cloudCIDRAllocator, name string) bool {
|
||||
func TestBoundedRetries(t *testing.T) {
|
||||
clientSet := fake.NewSimpleClientset()
|
||||
updateChan := make(chan string, 1) // need to buffer as we are using only on go routine
|
||||
stopChan := make(chan struct{})
|
||||
sharedInfomer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
|
||||
ca := &cloudCIDRAllocator{
|
||||
client: clientSet,
|
||||
@ -51,9 +51,10 @@ func TestBoundedRetries(t *testing.T) {
|
||||
nodesSynced: sharedInfomer.Core().V1().Nodes().Informer().HasSynced,
|
||||
nodesInProcessing: map[string]*nodeProcessingInfo{},
|
||||
}
|
||||
go ca.worker(stopChan)
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
go ca.worker(ctx)
|
||||
nodeName := "testNode"
|
||||
ca.AllocateOrOccupyCIDR(&v1.Node{
|
||||
ca.AllocateOrOccupyCIDR(logger, &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: nodeName,
|
||||
},
|
||||
@ -176,9 +177,9 @@ func TestNeedPodCIDRsUpdate(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Errorf("failed to parse %v as CIDRs: %v", tc.cidrs, err)
|
||||
}
|
||||
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
got, err := needPodCIDRsUpdate(&node, netCIDRs)
|
||||
got, err := needPodCIDRsUpdate(logger, &node, netCIDRs)
|
||||
if tc.wantErr == (err == nil) {
|
||||
t.Errorf("err: %v, wantErr: %v", err, tc.wantErr)
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
package ipam
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
@ -113,10 +114,10 @@ func NewController(
|
||||
// Start initializes the Controller with the existing list of nodes and
|
||||
// registers the informers for node changes. This will start synchronization
|
||||
// of the node and cloud CIDR range allocations.
|
||||
func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
|
||||
klog.InfoS("Starting IPAM controller", "config", c.config)
|
||||
func (c *Controller) Start(logger klog.Logger, nodeInformer informers.NodeInformer) error {
|
||||
logger.Info("Starting IPAM controller", "config", c.config)
|
||||
|
||||
nodes, err := listNodes(c.adapter.k8s)
|
||||
nodes, err := listNodes(logger, c.adapter.k8s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -125,9 +126,9 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
|
||||
_, cidrRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
||||
if err == nil {
|
||||
c.set.Occupy(cidrRange)
|
||||
klog.V(3).InfoS("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", node.Name)
|
||||
logger.V(3).Info("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", klog.KObj(&node))
|
||||
} else {
|
||||
klog.ErrorS(err, "Node has an invalid CIDR", "node", node.Name, "CIDR", node.Spec.PodCIDR)
|
||||
logger.Error(err, "Node has an invalid CIDR", "node", klog.KObj(&node), "CIDR", node.Spec.PodCIDR)
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,24 +139,30 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
|
||||
// XXX/bowei -- stagger the start of each sync cycle.
|
||||
syncer := c.newSyncer(node.Name)
|
||||
c.syncers[node.Name] = syncer
|
||||
go syncer.Loop(nil)
|
||||
go syncer.Loop(logger, nil)
|
||||
}()
|
||||
}
|
||||
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: controllerutil.CreateAddNodeHandler(c.onAdd),
|
||||
UpdateFunc: controllerutil.CreateUpdateNodeHandler(c.onUpdate),
|
||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(c.onDelete),
|
||||
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||
return c.onAdd(logger, node)
|
||||
}),
|
||||
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||
return c.onUpdate(logger, newNode)
|
||||
}),
|
||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
|
||||
return c.onDelete(logger, node)
|
||||
}),
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Controller) Run(stopCh <-chan struct{}) {
|
||||
func (c *Controller) Run(ctx context.Context) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
go c.adapter.Run(stopCh)
|
||||
<-stopCh
|
||||
go c.adapter.Run(ctx)
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
// occupyServiceCIDR removes the service CIDR range from the cluster CIDR if it
|
||||
@ -192,7 +199,7 @@ func (c *Controller) newSyncer(name string) *nodesync.NodeSync {
|
||||
return nodesync.New(ns, c.adapter, c.adapter, c.config.Mode, name, c.set)
|
||||
}
|
||||
|
||||
func (c *Controller) onAdd(node *v1.Node) error {
|
||||
func (c *Controller) onAdd(logger klog.Logger, node *v1.Node) error {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
@ -200,30 +207,30 @@ func (c *Controller) onAdd(node *v1.Node) error {
|
||||
if !ok {
|
||||
syncer = c.newSyncer(node.Name)
|
||||
c.syncers[node.Name] = syncer
|
||||
go syncer.Loop(nil)
|
||||
go syncer.Loop(logger, nil)
|
||||
} else {
|
||||
klog.InfoS("Add for node that already exists", "node", node.Name)
|
||||
logger.Info("Add for node that already exists", "node", klog.KObj(node))
|
||||
}
|
||||
syncer.Update(node)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Controller) onUpdate(_, node *v1.Node) error {
|
||||
func (c *Controller) onUpdate(logger klog.Logger, node *v1.Node) error {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
if sync, ok := c.syncers[node.Name]; ok {
|
||||
sync.Update(node)
|
||||
} else {
|
||||
klog.ErrorS(nil, "Received update for non-existent node", "node", node.Name)
|
||||
logger.Error(nil, "Received update for non-existent node", "node", klog.KObj(node))
|
||||
return fmt.Errorf("unknown node %q", node.Name)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Controller) onDelete(node *v1.Node) error {
|
||||
func (c *Controller) onDelete(logger klog.Logger, node *v1.Node) error {
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
@ -231,7 +238,7 @@ func (c *Controller) onDelete(node *v1.Node) error {
|
||||
syncer.Delete(node)
|
||||
delete(c.syncers, node.Name)
|
||||
} else {
|
||||
klog.InfoS("Node was already deleted", "node", node.Name)
|
||||
logger.Info("Node was already deleted", "node", klog.KObj(node))
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -102,6 +102,7 @@ type multiCIDRRangeAllocator struct {
|
||||
// Caller must always pass in a list of existing nodes to the new allocator.
|
||||
// NodeList is only nil in testing.
|
||||
func NewMultiCIDRRangeAllocator(
|
||||
ctx context.Context,
|
||||
client clientset.Interface,
|
||||
nodeInformer informers.NodeInformer,
|
||||
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
|
||||
@ -109,8 +110,10 @@ func NewMultiCIDRRangeAllocator(
|
||||
nodeList *v1.NodeList,
|
||||
testCIDRMap map[string][]*cidrset.ClusterCIDR,
|
||||
) (CIDRAllocator, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
if client == nil {
|
||||
klog.Fatalf("client is nil")
|
||||
logger.Error(nil, "kubeClient is nil when starting multi CIDRRangeAllocator")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
@ -137,10 +140,10 @@ func NewMultiCIDRRangeAllocator(
|
||||
// testCIDRMap is only set for testing purposes.
|
||||
if len(testCIDRMap) > 0 {
|
||||
ra.cidrMap = testCIDRMap
|
||||
klog.Warningf("testCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug.")
|
||||
logger.Info("TestCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug")
|
||||
}
|
||||
|
||||
ccList, err := listClusterCIDRs(client)
|
||||
ccList, err := listClusterCIDRs(ctx, client)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -148,14 +151,14 @@ func NewMultiCIDRRangeAllocator(
|
||||
if ccList == nil {
|
||||
ccList = &networkingv1alpha1.ClusterCIDRList{}
|
||||
}
|
||||
createDefaultClusterCIDR(ccList, allocatorParams)
|
||||
createDefaultClusterCIDR(logger, ccList, allocatorParams)
|
||||
|
||||
// Regenerate the cidrMaps from the existing ClusterCIDRs.
|
||||
for _, clusterCIDR := range ccList.Items {
|
||||
klog.Infof("Regenerating existing ClusterCIDR: %v", clusterCIDR)
|
||||
logger.Info("Regenerating existing ClusterCIDR", "clusterCIDR", clusterCIDR)
|
||||
// Create an event for invalid ClusterCIDRs, do not crash on failures.
|
||||
if err := ra.reconcileBootstrap(&clusterCIDR); err != nil {
|
||||
klog.Errorf("Error while regenerating existing ClusterCIDR: %v", err)
|
||||
if err := ra.reconcileBootstrap(ctx, &clusterCIDR); err != nil {
|
||||
logger.Error(err, "Error while regenerating existing ClusterCIDR")
|
||||
ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error())
|
||||
}
|
||||
}
|
||||
@ -184,25 +187,25 @@ func NewMultiCIDRRangeAllocator(
|
||||
})
|
||||
|
||||
if allocatorParams.ServiceCIDR != nil {
|
||||
ra.filterOutServiceRange(allocatorParams.ServiceCIDR)
|
||||
ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
|
||||
} else {
|
||||
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.")
|
||||
logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
|
||||
}
|
||||
|
||||
if allocatorParams.SecondaryServiceCIDR != nil {
|
||||
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR)
|
||||
ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
|
||||
} else {
|
||||
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.")
|
||||
logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
|
||||
}
|
||||
|
||||
if nodeList != nil {
|
||||
for _, node := range nodeList.Items {
|
||||
if len(node.Spec.PodCIDRs) == 0 {
|
||||
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name)
|
||||
logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
|
||||
continue
|
||||
}
|
||||
klog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDRs)
|
||||
if err := ra.occupyCIDRs(&node); err != nil {
|
||||
logger.Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDRs", node.Spec.PodCIDRs)
|
||||
if err := ra.occupyCIDRs(logger, &node); err != nil {
|
||||
// This will happen if:
|
||||
// 1. We find garbage in the podCIDRs field. Retrying is useless.
|
||||
// 2. CIDR out of range: This means ClusterCIDR is not yet created
|
||||
@ -231,7 +234,7 @@ func NewMultiCIDRRangeAllocator(
|
||||
// we don't see the Update with DeletionTimestamp != 0.
|
||||
// TODO: instead of executing the operation directly in the handler, build a small cache with key node.Name
|
||||
// and value PodCIDRs use ReleaseCIDR on the reconcile loop so we can retry on `ReleaseCIDR` failures.
|
||||
ra.ReleaseCIDR(obj.(*v1.Node))
|
||||
ra.ReleaseCIDR(logger, obj.(*v1.Node))
|
||||
// IndexerInformer uses a delta nodeQueue, therefore for deletes we have to use this
|
||||
// key function.
|
||||
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
|
||||
@ -244,51 +247,52 @@ func NewMultiCIDRRangeAllocator(
|
||||
return ra, nil
|
||||
}
|
||||
|
||||
func (r *multiCIDRRangeAllocator) Run(stopCh <-chan struct{}) {
|
||||
func (r *multiCIDRRangeAllocator) Run(ctx context.Context) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
// Start event processing pipeline.
|
||||
logger := klog.FromContext(ctx)
|
||||
r.broadcaster.StartStructuredLogging(0)
|
||||
klog.Infof("Started sending events to API Server.")
|
||||
logger.Info("Started sending events to API Server")
|
||||
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
|
||||
defer r.broadcaster.Shutdown()
|
||||
|
||||
defer r.cidrQueue.ShutDown()
|
||||
defer r.nodeQueue.ShutDown()
|
||||
|
||||
klog.Infof("Starting Multi CIDR Range allocator")
|
||||
defer klog.Infof("Shutting down Multi CIDR Range allocator")
|
||||
logger.Info("Starting Multi CIDR Range allocator")
|
||||
defer logger.Info("Shutting down Multi CIDR Range allocator")
|
||||
|
||||
if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", stopCh, r.nodesSynced, r.clusterCIDRSynced) {
|
||||
if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", ctx.Done(), r.nodesSynced, r.clusterCIDRSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||
go wait.Until(r.runCIDRWorker, time.Second, stopCh)
|
||||
go wait.Until(r.runNodeWorker, time.Second, stopCh)
|
||||
go wait.UntilWithContext(ctx, r.runCIDRWorker, time.Second)
|
||||
go wait.UntilWithContext(ctx, r.runNodeWorker, time.Second)
|
||||
}
|
||||
|
||||
<-stopCh
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
// runWorker is a long-running function that will continually call the
|
||||
// processNextWorkItem function in order to read and process a message on the
|
||||
// cidrQueue.
|
||||
func (r *multiCIDRRangeAllocator) runCIDRWorker() {
|
||||
for r.processNextCIDRWorkItem() {
|
||||
func (r *multiCIDRRangeAllocator) runCIDRWorker(ctx context.Context) {
|
||||
for r.processNextCIDRWorkItem(ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
// processNextWorkItem will read a single work item off the cidrQueue and
|
||||
// attempt to process it, by calling the syncHandler.
|
||||
func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
||||
func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem(ctx context.Context) bool {
|
||||
obj, shutdown := r.cidrQueue.Get()
|
||||
if shutdown {
|
||||
return false
|
||||
}
|
||||
|
||||
// We wrap this block in a func so we can defer c.cidrQueue.Done.
|
||||
err := func(obj interface{}) error {
|
||||
err := func(ctx context.Context, obj interface{}) error {
|
||||
// We call Done here so the cidrQueue knows we have finished
|
||||
// processing this item. We also must remember to call Forget if we
|
||||
// do not want this work item being re-queued. For example, we do
|
||||
@ -313,7 +317,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
||||
}
|
||||
// Run the syncHandler, passing it the namespace/name string of the
|
||||
// Foo resource to be synced.
|
||||
if err := r.syncClusterCIDR(key); err != nil {
|
||||
if err := r.syncClusterCIDR(ctx, key); err != nil {
|
||||
// Put the item back on the cidrQueue to handle any transient errors.
|
||||
r.cidrQueue.AddRateLimited(key)
|
||||
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
|
||||
@ -323,7 +327,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
||||
r.cidrQueue.Forget(obj)
|
||||
klog.Infof("Successfully synced '%s'", key)
|
||||
return nil
|
||||
}(obj)
|
||||
}(ctx, obj)
|
||||
|
||||
if err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
@ -333,21 +337,21 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *multiCIDRRangeAllocator) runNodeWorker() {
|
||||
for r.processNextNodeWorkItem() {
|
||||
func (r *multiCIDRRangeAllocator) runNodeWorker(ctx context.Context) {
|
||||
for r.processNextNodeWorkItem(ctx) {
|
||||
}
|
||||
}
|
||||
|
||||
// processNextWorkItem will read a single work item off the cidrQueue and
|
||||
// attempt to process it, by calling the syncHandler.
|
||||
func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
||||
func (r *multiCIDRRangeAllocator) processNextNodeWorkItem(ctx context.Context) bool {
|
||||
obj, shutdown := r.nodeQueue.Get()
|
||||
if shutdown {
|
||||
return false
|
||||
}
|
||||
|
||||
// We wrap this block in a func so we can defer c.cidrQueue.Done.
|
||||
err := func(obj interface{}) error {
|
||||
err := func(logger klog.Logger, obj interface{}) error {
|
||||
// We call Done here so the workNodeQueue knows we have finished
|
||||
// processing this item. We also must remember to call Forget if we
|
||||
// do not want this work item being re-queued. For example, we do
|
||||
@ -372,7 +376,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
||||
}
|
||||
// Run the syncHandler, passing it the namespace/name string of the
|
||||
// Foo resource to be synced.
|
||||
if err := r.syncNode(key); err != nil {
|
||||
if err := r.syncNode(logger, key); err != nil {
|
||||
// Put the item back on the cidrQueue to handle any transient errors.
|
||||
r.nodeQueue.AddRateLimited(key)
|
||||
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
|
||||
@ -382,7 +386,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
||||
r.nodeQueue.Forget(obj)
|
||||
klog.Infof("Successfully synced '%s'", key)
|
||||
return nil
|
||||
}(obj)
|
||||
}(klog.FromContext(ctx), obj)
|
||||
|
||||
if err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
@ -392,7 +396,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *multiCIDRRangeAllocator) syncNode(key string) error {
|
||||
func (r *multiCIDRRangeAllocator) syncNode(logger klog.Logger, key string) error {
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
klog.V(4).Infof("Finished syncing Node request %q (%v)", key, time.Since(startTime))
|
||||
@ -411,9 +415,9 @@ func (r *multiCIDRRangeAllocator) syncNode(key string) error {
|
||||
// Check the DeletionTimestamp to determine if object is under deletion.
|
||||
if !node.DeletionTimestamp.IsZero() {
|
||||
klog.V(3).Infof("node is being deleted: %v", key)
|
||||
return r.ReleaseCIDR(node)
|
||||
return r.ReleaseCIDR(logger, node)
|
||||
}
|
||||
return r.AllocateOrOccupyCIDR(node)
|
||||
return r.AllocateOrOccupyCIDR(logger, node)
|
||||
}
|
||||
|
||||
// needToAddFinalizer checks if a finalizer should be added to the object.
|
||||
@ -422,15 +426,16 @@ func needToAddFinalizer(obj metav1.Object, finalizer string) bool {
|
||||
finalizer, nil)
|
||||
}
|
||||
|
||||
func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error {
|
||||
func (r *multiCIDRRangeAllocator) syncClusterCIDR(ctx context.Context, key string) error {
|
||||
startTime := time.Now()
|
||||
logger := klog.FromContext(ctx)
|
||||
defer func() {
|
||||
klog.V(4).Infof("Finished syncing clusterCIDR request %q (%v)", key, time.Since(startTime))
|
||||
logger.V(4).Info("Finished syncing clusterCIDR request", "key", key, "latency", time.Since(startTime))
|
||||
}()
|
||||
|
||||
clusterCIDR, err := r.clusterCIDRLister.Get(key)
|
||||
if apierrors.IsNotFound(err) {
|
||||
klog.V(3).Infof("clusterCIDR has been deleted: %v", key)
|
||||
logger.V(3).Info("clusterCIDR has been deleted", "key", key)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -440,21 +445,20 @@ func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error {
|
||||
|
||||
// Check the DeletionTimestamp to determine if object is under deletion.
|
||||
if !clusterCIDR.DeletionTimestamp.IsZero() {
|
||||
return r.reconcileDelete(clusterCIDR)
|
||||
return r.reconcileDelete(ctx, clusterCIDR)
|
||||
}
|
||||
return r.reconcileCreate(clusterCIDR)
|
||||
return r.reconcileCreate(ctx, clusterCIDR)
|
||||
}
|
||||
|
||||
// occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet.
|
||||
func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error {
|
||||
func (r *multiCIDRRangeAllocator) occupyCIDRs(logger klog.Logger, node *v1.Node) error {
|
||||
|
||||
err := func(node *v1.Node) error {
|
||||
|
||||
if len(node.Spec.PodCIDRs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true)
|
||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -468,10 +472,10 @@ func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error {
|
||||
return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err)
|
||||
}
|
||||
|
||||
klog.Infof("occupy CIDR %s for node: %s", cidr, node.Name)
|
||||
logger.Info("occupy CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
|
||||
|
||||
if err := r.Occupy(clusterCIDR, podCIDR); err != nil {
|
||||
klog.V(3).Infof("Could not occupy cidr: %v, trying next range: %w", node.Spec.PodCIDRs, err)
|
||||
logger.V(3).Info("Could not occupy cidr, trying next range", "podCIDRs", node.Spec.PodCIDRs, "err", err)
|
||||
break
|
||||
}
|
||||
|
||||
@ -519,14 +523,14 @@ func (r *multiCIDRRangeAllocator) Occupy(clusterCIDR *cidrset.ClusterCIDR, cidr
|
||||
|
||||
// Release marks the CIDR as free in the cidrSet used bitmap,
|
||||
// Also removes the CIDR from the allocatedCIDRSet.
|
||||
func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error {
|
||||
func (r *multiCIDRRangeAllocator) Release(logger klog.Logger, clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error {
|
||||
currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := currCIDRSet.Release(cidr); err != nil {
|
||||
klog.Infof("Unable to release cidr %v in cidrSet", cidr)
|
||||
logger.Info("Unable to release cidr in cidrSet", "CIDR", cidr)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -539,7 +543,7 @@ func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr
|
||||
// WARNING: If you're adding any return calls or defer any more work from this
|
||||
// function you have to make sure to update nodesInProcessing properly with the
|
||||
// disposition of the node when the work is done.
|
||||
func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
||||
func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
@ -548,10 +552,10 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
||||
}
|
||||
|
||||
if len(node.Spec.PodCIDRs) > 0 {
|
||||
return r.occupyCIDRs(node)
|
||||
return r.occupyCIDRs(logger, node)
|
||||
}
|
||||
|
||||
cidrs, clusterCIDR, err := r.prioritizedCIDRs(node)
|
||||
cidrs, clusterCIDR, err := r.prioritizedCIDRs(logger, node)
|
||||
if err != nil {
|
||||
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
|
||||
return fmt.Errorf("failed to get cidrs for node %s", node.Name)
|
||||
@ -571,11 +575,11 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
||||
clusterCIDR: clusterCIDR,
|
||||
}
|
||||
|
||||
return r.updateCIDRsAllocation(allocated)
|
||||
return r.updateCIDRsAllocation(logger, allocated)
|
||||
}
|
||||
|
||||
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets.
|
||||
func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
func (r *multiCIDRRangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
@ -583,7 +587,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
clusterCIDR, err := r.allocatedClusterCIDR(node)
|
||||
clusterCIDR, err := r.allocatedClusterCIDR(logger, node)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -594,8 +598,8 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err)
|
||||
}
|
||||
|
||||
klog.Infof("release CIDR %s for node: %s", cidr, node.Name)
|
||||
if err := r.Release(clusterCIDR, podCIDR); err != nil {
|
||||
logger.Info("release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
|
||||
if err := r.Release(logger, clusterCIDR, podCIDR); err != nil {
|
||||
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err)
|
||||
}
|
||||
}
|
||||
@ -608,7 +612,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
|
||||
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
|
||||
// so that they won't be assignable.
|
||||
func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
|
||||
func (r *multiCIDRRangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
|
||||
// Checks if service CIDR has a nonempty intersection with cluster
|
||||
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
|
||||
// clusterCIDR's Mask applied (this means that clusterCIDR contains
|
||||
@ -617,7 +621,7 @@ func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet)
|
||||
for _, clusterCIDRList := range r.cidrMap {
|
||||
for _, clusterCIDR := range clusterCIDRList {
|
||||
if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil {
|
||||
klog.Errorf("unable to occupy service CIDR: %w", err)
|
||||
logger.Error(err, "Unable to occupy service CIDR")
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -645,12 +649,12 @@ func (r *multiCIDRRangeAllocator) occupyServiceCIDR(clusterCIDR *cidrset.Cluster
|
||||
}
|
||||
|
||||
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
|
||||
func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReservedCIDRs) error {
|
||||
func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(logger klog.Logger, data multiCIDRNodeReservedCIDRs) error {
|
||||
err := func(data multiCIDRNodeReservedCIDRs) error {
|
||||
cidrsString := ipnetToStringList(data.allocatedCIDRs)
|
||||
node, err := r.nodeLister.Get(data.nodeName)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err)
|
||||
logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
|
||||
return err
|
||||
}
|
||||
|
||||
@ -666,16 +670,16 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
|
||||
}
|
||||
}
|
||||
if match {
|
||||
klog.V(4).Infof("Node %q already has allocated CIDR %q. It matches the proposed one.", node.Name, data.allocatedCIDRs)
|
||||
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one.", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// node has cidrs allocated, release the reserved.
|
||||
if len(node.Spec.PodCIDRs) != 0 {
|
||||
klog.Errorf("Node %q already has a CIDR allocated %q. Releasing the new one.", node.Name, node.Spec.PodCIDRs)
|
||||
logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
|
||||
for _, cidr := range data.allocatedCIDRs {
|
||||
if err := r.Release(data.clusterCIDR, cidr); err != nil {
|
||||
if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
|
||||
return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err)
|
||||
}
|
||||
}
|
||||
@ -686,20 +690,20 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
|
||||
for i := 0; i < cidrUpdateRetries; i++ {
|
||||
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
|
||||
data.clusterCIDR.AssociatedNodes[node.Name] = true
|
||||
klog.Infof("Set node %q PodCIDR to %q", node.Name, cidrsString)
|
||||
logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrsString)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// failed release back to the pool.
|
||||
klog.Errorf("Failed to update node %q PodCIDR to %q after %d attempts: %v", node.Name, cidrsString, cidrUpdateRetries, err)
|
||||
logger.Error(err, "Failed to update node PodCIDR after attempts", "node", klog.KObj(node), "podCIDR", cidrsString, "retries", cidrUpdateRetries)
|
||||
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
||||
// We accept the fact that we may leak CIDRs here. This is safer than releasing
|
||||
// them in case when we don't know if request went through.
|
||||
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
||||
if !apierrors.IsServerTimeout(err) {
|
||||
klog.Errorf("CIDR assignment for node %q failed: %v. Releasing allocated CIDR", node.Name, err)
|
||||
logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
|
||||
for _, cidr := range data.allocatedCIDRs {
|
||||
if err := r.Release(data.clusterCIDR, cidr); err != nil {
|
||||
if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
|
||||
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err)
|
||||
}
|
||||
}
|
||||
@ -732,8 +736,8 @@ func defaultNodeSelector() *v1.NodeSelector {
|
||||
// prioritizedCIDRs returns a list of CIDRs to be allocated to the node.
|
||||
// Returns 1 CIDR if single stack.
|
||||
// Returns 2 CIDRs , 1 from each ip family if dual stack.
|
||||
func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) {
|
||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true)
|
||||
func (r *multiCIDRRangeAllocator) prioritizedCIDRs(logger klog.Logger, node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) {
|
||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
|
||||
}
|
||||
@ -743,7 +747,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
|
||||
if clusterCIDR.IPv4CIDRSet != nil {
|
||||
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet)
|
||||
if err != nil {
|
||||
klog.V(3).Infof("unable to allocate IPv4 CIDR, trying next range: %w", err)
|
||||
logger.V(3).Info("Unable to allocate IPv4 CIDR, trying next range", "err", err)
|
||||
continue
|
||||
}
|
||||
cidrs = append(cidrs, cidr)
|
||||
@ -752,7 +756,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
|
||||
if clusterCIDR.IPv6CIDRSet != nil {
|
||||
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet)
|
||||
if err != nil {
|
||||
klog.V(3).Infof("unable to allocate IPv6 CIDR, trying next range: %w", err)
|
||||
logger.V(3).Info("Unable to allocate IPv6 CIDR, trying next range", "err", err)
|
||||
continue
|
||||
}
|
||||
cidrs = append(cidrs, cidr)
|
||||
@ -827,8 +831,8 @@ func (r *multiCIDRRangeAllocator) cidrOverlapWithAllocatedList(cidr *net.IPNet)
|
||||
}
|
||||
|
||||
// allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated.
|
||||
func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.ClusterCIDR, error) {
|
||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, false)
|
||||
func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(logger klog.Logger, node *v1.Node) (*cidrset.ClusterCIDR, error) {
|
||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
|
||||
}
|
||||
@ -852,12 +856,12 @@ func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.
|
||||
// orderedMatchingClusterCIDRs takes `occupy` as an argument, it determines whether the function
|
||||
// is called during an occupy or a release operation. For a release operation, a ClusterCIDR must
|
||||
// be added to the matching ClusterCIDRs list, irrespective of whether the ClusterCIDR is terminating.
|
||||
func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) {
|
||||
func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(logger klog.Logger, node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) {
|
||||
matchingCIDRs := make([]*cidrset.ClusterCIDR, 0)
|
||||
pq := make(PriorityQueue, 0)
|
||||
|
||||
for label, clusterCIDRList := range r.cidrMap {
|
||||
labelsMatch, matchCnt, err := r.matchCIDRLabels(node, label)
|
||||
labelsMatch, matchCnt, err := r.matchCIDRLabels(logger, node, label)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -902,14 +906,14 @@ func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occ
|
||||
|
||||
// matchCIDRLabels Matches the Node labels to CIDR Configs.
|
||||
// Returns true only if all the labels match, also returns the count of matching labels.
|
||||
func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (bool, int, error) {
|
||||
func (r *multiCIDRRangeAllocator) matchCIDRLabels(logger klog.Logger, node *v1.Node, label string) (bool, int, error) {
|
||||
var labelSet labels.Set
|
||||
var matchCnt int
|
||||
labelsMatch := false
|
||||
|
||||
ls, err := labels.Parse(label)
|
||||
if err != nil {
|
||||
klog.Errorf("Unable to parse label %s to labels.Selector: %v", label, err)
|
||||
logger.Error(err, "Unable to parse label to labels.Selector", "label", label)
|
||||
return labelsMatch, 0, err
|
||||
}
|
||||
reqs, selectable := ls.Requirements()
|
||||
@ -934,7 +938,7 @@ func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (
|
||||
// createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has
|
||||
// been configured. It converts the --cluster-cidr and --per-node-mask-size* flags
|
||||
// to appropriate ClusterCIDR fields.
|
||||
func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDRList,
|
||||
func createDefaultClusterCIDR(logger klog.Logger, existingConfigList *networkingv1alpha1.ClusterCIDRList,
|
||||
allocatorParams CIDRAllocatorParams) {
|
||||
// Create default ClusterCIDR only if --cluster-cidr has been configured
|
||||
if len(allocatorParams.ClusterCIDRs) == 0 {
|
||||
@ -944,7 +948,7 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
|
||||
for _, clusterCIDR := range existingConfigList.Items {
|
||||
if clusterCIDR.Name == defaultClusterCIDRName {
|
||||
// Default ClusterCIDR already exists, no further action required.
|
||||
klog.V(3).Infof("Default ClusterCIDR %s already exists", defaultClusterCIDRName)
|
||||
logger.V(3).Info("Default ClusterCIDR already exists", "defaultClusterCIDRName", defaultClusterCIDRName)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -1006,14 +1010,15 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
|
||||
}
|
||||
|
||||
// reconcileCreate handles create ClusterCIDR events.
|
||||
func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
func (r *multiCIDRRangeAllocator) reconcileCreate(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) {
|
||||
klog.V(3).Infof("Creating ClusterCIDR %s", clusterCIDR.Name)
|
||||
if err := r.createClusterCIDR(clusterCIDR, false); err != nil {
|
||||
klog.Errorf("Unable to create ClusterCIDR %s : %v", clusterCIDR.Name, err)
|
||||
logger.V(3).Info("Creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||
if err := r.createClusterCIDR(ctx, clusterCIDR, false); err != nil {
|
||||
logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -1022,21 +1027,22 @@ func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha
|
||||
|
||||
// reconcileBootstrap handles creation of existing ClusterCIDRs.
|
||||
// adds a finalizer if not already present.
|
||||
func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
func (r *multiCIDRRangeAllocator) reconcileBootstrap(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
terminating := false
|
||||
// Create the ClusterCIDR only if the Spec has not been modified.
|
||||
if clusterCIDR.Generation > 1 {
|
||||
terminating = true
|
||||
err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name)
|
||||
klog.Errorf("ClusterCIDR Modified: %v", err)
|
||||
logger.Error(err, "ClusterCIDR Modified")
|
||||
}
|
||||
|
||||
klog.V(2).Infof("Creating ClusterCIDR %s during bootstrap", clusterCIDR.Name)
|
||||
if err := r.createClusterCIDR(clusterCIDR, terminating); err != nil {
|
||||
klog.Errorf("Unable to create ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
||||
logger.V(2).Info("Creating ClusterCIDR during bootstrap", "clusterCIDR", clusterCIDR.Name)
|
||||
if err := r.createClusterCIDR(ctx, clusterCIDR, terminating); err != nil {
|
||||
logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||
return err
|
||||
}
|
||||
|
||||
@ -1044,7 +1050,7 @@ func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1al
|
||||
}
|
||||
|
||||
// createClusterCIDR creates and maps the cidrSets in the cidrMap.
|
||||
func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error {
|
||||
func (r *multiCIDRRangeAllocator) createClusterCIDR(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error {
|
||||
nodeSelector, err := r.nodeSelectorKey(clusterCIDR)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to get labelSelector key: %w", err)
|
||||
@ -1069,16 +1075,17 @@ func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alp
|
||||
updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer)
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if updatedClusterCIDR.ResourceVersion == "" {
|
||||
// Create is only used for creating default ClusterCIDR.
|
||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(context.TODO(), updatedClusterCIDR, metav1.CreateOptions{}); err != nil {
|
||||
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(ctx, updatedClusterCIDR, metav1.CreateOptions{}); err != nil {
|
||||
logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", klog.KObj(clusterCIDR), "err", err)
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Update the ClusterCIDR object when called from reconcileCreate.
|
||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), updatedClusterCIDR, metav1.UpdateOptions{}); err != nil {
|
||||
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, updatedClusterCIDR, metav1.UpdateOptions{}); err != nil {
|
||||
logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -1136,30 +1143,31 @@ func (r *multiCIDRRangeAllocator) mapClusterCIDRSet(cidrMap map[string][]*cidrse
|
||||
|
||||
// reconcileDelete releases the assigned ClusterCIDR and removes the finalizer
|
||||
// if the deletion timestamp is set.
|
||||
func (r *multiCIDRRangeAllocator) reconcileDelete(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
func (r *multiCIDRRangeAllocator) reconcileDelete(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
r.lock.Lock()
|
||||
defer r.lock.Unlock()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) {
|
||||
klog.V(2).Infof("Releasing ClusterCIDR: %s", clusterCIDR.Name)
|
||||
if err := r.deleteClusterCIDR(clusterCIDR); err != nil {
|
||||
klog.V(2).Infof("Error while deleting ClusterCIDR: %+v", err)
|
||||
logger.V(2).Info("Releasing ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||
if err := r.deleteClusterCIDR(logger, clusterCIDR); err != nil {
|
||||
klog.V(2).Info("Error while deleting ClusterCIDR", "err", err)
|
||||
return err
|
||||
}
|
||||
// Remove the finalizer as delete is successful.
|
||||
cccCopy := clusterCIDR.DeepCopy()
|
||||
cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil)
|
||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), cccCopy, metav1.UpdateOptions{}); err != nil {
|
||||
klog.V(2).Infof("Error removing finalizer for ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, cccCopy, metav1.UpdateOptions{}); err != nil {
|
||||
logger.V(2).Info("Error removing finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
|
||||
return err
|
||||
}
|
||||
klog.V(2).Infof("Removed finalizer for ClusterCIDR %s", clusterCIDR.Name)
|
||||
logger.V(2).Info("Removed finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap.
|
||||
func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
func (r *multiCIDRRangeAllocator) deleteClusterCIDR(logger klog.Logger, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||
|
||||
labelSelector, err := r.nodeSelectorKey(clusterCIDR)
|
||||
if err != nil {
|
||||
@ -1168,7 +1176,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
|
||||
|
||||
clusterCIDRSetList, ok := r.cidrMap[labelSelector]
|
||||
if !ok {
|
||||
klog.Infof("Label %s not found in CIDRMap, proceeding with delete", labelSelector)
|
||||
logger.Info("Label not found in CIDRMap, proceeding with delete", "labelSelector", labelSelector)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1196,7 +1204,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
|
||||
r.cidrMap[labelSelector] = clusterCIDRSetList
|
||||
return nil
|
||||
}
|
||||
klog.V(2).Info("clusterCIDR not found, proceeding with delete", "Name", clusterCIDR.Name, "label", labelSelector)
|
||||
logger.V(2).Info("clusterCIDR not found, proceeding with delete", "clusterCIDR", clusterCIDR.Name, "label", labelSelector)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -1217,7 +1225,7 @@ func (r *multiCIDRRangeAllocator) nodeSelectorKey(clusterCIDR *networkingv1alpha
|
||||
return nodeSelector.String(), nil
|
||||
}
|
||||
|
||||
func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
|
||||
func listClusterCIDRs(ctx context.Context, kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
|
||||
var clusterCIDRList *networkingv1alpha1.ClusterCIDRList
|
||||
// We must poll because apiserver might not be up. This error causes
|
||||
// controller manager to restart.
|
||||
@ -1230,19 +1238,20 @@ func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.Clust
|
||||
Steps: 11,
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
||||
var err error
|
||||
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(context.TODO(), metav1.ListOptions{
|
||||
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(ctx, metav1.ListOptions{
|
||||
FieldSelector: fields.Everything().String(),
|
||||
LabelSelector: labels.Everything().String(),
|
||||
})
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to list all clusterCIDRs: %v", err)
|
||||
logger.Error(err, "Failed to list all clusterCIDRs")
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}); pollErr != nil {
|
||||
klog.Errorf("Failed to list clusterCIDRs (after %v)", time.Now().Sub(startTimestamp))
|
||||
logger.Error(nil, "Failed to list clusterCIDRs", "latency", time.Now().Sub(startTimestamp))
|
||||
return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map",
|
||||
apiserverStartupGracePeriod)
|
||||
}
|
||||
|
@ -34,6 +34,7 @@ import (
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
k8stesting "k8s.io/client-go/testing"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset"
|
||||
@ -473,6 +474,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
|
||||
}
|
||||
|
||||
// test function
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
for _, tc := range testCaseMultiCIDRs {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Initialize the range allocator.
|
||||
@ -482,7 +484,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
|
||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||
|
||||
_, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
||||
_, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
||||
if err == nil && tc.ctrlCreateFail {
|
||||
t.Fatalf("creating range allocator was expected to fail, but it did not")
|
||||
}
|
||||
@ -1177,6 +1179,8 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
|
||||
// test function
|
||||
testFunc := func(tc testCaseMultiCIDR) {
|
||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||
@ -1185,7 +1189,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
||||
allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
||||
if err != nil {
|
||||
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||
return
|
||||
@ -1230,7 +1234,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
||||
if node.Spec.PodCIDRs == nil {
|
||||
updateCount++
|
||||
}
|
||||
if err := allocator.AllocateOrOccupyCIDR(node); err != nil {
|
||||
if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
|
||||
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
}
|
||||
}
|
||||
@ -1308,13 +1312,15 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
|
||||
testFunc := func(tc testCaseMultiCIDR) {
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||
|
||||
// Initialize the range allocator.
|
||||
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
||||
allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
||||
if err != nil {
|
||||
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||
}
|
||||
@ -1353,7 +1359,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
|
||||
if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
|
||||
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
}
|
||||
// We don't expect any updates, so just sleep for some time
|
||||
@ -1498,13 +1504,13 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
testFunc := func(tc releasetestCaseMultiCIDR) {
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||
// Initialize the range allocator.
|
||||
allocator, _ := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
||||
allocator, _ := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
||||
rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator)
|
||||
if !ok {
|
||||
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
|
||||
@ -1540,7 +1546,7 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
|
||||
err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
|
||||
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
|
||||
if err != nil {
|
||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
@ -1570,12 +1576,12 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
|
||||
},
|
||||
}
|
||||
nodeToRelease.Spec.PodCIDRs = cidrToRelease
|
||||
err = allocator.ReleaseCIDR(&nodeToRelease)
|
||||
err = allocator.ReleaseCIDR(logger, &nodeToRelease)
|
||||
if err != nil {
|
||||
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
|
||||
}
|
||||
}
|
||||
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
|
||||
if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
|
||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
}
|
||||
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
|
||||
@ -1614,7 +1620,7 @@ type clusterCIDRController struct {
|
||||
clusterCIDRStore cache.Store
|
||||
}
|
||||
|
||||
func newController() (*fake.Clientset, *clusterCIDRController) {
|
||||
func newController(ctx context.Context) (*fake.Clientset, *clusterCIDRController) {
|
||||
client := fake.NewSimpleClientset()
|
||||
|
||||
informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc())
|
||||
@ -1657,7 +1663,7 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
|
||||
testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0)
|
||||
|
||||
// Initialize the range allocator.
|
||||
ra, _ := NewMultiCIDRRangeAllocator(client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap)
|
||||
ra, _ := NewMultiCIDRRangeAllocator(ctx, client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap)
|
||||
cccController := ra.(*multiCIDRRangeAllocator)
|
||||
|
||||
cccController.clusterCIDRSynced = alwaysReady
|
||||
@ -1671,8 +1677,8 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
|
||||
// Ensure default ClusterCIDR is created during bootstrap.
|
||||
func TestClusterCIDRDefault(t *testing.T) {
|
||||
defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil)
|
||||
|
||||
client, _ := newController()
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
client, _ := newController(ctx)
|
||||
createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{})
|
||||
assert.Nil(t, err, "Expected no error getting clustercidr objects")
|
||||
assert.Equal(t, defaultCCC.Spec, createdCCC.Spec)
|
||||
@ -1752,11 +1758,11 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
client, cccController := newController()
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
client, cccController := newController(ctx)
|
||||
for _, tc := range tests {
|
||||
cccController.clusterCIDRStore.Add(tc.ccc)
|
||||
err := cccController.syncClusterCIDR(tc.ccc.Name)
|
||||
err := cccController.syncClusterCIDR(ctx, tc.ccc.Name)
|
||||
if tc.wantErr {
|
||||
assert.Error(t, err)
|
||||
continue
|
||||
@ -1773,30 +1779,32 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
|
||||
|
||||
// Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR.
|
||||
func TestSyncClusterCIDRDelete(t *testing.T) {
|
||||
_, cccController := newController()
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
_, cccController := newController(ctx)
|
||||
|
||||
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
|
||||
|
||||
cccController.clusterCIDRStore.Add(testCCC)
|
||||
err := cccController.syncClusterCIDR(testCCC.Name)
|
||||
err := cccController.syncClusterCIDR(ctx, testCCC.Name)
|
||||
assert.NoError(t, err)
|
||||
|
||||
deletionTimestamp := metav1.Now()
|
||||
testCCC.DeletionTimestamp = &deletionTimestamp
|
||||
cccController.clusterCIDRStore.Update(testCCC)
|
||||
err = cccController.syncClusterCIDR(testCCC.Name)
|
||||
err = cccController.syncClusterCIDR(ctx, testCCC.Name)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
// Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR
|
||||
// if a node is associated with the ClusterCIDR.
|
||||
func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
|
||||
client, cccController := newController()
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
client, cccController := newController(ctx)
|
||||
|
||||
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
|
||||
|
||||
cccController.clusterCIDRStore.Add(testCCC)
|
||||
err := cccController.syncClusterCIDR(testCCC.Name)
|
||||
err := cccController.syncClusterCIDR(ctx, testCCC.Name)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Mock the IPAM controller behavior associating node with ClusterCIDR.
|
||||
@ -1810,7 +1818,7 @@ func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
|
||||
deletionTimestamp := metav1.Now()
|
||||
createdCCC.DeletionTimestamp = &deletionTimestamp
|
||||
cccController.clusterCIDRStore.Update(createdCCC)
|
||||
err = cccController.syncClusterCIDR(createdCCC.Name)
|
||||
err = cccController.syncClusterCIDR(ctx, createdCCC.Name)
|
||||
assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name))
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
utilnet "k8s.io/utils/net"
|
||||
)
|
||||
|
||||
@ -568,6 +568,7 @@ func TestGetBitforCIDR(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
for _, tc := range cases {
|
||||
_, clusterCIDR, err := utilnet.ParseCIDRSloppy(tc.clusterCIDRStr)
|
||||
if err != nil {
|
||||
@ -585,17 +586,17 @@ func TestGetBitforCIDR(t *testing.T) {
|
||||
|
||||
got, err := cs.getIndexForCIDR(subnetCIDR)
|
||||
if err == nil && tc.expectErr {
|
||||
klog.Errorf("expected error but got null for %v", tc.description)
|
||||
logger.Error(nil, "Expected error but got null", "description", tc.description)
|
||||
continue
|
||||
}
|
||||
|
||||
if err != nil && !tc.expectErr {
|
||||
klog.Errorf("unexpected error: %v for %v", err, tc.description)
|
||||
logger.Error(err, "Unexpected error", "description", tc.description)
|
||||
continue
|
||||
}
|
||||
|
||||
if got != tc.expectedBit {
|
||||
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
|
||||
logger.Error(nil, "Unexpected value", "description", tc.description, "expected", tc.expectedBit, "got", got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
package ipam
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"sync"
|
||||
@ -66,9 +67,10 @@ type rangeAllocator struct {
|
||||
// Caller must always pass in a list of existing nodes so the new allocator.
|
||||
// Caller must ensure that ClusterCIDRs are semantically correct e.g (1 for non DualStack, 2 for DualStack etc..)
|
||||
// can initialize its CIDR map. NodeList is only nil in testing.
|
||||
func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) {
|
||||
func NewCIDRRangeAllocator(logger klog.Logger, client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) {
|
||||
if client == nil {
|
||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
||||
logger.Error(nil, "kubeClient is nil when starting CIDRRangeAllocator")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
@ -98,24 +100,24 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
|
||||
}
|
||||
|
||||
if allocatorParams.ServiceCIDR != nil {
|
||||
ra.filterOutServiceRange(allocatorParams.ServiceCIDR)
|
||||
ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
|
||||
} else {
|
||||
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.")
|
||||
logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
|
||||
}
|
||||
|
||||
if allocatorParams.SecondaryServiceCIDR != nil {
|
||||
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR)
|
||||
ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
|
||||
} else {
|
||||
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.")
|
||||
logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
|
||||
}
|
||||
|
||||
if nodeList != nil {
|
||||
for _, node := range nodeList.Items {
|
||||
if len(node.Spec.PodCIDRs) == 0 {
|
||||
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name)
|
||||
logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
|
||||
continue
|
||||
}
|
||||
klog.V(4).Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR)
|
||||
logger.V(4).Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDR", node.Spec.PodCIDR)
|
||||
if err := ra.occupyCIDRs(&node); err != nil {
|
||||
// This will happen if:
|
||||
// 1. We find garbage in the podCIDRs field. Retrying is useless.
|
||||
@ -127,7 +129,9 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
|
||||
}
|
||||
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: controllerutil.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR),
|
||||
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||
return ra.AllocateOrOccupyCIDR(logger, node)
|
||||
}),
|
||||
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||
// If the PodCIDRs list is not empty we either:
|
||||
// - already processed a Node that already had CIDRs after NC restarted
|
||||
@ -149,52 +153,56 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
|
||||
// state is correct.
|
||||
// Restart of NC fixes the issue.
|
||||
if len(newNode.Spec.PodCIDRs) == 0 {
|
||||
return ra.AllocateOrOccupyCIDR(newNode)
|
||||
return ra.AllocateOrOccupyCIDR(logger, newNode)
|
||||
}
|
||||
return nil
|
||||
}),
|
||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ra.ReleaseCIDR),
|
||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
|
||||
return ra.ReleaseCIDR(logger, node)
|
||||
}),
|
||||
})
|
||||
|
||||
return ra, nil
|
||||
}
|
||||
|
||||
func (r *rangeAllocator) Run(stopCh <-chan struct{}) {
|
||||
func (r *rangeAllocator) Run(ctx context.Context) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
// Start event processing pipeline.
|
||||
r.broadcaster.StartStructuredLogging(0)
|
||||
klog.Infof("Sending events to api server.")
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.Info("Sending events to api server")
|
||||
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
|
||||
defer r.broadcaster.Shutdown()
|
||||
|
||||
klog.Infof("Starting range CIDR allocator")
|
||||
defer klog.Infof("Shutting down range CIDR allocator")
|
||||
logger.Info("Starting range CIDR allocator")
|
||||
defer logger.Info("Shutting down range CIDR allocator")
|
||||
|
||||
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, r.nodesSynced) {
|
||||
if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), r.nodesSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||
go r.worker(stopCh)
|
||||
go r.worker(ctx)
|
||||
}
|
||||
|
||||
<-stopCh
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
func (r *rangeAllocator) worker(stopChan <-chan struct{}) {
|
||||
func (r *rangeAllocator) worker(ctx context.Context) {
|
||||
logger := klog.FromContext(ctx)
|
||||
for {
|
||||
select {
|
||||
case workItem, ok := <-r.nodeCIDRUpdateChannel:
|
||||
if !ok {
|
||||
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
||||
logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
||||
return
|
||||
}
|
||||
if err := r.updateCIDRsAllocation(workItem); err != nil {
|
||||
if err := r.updateCIDRsAllocation(logger, workItem); err != nil {
|
||||
// Requeue the failed node for update again.
|
||||
r.nodeCIDRUpdateChannel <- workItem
|
||||
}
|
||||
case <-stopChan:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -244,12 +252,12 @@ func (r *rangeAllocator) occupyCIDRs(node *v1.Node) error {
|
||||
// WARNING: If you're adding any return calls or defer any more work from this
|
||||
// function you have to make sure to update nodesInProcessing properly with the
|
||||
// disposition of the node when the work is done.
|
||||
func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
||||
func (r *rangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
if !r.insertNodeToProcessing(node.Name) {
|
||||
klog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
|
||||
logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -273,13 +281,13 @@ func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
||||
}
|
||||
|
||||
//queue the assignment
|
||||
klog.V(4).Infof("Putting node %s with CIDR %v into the work queue", node.Name, allocated.allocatedCIDRs)
|
||||
logger.V(4).Info("Putting node with CIDR into the work queue", "node", klog.KObj(node), "CIDRs", allocated.allocatedCIDRs)
|
||||
r.nodeCIDRUpdateChannel <- allocated
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets
|
||||
func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
func (r *rangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
|
||||
if node == nil || len(node.Spec.PodCIDRs) == 0 {
|
||||
return nil
|
||||
}
|
||||
@ -297,7 +305,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx)
|
||||
}
|
||||
|
||||
klog.V(4).Infof("release CIDR %s for node:%v", cidr, node.Name)
|
||||
logger.V(4).Info("Release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
|
||||
if err = r.cidrSets[idx].Release(podCIDR); err != nil {
|
||||
return fmt.Errorf("error when releasing CIDR %v: %v", cidr, err)
|
||||
}
|
||||
@ -307,7 +315,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
||||
|
||||
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
|
||||
// so that they won't be assignable.
|
||||
func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
|
||||
func (r *rangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
|
||||
// Checks if service CIDR has a nonempty intersection with cluster
|
||||
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
|
||||
// clusterCIDR's Mask applied (this means that clusterCIDR contains
|
||||
@ -321,20 +329,20 @@ func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
|
||||
|
||||
// at this point, len(cidrSet) == len(clusterCidr)
|
||||
if err := r.cidrSets[idx].Occupy(serviceCIDR); err != nil {
|
||||
klog.Errorf("Error filtering out service cidr out cluster cidr:%v (index:%v) %v: %v", cidr, idx, serviceCIDR, err)
|
||||
logger.Error(err, "Error filtering out service cidr out cluster cidr", "CIDR", cidr, "index", idx, "serviceCIDR", serviceCIDR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
|
||||
func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
|
||||
func (r *rangeAllocator) updateCIDRsAllocation(logger klog.Logger, data nodeReservedCIDRs) error {
|
||||
var err error
|
||||
var node *v1.Node
|
||||
defer r.removeNodeFromProcessing(data.nodeName)
|
||||
cidrsString := ipnetToStringList(data.allocatedCIDRs)
|
||||
node, err = r.nodeLister.Get(data.nodeName)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err)
|
||||
logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
|
||||
return err
|
||||
}
|
||||
|
||||
@ -350,17 +358,17 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
|
||||
}
|
||||
}
|
||||
if match {
|
||||
klog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, data.allocatedCIDRs)
|
||||
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// node has cidrs, release the reserved
|
||||
if len(node.Spec.PodCIDRs) != 0 {
|
||||
klog.Errorf("Node %v already has a CIDR allocated %v. Releasing the new one.", node.Name, node.Spec.PodCIDRs)
|
||||
logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
|
||||
for idx, cidr := range data.allocatedCIDRs {
|
||||
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
|
||||
klog.Errorf("Error when releasing CIDR idx:%v value: %v err:%v", idx, cidr, releaseErr)
|
||||
logger.Error(releaseErr, "Error when releasing CIDR", "index", idx, "CIDR", cidr)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@ -369,21 +377,21 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
|
||||
// If we reached here, it means that the node has no CIDR currently assigned. So we set it.
|
||||
for i := 0; i < cidrUpdateRetries; i++ {
|
||||
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
|
||||
klog.Infof("Set node %v PodCIDR to %v", node.Name, cidrsString)
|
||||
logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDRs", cidrsString)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// failed release back to the pool
|
||||
klog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, cidrsString, err)
|
||||
logger.Error(err, "Failed to update node PodCIDR after multiple attempts", "node", klog.KObj(node), "podCIDRs", cidrsString)
|
||||
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
||||
// We accept the fact that we may leak CIDRs here. This is safer than releasing
|
||||
// them in case when we don't know if request went through.
|
||||
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
||||
if !apierrors.IsServerTimeout(err) {
|
||||
klog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", node.Name, err)
|
||||
logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
|
||||
for idx, cidr := range data.allocatedCIDRs {
|
||||
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
|
||||
klog.Errorf("Error releasing allocated CIDR for node %v: %v", node.Name, releaseErr)
|
||||
logger.Error(releaseErr, "Error releasing allocated CIDR for node", "node", klog.KObj(node))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
|
||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||
netutils "k8s.io/utils/net"
|
||||
@ -274,12 +275,13 @@ func TestOccupyPreExistingCIDR(t *testing.T) {
|
||||
}
|
||||
|
||||
// test function
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Initialize the range allocator.
|
||||
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
|
||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||
_, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
||||
_, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
||||
if err == nil && tc.ctrlCreateFail {
|
||||
t.Fatalf("creating range allocator was expected to fail, but it did not")
|
||||
}
|
||||
@ -508,11 +510,12 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
||||
}
|
||||
|
||||
// test function
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
testFunc := func(tc testCase) {
|
||||
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
|
||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||
// Initialize the range allocator.
|
||||
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
||||
allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
||||
if err != nil {
|
||||
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||
return
|
||||
@ -524,7 +527,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
||||
}
|
||||
rangeAllocator.nodesSynced = test.AlwaysReady
|
||||
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
||||
go allocator.Run(wait.NeverStop)
|
||||
go allocator.Run(ctx)
|
||||
|
||||
// this is a bit of white box testing
|
||||
// pre allocate the cidrs as per the test
|
||||
@ -545,7 +548,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
||||
if node.Spec.PodCIDRs == nil {
|
||||
updateCount++
|
||||
}
|
||||
if err := allocator.AllocateOrOccupyCIDR(node); err != nil {
|
||||
if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
|
||||
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
}
|
||||
}
|
||||
@ -608,10 +611,10 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
testFunc := func(tc testCase) {
|
||||
// Initialize the range allocator.
|
||||
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
||||
allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
||||
if err != nil {
|
||||
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||
}
|
||||
@ -622,7 +625,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
|
||||
}
|
||||
rangeAllocator.nodesSynced = test.AlwaysReady
|
||||
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
||||
go allocator.Run(wait.NeverStop)
|
||||
go allocator.Run(ctx)
|
||||
|
||||
// this is a bit of white box testing
|
||||
for setIdx, allocatedList := range tc.allocatedCIDRs {
|
||||
@ -637,7 +640,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
|
||||
if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
|
||||
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
}
|
||||
// We don't expect any updates, so just sleep for some time
|
||||
@ -753,10 +756,10 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
logger, ctx := ktesting.NewTestContext(t)
|
||||
testFunc := func(tc releaseTestCase) {
|
||||
// Initialize the range allocator.
|
||||
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
||||
allocator, _ := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
||||
rangeAllocator, ok := allocator.(*rangeAllocator)
|
||||
if !ok {
|
||||
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
|
||||
@ -764,7 +767,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
||||
}
|
||||
rangeAllocator.nodesSynced = test.AlwaysReady
|
||||
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
||||
go allocator.Run(wait.NeverStop)
|
||||
go allocator.Run(ctx)
|
||||
|
||||
// this is a bit of white box testing
|
||||
for setIdx, allocatedList := range tc.allocatedCIDRs {
|
||||
@ -780,7 +783,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
|
||||
err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
|
||||
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
|
||||
if err != nil {
|
||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
@ -805,12 +808,12 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
||||
},
|
||||
}
|
||||
nodeToRelease.Spec.PodCIDRs = cidrToRelease
|
||||
err = allocator.ReleaseCIDR(&nodeToRelease)
|
||||
err = allocator.ReleaseCIDR(logger, &nodeToRelease)
|
||||
if err != nil {
|
||||
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
|
||||
}
|
||||
}
|
||||
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
|
||||
if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
|
||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||
}
|
||||
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
|
||||
|
@ -120,8 +120,8 @@ func New(c controller, cloudAlias cloudAlias, kubeAPI kubeAPI, mode NodeSyncMode
|
||||
|
||||
// Loop runs the sync loop for a given node. done is an optional channel that
|
||||
// is closed when the Loop() returns.
|
||||
func (sync *NodeSync) Loop(done chan struct{}) {
|
||||
klog.V(2).Infof("Starting sync loop for node %q", sync.nodeName)
|
||||
func (sync *NodeSync) Loop(logger klog.Logger, done chan struct{}) {
|
||||
logger.V(2).Info("Starting sync loop", "node", klog.KRef("", sync.nodeName))
|
||||
|
||||
defer func() {
|
||||
if done != nil {
|
||||
@ -131,27 +131,27 @@ func (sync *NodeSync) Loop(done chan struct{}) {
|
||||
|
||||
timeout := sync.c.ResyncTimeout()
|
||||
delayTimer := time.NewTimer(timeout)
|
||||
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
|
||||
logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
|
||||
|
||||
for {
|
||||
select {
|
||||
case op, more := <-sync.opChan:
|
||||
if !more {
|
||||
klog.V(2).Infof("Stopping sync loop")
|
||||
logger.V(2).Info("Stopping sync loop")
|
||||
return
|
||||
}
|
||||
sync.c.ReportResult(op.run(sync))
|
||||
sync.c.ReportResult(op.run(logger, sync))
|
||||
if !delayTimer.Stop() {
|
||||
<-delayTimer.C
|
||||
}
|
||||
case <-delayTimer.C:
|
||||
klog.V(4).Infof("Running resync for node %q", sync.nodeName)
|
||||
sync.c.ReportResult((&updateOp{}).run(sync))
|
||||
logger.V(4).Info("Running resync", "node", klog.KRef("", sync.nodeName))
|
||||
sync.c.ReportResult((&updateOp{}).run(logger, sync))
|
||||
}
|
||||
|
||||
timeout := sync.c.ResyncTimeout()
|
||||
delayTimer.Reset(timeout)
|
||||
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
|
||||
logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,7 +175,7 @@ func (sync *NodeSync) Delete(node *v1.Node) {
|
||||
// syncOp is the interface for generic sync operation.
|
||||
type syncOp interface {
|
||||
// run the requested sync operation.
|
||||
run(sync *NodeSync) error
|
||||
run(logger klog.Logger, sync *NodeSync) error
|
||||
}
|
||||
|
||||
// updateOp handles creation and updates of a node.
|
||||
@ -190,16 +190,16 @@ func (op *updateOp) String() string {
|
||||
return fmt.Sprintf("updateOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
|
||||
}
|
||||
|
||||
func (op *updateOp) run(sync *NodeSync) error {
|
||||
klog.V(3).Infof("Running updateOp %+v", op)
|
||||
func (op *updateOp) run(logger klog.Logger, sync *NodeSync) error {
|
||||
logger.V(3).Info("Running updateOp", "updateOp", op)
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
if op.node == nil {
|
||||
klog.V(3).Infof("Getting node spec for %q", sync.nodeName)
|
||||
logger.V(3).Info("Getting node spec", "node", klog.KRef("", sync.nodeName))
|
||||
node, err := sync.kubeAPI.Node(ctx, sync.nodeName)
|
||||
if err != nil {
|
||||
klog.Errorf("Error getting node %q spec: %v", sync.nodeName, err)
|
||||
logger.Error(err, "Error getting node pec", "node", klog.KRef("", sync.nodeName))
|
||||
return err
|
||||
}
|
||||
op.node = node
|
||||
@ -207,7 +207,7 @@ func (op *updateOp) run(sync *NodeSync) error {
|
||||
|
||||
aliasRange, err := sync.cloudAlias.Alias(ctx, op.node)
|
||||
if err != nil {
|
||||
klog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err)
|
||||
logger.Error(err, "Error getting cloud alias for node", "node", klog.KRef("", sync.nodeName))
|
||||
return err
|
||||
}
|
||||
|
||||
@ -229,14 +229,13 @@ func (op *updateOp) run(sync *NodeSync) error {
|
||||
// match.
|
||||
func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
|
||||
if node.Spec.PodCIDR != aliasRange.String() {
|
||||
klog.Errorf("Inconsistency detected between node PodCIDR and node alias (%v != %v)",
|
||||
node.Spec.PodCIDR, aliasRange)
|
||||
klog.FromContext(ctx).Error(nil, "Inconsistency detected between node PodCIDR and node alias", "podCIDR", node.Spec.PodCIDR, "alias", aliasRange)
|
||||
sync.kubeAPI.EmitNodeWarningEvent(node.Name, MismatchEvent,
|
||||
"Node.Spec.PodCIDR != cloud alias (%v != %v)", node.Spec.PodCIDR, aliasRange)
|
||||
// User intervention is required in this case, as this is most likely due
|
||||
// to the user mucking around with their VM aliases on the side.
|
||||
} else {
|
||||
klog.V(4).Infof("Node %q CIDR range %v is matches cloud assignment", node.Name, node.Spec.PodCIDR)
|
||||
klog.FromContext(ctx).V(4).Info("Node CIDR range is matches cloud assignment", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -249,27 +248,27 @@ func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, nod
|
||||
"Cannot sync from cloud in mode %q", sync.mode)
|
||||
return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode)
|
||||
}
|
||||
|
||||
klog.V(2).Infof("Updating node spec with alias range, node.PodCIDR = %v", aliasRange)
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(2).Info("Updating node spec with alias range", "podCIDR", aliasRange)
|
||||
|
||||
if err := sync.set.Occupy(aliasRange); err != nil {
|
||||
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
|
||||
logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, aliasRange); err != nil {
|
||||
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, aliasRange, err)
|
||||
logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", aliasRange)
|
||||
return err
|
||||
}
|
||||
|
||||
klog.V(2).Infof("Node %q PodCIDR set to %v", node.Name, aliasRange)
|
||||
logger.V(2).Info("Node PodCIDR updated", "node", klog.KObj(node), "podCIDR", aliasRange)
|
||||
|
||||
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
||||
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
|
||||
logger.Error(err, "Could not update node NetworkUnavailable status to false")
|
||||
return err
|
||||
}
|
||||
|
||||
klog.V(2).Infof("Updated node %q PodCIDR from cloud alias %v", node.Name, aliasRange)
|
||||
logger.V(2).Info("Updated node PodCIDR from cloud alias", "node", klog.KObj(node), "alias", aliasRange)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -283,29 +282,29 @@ func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, nod
|
||||
}
|
||||
|
||||
_, aliasRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if err != nil {
|
||||
klog.Errorf("Could not parse PodCIDR (%q) for node %q: %v",
|
||||
node.Spec.PodCIDR, node.Name, err)
|
||||
logger.Error(err, "Could not parse PodCIDR for node", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sync.set.Occupy(aliasRange); err != nil {
|
||||
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
|
||||
logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil {
|
||||
klog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err)
|
||||
logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", aliasRange)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
||||
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
|
||||
logger.Error(err, "Could not update node NetworkUnavailable status to false")
|
||||
return err
|
||||
}
|
||||
|
||||
klog.V(2).Infof("Updated node %q cloud alias with node spec, node.PodCIDR = %v",
|
||||
node.Name, node.Spec.PodCIDR)
|
||||
logger.V(2).Info("Updated node cloud alias with node spec", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -326,22 +325,23 @@ func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1.
|
||||
// If addAlias returns a hard error, cidrRange will be leaked as there
|
||||
// is no durable record of the range. The missing space will be
|
||||
// recovered on the next restart of the controller.
|
||||
logger := klog.FromContext(ctx)
|
||||
if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil {
|
||||
klog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err)
|
||||
logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", cidrRange)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, cidrRange); err != nil {
|
||||
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, cidrRange, err)
|
||||
logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrRange)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
||||
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
|
||||
logger.Error(err, "Could not update node NetworkUnavailable status to false")
|
||||
return err
|
||||
}
|
||||
|
||||
klog.V(2).Infof("Allocated PodCIDR %v for node %q", cidrRange, node.Name)
|
||||
logger.V(2).Info("Allocated PodCIDR for node", "node", klog.KObj(node), "podCIDR", cidrRange)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -358,25 +358,23 @@ func (op *deleteOp) String() string {
|
||||
return fmt.Sprintf("deleteOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
|
||||
}
|
||||
|
||||
func (op *deleteOp) run(sync *NodeSync) error {
|
||||
klog.V(3).Infof("Running deleteOp %+v", op)
|
||||
func (op *deleteOp) run(logger klog.Logger, sync *NodeSync) error {
|
||||
logger.V(3).Info("Running deleteOp", "deleteOp", op)
|
||||
if op.node.Spec.PodCIDR == "" {
|
||||
klog.V(2).Infof("Node %q was deleted, node had no PodCIDR range assigned", op.node.Name)
|
||||
logger.V(2).Info("Node was deleted, node had no PodCIDR range assigned", "node", klog.KObj(op.node))
|
||||
return nil
|
||||
}
|
||||
|
||||
_, cidrRange, err := netutils.ParseCIDRSloppy(op.node.Spec.PodCIDR)
|
||||
if err != nil {
|
||||
klog.Errorf("Deleted node %q has an invalid podCIDR %q: %v",
|
||||
op.node.Name, op.node.Spec.PodCIDR, err)
|
||||
logger.Error(err, "Deleted node has an invalid podCIDR", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
|
||||
sync.kubeAPI.EmitNodeWarningEvent(op.node.Name, InvalidPodCIDR,
|
||||
"Node %q has an invalid PodCIDR: %q", op.node.Name, op.node.Spec.PodCIDR)
|
||||
return nil
|
||||
}
|
||||
|
||||
sync.set.Release(cidrRange)
|
||||
klog.V(2).Infof("Node %q was deleted, releasing CIDR range %v",
|
||||
op.node.Name, op.node.Spec.PodCIDR)
|
||||
logger.V(2).Info("Node was deleted, releasing CIDR range", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ import (
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset"
|
||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
|
||||
netutils "k8s.io/utils/net"
|
||||
@ -200,7 +201,8 @@ func TestNodeSyncUpdate(t *testing.T) {
|
||||
doneChan := make(chan struct{})
|
||||
|
||||
// Do a single step of the loop.
|
||||
go sync.Loop(doneChan)
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
go sync.Loop(logger, doneChan)
|
||||
sync.Update(tc.node)
|
||||
close(sync.opChan)
|
||||
<-doneChan
|
||||
@ -230,8 +232,8 @@ func TestNodeSyncResync(t *testing.T) {
|
||||
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
|
||||
sync := New(fake, fake, fake, SyncFromCluster, "node1", cidr)
|
||||
doneChan := make(chan struct{})
|
||||
|
||||
go sync.Loop(doneChan)
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
go sync.Loop(logger, doneChan)
|
||||
<-fake.reportChan
|
||||
close(sync.opChan)
|
||||
// Unblock loop().
|
||||
@ -275,7 +277,8 @@ func TestNodeSyncDelete(t *testing.T) {
|
||||
doneChan := make(chan struct{})
|
||||
|
||||
// Do a single step of the loop.
|
||||
go sync.Loop(doneChan)
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
go sync.Loop(logger, doneChan)
|
||||
sync.Delete(tc.node)
|
||||
<-doneChan
|
||||
tc.fake.dumpTrace()
|
||||
|
@ -32,6 +32,7 @@ import (
|
||||
)
|
||||
|
||||
func createLegacyIPAM(
|
||||
logger klog.Logger,
|
||||
ic *Controller,
|
||||
nodeInformer coreinformers.NodeInformer,
|
||||
cloud cloudprovider.Interface,
|
||||
@ -58,14 +59,16 @@ func createLegacyIPAM(
|
||||
cidr = clusterCIDRs[0]
|
||||
}
|
||||
if len(clusterCIDRs) > 1 {
|
||||
klog.Warningf("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded")
|
||||
logger.Info("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded")
|
||||
}
|
||||
ipamc, err := ipam.NewController(cfg, kubeClient, cloud, cidr, serviceCIDR, nodeCIDRMaskSizes[0])
|
||||
if err != nil {
|
||||
klog.Fatalf("Error creating ipam controller: %v", err)
|
||||
logger.Error(err, "Error creating ipam controller")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
if err := ipamc.Start(nodeInformer); err != nil {
|
||||
klog.Fatalf("Error trying to Init(): %v", err)
|
||||
if err := ipamc.Start(logger, nodeInformer); err != nil {
|
||||
logger.Error(err, "Error trying to Init()")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
return ipamc
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
package nodeipam
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
@ -50,7 +51,7 @@ const (
|
||||
// legacy mode. It is needed to ensure correct building for
|
||||
// both provider-specific and providerless environments.
|
||||
type ipamController interface {
|
||||
Run(<-chan struct{})
|
||||
Run(ctx context.Context)
|
||||
}
|
||||
|
||||
// Controller is the controller that manages node ipam state.
|
||||
@ -79,6 +80,7 @@ type Controller struct {
|
||||
// podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes
|
||||
// currently, this should be handled as a fatal error.
|
||||
func NewNodeIpamController(
|
||||
ctx context.Context,
|
||||
nodeInformer coreinformers.NodeInformer,
|
||||
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
|
||||
cloud cloudprovider.Interface,
|
||||
@ -89,20 +91,24 @@ func NewNodeIpamController(
|
||||
nodeCIDRMaskSizes []int,
|
||||
allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if kubeClient == nil {
|
||||
klog.Fatalf("kubeClient is nil when starting Controller")
|
||||
logger.Error(nil, "kubeClient is nil when starting Controller")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
|
||||
// Cloud CIDR allocator does not rely on clusterCIDR or nodeCIDRMaskSize for allocation.
|
||||
if allocatorType != ipam.CloudAllocatorType {
|
||||
if len(clusterCIDRs) == 0 {
|
||||
klog.Fatal("Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set")
|
||||
logger.Error(nil, "Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
|
||||
for idx, cidr := range clusterCIDRs {
|
||||
mask := cidr.Mask
|
||||
if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSizes[idx] {
|
||||
klog.Fatal("Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family")
|
||||
logger.Error(nil, "Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -120,7 +126,7 @@ func NewNodeIpamController(
|
||||
|
||||
// TODO: Abstract this check into a generic controller manager should run method.
|
||||
if ic.allocatorType == ipam.IPAMFromClusterAllocatorType || ic.allocatorType == ipam.IPAMFromCloudAllocatorType {
|
||||
ic.legacyIPAM = createLegacyIPAM(ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes)
|
||||
ic.legacyIPAM = createLegacyIPAM(logger, ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes)
|
||||
} else {
|
||||
var err error
|
||||
|
||||
@ -131,7 +137,7 @@ func NewNodeIpamController(
|
||||
NodeCIDRMaskSizes: nodeCIDRMaskSizes,
|
||||
}
|
||||
|
||||
ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams)
|
||||
ic.cidrAllocator, err = ipam.New(ctx, kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -144,33 +150,32 @@ func NewNodeIpamController(
|
||||
}
|
||||
|
||||
// Run starts an asynchronous loop that monitors the status of cluster nodes.
|
||||
func (nc *Controller) Run(stopCh <-chan struct{}) {
|
||||
func (nc *Controller) Run(ctx context.Context) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
// Start event processing pipeline.
|
||||
nc.eventBroadcaster.StartStructuredLogging(0)
|
||||
nc.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: nc.kubeClient.CoreV1().Events("")})
|
||||
defer nc.eventBroadcaster.Shutdown()
|
||||
klog.FromContext(ctx).Info("Starting ipam controller")
|
||||
defer klog.FromContext(ctx).Info("Shutting down ipam controller")
|
||||
|
||||
klog.Infof("Starting ipam controller")
|
||||
defer klog.Infof("Shutting down ipam controller")
|
||||
|
||||
if !cache.WaitForNamedCacheSync("node", stopCh, nc.nodeInformerSynced) {
|
||||
if !cache.WaitForNamedCacheSync("node", ctx.Done(), nc.nodeInformerSynced) {
|
||||
return
|
||||
}
|
||||
|
||||
if nc.allocatorType == ipam.IPAMFromClusterAllocatorType || nc.allocatorType == ipam.IPAMFromCloudAllocatorType {
|
||||
go nc.legacyIPAM.Run(stopCh)
|
||||
go nc.legacyIPAM.Run(ctx)
|
||||
} else {
|
||||
go nc.cidrAllocator.Run(stopCh)
|
||||
go nc.cidrAllocator.Run(ctx)
|
||||
}
|
||||
|
||||
<-stopCh
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
// RunWithMetrics is a wrapper for Run that also tracks starting and stopping of the nodeipam controller with additional metric
|
||||
func (nc *Controller) RunWithMetrics(stopCh <-chan struct{}, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) {
|
||||
func (nc *Controller) RunWithMetrics(ctx context.Context, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) {
|
||||
controllerManagerMetrics.ControllerStarted("nodeipam")
|
||||
defer controllerManagerMetrics.ControllerStopped("nodeipam")
|
||||
nc.Run(stopCh)
|
||||
nc.Run(ctx)
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
package nodeipam
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
@ -30,6 +31,7 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
|
||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||
@ -37,7 +39,7 @@ import (
|
||||
netutils "k8s.io/utils/net"
|
||||
)
|
||||
|
||||
func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
|
||||
func newTestNodeIpamController(ctx context.Context, clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
|
||||
clientSet := fake.NewSimpleClientset()
|
||||
fakeNodeHandler := &testutil.FakeNodeHandler{
|
||||
Existing: []*v1.Node{
|
||||
@ -56,6 +58,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet,
|
||||
|
||||
fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues())
|
||||
return NewNodeIpamController(
|
||||
ctx,
|
||||
fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet,
|
||||
clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType,
|
||||
)
|
||||
@ -92,6 +95,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
|
||||
{"invalid_serviceCIDR_contains_clusterCIDR", "10.0.0.0/23", "10.0.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, true},
|
||||
{"invalid_CIDR_mask_size", "10.0.0.0/24,2000::/64", "10.1.0.0/21", emptyServiceCIDR, []int{24, 48}, ipam.IPAMFromClusterAllocatorType, true},
|
||||
} {
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
clusterCidrs, _ := netutils.ParseCIDRs(strings.Split(tc.clusterCIDR, ","))
|
||||
_, serviceCIDRIpNet, _ := netutils.ParseCIDRSloppy(tc.serviceCIDR)
|
||||
@ -99,7 +103,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
|
||||
|
||||
if os.Getenv("EXIT_ON_FATAL") == "1" {
|
||||
// This is the subprocess which runs the actual code.
|
||||
newTestNodeIpamController(clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType)
|
||||
newTestNodeIpamController(ctx, clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType)
|
||||
return
|
||||
}
|
||||
// This is the host process that monitors the exit code of the subprocess.
|
||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
||||
package nodeipam
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
@ -31,11 +32,12 @@ import (
|
||||
type fakeController struct {
|
||||
}
|
||||
|
||||
func (f *fakeController) Run(stopCh <-chan struct{}) {
|
||||
<-stopCh
|
||||
func (f *fakeController) Run(ctx context.Context) {
|
||||
<-ctx.Done()
|
||||
}
|
||||
|
||||
func createLegacyIPAM(
|
||||
logger klog.Logger,
|
||||
ic *Controller,
|
||||
nodeInformer coreinformers.NodeInformer,
|
||||
cloud cloudprovider.Interface,
|
||||
@ -44,6 +46,7 @@ func createLegacyIPAM(
|
||||
serviceCIDR *net.IPNet,
|
||||
nodeCIDRMaskSizes []int,
|
||||
) *fakeController {
|
||||
klog.Fatal("Error trying to Init(): legacy cloud provider support disabled at build time")
|
||||
logger.Error(nil, "Error trying to Init(): legacy cloud provider support disabled at build time")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
return &fakeController{}
|
||||
}
|
||||
|
@ -368,9 +368,10 @@ func NewNodeLifecycleController(
|
||||
unhealthyZoneThreshold float32,
|
||||
runTaintManager bool,
|
||||
) (*Controller, error) {
|
||||
|
||||
logger := klog.LoggerWithName(klog.FromContext(ctx), "NodeLifecycleController")
|
||||
if kubeClient == nil {
|
||||
klog.Fatalf("kubeClient is nil when starting Controller")
|
||||
logger.Error(nil, "kubeClient is nil when starting nodelifecycle Controller")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
@ -428,12 +429,12 @@ func NewNodeLifecycleController(
|
||||
if !isPod {
|
||||
deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
|
||||
if !ok {
|
||||
klog.Errorf("Received unexpected object: %v", obj)
|
||||
logger.Error(nil, "Received unexpected object", "object", obj)
|
||||
return
|
||||
}
|
||||
pod, ok = deletedState.Obj.(*v1.Pod)
|
||||
if !ok {
|
||||
klog.Errorf("DeletedFinalStateUnknown contained non-Pod object: %v", deletedState.Obj)
|
||||
logger.Error(nil, "DeletedFinalStateUnknown contained non-Pod object", "object", deletedState.Obj)
|
||||
return
|
||||
}
|
||||
}
|
||||
@ -494,7 +495,7 @@ func NewNodeLifecycleController(
|
||||
})
|
||||
}
|
||||
|
||||
klog.Infof("Controller will reconcile labels.")
|
||||
logger.Info("Controller will reconcile labels")
|
||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||
nc.nodeUpdateQueue.Add(node.Name)
|
||||
@ -529,7 +530,8 @@ func (nc *Controller) Run(ctx context.Context) {
|
||||
|
||||
// Start events processing pipeline.
|
||||
nc.broadcaster.StartStructuredLogging(0)
|
||||
klog.Infof("Sending events to api server.")
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.Info("Sending events to api server")
|
||||
nc.broadcaster.StartRecordingToSink(
|
||||
&v1core.EventSinkImpl{
|
||||
Interface: v1core.New(nc.kubeClient.CoreV1().RESTClient()).Events(""),
|
||||
@ -540,8 +542,8 @@ func (nc *Controller) Run(ctx context.Context) {
|
||||
defer nc.nodeUpdateQueue.ShutDown()
|
||||
defer nc.podUpdateQueue.ShutDown()
|
||||
|
||||
klog.Infof("Starting node controller")
|
||||
defer klog.Infof("Shutting down node controller")
|
||||
logger.Info("Starting node controller")
|
||||
defer logger.Info("Shutting down node controller")
|
||||
|
||||
if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
|
||||
return
|
||||
@ -578,7 +580,7 @@ func (nc *Controller) Run(ctx context.Context) {
|
||||
// Incorporate the results of node health signal pushed from kubelet to master.
|
||||
go wait.UntilWithContext(ctx, func(ctx context.Context) {
|
||||
if err := nc.monitorNodeHealth(ctx); err != nil {
|
||||
klog.Errorf("Error monitoring node health: %v", err)
|
||||
logger.Error(err, "Error monitoring node health")
|
||||
}
|
||||
}, nc.nodeMonitorPeriod)
|
||||
|
||||
@ -586,6 +588,7 @@ func (nc *Controller) Run(ctx context.Context) {
|
||||
}
|
||||
|
||||
func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
|
||||
logger := klog.FromContext(ctx)
|
||||
for {
|
||||
obj, shutdown := nc.nodeUpdateQueue.Get()
|
||||
// "nodeUpdateQueue" will be shutdown when "stopCh" closed;
|
||||
@ -595,13 +598,13 @@ func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
|
||||
}
|
||||
nodeName := obj.(string)
|
||||
if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil {
|
||||
klog.Errorf("Failed to taint NoSchedule on node <%s>, requeue it: %v", nodeName, err)
|
||||
logger.Error(err, "Failed to taint NoSchedule on node, requeue it", "node", klog.KRef("", nodeName))
|
||||
// TODO(k82cn): Add nodeName back to the queue
|
||||
}
|
||||
// TODO: re-evaluate whether there are any labels that need to be
|
||||
// reconcile in 1.19. Remove this function if it's no longer necessary.
|
||||
if err := nc.reconcileNodeLabels(nodeName); err != nil {
|
||||
klog.Errorf("Failed to reconcile labels for node <%s>, requeue it: %v", nodeName, err)
|
||||
logger.Error(err, "Failed to reconcile labels for node, requeue it", "node", klog.KRef("", nodeName))
|
||||
// TODO(yujuhong): Add nodeName back to the queue
|
||||
}
|
||||
nc.nodeUpdateQueue.Done(nodeName)
|
||||
@ -677,6 +680,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
||||
zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k)
|
||||
}
|
||||
}()
|
||||
logger := klog.FromContext(ctx)
|
||||
for _, k := range zoneNoExecuteTainterKeys {
|
||||
var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue
|
||||
func() {
|
||||
@ -689,13 +693,13 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
||||
zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k]
|
||||
}()
|
||||
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
||||
zoneNoExecuteTainterWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
zoneNoExecuteTainterWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
node, err := nc.nodeLister.Get(value.Value)
|
||||
if apierrors.IsNotFound(err) {
|
||||
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
||||
logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
|
||||
return true, 0
|
||||
} else if err != nil {
|
||||
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
|
||||
logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
|
||||
// retry in 50 millisecond
|
||||
return false, 50 * time.Millisecond
|
||||
}
|
||||
@ -712,7 +716,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
||||
oppositeTaint = *NotReadyTaintTemplate
|
||||
default:
|
||||
// It seems that the Node is ready again, so there's no need to taint it.
|
||||
klog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value)
|
||||
logger.V(4).Info("Node was in a taint queue, but it's ready now. Ignoring taint request", "node", klog.KRef("", value.Value))
|
||||
return true, 0
|
||||
}
|
||||
result := controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
|
||||
@ -742,7 +746,7 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
|
||||
zonePodEvictorKeys = append(zonePodEvictorKeys, k)
|
||||
}
|
||||
}()
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
for _, k := range zonePodEvictorKeys {
|
||||
var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue
|
||||
func() {
|
||||
@ -756,12 +760,12 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
|
||||
}()
|
||||
|
||||
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
||||
zonePodEvictionWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
zonePodEvictionWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
node, err := nc.nodeLister.Get(value.Value)
|
||||
if apierrors.IsNotFound(err) {
|
||||
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
||||
logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
|
||||
} else if err != nil {
|
||||
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
|
||||
logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
|
||||
}
|
||||
nodeUID, _ := value.UID.(string)
|
||||
pods, err := nc.getPodsAssignedToNode(value.Value)
|
||||
@ -778,10 +782,10 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
|
||||
return false, 0
|
||||
}
|
||||
if !nc.nodeEvictionMap.setStatus(value.Value, evicted) {
|
||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", value.Value)
|
||||
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KRef("", value.Value))
|
||||
}
|
||||
if remaining {
|
||||
klog.Infof("Pods awaiting deletion due to Controller eviction")
|
||||
logger.Info("Pods awaiting deletion due to Controller eviction")
|
||||
}
|
||||
|
||||
if node != nil {
|
||||
@ -811,25 +815,24 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
|
||||
return err
|
||||
}
|
||||
added, deleted, newZoneRepresentatives := nc.classifyNodes(nodes)
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
for i := range newZoneRepresentatives {
|
||||
nc.addPodEvictorForNewZone(newZoneRepresentatives[i])
|
||||
nc.addPodEvictorForNewZone(logger, newZoneRepresentatives[i])
|
||||
}
|
||||
|
||||
for i := range added {
|
||||
klog.V(1).Infof("Controller observed a new Node: %#v", added[i].Name)
|
||||
logger.V(1).Info("Controller observed a new Node", "node", klog.KRef("", added[i].Name))
|
||||
controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
|
||||
nc.knownNodeSet[added[i].Name] = added[i]
|
||||
nc.addPodEvictorForNewZone(added[i])
|
||||
nc.addPodEvictorForNewZone(logger, added[i])
|
||||
if nc.runTaintManager {
|
||||
nc.markNodeAsReachable(ctx, added[i])
|
||||
} else {
|
||||
nc.cancelPodEviction(added[i])
|
||||
nc.cancelPodEviction(logger, added[i])
|
||||
}
|
||||
}
|
||||
|
||||
for i := range deleted {
|
||||
klog.V(1).Infof("Controller observed a Node deletion: %v", deleted[i].Name)
|
||||
logger.V(1).Info("Controller observed a Node deletion", "node", klog.KRef("", deleted[i].Name))
|
||||
controllerutil.RecordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from Controller", deleted[i].Name))
|
||||
delete(nc.knownNodeSet, deleted[i].Name)
|
||||
}
|
||||
@ -856,13 +859,12 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
|
||||
name := node.Name
|
||||
node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
klog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
|
||||
logger.Error(nil, "Failed while getting a Node to retry updating node health. Probably Node was deleted", "node", klog.KRef("", name))
|
||||
return false, err
|
||||
}
|
||||
return false, nil
|
||||
}); err != nil {
|
||||
klog.Errorf("Update health of Node '%v' from Controller error: %v. "+
|
||||
"Skipping - no pods will be evicted.", node.Name, err)
|
||||
logger.Error(err, "Update health of Node from Controller error, Skipping - no pods will be evicted", "node", klog.KObj(node))
|
||||
return
|
||||
}
|
||||
|
||||
@ -925,40 +927,35 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
|
||||
func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
|
||||
decisionTimestamp := nc.now()
|
||||
// Check eviction timeout against decisionTimestamp
|
||||
logger := klog.FromContext(ctx)
|
||||
switch observedReadyCondition.Status {
|
||||
case v1.ConditionFalse:
|
||||
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
||||
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
|
||||
taintToAdd := *NotReadyTaintTemplate
|
||||
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
|
||||
klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
|
||||
logger.Error(nil, "Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle")
|
||||
}
|
||||
} else if nc.markNodeForTainting(node, v1.ConditionFalse) {
|
||||
klog.V(2).Infof("Node %v is NotReady as of %v. Adding it to the Taint queue.",
|
||||
node.Name,
|
||||
decisionTimestamp,
|
||||
)
|
||||
logger.V(2).Info("Node is NotReady. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
|
||||
}
|
||||
case v1.ConditionUnknown:
|
||||
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
||||
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
|
||||
taintToAdd := *UnreachableTaintTemplate
|
||||
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
|
||||
klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.")
|
||||
logger.Error(nil, "Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle")
|
||||
}
|
||||
} else if nc.markNodeForTainting(node, v1.ConditionUnknown) {
|
||||
klog.V(2).Infof("Node %v is unresponsive as of %v. Adding it to the Taint queue.",
|
||||
node.Name,
|
||||
decisionTimestamp,
|
||||
)
|
||||
logger.V(2).Info("Node is unresponsive. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
|
||||
}
|
||||
case v1.ConditionTrue:
|
||||
removed, err := nc.markNodeAsReachable(ctx, node)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
|
||||
logger.Error(nil, "Failed to remove taints from node. Will retry in next iteration", "node", klog.KObj(node))
|
||||
}
|
||||
if removed {
|
||||
klog.V(2).Infof("Node %s is healthy again, removing all taints", node.Name)
|
||||
logger.V(2).Info("Node is healthy again, removing all taints", "node", klog.KObj(node))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -970,6 +967,7 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
|
||||
return fmt.Errorf("health data doesn't exist for node %q", node.Name)
|
||||
}
|
||||
// Check eviction timeout against decisionTimestamp
|
||||
logger := klog.FromContext(ctx)
|
||||
switch observedReadyCondition.Status {
|
||||
case v1.ConditionFalse:
|
||||
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
|
||||
@ -978,11 +976,11 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
|
||||
return err
|
||||
}
|
||||
if enqueued {
|
||||
klog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v",
|
||||
node.Name,
|
||||
decisionTimestamp,
|
||||
nodeHealthData.readyTransitionTimestamp,
|
||||
nc.podEvictionTimeout,
|
||||
logger.V(2).Info("Node is NotReady. Adding Pods on Node to eviction queue: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout",
|
||||
"node", klog.KObj(node),
|
||||
"decisionTimestamp", decisionTimestamp,
|
||||
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
|
||||
"podEvictionTimeout", nc.podEvictionTimeout,
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -993,17 +991,17 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
|
||||
return err
|
||||
}
|
||||
if enqueued {
|
||||
klog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v",
|
||||
node.Name,
|
||||
decisionTimestamp,
|
||||
nodeHealthData.readyTransitionTimestamp,
|
||||
nc.podEvictionTimeout-gracePeriod,
|
||||
logger.V(2).Info("Node is unresponsive. Adding Pods on Node to eviction queues: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout-gracePeriod",
|
||||
"node", klog.KObj(node),
|
||||
"decisionTimestamp", decisionTimestamp,
|
||||
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
|
||||
"podEvictionTimeoutGracePeriod", nc.podEvictionTimeout-gracePeriod,
|
||||
)
|
||||
}
|
||||
}
|
||||
case v1.ConditionTrue:
|
||||
if nc.cancelPodEviction(node) {
|
||||
klog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
|
||||
if nc.cancelPodEviction(logger, node) {
|
||||
logger.V(2).Info("Node is ready again, cancelled pod eviction", "node", klog.KObj(node))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@ -1076,23 +1074,23 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
||||
_, savedCondition = controllerutil.GetNodeCondition(nodeHealth.status, v1.NodeReady)
|
||||
savedLease = nodeHealth.lease
|
||||
}
|
||||
|
||||
logger := klog.FromContext(ctx)
|
||||
if nodeHealth == nil {
|
||||
klog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
|
||||
logger.Info("Missing timestamp for Node. Assuming now as a timestamp", "node", klog.KObj(node))
|
||||
nodeHealth = &nodeHealthData{
|
||||
status: &node.Status,
|
||||
probeTimestamp: nc.now(),
|
||||
readyTransitionTimestamp: nc.now(),
|
||||
}
|
||||
} else if savedCondition == nil && currentReadyCondition != nil {
|
||||
klog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
|
||||
logger.V(1).Info("Creating timestamp entry for newly observed Node", "node", klog.KObj(node))
|
||||
nodeHealth = &nodeHealthData{
|
||||
status: &node.Status,
|
||||
probeTimestamp: nc.now(),
|
||||
readyTransitionTimestamp: nc.now(),
|
||||
}
|
||||
} else if savedCondition != nil && currentReadyCondition == nil {
|
||||
klog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
|
||||
logger.Error(nil, "ReadyCondition was removed from Status of Node", "node", klog.KObj(node))
|
||||
// TODO: figure out what to do in this case. For now we do the same thing as above.
|
||||
nodeHealth = &nodeHealthData{
|
||||
status: &node.Status,
|
||||
@ -1104,15 +1102,15 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
||||
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
|
||||
// otherwise we leave it as it is.
|
||||
if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime {
|
||||
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, currentReadyCondition)
|
||||
logger.V(3).Info("ReadyCondition for Node transitioned from savedCondition to currentReadyCondition", "node", klog.KObj(node), "savedCondition", savedCondition, "currentReadyCondition", currentReadyCondition)
|
||||
transitionTime = nc.now()
|
||||
} else {
|
||||
transitionTime = nodeHealth.readyTransitionTimestamp
|
||||
}
|
||||
if klogV := klog.V(5); klogV.Enabled() {
|
||||
klogV.Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, nodeHealth.status, node.Status)
|
||||
if loggerV := logger.V(5); loggerV.Enabled() {
|
||||
loggerV.Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node), "nodeHealthStatus", nodeHealth.status, "nodeStatus", node.Status)
|
||||
} else {
|
||||
klog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
|
||||
logger.V(3).Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node))
|
||||
}
|
||||
nodeHealth = &nodeHealthData{
|
||||
status: &node.Status,
|
||||
@ -1147,7 +1145,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
||||
for _, nodeConditionType := range nodeConditionTypes {
|
||||
_, currentCondition := controllerutil.GetNodeCondition(&node.Status, nodeConditionType)
|
||||
if currentCondition == nil {
|
||||
klog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name)
|
||||
logger.V(2).Info("Condition of node was never updated by kubelet", "nodeConditionType", nodeConditionType, "node", klog.KObj(node))
|
||||
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
|
||||
Type: nodeConditionType,
|
||||
Status: v1.ConditionUnknown,
|
||||
@ -1157,8 +1155,8 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
||||
LastTransitionTime: nowTimestamp,
|
||||
})
|
||||
} else {
|
||||
klog.V(2).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
|
||||
node.Name, nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition)
|
||||
logger.V(2).Info("Node hasn't been updated",
|
||||
"node", klog.KObj(node), "duration", nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), "nodeConditionType", nodeConditionType, "currentCondition", currentCondition)
|
||||
if currentCondition.Status != v1.ConditionUnknown {
|
||||
currentCondition.Status = v1.ConditionUnknown
|
||||
currentCondition.Reason = "NodeStatusUnknown"
|
||||
@ -1172,7 +1170,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
||||
|
||||
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
|
||||
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
|
||||
klog.Errorf("Error updating node %s: %v", node.Name, err)
|
||||
logger.Error(err, "Error updating node", "node", klog.KObj(node))
|
||||
return gracePeriod, observedReadyCondition, currentReadyCondition, err
|
||||
}
|
||||
nodeHealth = &nodeHealthData{
|
||||
@ -1191,6 +1189,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
||||
func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
|
||||
newZoneStates := map[string]ZoneState{}
|
||||
allAreFullyDisrupted := true
|
||||
logger := klog.FromContext(ctx)
|
||||
for k, v := range zoneToNodeConditions {
|
||||
zoneSize.WithLabelValues(k).Set(float64(len(v)))
|
||||
unhealthy, newState := nc.computeZoneStateFunc(v)
|
||||
@ -1201,7 +1200,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
||||
}
|
||||
newZoneStates[k] = newState
|
||||
if _, had := nc.zoneStates[k]; !had {
|
||||
klog.Errorf("Setting initial state for unseen zone: %v", k)
|
||||
logger.Error(nil, "Setting initial state for unseen zone", "zone", k)
|
||||
nc.zoneStates[k] = stateInitial
|
||||
}
|
||||
}
|
||||
@ -1229,15 +1228,15 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
||||
if !allAreFullyDisrupted || !allWasFullyDisrupted {
|
||||
// We're switching to full disruption mode
|
||||
if allAreFullyDisrupted {
|
||||
klog.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
|
||||
logger.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode")
|
||||
for i := range nodes {
|
||||
if nc.runTaintManager {
|
||||
_, err := nc.markNodeAsReachable(ctx, nodes[i])
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
|
||||
logger.Error(nil, "Failed to remove taints from Node", "node", klog.KObj(nodes[i]))
|
||||
}
|
||||
} else {
|
||||
nc.cancelPodEviction(nodes[i])
|
||||
nc.cancelPodEviction(logger, nodes[i])
|
||||
}
|
||||
}
|
||||
// We stop all evictions.
|
||||
@ -1256,7 +1255,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
||||
}
|
||||
// We're exiting full disruption mode
|
||||
if allWasFullyDisrupted {
|
||||
klog.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode.")
|
||||
logger.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode")
|
||||
// When exiting disruption mode update probe timestamps on all Nodes.
|
||||
now := nc.now()
|
||||
for i := range nodes {
|
||||
@ -1279,7 +1278,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
||||
if v == newState {
|
||||
continue
|
||||
}
|
||||
klog.Infof("Controller detected that zone %v is now in state %v.", k, newState)
|
||||
logger.Info("Controller detected that zone is now in new state", "zone", k, "newState", newState)
|
||||
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
|
||||
nc.zoneStates[k] = newState
|
||||
}
|
||||
@ -1317,12 +1316,13 @@ func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
|
||||
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
||||
defer nc.podUpdateQueue.Done(podItem)
|
||||
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
|
||||
logger := klog.FromContext(ctx)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// If the pod was deleted, there is no need to requeue.
|
||||
return
|
||||
}
|
||||
klog.Warningf("Failed to read pod %v/%v: %v.", podItem.namespace, podItem.name, err)
|
||||
logger.Info("Failed to read pod", "pod", klog.KRef(podItem.namespace, podItem.name), "err", err)
|
||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||
return
|
||||
}
|
||||
@ -1338,7 +1338,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
||||
|
||||
node, err := nc.nodeLister.Get(nodeName)
|
||||
if err != nil {
|
||||
klog.Warningf("Failed to read node %v: %v.", nodeName, err)
|
||||
logger.Info("Failed to read node", "node", klog.KRef("", nodeName), "err", err)
|
||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||
return
|
||||
}
|
||||
@ -1356,7 +1356,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
||||
// Pods are processed by TaintManager.
|
||||
if !nc.runTaintManager {
|
||||
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
|
||||
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
|
||||
logger.Info("Unable to process pod eviction from node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
|
||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||
return
|
||||
}
|
||||
@ -1364,7 +1364,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
||||
|
||||
if currentReadyCondition.Status != v1.ConditionTrue {
|
||||
if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
|
||||
klog.Warningf("Unable to mark pod %+v NotReady on node %v: %v.", podItem, nodeName, err)
|
||||
logger.Info("Unable to mark pod NotReady on node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
|
||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||
}
|
||||
}
|
||||
@ -1447,7 +1447,7 @@ func (nc *Controller) ReducedQPSFunc(nodeNum int) float32 {
|
||||
}
|
||||
|
||||
// addPodEvictorForNewZone checks if new zone appeared, and if so add new evictor.
|
||||
func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
|
||||
func (nc *Controller) addPodEvictorForNewZone(logger klog.Logger, node *v1.Node) {
|
||||
nc.evictorLock.Lock()
|
||||
defer nc.evictorLock.Unlock()
|
||||
zone := nodetopology.GetZoneKey(node)
|
||||
@ -1463,7 +1463,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
|
||||
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
|
||||
}
|
||||
// Init the metric for the new zone.
|
||||
klog.Infof("Initializing eviction metric for zone: %v", zone)
|
||||
logger.Info("Initializing eviction metric for zone", "zone", zone)
|
||||
evictionsNumber.WithLabelValues(zone).Add(0)
|
||||
evictionsTotal.WithLabelValues(zone).Add(0)
|
||||
}
|
||||
@ -1471,16 +1471,16 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
|
||||
|
||||
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
|
||||
// returns true if an eviction was queued.
|
||||
func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
|
||||
func (nc *Controller) cancelPodEviction(logger klog.Logger, node *v1.Node) bool {
|
||||
zone := nodetopology.GetZoneKey(node)
|
||||
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
|
||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
||||
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
|
||||
}
|
||||
nc.evictorLock.Lock()
|
||||
defer nc.evictorLock.Unlock()
|
||||
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
|
||||
if wasDeleting {
|
||||
klog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name)
|
||||
logger.V(2).Info("Cancelling pod Eviction on Node", "node", klog.KObj(node))
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@ -1502,8 +1502,9 @@ func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.P
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
logger := klog.FromContext(ctx)
|
||||
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
|
||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
||||
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
|
||||
}
|
||||
|
||||
nc.evictorLock.Lock()
|
||||
@ -1532,13 +1533,14 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat
|
||||
|
||||
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
|
||||
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
|
||||
logger := klog.FromContext(ctx)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
||||
logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
|
||||
return false, err
|
||||
}
|
||||
err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
||||
logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
|
||||
return false, err
|
||||
}
|
||||
nc.evictorLock.Lock()
|
||||
|
@ -41,6 +41,8 @@ import (
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
testcore "k8s.io/client-go/testing"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
kubeletapis "k8s.io/kubelet/pkg/apis"
|
||||
"k8s.io/kubernetes/pkg/controller"
|
||||
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
|
||||
@ -88,12 +90,12 @@ type nodeLifecycleController struct {
|
||||
}
|
||||
|
||||
// doEviction does the fake eviction and returns the status of eviction operation.
|
||||
func (nc *nodeLifecycleController) doEviction(fakeNodeHandler *testutil.FakeNodeHandler) bool {
|
||||
func (nc *nodeLifecycleController) doEviction(logger klog.Logger, fakeNodeHandler *testutil.FakeNodeHandler) bool {
|
||||
nc.evictorLock.Lock()
|
||||
defer nc.evictorLock.Unlock()
|
||||
zones := testutil.GetZones(fakeNodeHandler)
|
||||
for _, zone := range zones {
|
||||
nc.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
nc.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
uid, _ := value.UID.(string)
|
||||
pods, _ := nc.getPodsAssignedToNode(value.Value)
|
||||
controllerutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
|
||||
@ -721,9 +723,10 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
zones := testutil.GetZones(item.fakeNodeHandler)
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
for _, zone := range zones {
|
||||
if _, ok := nodeController.zonePodEvictor[zone]; ok {
|
||||
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
nodeUID, _ := value.UID.(string)
|
||||
pods, err := nodeController.getPodsAssignedToNode(value.Value)
|
||||
if err != nil {
|
||||
@ -883,8 +886,9 @@ func TestPodStatusChange(t *testing.T) {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
zones := testutil.GetZones(item.fakeNodeHandler)
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
for _, zone := range zones {
|
||||
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||
nodeUID, _ := value.UID.(string)
|
||||
pods, err := nodeController.getPodsAssignedToNode(value.Value)
|
||||
if err != nil {
|
||||
@ -1467,8 +1471,9 @@ func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
|
||||
// Infinite loop, used for retrying in case ratelimiter fails to reload for Try function.
|
||||
// this breaks when we have the status that we need for test case or when we don't see the
|
||||
// intended result after 1 minute.
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
for {
|
||||
podEvicted = nodeController.doEviction(fakeNodeHandler)
|
||||
podEvicted = nodeController.doEviction(logger, fakeNodeHandler)
|
||||
if podEvicted == item.expectedEvictPods || time.Since(start) > 1*time.Minute {
|
||||
break
|
||||
}
|
||||
|
@ -23,7 +23,6 @@ import (
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/util/flowcontrol"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
@ -229,14 +228,14 @@ type ActionFunc func(TimedValue) (bool, time.Duration)
|
||||
// function in NodeController when Node becomes Ready again) TODO:
|
||||
// figure out a good way to do garbage collection for all Nodes that
|
||||
// were removed from the cluster.
|
||||
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
|
||||
func (q *RateLimitedTimedQueue) Try(logger klog.Logger, fn ActionFunc) {
|
||||
val, ok := q.queue.Head()
|
||||
q.limiterLock.Lock()
|
||||
defer q.limiterLock.Unlock()
|
||||
for ok {
|
||||
// rate limit the queue checking
|
||||
if !q.limiter.TryAccept() {
|
||||
klog.V(10).Infof("Try rate limited for value: %v", val)
|
||||
logger.V(10).Info("Try rate limited", "value", val)
|
||||
// Try again later
|
||||
break
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/util/flowcontrol"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
)
|
||||
|
||||
func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
|
||||
@ -144,7 +145,8 @@ func TestTry(t *testing.T) {
|
||||
evictor.Remove("second")
|
||||
|
||||
deletedMap := sets.NewString()
|
||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||
deletedMap.Insert(value.Value)
|
||||
return true, 0
|
||||
})
|
||||
@ -180,7 +182,8 @@ func TestTryOrdering(t *testing.T) {
|
||||
order := []string{}
|
||||
count := 0
|
||||
hasQueued := false
|
||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||
count++
|
||||
t.Logf("eviction %d", count)
|
||||
if value.ProcessAt.IsZero() {
|
||||
@ -242,8 +245,8 @@ func TestTryRemovingWhileTry(t *testing.T) {
|
||||
evictor.Remove("second")
|
||||
close(wait)
|
||||
}()
|
||||
|
||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||
count++
|
||||
if value.AddedAt.IsZero() {
|
||||
t.Fatalf("added should not be zero")
|
||||
@ -313,7 +316,8 @@ func TestAddAfterTry(t *testing.T) {
|
||||
evictor.Remove("second")
|
||||
|
||||
deletedMap := sets.NewString()
|
||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||
deletedMap.Insert(value.Value)
|
||||
return true, 0
|
||||
})
|
||||
@ -327,7 +331,7 @@ func TestAddAfterTry(t *testing.T) {
|
||||
}
|
||||
|
||||
evictor.Add("first", "11111")
|
||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
||||
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||
t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.")
|
||||
return true, 0
|
||||
})
|
||||
|
@ -105,7 +105,7 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced
|
||||
return func(ctx context.Context, args *WorkArgs) error {
|
||||
ns := args.NamespacedName.Namespace
|
||||
name := args.NamespacedName.Name
|
||||
klog.InfoS("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
|
||||
klog.FromContext(ctx).Info("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
|
||||
if emitEventFunc != nil {
|
||||
emitEventFunc(args.NamespacedName)
|
||||
}
|
||||
@ -202,16 +202,17 @@ func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, podLis
|
||||
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
|
||||
func (tc *NoExecuteTaintManager) Run(ctx context.Context) {
|
||||
defer utilruntime.HandleCrash()
|
||||
|
||||
klog.InfoS("Starting NoExecuteTaintManager")
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.Info("Starting NoExecuteTaintManager")
|
||||
|
||||
// Start events processing pipeline.
|
||||
tc.broadcaster.StartStructuredLogging(0)
|
||||
if tc.client != nil {
|
||||
klog.InfoS("Sending events to api server")
|
||||
logger.Info("Sending events to api server")
|
||||
tc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: tc.client.CoreV1().Events("")})
|
||||
} else {
|
||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
||||
logger.Error(nil, "kubeClient is nil when starting NodeController")
|
||||
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||
}
|
||||
defer tc.broadcaster.Shutdown()
|
||||
|
||||
@ -363,8 +364,8 @@ func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node)
|
||||
tc.nodeUpdateQueue.Add(updateItem)
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName) {
|
||||
if tc.taintEvictionQueue.CancelWork(nsName.String()) {
|
||||
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(logger klog.Logger, nsName types.NamespacedName) {
|
||||
if tc.taintEvictionQueue.CancelWork(logger, nsName.String()) {
|
||||
tc.emitCancelPodDeletionEvent(nsName)
|
||||
}
|
||||
}
|
||||
@ -377,22 +378,23 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
|
||||
taints []v1.Taint,
|
||||
now time.Time,
|
||||
) {
|
||||
logger := klog.FromContext(ctx)
|
||||
if len(taints) == 0 {
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||
}
|
||||
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
|
||||
if !allTolerated {
|
||||
klog.V(2).InfoS("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName))
|
||||
logger.V(2).Info("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName))
|
||||
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
|
||||
return
|
||||
}
|
||||
minTolerationTime := getMinTolerationTime(usedTolerations)
|
||||
// getMinTolerationTime returns negative value to denote infinite toleration.
|
||||
if minTolerationTime < 0 {
|
||||
klog.V(4).InfoS("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
logger.V(4).Info("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
|
||||
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||
return
|
||||
}
|
||||
|
||||
@ -404,19 +406,20 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
|
||||
if startTime.Add(minTolerationTime).Before(triggerTime) {
|
||||
return
|
||||
}
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||
}
|
||||
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
|
||||
}
|
||||
|
||||
func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) {
|
||||
pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName)
|
||||
logger := klog.FromContext(ctx)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Delete
|
||||
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
|
||||
klog.V(4).InfoS("Noticed pod deletion", "pod", podNamespacedName)
|
||||
tc.cancelWorkWithEvent(podNamespacedName)
|
||||
logger.V(4).Info("Noticed pod deletion", "pod", podNamespacedName)
|
||||
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||
return
|
||||
}
|
||||
utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err))
|
||||
@ -430,7 +433,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
|
||||
|
||||
// Create or Update
|
||||
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
||||
klog.V(4).InfoS("Noticed pod update", "pod", podNamespacedName)
|
||||
logger.V(4).Info("Noticed pod update", "pod", podNamespacedName)
|
||||
nodeName := pod.Spec.NodeName
|
||||
if nodeName == "" {
|
||||
return
|
||||
@ -451,10 +454,11 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
|
||||
|
||||
func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
|
||||
node, err := tc.nodeLister.Get(nodeUpdate.nodeName)
|
||||
logger := klog.FromContext(ctx)
|
||||
if err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
// Delete
|
||||
klog.V(4).InfoS("Noticed node deletion", "node", nodeUpdate.nodeName)
|
||||
logger.V(4).Info("Noticed node deletion", "node", klog.KRef("", nodeUpdate.nodeName))
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
delete(tc.taintedNodes, nodeUpdate.nodeName)
|
||||
@ -465,12 +469,12 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
|
||||
}
|
||||
|
||||
// Create or Update
|
||||
klog.V(4).InfoS("Noticed node update", "node", nodeUpdate)
|
||||
logger.V(4).Info("Noticed node update", "node", klog.KObj(node))
|
||||
taints := getNoExecuteTaints(node.Spec.Taints)
|
||||
func() {
|
||||
tc.taintedNodesLock.Lock()
|
||||
defer tc.taintedNodesLock.Unlock()
|
||||
klog.V(4).InfoS("Updating known taints on node", "node", node.Name, "taints", taints)
|
||||
logger.V(4).Info("Updating known taints on node", "node", klog.KObj(node), "taints", taints)
|
||||
if len(taints) == 0 {
|
||||
delete(tc.taintedNodes, node.Name)
|
||||
} else {
|
||||
@ -483,7 +487,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
|
||||
// tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods.
|
||||
pods, err := tc.getPodsAssignedToNode(node.Name)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to get pods assigned to node", "node", node.Name)
|
||||
logger.Error(err, "Failed to get pods assigned to node", "node", klog.KObj(node))
|
||||
return
|
||||
}
|
||||
if len(pods) == 0 {
|
||||
@ -491,9 +495,9 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
|
||||
}
|
||||
// Short circuit, to make this controller a bit faster.
|
||||
if len(taints) == 0 {
|
||||
klog.V(4).InfoS("All taints were removed from the node. Cancelling all evictions...", "node", node.Name)
|
||||
logger.V(4).Info("All taints were removed from the node. Cancelling all evictions...", "node", klog.KObj(node))
|
||||
for i := range pods {
|
||||
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
|
||||
tc.cancelWorkWithEvent(logger, types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -52,10 +52,11 @@ type TimedWorker struct {
|
||||
// createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
|
||||
func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker {
|
||||
delay := fireAt.Sub(createdAt)
|
||||
logger := klog.FromContext(ctx)
|
||||
fWithErrorLogging := func() {
|
||||
err := f(ctx, args)
|
||||
if err != nil {
|
||||
klog.Errorf("NodeLifecycle: timed worker failed with error: %q", err)
|
||||
logger.Error(err, "NodeLifecycle: timed worker failed")
|
||||
}
|
||||
}
|
||||
if delay <= 0 {
|
||||
@ -116,12 +117,13 @@ func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(ctx context.Con
|
||||
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
|
||||
func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time) {
|
||||
key := args.KeyFromWorkArgs()
|
||||
klog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt)
|
||||
logger := klog.FromContext(ctx)
|
||||
logger.V(4).Info("Adding TimedWorkerQueue item and to be fired at firedTime", "item", key, "createTime", createdAt, "firedTime", fireAt)
|
||||
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
if _, exists := q.workers[key]; exists {
|
||||
klog.Warningf("Trying to add already existing work for %+v. Skipping.", args)
|
||||
logger.Info("Trying to add already existing work, skipping", "args", args)
|
||||
return
|
||||
}
|
||||
worker := createWorker(ctx, args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock)
|
||||
@ -129,13 +131,13 @@ func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdA
|
||||
}
|
||||
|
||||
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
|
||||
func (q *TimedWorkerQueue) CancelWork(key string) bool {
|
||||
func (q *TimedWorkerQueue) CancelWork(logger klog.Logger, key string) bool {
|
||||
q.Lock()
|
||||
defer q.Unlock()
|
||||
worker, found := q.workers[key]
|
||||
result := false
|
||||
if found {
|
||||
klog.V(4).Infof("Cancelling TimedWorkerQueue item %v at %v", key, time.Now())
|
||||
logger.V(4).Info("Cancelling TimedWorkerQueue item", "item", key, "time", time.Now())
|
||||
if worker != nil {
|
||||
result = true
|
||||
worker.Cancel()
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
testingclock "k8s.io/utils/clock/testing"
|
||||
)
|
||||
|
||||
@ -108,8 +109,9 @@ func TestCancel(t *testing.T) {
|
||||
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
|
||||
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
|
||||
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
|
||||
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
|
||||
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
|
||||
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
|
||||
fakeClock.Step(11 * time.Second)
|
||||
wg.Wait()
|
||||
lastVal := atomic.LoadInt32(&testVal)
|
||||
@ -141,8 +143,9 @@ func TestCancelAndReadd(t *testing.T) {
|
||||
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
|
||||
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
|
||||
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
|
||||
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
|
||||
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
|
||||
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
|
||||
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
|
||||
fakeClock.Step(11 * time.Second)
|
||||
wg.Wait()
|
||||
|
@ -33,6 +33,7 @@ import (
|
||||
"k8s.io/client-go/informers"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
|
||||
"k8s.io/kubernetes/pkg/controller/nodeipam"
|
||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
|
||||
@ -63,7 +64,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRAllocate(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go ipamController.Run(ctx.Done())
|
||||
go ipamController.Run(ctx)
|
||||
sharedInformer.Start(ctx.Done())
|
||||
|
||||
tests := []struct {
|
||||
@ -149,7 +150,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRRelease(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go ipamController.Run(ctx.Done())
|
||||
go ipamController.Run(ctx)
|
||||
sharedInformer.Start(ctx.Done())
|
||||
|
||||
t.Run("Pod CIDR release after node delete", func(t *testing.T) {
|
||||
@ -226,7 +227,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRDelete(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go ipamController.Run(ctx.Done())
|
||||
go ipamController.Run(ctx)
|
||||
sharedInformer.Start(ctx.Done())
|
||||
|
||||
t.Run("delete cc with node associated", func(t *testing.T) {
|
||||
@ -322,7 +323,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTerminate(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go ipamController.Run(ctx.Done())
|
||||
go ipamController.Run(ctx)
|
||||
sharedInformer.Start(ctx.Done())
|
||||
|
||||
t.Run("Pod CIDRS must not be allocated from a terminating CC", func(t *testing.T) {
|
||||
@ -407,7 +408,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTieBreak(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
go ipamController.Run(ctx.Done())
|
||||
go ipamController.Run(ctx)
|
||||
sharedInformer.Start(ctx.Done())
|
||||
|
||||
tests := []struct {
|
||||
@ -557,8 +558,9 @@ func booststrapMultiCIDRRangeAllocator(t *testing.T,
|
||||
if _, err := clientSet.CoreV1().Nodes().Create(context.TODO(), initialNode, metav1.CreateOptions{}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
ipamController, err := nodeipam.NewNodeIpamController(
|
||||
ctx,
|
||||
sharedInformer.Core().V1().Nodes(),
|
||||
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
|
||||
nil,
|
||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
||||
package ipamperf
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
@ -25,6 +26,7 @@ import (
|
||||
"time"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
netutils "k8s.io/utils/net"
|
||||
|
||||
"k8s.io/client-go/informers"
|
||||
@ -37,7 +39,7 @@ import (
|
||||
"k8s.io/kubernetes/test/integration/util"
|
||||
)
|
||||
|
||||
func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) {
|
||||
func setupAllocator(ctx context.Context, kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) {
|
||||
controllerStopChan := make(chan struct{})
|
||||
shutdownFunc := func() {
|
||||
close(controllerStopChan)
|
||||
@ -50,6 +52,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
|
||||
|
||||
sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
|
||||
ipamController, err := nodeipam.NewNodeIpamController(
|
||||
ctx,
|
||||
sharedInformer.Core().V1().Nodes(),
|
||||
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
|
||||
config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil,
|
||||
@ -58,7 +61,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
|
||||
if err != nil {
|
||||
return nil, shutdownFunc, err
|
||||
}
|
||||
go ipamController.Run(controllerStopChan)
|
||||
go ipamController.Run(ctx)
|
||||
sharedInformer.Start(controllerStopChan)
|
||||
|
||||
return clientSet, shutdownFunc, nil
|
||||
@ -74,8 +77,8 @@ func runTest(t *testing.T, kubeConfig *restclient.Config, config *Config, cluste
|
||||
nodeClient := clientset.NewForConfigOrDie(nodeClientConfig)
|
||||
|
||||
defer deleteNodes(nodeClient) // cleanup nodes on after controller shutdown
|
||||
|
||||
clientSet, shutdownFunc, err := setupAllocator(kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize)
|
||||
_, ctx := ktesting.NewTestContext(t)
|
||||
clientSet, shutdownFunc, err := setupAllocator(ctx, kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize)
|
||||
if err != nil {
|
||||
t.Fatalf("Error starting IPAM allocator: %v", err)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user