Merge pull request #112670 from yangjunmyfm192085/delklogV0

use contextual logging(nodeipam and nodelifecycle part)
This commit is contained in:
Kubernetes Prow Robot 2023-03-07 09:40:33 -08:00 committed by GitHub
commit 37326f7cea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 606 additions and 516 deletions

View File

@ -62,11 +62,11 @@ func (nodeIpamController *nodeIPAMController) StartNodeIpamControllerWrapper(ini
nodeIpamController.nodeIPAMControllerOptions.ApplyTo(&nodeIpamController.nodeIPAMControllerConfiguration)
return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) {
return startNodeIpamController(initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud)
return startNodeIpamController(ctx, initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud)
}
}
func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, ctx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) {
func startNodeIpamController(ctx context.Context, initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, controllerCtx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) {
var serviceCIDR *net.IPNet
var secondaryServiceCIDR *net.IPNet
@ -130,14 +130,14 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
var clusterCIDRInformer v1alpha1.ClusterCIDRInformer
if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
clusterCIDRInformer = ctx.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
clusterCIDRInformer = controllerCtx.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
}
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
ctx.InformerFactory.Core().V1().Nodes(),
ctx,
controllerCtx.InformerFactory.Core().V1().Nodes(),
clusterCIDRInformer,
cloud,
ctx.ClientBuilder.ClientOrDie(initContext.ClientName),
controllerCtx.ClientBuilder.ClientOrDie(initContext.ClientName),
clusterCIDRs,
serviceCIDR,
secondaryServiceCIDR,
@ -147,7 +147,7 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
if err != nil {
return nil, true, err
}
go nodeIpamController.Run(ctx.Stop)
go nodeIpamController.Run(ctx)
return nil, true, nil
}

View File

@ -154,7 +154,9 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
}
ctx = klog.NewContext(ctx, klog.LoggerWithName(klog.FromContext(ctx), "NodeIpamController"))
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
ctx,
controllerContext.InformerFactory.Core().V1().Nodes(),
clusterCIDRInformer,
controllerContext.Cloud,
@ -168,7 +170,7 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
if err != nil {
return nil, true, err
}
go nodeIpamController.RunWithMetrics(ctx.Done(), controllerContext.ControllerManagerMetrics)
go nodeIpamController.RunWithMetrics(ctx, controllerContext.ControllerManagerMetrics)
return nil, true, nil
}

View File

@ -61,7 +61,7 @@ func newAdapter(k8s clientset.Interface, cloud *gce.Cloud) *adapter {
return ret
}
func (a *adapter) Run(stopCh <-chan struct{}) {
func (a *adapter) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
// Start event processing pipeline.
@ -69,7 +69,7 @@ func (a *adapter) Run(stopCh <-chan struct{}) {
a.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: a.k8s.CoreV1().Events("")})
defer a.broadcaster.Shutdown()
<-stopCh
<-ctx.Done()
}
func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
@ -88,7 +88,7 @@ func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error)
case 1:
break
default:
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", node.Name, cidrs)
klog.FromContext(ctx).Info("Node has more than one alias assigned, defaulting to the first", "node", klog.KObj(node), "CIDRs", cidrs)
}
_, cidrRange, err := netutils.ParseCIDRSloppy(cidrs[0])

View File

@ -91,11 +91,11 @@ type CIDRAllocator interface {
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid
// CIDR if it doesn't currently have one or mark the CIDR as used if
// the node already have one.
AllocateOrOccupyCIDR(node *v1.Node) error
AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error
// ReleaseCIDR releases the CIDR of the removed node.
ReleaseCIDR(node *v1.Node) error
ReleaseCIDR(logger klog.Logger, node *v1.Node) error
// Run starts all the working logic of the allocator.
Run(stopCh <-chan struct{})
Run(ctx context.Context)
}
// CIDRAllocatorParams is parameters that's required for creating new
@ -119,29 +119,30 @@ type nodeReservedCIDRs struct {
}
// New creates a new CIDR range allocator.
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
nodeList, err := listNodes(kubeClient)
func New(ctx context.Context, kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
logger := klog.FromContext(ctx)
nodeList, err := listNodes(logger, kubeClient)
if err != nil {
return nil, err
}
switch allocatorType {
case RangeAllocatorType:
return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList)
return NewCIDRRangeAllocator(logger, kubeClient, nodeInformer, allocatorParams, nodeList)
case MultiCIDRRangeAllocatorType:
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator)
}
return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
return NewMultiCIDRRangeAllocator(ctx, kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
case CloudAllocatorType:
return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer)
return NewCloudCIDRAllocator(logger, kubeClient, cloud, nodeInformer)
default:
return nil, fmt.Errorf("invalid CIDR allocator type: %v", allocatorType)
}
}
func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
func listNodes(logger klog.Logger, kubeClient clientset.Interface) (*v1.NodeList, error) {
var nodeList *v1.NodeList
// We must poll because apiserver might not be up. This error causes
// controller manager to restart.
@ -152,7 +153,7 @@ func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
LabelSelector: labels.Everything().String(),
})
if err != nil {
klog.Errorf("Failed to list all nodes: %v", err)
logger.Error(err, "Failed to list all nodes")
return false, nil
}
return true, nil

View File

@ -23,7 +23,6 @@ import (
"testing"
"k8s.io/component-base/metrics/testutil"
"k8s.io/klog/v2"
netutils "k8s.io/utils/net"
)
@ -558,34 +557,35 @@ func TestGetBitforCIDR(t *testing.T) {
}
for _, tc := range cases {
_, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
t.Run(tc.description, func(t *testing.T) {
_, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
if err != nil {
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
}
_, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v for %v", err, tc.description)
}
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
if err != nil {
t.Fatalf("Error allocating CIDRSet")
}
_, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
got, err := cs.getIndexForCIDR(subnetCIDR)
if err == nil && tc.expectErr {
t.Errorf("expected error but got null")
return
}
got, err := cs.getIndexForCIDR(subnetCIDR)
if err == nil && tc.expectErr {
klog.Errorf("expected error but got null for %v", tc.description)
continue
}
if err != nil && !tc.expectErr {
t.Errorf("unexpected error: %v", err)
return
}
if err != nil && !tc.expectErr {
klog.Errorf("unexpected error: %v for %v", err, tc.description)
continue
}
if got != tc.expectedBit {
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
}
if got != tc.expectedBit {
t.Errorf("expected %v, but got %v", tc.expectedBit, got)
}
})
}
}

View File

@ -20,6 +20,7 @@ limitations under the License.
package ipam
import (
"context"
"fmt"
"math/rand"
"net"
@ -27,6 +28,7 @@ import (
"time"
"github.com/google/go-cmp/cmp"
"k8s.io/klog/v2"
v1 "k8s.io/api/core/v1"
@ -85,9 +87,10 @@ type cloudCIDRAllocator struct {
var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
if client == nil {
klog.Fatalf("kubeClient is nil when starting NodeController")
logger.Error(nil, "kubeClient is nil when starting cloud CIDR allocator")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
eventBroadcaster := record.NewBroadcaster()
@ -111,74 +114,80 @@ func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Inter
}
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(ca.AllocateOrOccupyCIDR),
AddFunc: controllerutil.CreateAddNodeHandler(
func(node *v1.Node) error {
return ca.AllocateOrOccupyCIDR(logger, node)
}),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
if newNode.Spec.PodCIDR == "" {
return ca.AllocateOrOccupyCIDR(newNode)
return ca.AllocateOrOccupyCIDR(logger, newNode)
}
// Even if PodCIDR is assigned, but NetworkUnavailable condition is
// set to true, we need to process the node to set the condition.
networkUnavailableTaint := &v1.Taint{Key: v1.TaintNodeNetworkUnavailable, Effect: v1.TaintEffectNoSchedule}
_, cond := controllerutil.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable)
if cond == nil || cond.Status != v1.ConditionFalse || utiltaints.TaintExists(newNode.Spec.Taints, networkUnavailableTaint) {
return ca.AllocateOrOccupyCIDR(newNode)
return ca.AllocateOrOccupyCIDR(logger, newNode)
}
return nil
}),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ca.ReleaseCIDR),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
return ca.ReleaseCIDR(logger, node)
}),
})
klog.Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName())
logger.Info("Using cloud CIDR allocator", "provider", cloud.ProviderName())
return ca, nil
}
func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) {
func (ca *cloudCIDRAllocator) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
// Start event processing pipeline.
ca.broadcaster.StartStructuredLogging(0)
klog.Infof("Sending events to api server.")
logger := klog.FromContext(ctx)
logger.Info("Sending events to api server")
ca.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: ca.client.CoreV1().Events("")})
defer ca.broadcaster.Shutdown()
klog.Infof("Starting cloud CIDR allocator")
defer klog.Infof("Shutting down cloud CIDR allocator")
logger.Info("Starting cloud CIDR allocator")
defer logger.Info("Shutting down cloud CIDR allocator")
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, ca.nodesSynced) {
if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), ca.nodesSynced) {
return
}
for i := 0; i < cidrUpdateWorkers; i++ {
go ca.worker(stopCh)
go ca.worker(ctx)
}
<-stopCh
<-ctx.Done()
}
func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) {
func (ca *cloudCIDRAllocator) worker(ctx context.Context) {
logger := klog.FromContext(ctx)
for {
select {
case workItem, ok := <-ca.nodeUpdateChannel:
if !ok {
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
return
}
if err := ca.updateCIDRAllocation(workItem); err == nil {
klog.V(3).Infof("Updated CIDR for %q", workItem)
if err := ca.updateCIDRAllocation(logger, workItem); err == nil {
logger.V(3).Info("Updated CIDR", "workItem", workItem)
} else {
klog.Errorf("Error updating CIDR for %q: %v", workItem, err)
if canRetry, timeout := ca.retryParams(workItem); canRetry {
klog.V(2).Infof("Retrying update for %q after %v", workItem, timeout)
logger.Error(err, "Error updating CIDR", "workItem", workItem)
if canRetry, timeout := ca.retryParams(logger, workItem); canRetry {
logger.V(2).Info("Retrying update on next period", "workItem", workItem, "timeout", timeout)
time.AfterFunc(timeout, func() {
// Requeue the failed node for update again.
ca.nodeUpdateChannel <- workItem
})
continue
}
klog.Errorf("Exceeded retry count for %q, dropping from queue", workItem)
logger.Error(nil, "Exceeded retry count, dropping from queue", "workItem", workItem)
}
ca.removeNodeFromProcessing(workItem)
case <-stopChan:
case <-ctx.Done():
return
}
}
@ -194,13 +203,13 @@ func (ca *cloudCIDRAllocator) insertNodeToProcessing(nodeName string) bool {
return true
}
func (ca *cloudCIDRAllocator) retryParams(nodeName string) (bool, time.Duration) {
func (ca *cloudCIDRAllocator) retryParams(logger klog.Logger, nodeName string) (bool, time.Duration) {
ca.lock.Lock()
defer ca.lock.Unlock()
entry, ok := ca.nodesInProcessing[nodeName]
if !ok {
klog.Errorf("Cannot get retryParams for %q as entry does not exist", nodeName)
logger.Error(nil, "Cannot get retryParams for node as entry does not exist", "node", klog.KRef("", nodeName))
return false, 0
}
@ -233,28 +242,28 @@ func (ca *cloudCIDRAllocator) removeNodeFromProcessing(nodeName string) {
// WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done.
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
if node == nil {
return nil
}
if !ca.insertNodeToProcessing(node.Name) {
klog.V(2).InfoS("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
return nil
}
klog.V(4).Infof("Putting node %s into the work queue", node.Name)
logger.V(4).Info("Putting node into the work queue", "node", klog.KObj(node))
ca.nodeUpdateChannel <- node.Name
return nil
}
// updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
func (ca *cloudCIDRAllocator) updateCIDRAllocation(logger klog.Logger, nodeName string) error {
node, err := ca.nodeLister.Get(nodeName)
if err != nil {
if errors.IsNotFound(err) {
return nil // node no longer available, skip processing
}
klog.ErrorS(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "nodeName", nodeName)
logger.Error(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "node", klog.KRef("", nodeName))
return err
}
if node.Spec.ProviderID == "" {
@ -272,7 +281,7 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
}
//Can have at most 2 ips (one for v4 and one for v6)
if len(cidrStrings) > 2 {
klog.InfoS("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings)
logger.Info("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings)
cidrStrings = cidrStrings[:2]
}
@ -281,13 +290,13 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
return fmt.Errorf("failed to parse strings %v as CIDRs: %v", cidrStrings, err)
}
needUpdate, err := needPodCIDRsUpdate(node, cidrs)
needUpdate, err := needPodCIDRsUpdate(logger, node, cidrs)
if err != nil {
return fmt.Errorf("err: %v, CIDRS: %v", err, cidrStrings)
}
if needUpdate {
if node.Spec.PodCIDR != "" {
klog.ErrorS(nil, "PodCIDR being reassigned!", "nodeName", node.Name, "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings)
logger.Error(nil, "PodCIDR being reassigned", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings)
// We fall through and set the CIDR despite this error. This
// implements the same logic as implemented in the
// rangeAllocator.
@ -296,14 +305,14 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
}
for i := 0; i < cidrUpdateRetries; i++ {
if err = nodeutil.PatchNodeCIDRs(ca.client, types.NodeName(node.Name), cidrStrings); err == nil {
klog.InfoS("Set the node PodCIDRs", "nodeName", node.Name, "cidrStrings", cidrStrings)
logger.Info("Set the node PodCIDRs", "node", klog.KObj(node), "cidrStrings", cidrStrings)
break
}
}
}
if err != nil {
controllerutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
klog.ErrorS(err, "Failed to update the node PodCIDR after multiple attempts", "nodeName", node.Name, "cidrStrings", cidrStrings)
logger.Error(err, "Failed to update the node PodCIDR after multiple attempts", "node", klog.KObj(node), "cidrStrings", cidrStrings)
return err
}
@ -315,31 +324,31 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
LastTransitionTime: metav1.Now(),
})
if err != nil {
klog.ErrorS(err, "Error setting route status for the node", "nodeName", node.Name)
logger.Error(err, "Error setting route status for the node", "node", klog.KObj(node))
}
return err
}
func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
func needPodCIDRsUpdate(logger klog.Logger, node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
if node.Spec.PodCIDR == "" {
return true, nil
}
_, nodePodCIDR, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
if err != nil {
klog.ErrorS(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR)
logger.Error(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR)
// We will try to overwrite with new CIDR(s)
return true, nil
}
nodePodCIDRs, err := netutils.ParseCIDRs(node.Spec.PodCIDRs)
if err != nil {
klog.ErrorS(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs)
logger.Error(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs)
// We will try to overwrite with new CIDR(s)
return true, nil
}
if len(podCIDRs) == 1 {
if cmp.Equal(nodePodCIDR, podCIDRs[0]) {
klog.V(4).InfoS("Node already has allocated CIDR. It matches the proposed one.", "nodeName", node.Name, "podCIDR", podCIDRs[0])
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "podCIDR", podCIDRs[0])
return false, nil
}
} else if len(nodePodCIDRs) == len(podCIDRs) {
@ -351,15 +360,15 @@ func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
return true, nil
}
}
klog.V(4).InfoS("Node already has allocated CIDRs. It matches the proposed one.", "nodeName", node.Name, "podCIDRs", podCIDRs)
logger.V(4).Info("Node already has allocated CIDRs. It matches the proposed one", "node", klog.KObj(node), "podCIDRs", podCIDRs)
return false, nil
}
return true, nil
}
func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error {
klog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)",
node.Name, node.Spec.PodCIDR)
func (ca *cloudCIDRAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
logger.V(2).Info("Node's PodCIDR will be released by external cloud provider (not managed by controller)",
"node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
return nil
}

View File

@ -25,9 +25,10 @@ import (
informers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog/v2"
)
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
return nil, errors.New("legacy cloud provider support not built")
}

View File

@ -28,6 +28,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/klog/v2/ktesting"
netutils "k8s.io/utils/net"
)
@ -42,7 +43,6 @@ func hasNodeInProcessing(ca *cloudCIDRAllocator, name string) bool {
func TestBoundedRetries(t *testing.T) {
clientSet := fake.NewSimpleClientset()
updateChan := make(chan string, 1) // need to buffer as we are using only on go routine
stopChan := make(chan struct{})
sharedInfomer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
ca := &cloudCIDRAllocator{
client: clientSet,
@ -51,9 +51,10 @@ func TestBoundedRetries(t *testing.T) {
nodesSynced: sharedInfomer.Core().V1().Nodes().Informer().HasSynced,
nodesInProcessing: map[string]*nodeProcessingInfo{},
}
go ca.worker(stopChan)
logger, ctx := ktesting.NewTestContext(t)
go ca.worker(ctx)
nodeName := "testNode"
ca.AllocateOrOccupyCIDR(&v1.Node{
ca.AllocateOrOccupyCIDR(logger, &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
},
@ -176,9 +177,9 @@ func TestNeedPodCIDRsUpdate(t *testing.T) {
if err != nil {
t.Errorf("failed to parse %v as CIDRs: %v", tc.cidrs, err)
}
logger, _ := ktesting.NewTestContext(t)
t.Run(tc.desc, func(t *testing.T) {
got, err := needPodCIDRsUpdate(&node, netCIDRs)
got, err := needPodCIDRsUpdate(logger, &node, netCIDRs)
if tc.wantErr == (err == nil) {
t.Errorf("err: %v, wantErr: %v", err, tc.wantErr)
}

View File

@ -20,6 +20,7 @@ limitations under the License.
package ipam
import (
"context"
"fmt"
"net"
"sync"
@ -113,10 +114,10 @@ func NewController(
// Start initializes the Controller with the existing list of nodes and
// registers the informers for node changes. This will start synchronization
// of the node and cloud CIDR range allocations.
func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
klog.InfoS("Starting IPAM controller", "config", c.config)
func (c *Controller) Start(logger klog.Logger, nodeInformer informers.NodeInformer) error {
logger.Info("Starting IPAM controller", "config", c.config)
nodes, err := listNodes(c.adapter.k8s)
nodes, err := listNodes(logger, c.adapter.k8s)
if err != nil {
return err
}
@ -125,9 +126,9 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
_, cidrRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
if err == nil {
c.set.Occupy(cidrRange)
klog.V(3).InfoS("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", node.Name)
logger.V(3).Info("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", klog.KObj(&node))
} else {
klog.ErrorS(err, "Node has an invalid CIDR", "node", node.Name, "CIDR", node.Spec.PodCIDR)
logger.Error(err, "Node has an invalid CIDR", "node", klog.KObj(&node), "CIDR", node.Spec.PodCIDR)
}
}
@ -138,24 +139,30 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
// XXX/bowei -- stagger the start of each sync cycle.
syncer := c.newSyncer(node.Name)
c.syncers[node.Name] = syncer
go syncer.Loop(nil)
go syncer.Loop(logger, nil)
}()
}
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(c.onAdd),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(c.onUpdate),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(c.onDelete),
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
return c.onAdd(logger, node)
}),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
return c.onUpdate(logger, newNode)
}),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
return c.onDelete(logger, node)
}),
})
return nil
}
func (c *Controller) Run(stopCh <-chan struct{}) {
func (c *Controller) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
go c.adapter.Run(stopCh)
<-stopCh
go c.adapter.Run(ctx)
<-ctx.Done()
}
// occupyServiceCIDR removes the service CIDR range from the cluster CIDR if it
@ -192,7 +199,7 @@ func (c *Controller) newSyncer(name string) *nodesync.NodeSync {
return nodesync.New(ns, c.adapter, c.adapter, c.config.Mode, name, c.set)
}
func (c *Controller) onAdd(node *v1.Node) error {
func (c *Controller) onAdd(logger klog.Logger, node *v1.Node) error {
c.lock.Lock()
defer c.lock.Unlock()
@ -200,30 +207,30 @@ func (c *Controller) onAdd(node *v1.Node) error {
if !ok {
syncer = c.newSyncer(node.Name)
c.syncers[node.Name] = syncer
go syncer.Loop(nil)
go syncer.Loop(logger, nil)
} else {
klog.InfoS("Add for node that already exists", "node", node.Name)
logger.Info("Add for node that already exists", "node", klog.KObj(node))
}
syncer.Update(node)
return nil
}
func (c *Controller) onUpdate(_, node *v1.Node) error {
func (c *Controller) onUpdate(logger klog.Logger, node *v1.Node) error {
c.lock.Lock()
defer c.lock.Unlock()
if sync, ok := c.syncers[node.Name]; ok {
sync.Update(node)
} else {
klog.ErrorS(nil, "Received update for non-existent node", "node", node.Name)
logger.Error(nil, "Received update for non-existent node", "node", klog.KObj(node))
return fmt.Errorf("unknown node %q", node.Name)
}
return nil
}
func (c *Controller) onDelete(node *v1.Node) error {
func (c *Controller) onDelete(logger klog.Logger, node *v1.Node) error {
c.lock.Lock()
defer c.lock.Unlock()
@ -231,7 +238,7 @@ func (c *Controller) onDelete(node *v1.Node) error {
syncer.Delete(node)
delete(c.syncers, node.Name)
} else {
klog.InfoS("Node was already deleted", "node", node.Name)
logger.Info("Node was already deleted", "node", klog.KObj(node))
}
return nil

View File

@ -102,6 +102,7 @@ type multiCIDRRangeAllocator struct {
// Caller must always pass in a list of existing nodes to the new allocator.
// NodeList is only nil in testing.
func NewMultiCIDRRangeAllocator(
ctx context.Context,
client clientset.Interface,
nodeInformer informers.NodeInformer,
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
@ -109,8 +110,10 @@ func NewMultiCIDRRangeAllocator(
nodeList *v1.NodeList,
testCIDRMap map[string][]*cidrset.ClusterCIDR,
) (CIDRAllocator, error) {
logger := klog.FromContext(ctx)
if client == nil {
klog.Fatalf("client is nil")
logger.Error(nil, "kubeClient is nil when starting multi CIDRRangeAllocator")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
eventBroadcaster := record.NewBroadcaster()
@ -137,10 +140,10 @@ func NewMultiCIDRRangeAllocator(
// testCIDRMap is only set for testing purposes.
if len(testCIDRMap) > 0 {
ra.cidrMap = testCIDRMap
klog.Warningf("testCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug.")
logger.Info("TestCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug")
}
ccList, err := listClusterCIDRs(client)
ccList, err := listClusterCIDRs(ctx, client)
if err != nil {
return nil, err
}
@ -148,14 +151,14 @@ func NewMultiCIDRRangeAllocator(
if ccList == nil {
ccList = &networkingv1alpha1.ClusterCIDRList{}
}
createDefaultClusterCIDR(ccList, allocatorParams)
createDefaultClusterCIDR(logger, ccList, allocatorParams)
// Regenerate the cidrMaps from the existing ClusterCIDRs.
for _, clusterCIDR := range ccList.Items {
klog.Infof("Regenerating existing ClusterCIDR: %v", clusterCIDR)
logger.Info("Regenerating existing ClusterCIDR", "clusterCIDR", clusterCIDR)
// Create an event for invalid ClusterCIDRs, do not crash on failures.
if err := ra.reconcileBootstrap(&clusterCIDR); err != nil {
klog.Errorf("Error while regenerating existing ClusterCIDR: %v", err)
if err := ra.reconcileBootstrap(ctx, &clusterCIDR); err != nil {
logger.Error(err, "Error while regenerating existing ClusterCIDR")
ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error())
}
}
@ -184,25 +187,25 @@ func NewMultiCIDRRangeAllocator(
})
if allocatorParams.ServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.ServiceCIDR)
ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
} else {
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.")
logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
}
if allocatorParams.SecondaryServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR)
ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
} else {
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.")
logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
}
if nodeList != nil {
for _, node := range nodeList.Items {
if len(node.Spec.PodCIDRs) == 0 {
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name)
logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
continue
}
klog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDRs)
if err := ra.occupyCIDRs(&node); err != nil {
logger.Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDRs", node.Spec.PodCIDRs)
if err := ra.occupyCIDRs(logger, &node); err != nil {
// This will happen if:
// 1. We find garbage in the podCIDRs field. Retrying is useless.
// 2. CIDR out of range: This means ClusterCIDR is not yet created
@ -231,7 +234,7 @@ func NewMultiCIDRRangeAllocator(
// we don't see the Update with DeletionTimestamp != 0.
// TODO: instead of executing the operation directly in the handler, build a small cache with key node.Name
// and value PodCIDRs use ReleaseCIDR on the reconcile loop so we can retry on `ReleaseCIDR` failures.
ra.ReleaseCIDR(obj.(*v1.Node))
ra.ReleaseCIDR(logger, obj.(*v1.Node))
// IndexerInformer uses a delta nodeQueue, therefore for deletes we have to use this
// key function.
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
@ -244,51 +247,52 @@ func NewMultiCIDRRangeAllocator(
return ra, nil
}
func (r *multiCIDRRangeAllocator) Run(stopCh <-chan struct{}) {
func (r *multiCIDRRangeAllocator) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
// Start event processing pipeline.
logger := klog.FromContext(ctx)
r.broadcaster.StartStructuredLogging(0)
klog.Infof("Started sending events to API Server.")
logger.Info("Started sending events to API Server")
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
defer r.broadcaster.Shutdown()
defer r.cidrQueue.ShutDown()
defer r.nodeQueue.ShutDown()
klog.Infof("Starting Multi CIDR Range allocator")
defer klog.Infof("Shutting down Multi CIDR Range allocator")
logger.Info("Starting Multi CIDR Range allocator")
defer logger.Info("Shutting down Multi CIDR Range allocator")
if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", stopCh, r.nodesSynced, r.clusterCIDRSynced) {
if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", ctx.Done(), r.nodesSynced, r.clusterCIDRSynced) {
return
}
for i := 0; i < cidrUpdateWorkers; i++ {
go wait.Until(r.runCIDRWorker, time.Second, stopCh)
go wait.Until(r.runNodeWorker, time.Second, stopCh)
go wait.UntilWithContext(ctx, r.runCIDRWorker, time.Second)
go wait.UntilWithContext(ctx, r.runNodeWorker, time.Second)
}
<-stopCh
<-ctx.Done()
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// cidrQueue.
func (r *multiCIDRRangeAllocator) runCIDRWorker() {
for r.processNextCIDRWorkItem() {
func (r *multiCIDRRangeAllocator) runCIDRWorker(ctx context.Context) {
for r.processNextCIDRWorkItem(ctx) {
}
}
// processNextWorkItem will read a single work item off the cidrQueue and
// attempt to process it, by calling the syncHandler.
func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem(ctx context.Context) bool {
obj, shutdown := r.cidrQueue.Get()
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.cidrQueue.Done.
err := func(obj interface{}) error {
err := func(ctx context.Context, obj interface{}) error {
// We call Done here so the cidrQueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
@ -313,7 +317,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
}
// Run the syncHandler, passing it the namespace/name string of the
// Foo resource to be synced.
if err := r.syncClusterCIDR(key); err != nil {
if err := r.syncClusterCIDR(ctx, key); err != nil {
// Put the item back on the cidrQueue to handle any transient errors.
r.cidrQueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
@ -323,7 +327,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
r.cidrQueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
}(ctx, obj)
if err != nil {
utilruntime.HandleError(err)
@ -333,21 +337,21 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
return true
}
func (r *multiCIDRRangeAllocator) runNodeWorker() {
for r.processNextNodeWorkItem() {
func (r *multiCIDRRangeAllocator) runNodeWorker(ctx context.Context) {
for r.processNextNodeWorkItem(ctx) {
}
}
// processNextWorkItem will read a single work item off the cidrQueue and
// attempt to process it, by calling the syncHandler.
func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
func (r *multiCIDRRangeAllocator) processNextNodeWorkItem(ctx context.Context) bool {
obj, shutdown := r.nodeQueue.Get()
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.cidrQueue.Done.
err := func(obj interface{}) error {
err := func(logger klog.Logger, obj interface{}) error {
// We call Done here so the workNodeQueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
@ -372,7 +376,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
}
// Run the syncHandler, passing it the namespace/name string of the
// Foo resource to be synced.
if err := r.syncNode(key); err != nil {
if err := r.syncNode(logger, key); err != nil {
// Put the item back on the cidrQueue to handle any transient errors.
r.nodeQueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
@ -382,7 +386,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
r.nodeQueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
}(klog.FromContext(ctx), obj)
if err != nil {
utilruntime.HandleError(err)
@ -392,7 +396,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
return true
}
func (r *multiCIDRRangeAllocator) syncNode(key string) error {
func (r *multiCIDRRangeAllocator) syncNode(logger klog.Logger, key string) error {
startTime := time.Now()
defer func() {
klog.V(4).Infof("Finished syncing Node request %q (%v)", key, time.Since(startTime))
@ -411,9 +415,9 @@ func (r *multiCIDRRangeAllocator) syncNode(key string) error {
// Check the DeletionTimestamp to determine if object is under deletion.
if !node.DeletionTimestamp.IsZero() {
klog.V(3).Infof("node is being deleted: %v", key)
return r.ReleaseCIDR(node)
return r.ReleaseCIDR(logger, node)
}
return r.AllocateOrOccupyCIDR(node)
return r.AllocateOrOccupyCIDR(logger, node)
}
// needToAddFinalizer checks if a finalizer should be added to the object.
@ -422,15 +426,16 @@ func needToAddFinalizer(obj metav1.Object, finalizer string) bool {
finalizer, nil)
}
func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error {
func (r *multiCIDRRangeAllocator) syncClusterCIDR(ctx context.Context, key string) error {
startTime := time.Now()
logger := klog.FromContext(ctx)
defer func() {
klog.V(4).Infof("Finished syncing clusterCIDR request %q (%v)", key, time.Since(startTime))
logger.V(4).Info("Finished syncing clusterCIDR request", "key", key, "latency", time.Since(startTime))
}()
clusterCIDR, err := r.clusterCIDRLister.Get(key)
if apierrors.IsNotFound(err) {
klog.V(3).Infof("clusterCIDR has been deleted: %v", key)
logger.V(3).Info("clusterCIDR has been deleted", "key", key)
return nil
}
@ -440,21 +445,20 @@ func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error {
// Check the DeletionTimestamp to determine if object is under deletion.
if !clusterCIDR.DeletionTimestamp.IsZero() {
return r.reconcileDelete(clusterCIDR)
return r.reconcileDelete(ctx, clusterCIDR)
}
return r.reconcileCreate(clusterCIDR)
return r.reconcileCreate(ctx, clusterCIDR)
}
// occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet.
func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error {
func (r *multiCIDRRangeAllocator) occupyCIDRs(logger klog.Logger, node *v1.Node) error {
err := func(node *v1.Node) error {
if len(node.Spec.PodCIDRs) == 0 {
return nil
}
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true)
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
if err != nil {
return err
}
@ -468,10 +472,10 @@ func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error {
return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err)
}
klog.Infof("occupy CIDR %s for node: %s", cidr, node.Name)
logger.Info("occupy CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
if err := r.Occupy(clusterCIDR, podCIDR); err != nil {
klog.V(3).Infof("Could not occupy cidr: %v, trying next range: %w", node.Spec.PodCIDRs, err)
logger.V(3).Info("Could not occupy cidr, trying next range", "podCIDRs", node.Spec.PodCIDRs, "err", err)
break
}
@ -519,14 +523,14 @@ func (r *multiCIDRRangeAllocator) Occupy(clusterCIDR *cidrset.ClusterCIDR, cidr
// Release marks the CIDR as free in the cidrSet used bitmap,
// Also removes the CIDR from the allocatedCIDRSet.
func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error {
func (r *multiCIDRRangeAllocator) Release(logger klog.Logger, clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error {
currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr)
if err != nil {
return err
}
if err := currCIDRSet.Release(cidr); err != nil {
klog.Infof("Unable to release cidr %v in cidrSet", cidr)
logger.Info("Unable to release cidr in cidrSet", "CIDR", cidr)
return err
}
@ -539,7 +543,7 @@ func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr
// WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done.
func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
r.lock.Lock()
defer r.lock.Unlock()
@ -548,10 +552,10 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
}
if len(node.Spec.PodCIDRs) > 0 {
return r.occupyCIDRs(node)
return r.occupyCIDRs(logger, node)
}
cidrs, clusterCIDR, err := r.prioritizedCIDRs(node)
cidrs, clusterCIDR, err := r.prioritizedCIDRs(logger, node)
if err != nil {
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to get cidrs for node %s", node.Name)
@ -571,11 +575,11 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
clusterCIDR: clusterCIDR,
}
return r.updateCIDRsAllocation(allocated)
return r.updateCIDRsAllocation(logger, allocated)
}
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets.
func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
func (r *multiCIDRRangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
r.lock.Lock()
defer r.lock.Unlock()
@ -583,7 +587,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
return nil
}
clusterCIDR, err := r.allocatedClusterCIDR(node)
clusterCIDR, err := r.allocatedClusterCIDR(logger, node)
if err != nil {
return err
}
@ -594,8 +598,8 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err)
}
klog.Infof("release CIDR %s for node: %s", cidr, node.Name)
if err := r.Release(clusterCIDR, podCIDR); err != nil {
logger.Info("release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
if err := r.Release(logger, clusterCIDR, podCIDR); err != nil {
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err)
}
}
@ -608,7 +612,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
// so that they won't be assignable.
func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
func (r *multiCIDRRangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
// Checks if service CIDR has a nonempty intersection with cluster
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
// clusterCIDR's Mask applied (this means that clusterCIDR contains
@ -617,7 +621,7 @@ func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet)
for _, clusterCIDRList := range r.cidrMap {
for _, clusterCIDR := range clusterCIDRList {
if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil {
klog.Errorf("unable to occupy service CIDR: %w", err)
logger.Error(err, "Unable to occupy service CIDR")
}
}
}
@ -645,12 +649,12 @@ func (r *multiCIDRRangeAllocator) occupyServiceCIDR(clusterCIDR *cidrset.Cluster
}
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReservedCIDRs) error {
func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(logger klog.Logger, data multiCIDRNodeReservedCIDRs) error {
err := func(data multiCIDRNodeReservedCIDRs) error {
cidrsString := ipnetToStringList(data.allocatedCIDRs)
node, err := r.nodeLister.Get(data.nodeName)
if err != nil {
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err)
logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
return err
}
@ -666,16 +670,16 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
}
}
if match {
klog.V(4).Infof("Node %q already has allocated CIDR %q. It matches the proposed one.", node.Name, data.allocatedCIDRs)
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one.", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
return nil
}
}
// node has cidrs allocated, release the reserved.
if len(node.Spec.PodCIDRs) != 0 {
klog.Errorf("Node %q already has a CIDR allocated %q. Releasing the new one.", node.Name, node.Spec.PodCIDRs)
logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
for _, cidr := range data.allocatedCIDRs {
if err := r.Release(data.clusterCIDR, cidr); err != nil {
if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err)
}
}
@ -686,20 +690,20 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
for i := 0; i < cidrUpdateRetries; i++ {
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
data.clusterCIDR.AssociatedNodes[node.Name] = true
klog.Infof("Set node %q PodCIDR to %q", node.Name, cidrsString)
logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrsString)
return nil
}
}
// failed release back to the pool.
klog.Errorf("Failed to update node %q PodCIDR to %q after %d attempts: %v", node.Name, cidrsString, cidrUpdateRetries, err)
logger.Error(err, "Failed to update node PodCIDR after attempts", "node", klog.KObj(node), "podCIDR", cidrsString, "retries", cidrUpdateRetries)
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
// We accept the fact that we may leak CIDRs here. This is safer than releasing
// them in case when we don't know if request went through.
// NodeController restart will return all falsely allocated CIDRs to the pool.
if !apierrors.IsServerTimeout(err) {
klog.Errorf("CIDR assignment for node %q failed: %v. Releasing allocated CIDR", node.Name, err)
logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
for _, cidr := range data.allocatedCIDRs {
if err := r.Release(data.clusterCIDR, cidr); err != nil {
if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err)
}
}
@ -732,8 +736,8 @@ func defaultNodeSelector() *v1.NodeSelector {
// prioritizedCIDRs returns a list of CIDRs to be allocated to the node.
// Returns 1 CIDR if single stack.
// Returns 2 CIDRs , 1 from each ip family if dual stack.
func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) {
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true)
func (r *multiCIDRRangeAllocator) prioritizedCIDRs(logger klog.Logger, node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) {
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
if err != nil {
return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
}
@ -743,7 +747,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
if clusterCIDR.IPv4CIDRSet != nil {
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet)
if err != nil {
klog.V(3).Infof("unable to allocate IPv4 CIDR, trying next range: %w", err)
logger.V(3).Info("Unable to allocate IPv4 CIDR, trying next range", "err", err)
continue
}
cidrs = append(cidrs, cidr)
@ -752,7 +756,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
if clusterCIDR.IPv6CIDRSet != nil {
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet)
if err != nil {
klog.V(3).Infof("unable to allocate IPv6 CIDR, trying next range: %w", err)
logger.V(3).Info("Unable to allocate IPv6 CIDR, trying next range", "err", err)
continue
}
cidrs = append(cidrs, cidr)
@ -827,8 +831,8 @@ func (r *multiCIDRRangeAllocator) cidrOverlapWithAllocatedList(cidr *net.IPNet)
}
// allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated.
func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.ClusterCIDR, error) {
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, false)
func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(logger klog.Logger, node *v1.Node) (*cidrset.ClusterCIDR, error) {
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, false)
if err != nil {
return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
}
@ -852,12 +856,12 @@ func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.
// orderedMatchingClusterCIDRs takes `occupy` as an argument, it determines whether the function
// is called during an occupy or a release operation. For a release operation, a ClusterCIDR must
// be added to the matching ClusterCIDRs list, irrespective of whether the ClusterCIDR is terminating.
func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) {
func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(logger klog.Logger, node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) {
matchingCIDRs := make([]*cidrset.ClusterCIDR, 0)
pq := make(PriorityQueue, 0)
for label, clusterCIDRList := range r.cidrMap {
labelsMatch, matchCnt, err := r.matchCIDRLabels(node, label)
labelsMatch, matchCnt, err := r.matchCIDRLabels(logger, node, label)
if err != nil {
return nil, err
}
@ -902,14 +906,14 @@ func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occ
// matchCIDRLabels Matches the Node labels to CIDR Configs.
// Returns true only if all the labels match, also returns the count of matching labels.
func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (bool, int, error) {
func (r *multiCIDRRangeAllocator) matchCIDRLabels(logger klog.Logger, node *v1.Node, label string) (bool, int, error) {
var labelSet labels.Set
var matchCnt int
labelsMatch := false
ls, err := labels.Parse(label)
if err != nil {
klog.Errorf("Unable to parse label %s to labels.Selector: %v", label, err)
logger.Error(err, "Unable to parse label to labels.Selector", "label", label)
return labelsMatch, 0, err
}
reqs, selectable := ls.Requirements()
@ -934,7 +938,7 @@ func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (
// createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has
// been configured. It converts the --cluster-cidr and --per-node-mask-size* flags
// to appropriate ClusterCIDR fields.
func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDRList,
func createDefaultClusterCIDR(logger klog.Logger, existingConfigList *networkingv1alpha1.ClusterCIDRList,
allocatorParams CIDRAllocatorParams) {
// Create default ClusterCIDR only if --cluster-cidr has been configured
if len(allocatorParams.ClusterCIDRs) == 0 {
@ -944,7 +948,7 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
for _, clusterCIDR := range existingConfigList.Items {
if clusterCIDR.Name == defaultClusterCIDRName {
// Default ClusterCIDR already exists, no further action required.
klog.V(3).Infof("Default ClusterCIDR %s already exists", defaultClusterCIDRName)
logger.V(3).Info("Default ClusterCIDR already exists", "defaultClusterCIDRName", defaultClusterCIDRName)
return
}
}
@ -1006,14 +1010,15 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
}
// reconcileCreate handles create ClusterCIDR events.
func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
func (r *multiCIDRRangeAllocator) reconcileCreate(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
r.lock.Lock()
defer r.lock.Unlock()
logger := klog.FromContext(ctx)
if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) {
klog.V(3).Infof("Creating ClusterCIDR %s", clusterCIDR.Name)
if err := r.createClusterCIDR(clusterCIDR, false); err != nil {
klog.Errorf("Unable to create ClusterCIDR %s : %v", clusterCIDR.Name, err)
logger.V(3).Info("Creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
if err := r.createClusterCIDR(ctx, clusterCIDR, false); err != nil {
logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
return err
}
}
@ -1022,21 +1027,22 @@ func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha
// reconcileBootstrap handles creation of existing ClusterCIDRs.
// adds a finalizer if not already present.
func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
func (r *multiCIDRRangeAllocator) reconcileBootstrap(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
r.lock.Lock()
defer r.lock.Unlock()
logger := klog.FromContext(ctx)
terminating := false
// Create the ClusterCIDR only if the Spec has not been modified.
if clusterCIDR.Generation > 1 {
terminating = true
err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name)
klog.Errorf("ClusterCIDR Modified: %v", err)
logger.Error(err, "ClusterCIDR Modified")
}
klog.V(2).Infof("Creating ClusterCIDR %s during bootstrap", clusterCIDR.Name)
if err := r.createClusterCIDR(clusterCIDR, terminating); err != nil {
klog.Errorf("Unable to create ClusterCIDR %s: %v", clusterCIDR.Name, err)
logger.V(2).Info("Creating ClusterCIDR during bootstrap", "clusterCIDR", clusterCIDR.Name)
if err := r.createClusterCIDR(ctx, clusterCIDR, terminating); err != nil {
logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
return err
}
@ -1044,7 +1050,7 @@ func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1al
}
// createClusterCIDR creates and maps the cidrSets in the cidrMap.
func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error {
func (r *multiCIDRRangeAllocator) createClusterCIDR(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error {
nodeSelector, err := r.nodeSelectorKey(clusterCIDR)
if err != nil {
return fmt.Errorf("unable to get labelSelector key: %w", err)
@ -1069,16 +1075,17 @@ func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alp
updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer)
}
logger := klog.FromContext(ctx)
if updatedClusterCIDR.ResourceVersion == "" {
// Create is only used for creating default ClusterCIDR.
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(context.TODO(), updatedClusterCIDR, metav1.CreateOptions{}); err != nil {
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err)
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(ctx, updatedClusterCIDR, metav1.CreateOptions{}); err != nil {
logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", klog.KObj(clusterCIDR), "err", err)
return err
}
} else {
// Update the ClusterCIDR object when called from reconcileCreate.
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), updatedClusterCIDR, metav1.UpdateOptions{}); err != nil {
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err)
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, updatedClusterCIDR, metav1.UpdateOptions{}); err != nil {
logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
return err
}
}
@ -1136,30 +1143,31 @@ func (r *multiCIDRRangeAllocator) mapClusterCIDRSet(cidrMap map[string][]*cidrse
// reconcileDelete releases the assigned ClusterCIDR and removes the finalizer
// if the deletion timestamp is set.
func (r *multiCIDRRangeAllocator) reconcileDelete(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
func (r *multiCIDRRangeAllocator) reconcileDelete(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
r.lock.Lock()
defer r.lock.Unlock()
logger := klog.FromContext(ctx)
if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) {
klog.V(2).Infof("Releasing ClusterCIDR: %s", clusterCIDR.Name)
if err := r.deleteClusterCIDR(clusterCIDR); err != nil {
klog.V(2).Infof("Error while deleting ClusterCIDR: %+v", err)
logger.V(2).Info("Releasing ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
if err := r.deleteClusterCIDR(logger, clusterCIDR); err != nil {
klog.V(2).Info("Error while deleting ClusterCIDR", "err", err)
return err
}
// Remove the finalizer as delete is successful.
cccCopy := clusterCIDR.DeepCopy()
cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil)
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), cccCopy, metav1.UpdateOptions{}); err != nil {
klog.V(2).Infof("Error removing finalizer for ClusterCIDR %s: %v", clusterCIDR.Name, err)
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, cccCopy, metav1.UpdateOptions{}); err != nil {
logger.V(2).Info("Error removing finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
return err
}
klog.V(2).Infof("Removed finalizer for ClusterCIDR %s", clusterCIDR.Name)
logger.V(2).Info("Removed finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
}
return nil
}
// deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap.
func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
func (r *multiCIDRRangeAllocator) deleteClusterCIDR(logger klog.Logger, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
labelSelector, err := r.nodeSelectorKey(clusterCIDR)
if err != nil {
@ -1168,7 +1176,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
clusterCIDRSetList, ok := r.cidrMap[labelSelector]
if !ok {
klog.Infof("Label %s not found in CIDRMap, proceeding with delete", labelSelector)
logger.Info("Label not found in CIDRMap, proceeding with delete", "labelSelector", labelSelector)
return nil
}
@ -1196,7 +1204,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
r.cidrMap[labelSelector] = clusterCIDRSetList
return nil
}
klog.V(2).Info("clusterCIDR not found, proceeding with delete", "Name", clusterCIDR.Name, "label", labelSelector)
logger.V(2).Info("clusterCIDR not found, proceeding with delete", "clusterCIDR", clusterCIDR.Name, "label", labelSelector)
return nil
}
@ -1217,7 +1225,7 @@ func (r *multiCIDRRangeAllocator) nodeSelectorKey(clusterCIDR *networkingv1alpha
return nodeSelector.String(), nil
}
func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
func listClusterCIDRs(ctx context.Context, kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
var clusterCIDRList *networkingv1alpha1.ClusterCIDRList
// We must poll because apiserver might not be up. This error causes
// controller manager to restart.
@ -1230,19 +1238,20 @@ func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.Clust
Steps: 11,
}
logger := klog.FromContext(ctx)
if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
var err error
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(context.TODO(), metav1.ListOptions{
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(ctx, metav1.ListOptions{
FieldSelector: fields.Everything().String(),
LabelSelector: labels.Everything().String(),
})
if err != nil {
klog.Errorf("Failed to list all clusterCIDRs: %v", err)
logger.Error(err, "Failed to list all clusterCIDRs")
return false, nil
}
return true, nil
}); pollErr != nil {
klog.Errorf("Failed to list clusterCIDRs (after %v)", time.Now().Sub(startTimestamp))
logger.Error(nil, "Failed to list clusterCIDRs", "latency", time.Now().Sub(startTimestamp))
return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map",
apiserverStartupGracePeriod)
}

View File

@ -34,6 +34,7 @@ import (
"k8s.io/client-go/kubernetes/fake"
k8stesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2/ktesting"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/controller"
cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset"
@ -473,6 +474,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
}
// test function
_, ctx := ktesting.NewTestContext(t)
for _, tc := range testCaseMultiCIDRs {
t.Run(tc.description, func(t *testing.T) {
// Initialize the range allocator.
@ -482,7 +484,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
_, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
_, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
if err == nil && tc.ctrlCreateFail {
t.Fatalf("creating range allocator was expected to fail, but it did not")
}
@ -1177,6 +1179,8 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
},
}
logger, ctx := ktesting.NewTestContext(t)
// test function
testFunc := func(tc testCaseMultiCIDR) {
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
@ -1185,7 +1189,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
if err != nil {
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
return
@ -1230,7 +1234,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
if node.Spec.PodCIDRs == nil {
updateCount++
}
if err := allocator.AllocateOrOccupyCIDR(node); err != nil {
if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
}
@ -1308,13 +1312,15 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
},
}
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc testCaseMultiCIDR) {
fakeClient := &fake.Clientset{}
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
// Initialize the range allocator.
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
if err != nil {
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
}
@ -1353,7 +1359,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
}
}
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
}
// We don't expect any updates, so just sleep for some time
@ -1498,13 +1504,13 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
},
},
}
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc releasetestCaseMultiCIDR) {
fakeClient := &fake.Clientset{}
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
// Initialize the range allocator.
allocator, _ := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
allocator, _ := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
@ -1540,7 +1546,7 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
}
}
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
if err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
@ -1570,12 +1576,12 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
},
}
nodeToRelease.Spec.PodCIDRs = cidrToRelease
err = allocator.ReleaseCIDR(&nodeToRelease)
err = allocator.ReleaseCIDR(logger, &nodeToRelease)
if err != nil {
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
}
}
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
@ -1614,7 +1620,7 @@ type clusterCIDRController struct {
clusterCIDRStore cache.Store
}
func newController() (*fake.Clientset, *clusterCIDRController) {
func newController(ctx context.Context) (*fake.Clientset, *clusterCIDRController) {
client := fake.NewSimpleClientset()
informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc())
@ -1657,7 +1663,7 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0)
// Initialize the range allocator.
ra, _ := NewMultiCIDRRangeAllocator(client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap)
ra, _ := NewMultiCIDRRangeAllocator(ctx, client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap)
cccController := ra.(*multiCIDRRangeAllocator)
cccController.clusterCIDRSynced = alwaysReady
@ -1671,8 +1677,8 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
// Ensure default ClusterCIDR is created during bootstrap.
func TestClusterCIDRDefault(t *testing.T) {
defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil)
client, _ := newController()
_, ctx := ktesting.NewTestContext(t)
client, _ := newController(ctx)
createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{})
assert.Nil(t, err, "Expected no error getting clustercidr objects")
assert.Equal(t, defaultCCC.Spec, createdCCC.Spec)
@ -1752,11 +1758,11 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
wantErr: true,
},
}
client, cccController := newController()
_, ctx := ktesting.NewTestContext(t)
client, cccController := newController(ctx)
for _, tc := range tests {
cccController.clusterCIDRStore.Add(tc.ccc)
err := cccController.syncClusterCIDR(tc.ccc.Name)
err := cccController.syncClusterCIDR(ctx, tc.ccc.Name)
if tc.wantErr {
assert.Error(t, err)
continue
@ -1773,30 +1779,32 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
// Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR.
func TestSyncClusterCIDRDelete(t *testing.T) {
_, cccController := newController()
_, ctx := ktesting.NewTestContext(t)
_, cccController := newController(ctx)
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
cccController.clusterCIDRStore.Add(testCCC)
err := cccController.syncClusterCIDR(testCCC.Name)
err := cccController.syncClusterCIDR(ctx, testCCC.Name)
assert.NoError(t, err)
deletionTimestamp := metav1.Now()
testCCC.DeletionTimestamp = &deletionTimestamp
cccController.clusterCIDRStore.Update(testCCC)
err = cccController.syncClusterCIDR(testCCC.Name)
err = cccController.syncClusterCIDR(ctx, testCCC.Name)
assert.NoError(t, err)
}
// Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR
// if a node is associated with the ClusterCIDR.
func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
client, cccController := newController()
_, ctx := ktesting.NewTestContext(t)
client, cccController := newController(ctx)
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
cccController.clusterCIDRStore.Add(testCCC)
err := cccController.syncClusterCIDR(testCCC.Name)
err := cccController.syncClusterCIDR(ctx, testCCC.Name)
assert.NoError(t, err)
// Mock the IPAM controller behavior associating node with ClusterCIDR.
@ -1810,7 +1818,7 @@ func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
deletionTimestamp := metav1.Now()
createdCCC.DeletionTimestamp = &deletionTimestamp
cccController.clusterCIDRStore.Update(createdCCC)
err = cccController.syncClusterCIDR(createdCCC.Name)
err = cccController.syncClusterCIDR(ctx, createdCCC.Name)
assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name))
}

View File

@ -22,7 +22,7 @@ import (
"testing"
"k8s.io/component-base/metrics/testutil"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
utilnet "k8s.io/utils/net"
)
@ -568,6 +568,7 @@ func TestGetBitforCIDR(t *testing.T) {
},
}
logger, _ := ktesting.NewTestContext(t)
for _, tc := range cases {
_, clusterCIDR, err := utilnet.ParseCIDRSloppy(tc.clusterCIDRStr)
if err != nil {
@ -585,17 +586,17 @@ func TestGetBitforCIDR(t *testing.T) {
got, err := cs.getIndexForCIDR(subnetCIDR)
if err == nil && tc.expectErr {
klog.Errorf("expected error but got null for %v", tc.description)
logger.Error(nil, "Expected error but got null", "description", tc.description)
continue
}
if err != nil && !tc.expectErr {
klog.Errorf("unexpected error: %v for %v", err, tc.description)
logger.Error(err, "Unexpected error", "description", tc.description)
continue
}
if got != tc.expectedBit {
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
logger.Error(nil, "Unexpected value", "description", tc.description, "expected", tc.expectedBit, "got", got)
}
}
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package ipam
import (
"context"
"fmt"
"net"
"sync"
@ -66,9 +67,10 @@ type rangeAllocator struct {
// Caller must always pass in a list of existing nodes so the new allocator.
// Caller must ensure that ClusterCIDRs are semantically correct e.g (1 for non DualStack, 2 for DualStack etc..)
// can initialize its CIDR map. NodeList is only nil in testing.
func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) {
func NewCIDRRangeAllocator(logger klog.Logger, client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) {
if client == nil {
klog.Fatalf("kubeClient is nil when starting NodeController")
logger.Error(nil, "kubeClient is nil when starting CIDRRangeAllocator")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
eventBroadcaster := record.NewBroadcaster()
@ -98,24 +100,24 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
}
if allocatorParams.ServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.ServiceCIDR)
ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
} else {
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.")
logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
}
if allocatorParams.SecondaryServiceCIDR != nil {
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR)
ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
} else {
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.")
logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
}
if nodeList != nil {
for _, node := range nodeList.Items {
if len(node.Spec.PodCIDRs) == 0 {
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name)
logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
continue
}
klog.V(4).Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR)
logger.V(4).Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDR", node.Spec.PodCIDR)
if err := ra.occupyCIDRs(&node); err != nil {
// This will happen if:
// 1. We find garbage in the podCIDRs field. Retrying is useless.
@ -127,7 +129,9 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
}
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR),
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
return ra.AllocateOrOccupyCIDR(logger, node)
}),
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
// If the PodCIDRs list is not empty we either:
// - already processed a Node that already had CIDRs after NC restarted
@ -149,52 +153,56 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
// state is correct.
// Restart of NC fixes the issue.
if len(newNode.Spec.PodCIDRs) == 0 {
return ra.AllocateOrOccupyCIDR(newNode)
return ra.AllocateOrOccupyCIDR(logger, newNode)
}
return nil
}),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ra.ReleaseCIDR),
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
return ra.ReleaseCIDR(logger, node)
}),
})
return ra, nil
}
func (r *rangeAllocator) Run(stopCh <-chan struct{}) {
func (r *rangeAllocator) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
// Start event processing pipeline.
r.broadcaster.StartStructuredLogging(0)
klog.Infof("Sending events to api server.")
logger := klog.FromContext(ctx)
logger.Info("Sending events to api server")
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
defer r.broadcaster.Shutdown()
klog.Infof("Starting range CIDR allocator")
defer klog.Infof("Shutting down range CIDR allocator")
logger.Info("Starting range CIDR allocator")
defer logger.Info("Shutting down range CIDR allocator")
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, r.nodesSynced) {
if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), r.nodesSynced) {
return
}
for i := 0; i < cidrUpdateWorkers; i++ {
go r.worker(stopCh)
go r.worker(ctx)
}
<-stopCh
<-ctx.Done()
}
func (r *rangeAllocator) worker(stopChan <-chan struct{}) {
func (r *rangeAllocator) worker(ctx context.Context) {
logger := klog.FromContext(ctx)
for {
select {
case workItem, ok := <-r.nodeCIDRUpdateChannel:
if !ok {
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
return
}
if err := r.updateCIDRsAllocation(workItem); err != nil {
if err := r.updateCIDRsAllocation(logger, workItem); err != nil {
// Requeue the failed node for update again.
r.nodeCIDRUpdateChannel <- workItem
}
case <-stopChan:
case <-ctx.Done():
return
}
}
@ -244,12 +252,12 @@ func (r *rangeAllocator) occupyCIDRs(node *v1.Node) error {
// WARNING: If you're adding any return calls or defer any more work from this
// function you have to make sure to update nodesInProcessing properly with the
// disposition of the node when the work is done.
func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
func (r *rangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
if node == nil {
return nil
}
if !r.insertNodeToProcessing(node.Name) {
klog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
return nil
}
@ -273,13 +281,13 @@ func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
}
//queue the assignment
klog.V(4).Infof("Putting node %s with CIDR %v into the work queue", node.Name, allocated.allocatedCIDRs)
logger.V(4).Info("Putting node with CIDR into the work queue", "node", klog.KObj(node), "CIDRs", allocated.allocatedCIDRs)
r.nodeCIDRUpdateChannel <- allocated
return nil
}
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets
func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
func (r *rangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
if node == nil || len(node.Spec.PodCIDRs) == 0 {
return nil
}
@ -297,7 +305,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx)
}
klog.V(4).Infof("release CIDR %s for node:%v", cidr, node.Name)
logger.V(4).Info("Release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
if err = r.cidrSets[idx].Release(podCIDR); err != nil {
return fmt.Errorf("error when releasing CIDR %v: %v", cidr, err)
}
@ -307,7 +315,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
// so that they won't be assignable.
func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
func (r *rangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
// Checks if service CIDR has a nonempty intersection with cluster
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
// clusterCIDR's Mask applied (this means that clusterCIDR contains
@ -321,20 +329,20 @@ func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
// at this point, len(cidrSet) == len(clusterCidr)
if err := r.cidrSets[idx].Occupy(serviceCIDR); err != nil {
klog.Errorf("Error filtering out service cidr out cluster cidr:%v (index:%v) %v: %v", cidr, idx, serviceCIDR, err)
logger.Error(err, "Error filtering out service cidr out cluster cidr", "CIDR", cidr, "index", idx, "serviceCIDR", serviceCIDR)
}
}
}
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
func (r *rangeAllocator) updateCIDRsAllocation(logger klog.Logger, data nodeReservedCIDRs) error {
var err error
var node *v1.Node
defer r.removeNodeFromProcessing(data.nodeName)
cidrsString := ipnetToStringList(data.allocatedCIDRs)
node, err = r.nodeLister.Get(data.nodeName)
if err != nil {
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err)
logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
return err
}
@ -350,17 +358,17 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
}
}
if match {
klog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, data.allocatedCIDRs)
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
return nil
}
}
// node has cidrs, release the reserved
if len(node.Spec.PodCIDRs) != 0 {
klog.Errorf("Node %v already has a CIDR allocated %v. Releasing the new one.", node.Name, node.Spec.PodCIDRs)
logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
for idx, cidr := range data.allocatedCIDRs {
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
klog.Errorf("Error when releasing CIDR idx:%v value: %v err:%v", idx, cidr, releaseErr)
logger.Error(releaseErr, "Error when releasing CIDR", "index", idx, "CIDR", cidr)
}
}
return nil
@ -369,21 +377,21 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
// If we reached here, it means that the node has no CIDR currently assigned. So we set it.
for i := 0; i < cidrUpdateRetries; i++ {
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
klog.Infof("Set node %v PodCIDR to %v", node.Name, cidrsString)
logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDRs", cidrsString)
return nil
}
}
// failed release back to the pool
klog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, cidrsString, err)
logger.Error(err, "Failed to update node PodCIDR after multiple attempts", "node", klog.KObj(node), "podCIDRs", cidrsString)
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
// We accept the fact that we may leak CIDRs here. This is safer than releasing
// them in case when we don't know if request went through.
// NodeController restart will return all falsely allocated CIDRs to the pool.
if !apierrors.IsServerTimeout(err) {
klog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", node.Name, err)
logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
for idx, cidr := range data.allocatedCIDRs {
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
klog.Errorf("Error releasing allocated CIDR for node %v: %v", node.Name, releaseErr)
logger.Error(releaseErr, "Error releasing allocated CIDR for node", "node", klog.KObj(node))
}
}
}

View File

@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
"k8s.io/kubernetes/pkg/controller/testutil"
netutils "k8s.io/utils/net"
@ -274,12 +275,13 @@ func TestOccupyPreExistingCIDR(t *testing.T) {
}
// test function
logger, _ := ktesting.NewTestContext(t)
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
// Initialize the range allocator.
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
_, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
_, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
if err == nil && tc.ctrlCreateFail {
t.Fatalf("creating range allocator was expected to fail, but it did not")
}
@ -508,11 +510,12 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
}
// test function
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc testCase) {
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
// Initialize the range allocator.
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
if err != nil {
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
return
@ -524,7 +527,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
}
rangeAllocator.nodesSynced = test.AlwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop)
go allocator.Run(ctx)
// this is a bit of white box testing
// pre allocate the cidrs as per the test
@ -545,7 +548,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
if node.Spec.PodCIDRs == nil {
updateCount++
}
if err := allocator.AllocateOrOccupyCIDR(node); err != nil {
if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
}
@ -608,10 +611,10 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
},
},
}
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc testCase) {
// Initialize the range allocator.
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
if err != nil {
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
}
@ -622,7 +625,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
}
rangeAllocator.nodesSynced = test.AlwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop)
go allocator.Run(ctx)
// this is a bit of white box testing
for setIdx, allocatedList := range tc.allocatedCIDRs {
@ -637,7 +640,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
}
}
}
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
}
// We don't expect any updates, so just sleep for some time
@ -753,10 +756,10 @@ func TestReleaseCIDRSuccess(t *testing.T) {
},
},
}
logger, ctx := ktesting.NewTestContext(t)
testFunc := func(tc releaseTestCase) {
// Initialize the range allocator.
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
allocator, _ := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
rangeAllocator, ok := allocator.(*rangeAllocator)
if !ok {
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
@ -764,7 +767,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
}
rangeAllocator.nodesSynced = test.AlwaysReady
rangeAllocator.recorder = testutil.NewFakeRecorder()
go allocator.Run(wait.NeverStop)
go allocator.Run(ctx)
// this is a bit of white box testing
for setIdx, allocatedList := range tc.allocatedCIDRs {
@ -780,7 +783,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
}
}
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
if err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
@ -805,12 +808,12 @@ func TestReleaseCIDRSuccess(t *testing.T) {
},
}
nodeToRelease.Spec.PodCIDRs = cidrToRelease
err = allocator.ReleaseCIDR(&nodeToRelease)
err = allocator.ReleaseCIDR(logger, &nodeToRelease)
if err != nil {
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
}
}
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
}
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {

View File

@ -120,8 +120,8 @@ func New(c controller, cloudAlias cloudAlias, kubeAPI kubeAPI, mode NodeSyncMode
// Loop runs the sync loop for a given node. done is an optional channel that
// is closed when the Loop() returns.
func (sync *NodeSync) Loop(done chan struct{}) {
klog.V(2).Infof("Starting sync loop for node %q", sync.nodeName)
func (sync *NodeSync) Loop(logger klog.Logger, done chan struct{}) {
logger.V(2).Info("Starting sync loop", "node", klog.KRef("", sync.nodeName))
defer func() {
if done != nil {
@ -131,27 +131,27 @@ func (sync *NodeSync) Loop(done chan struct{}) {
timeout := sync.c.ResyncTimeout()
delayTimer := time.NewTimer(timeout)
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
for {
select {
case op, more := <-sync.opChan:
if !more {
klog.V(2).Infof("Stopping sync loop")
logger.V(2).Info("Stopping sync loop")
return
}
sync.c.ReportResult(op.run(sync))
sync.c.ReportResult(op.run(logger, sync))
if !delayTimer.Stop() {
<-delayTimer.C
}
case <-delayTimer.C:
klog.V(4).Infof("Running resync for node %q", sync.nodeName)
sync.c.ReportResult((&updateOp{}).run(sync))
logger.V(4).Info("Running resync", "node", klog.KRef("", sync.nodeName))
sync.c.ReportResult((&updateOp{}).run(logger, sync))
}
timeout := sync.c.ResyncTimeout()
delayTimer.Reset(timeout)
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
}
}
@ -175,7 +175,7 @@ func (sync *NodeSync) Delete(node *v1.Node) {
// syncOp is the interface for generic sync operation.
type syncOp interface {
// run the requested sync operation.
run(sync *NodeSync) error
run(logger klog.Logger, sync *NodeSync) error
}
// updateOp handles creation and updates of a node.
@ -190,16 +190,16 @@ func (op *updateOp) String() string {
return fmt.Sprintf("updateOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
}
func (op *updateOp) run(sync *NodeSync) error {
klog.V(3).Infof("Running updateOp %+v", op)
func (op *updateOp) run(logger klog.Logger, sync *NodeSync) error {
logger.V(3).Info("Running updateOp", "updateOp", op)
ctx := context.Background()
if op.node == nil {
klog.V(3).Infof("Getting node spec for %q", sync.nodeName)
logger.V(3).Info("Getting node spec", "node", klog.KRef("", sync.nodeName))
node, err := sync.kubeAPI.Node(ctx, sync.nodeName)
if err != nil {
klog.Errorf("Error getting node %q spec: %v", sync.nodeName, err)
logger.Error(err, "Error getting node pec", "node", klog.KRef("", sync.nodeName))
return err
}
op.node = node
@ -207,7 +207,7 @@ func (op *updateOp) run(sync *NodeSync) error {
aliasRange, err := sync.cloudAlias.Alias(ctx, op.node)
if err != nil {
klog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err)
logger.Error(err, "Error getting cloud alias for node", "node", klog.KRef("", sync.nodeName))
return err
}
@ -229,14 +229,13 @@ func (op *updateOp) run(sync *NodeSync) error {
// match.
func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
if node.Spec.PodCIDR != aliasRange.String() {
klog.Errorf("Inconsistency detected between node PodCIDR and node alias (%v != %v)",
node.Spec.PodCIDR, aliasRange)
klog.FromContext(ctx).Error(nil, "Inconsistency detected between node PodCIDR and node alias", "podCIDR", node.Spec.PodCIDR, "alias", aliasRange)
sync.kubeAPI.EmitNodeWarningEvent(node.Name, MismatchEvent,
"Node.Spec.PodCIDR != cloud alias (%v != %v)", node.Spec.PodCIDR, aliasRange)
// User intervention is required in this case, as this is most likely due
// to the user mucking around with their VM aliases on the side.
} else {
klog.V(4).Infof("Node %q CIDR range %v is matches cloud assignment", node.Name, node.Spec.PodCIDR)
klog.FromContext(ctx).V(4).Info("Node CIDR range is matches cloud assignment", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
}
return nil
}
@ -249,27 +248,27 @@ func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, nod
"Cannot sync from cloud in mode %q", sync.mode)
return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode)
}
klog.V(2).Infof("Updating node spec with alias range, node.PodCIDR = %v", aliasRange)
logger := klog.FromContext(ctx)
logger.V(2).Info("Updating node spec with alias range", "podCIDR", aliasRange)
if err := sync.set.Occupy(aliasRange); err != nil {
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
return err
}
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, aliasRange); err != nil {
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, aliasRange, err)
logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", aliasRange)
return err
}
klog.V(2).Infof("Node %q PodCIDR set to %v", node.Name, aliasRange)
logger.V(2).Info("Node PodCIDR updated", "node", klog.KObj(node), "podCIDR", aliasRange)
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
logger.Error(err, "Could not update node NetworkUnavailable status to false")
return err
}
klog.V(2).Infof("Updated node %q PodCIDR from cloud alias %v", node.Name, aliasRange)
logger.V(2).Info("Updated node PodCIDR from cloud alias", "node", klog.KObj(node), "alias", aliasRange)
return nil
}
@ -283,29 +282,29 @@ func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, nod
}
_, aliasRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
logger := klog.FromContext(ctx)
if err != nil {
klog.Errorf("Could not parse PodCIDR (%q) for node %q: %v",
node.Spec.PodCIDR, node.Name, err)
logger.Error(err, "Could not parse PodCIDR for node", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
return err
}
if err := sync.set.Occupy(aliasRange); err != nil {
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
return err
}
if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err)
logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", aliasRange)
return err
}
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
logger.Error(err, "Could not update node NetworkUnavailable status to false")
return err
}
klog.V(2).Infof("Updated node %q cloud alias with node spec, node.PodCIDR = %v",
node.Name, node.Spec.PodCIDR)
logger.V(2).Info("Updated node cloud alias with node spec", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
return nil
}
@ -326,22 +325,23 @@ func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1.
// If addAlias returns a hard error, cidrRange will be leaked as there
// is no durable record of the range. The missing space will be
// recovered on the next restart of the controller.
logger := klog.FromContext(ctx)
if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err)
logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", cidrRange)
return err
}
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, cidrRange); err != nil {
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, cidrRange, err)
logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrRange)
return err
}
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
logger.Error(err, "Could not update node NetworkUnavailable status to false")
return err
}
klog.V(2).Infof("Allocated PodCIDR %v for node %q", cidrRange, node.Name)
logger.V(2).Info("Allocated PodCIDR for node", "node", klog.KObj(node), "podCIDR", cidrRange)
return nil
}
@ -358,25 +358,23 @@ func (op *deleteOp) String() string {
return fmt.Sprintf("deleteOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
}
func (op *deleteOp) run(sync *NodeSync) error {
klog.V(3).Infof("Running deleteOp %+v", op)
func (op *deleteOp) run(logger klog.Logger, sync *NodeSync) error {
logger.V(3).Info("Running deleteOp", "deleteOp", op)
if op.node.Spec.PodCIDR == "" {
klog.V(2).Infof("Node %q was deleted, node had no PodCIDR range assigned", op.node.Name)
logger.V(2).Info("Node was deleted, node had no PodCIDR range assigned", "node", klog.KObj(op.node))
return nil
}
_, cidrRange, err := netutils.ParseCIDRSloppy(op.node.Spec.PodCIDR)
if err != nil {
klog.Errorf("Deleted node %q has an invalid podCIDR %q: %v",
op.node.Name, op.node.Spec.PodCIDR, err)
logger.Error(err, "Deleted node has an invalid podCIDR", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
sync.kubeAPI.EmitNodeWarningEvent(op.node.Name, InvalidPodCIDR,
"Node %q has an invalid PodCIDR: %q", op.node.Name, op.node.Spec.PodCIDR)
return nil
}
sync.set.Release(cidrRange)
klog.V(2).Infof("Node %q was deleted, releasing CIDR range %v",
op.node.Name, op.node.Spec.PodCIDR)
logger.V(2).Info("Node was deleted, releasing CIDR range", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
return nil
}

View File

@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
netutils "k8s.io/utils/net"
@ -200,7 +201,8 @@ func TestNodeSyncUpdate(t *testing.T) {
doneChan := make(chan struct{})
// Do a single step of the loop.
go sync.Loop(doneChan)
logger, _ := ktesting.NewTestContext(t)
go sync.Loop(logger, doneChan)
sync.Update(tc.node)
close(sync.opChan)
<-doneChan
@ -230,8 +232,8 @@ func TestNodeSyncResync(t *testing.T) {
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
sync := New(fake, fake, fake, SyncFromCluster, "node1", cidr)
doneChan := make(chan struct{})
go sync.Loop(doneChan)
logger, _ := ktesting.NewTestContext(t)
go sync.Loop(logger, doneChan)
<-fake.reportChan
close(sync.opChan)
// Unblock loop().
@ -275,7 +277,8 @@ func TestNodeSyncDelete(t *testing.T) {
doneChan := make(chan struct{})
// Do a single step of the loop.
go sync.Loop(doneChan)
logger, _ := ktesting.NewTestContext(t)
go sync.Loop(logger, doneChan)
sync.Delete(tc.node)
<-doneChan
tc.fake.dumpTrace()

View File

@ -32,6 +32,7 @@ import (
)
func createLegacyIPAM(
logger klog.Logger,
ic *Controller,
nodeInformer coreinformers.NodeInformer,
cloud cloudprovider.Interface,
@ -58,14 +59,16 @@ func createLegacyIPAM(
cidr = clusterCIDRs[0]
}
if len(clusterCIDRs) > 1 {
klog.Warningf("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded")
logger.Info("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded")
}
ipamc, err := ipam.NewController(cfg, kubeClient, cloud, cidr, serviceCIDR, nodeCIDRMaskSizes[0])
if err != nil {
klog.Fatalf("Error creating ipam controller: %v", err)
logger.Error(err, "Error creating ipam controller")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
if err := ipamc.Start(nodeInformer); err != nil {
klog.Fatalf("Error trying to Init(): %v", err)
if err := ipamc.Start(logger, nodeInformer); err != nil {
logger.Error(err, "Error trying to Init()")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
return ipamc
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package nodeipam
import (
"context"
"net"
"time"
@ -50,7 +51,7 @@ const (
// legacy mode. It is needed to ensure correct building for
// both provider-specific and providerless environments.
type ipamController interface {
Run(<-chan struct{})
Run(ctx context.Context)
}
// Controller is the controller that manages node ipam state.
@ -79,6 +80,7 @@ type Controller struct {
// podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes
// currently, this should be handled as a fatal error.
func NewNodeIpamController(
ctx context.Context,
nodeInformer coreinformers.NodeInformer,
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
cloud cloudprovider.Interface,
@ -89,20 +91,24 @@ func NewNodeIpamController(
nodeCIDRMaskSizes []int,
allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
logger := klog.FromContext(ctx)
if kubeClient == nil {
klog.Fatalf("kubeClient is nil when starting Controller")
logger.Error(nil, "kubeClient is nil when starting Controller")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
// Cloud CIDR allocator does not rely on clusterCIDR or nodeCIDRMaskSize for allocation.
if allocatorType != ipam.CloudAllocatorType {
if len(clusterCIDRs) == 0 {
klog.Fatal("Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set")
logger.Error(nil, "Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
for idx, cidr := range clusterCIDRs {
mask := cidr.Mask
if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSizes[idx] {
klog.Fatal("Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family")
logger.Error(nil, "Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
}
}
@ -120,7 +126,7 @@ func NewNodeIpamController(
// TODO: Abstract this check into a generic controller manager should run method.
if ic.allocatorType == ipam.IPAMFromClusterAllocatorType || ic.allocatorType == ipam.IPAMFromCloudAllocatorType {
ic.legacyIPAM = createLegacyIPAM(ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes)
ic.legacyIPAM = createLegacyIPAM(logger, ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes)
} else {
var err error
@ -131,7 +137,7 @@ func NewNodeIpamController(
NodeCIDRMaskSizes: nodeCIDRMaskSizes,
}
ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams)
ic.cidrAllocator, err = ipam.New(ctx, kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams)
if err != nil {
return nil, err
}
@ -144,33 +150,32 @@ func NewNodeIpamController(
}
// Run starts an asynchronous loop that monitors the status of cluster nodes.
func (nc *Controller) Run(stopCh <-chan struct{}) {
func (nc *Controller) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
// Start event processing pipeline.
nc.eventBroadcaster.StartStructuredLogging(0)
nc.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: nc.kubeClient.CoreV1().Events("")})
defer nc.eventBroadcaster.Shutdown()
klog.FromContext(ctx).Info("Starting ipam controller")
defer klog.FromContext(ctx).Info("Shutting down ipam controller")
klog.Infof("Starting ipam controller")
defer klog.Infof("Shutting down ipam controller")
if !cache.WaitForNamedCacheSync("node", stopCh, nc.nodeInformerSynced) {
if !cache.WaitForNamedCacheSync("node", ctx.Done(), nc.nodeInformerSynced) {
return
}
if nc.allocatorType == ipam.IPAMFromClusterAllocatorType || nc.allocatorType == ipam.IPAMFromCloudAllocatorType {
go nc.legacyIPAM.Run(stopCh)
go nc.legacyIPAM.Run(ctx)
} else {
go nc.cidrAllocator.Run(stopCh)
go nc.cidrAllocator.Run(ctx)
}
<-stopCh
<-ctx.Done()
}
// RunWithMetrics is a wrapper for Run that also tracks starting and stopping of the nodeipam controller with additional metric
func (nc *Controller) RunWithMetrics(stopCh <-chan struct{}, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) {
func (nc *Controller) RunWithMetrics(ctx context.Context, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) {
controllerManagerMetrics.ControllerStarted("nodeipam")
defer controllerManagerMetrics.ControllerStopped("nodeipam")
nc.Run(stopCh)
nc.Run(ctx)
}

View File

@ -20,6 +20,7 @@ limitations under the License.
package nodeipam
import (
"context"
"net"
"os"
"os/exec"
@ -30,6 +31,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
"k8s.io/kubernetes/pkg/controller/testutil"
@ -37,7 +39,7 @@ import (
netutils "k8s.io/utils/net"
)
func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
func newTestNodeIpamController(ctx context.Context, clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
clientSet := fake.NewSimpleClientset()
fakeNodeHandler := &testutil.FakeNodeHandler{
Existing: []*v1.Node{
@ -56,6 +58,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet,
fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues())
return NewNodeIpamController(
ctx,
fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet,
clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType,
)
@ -92,6 +95,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
{"invalid_serviceCIDR_contains_clusterCIDR", "10.0.0.0/23", "10.0.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, true},
{"invalid_CIDR_mask_size", "10.0.0.0/24,2000::/64", "10.1.0.0/21", emptyServiceCIDR, []int{24, 48}, ipam.IPAMFromClusterAllocatorType, true},
} {
_, ctx := ktesting.NewTestContext(t)
t.Run(tc.desc, func(t *testing.T) {
clusterCidrs, _ := netutils.ParseCIDRs(strings.Split(tc.clusterCIDR, ","))
_, serviceCIDRIpNet, _ := netutils.ParseCIDRSloppy(tc.serviceCIDR)
@ -99,7 +103,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
if os.Getenv("EXIT_ON_FATAL") == "1" {
// This is the subprocess which runs the actual code.
newTestNodeIpamController(clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType)
newTestNodeIpamController(ctx, clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType)
return
}
// This is the host process that monitors the exit code of the subprocess.

View File

@ -20,6 +20,7 @@ limitations under the License.
package nodeipam
import (
"context"
"net"
coreinformers "k8s.io/client-go/informers/core/v1"
@ -31,11 +32,12 @@ import (
type fakeController struct {
}
func (f *fakeController) Run(stopCh <-chan struct{}) {
<-stopCh
func (f *fakeController) Run(ctx context.Context) {
<-ctx.Done()
}
func createLegacyIPAM(
logger klog.Logger,
ic *Controller,
nodeInformer coreinformers.NodeInformer,
cloud cloudprovider.Interface,
@ -44,6 +46,7 @@ func createLegacyIPAM(
serviceCIDR *net.IPNet,
nodeCIDRMaskSizes []int,
) *fakeController {
klog.Fatal("Error trying to Init(): legacy cloud provider support disabled at build time")
logger.Error(nil, "Error trying to Init(): legacy cloud provider support disabled at build time")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
return &fakeController{}
}

View File

@ -368,9 +368,10 @@ func NewNodeLifecycleController(
unhealthyZoneThreshold float32,
runTaintManager bool,
) (*Controller, error) {
logger := klog.LoggerWithName(klog.FromContext(ctx), "NodeLifecycleController")
if kubeClient == nil {
klog.Fatalf("kubeClient is nil when starting Controller")
logger.Error(nil, "kubeClient is nil when starting nodelifecycle Controller")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
eventBroadcaster := record.NewBroadcaster()
@ -428,12 +429,12 @@ func NewNodeLifecycleController(
if !isPod {
deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
klog.Errorf("Received unexpected object: %v", obj)
logger.Error(nil, "Received unexpected object", "object", obj)
return
}
pod, ok = deletedState.Obj.(*v1.Pod)
if !ok {
klog.Errorf("DeletedFinalStateUnknown contained non-Pod object: %v", deletedState.Obj)
logger.Error(nil, "DeletedFinalStateUnknown contained non-Pod object", "object", deletedState.Obj)
return
}
}
@ -494,7 +495,7 @@ func NewNodeLifecycleController(
})
}
klog.Infof("Controller will reconcile labels.")
logger.Info("Controller will reconcile labels")
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
nc.nodeUpdateQueue.Add(node.Name)
@ -529,7 +530,8 @@ func (nc *Controller) Run(ctx context.Context) {
// Start events processing pipeline.
nc.broadcaster.StartStructuredLogging(0)
klog.Infof("Sending events to api server.")
logger := klog.FromContext(ctx)
logger.Info("Sending events to api server")
nc.broadcaster.StartRecordingToSink(
&v1core.EventSinkImpl{
Interface: v1core.New(nc.kubeClient.CoreV1().RESTClient()).Events(""),
@ -540,8 +542,8 @@ func (nc *Controller) Run(ctx context.Context) {
defer nc.nodeUpdateQueue.ShutDown()
defer nc.podUpdateQueue.ShutDown()
klog.Infof("Starting node controller")
defer klog.Infof("Shutting down node controller")
logger.Info("Starting node controller")
defer logger.Info("Shutting down node controller")
if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
return
@ -578,7 +580,7 @@ func (nc *Controller) Run(ctx context.Context) {
// Incorporate the results of node health signal pushed from kubelet to master.
go wait.UntilWithContext(ctx, func(ctx context.Context) {
if err := nc.monitorNodeHealth(ctx); err != nil {
klog.Errorf("Error monitoring node health: %v", err)
logger.Error(err, "Error monitoring node health")
}
}, nc.nodeMonitorPeriod)
@ -586,6 +588,7 @@ func (nc *Controller) Run(ctx context.Context) {
}
func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
logger := klog.FromContext(ctx)
for {
obj, shutdown := nc.nodeUpdateQueue.Get()
// "nodeUpdateQueue" will be shutdown when "stopCh" closed;
@ -595,13 +598,13 @@ func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
}
nodeName := obj.(string)
if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil {
klog.Errorf("Failed to taint NoSchedule on node <%s>, requeue it: %v", nodeName, err)
logger.Error(err, "Failed to taint NoSchedule on node, requeue it", "node", klog.KRef("", nodeName))
// TODO(k82cn): Add nodeName back to the queue
}
// TODO: re-evaluate whether there are any labels that need to be
// reconcile in 1.19. Remove this function if it's no longer necessary.
if err := nc.reconcileNodeLabels(nodeName); err != nil {
klog.Errorf("Failed to reconcile labels for node <%s>, requeue it: %v", nodeName, err)
logger.Error(err, "Failed to reconcile labels for node, requeue it", "node", klog.KRef("", nodeName))
// TODO(yujuhong): Add nodeName back to the queue
}
nc.nodeUpdateQueue.Done(nodeName)
@ -677,6 +680,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k)
}
}()
logger := klog.FromContext(ctx)
for _, k := range zoneNoExecuteTainterKeys {
var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue
func() {
@ -689,13 +693,13 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k]
}()
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
zoneNoExecuteTainterWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
zoneNoExecuteTainterWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) {
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
return true, 0
} else if err != nil {
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
// retry in 50 millisecond
return false, 50 * time.Millisecond
}
@ -712,7 +716,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
oppositeTaint = *NotReadyTaintTemplate
default:
// It seems that the Node is ready again, so there's no need to taint it.
klog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value)
logger.V(4).Info("Node was in a taint queue, but it's ready now. Ignoring taint request", "node", klog.KRef("", value.Value))
return true, 0
}
result := controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
@ -742,7 +746,7 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
zonePodEvictorKeys = append(zonePodEvictorKeys, k)
}
}()
logger := klog.FromContext(ctx)
for _, k := range zonePodEvictorKeys {
var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue
func() {
@ -756,12 +760,12 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
}()
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
zonePodEvictionWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
zonePodEvictionWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
node, err := nc.nodeLister.Get(value.Value)
if apierrors.IsNotFound(err) {
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
} else if err != nil {
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
}
nodeUID, _ := value.UID.(string)
pods, err := nc.getPodsAssignedToNode(value.Value)
@ -778,10 +782,10 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
return false, 0
}
if !nc.nodeEvictionMap.setStatus(value.Value, evicted) {
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", value.Value)
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KRef("", value.Value))
}
if remaining {
klog.Infof("Pods awaiting deletion due to Controller eviction")
logger.Info("Pods awaiting deletion due to Controller eviction")
}
if node != nil {
@ -811,25 +815,24 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
return err
}
added, deleted, newZoneRepresentatives := nc.classifyNodes(nodes)
logger := klog.FromContext(ctx)
for i := range newZoneRepresentatives {
nc.addPodEvictorForNewZone(newZoneRepresentatives[i])
nc.addPodEvictorForNewZone(logger, newZoneRepresentatives[i])
}
for i := range added {
klog.V(1).Infof("Controller observed a new Node: %#v", added[i].Name)
logger.V(1).Info("Controller observed a new Node", "node", klog.KRef("", added[i].Name))
controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i])
nc.addPodEvictorForNewZone(logger, added[i])
if nc.runTaintManager {
nc.markNodeAsReachable(ctx, added[i])
} else {
nc.cancelPodEviction(added[i])
nc.cancelPodEviction(logger, added[i])
}
}
for i := range deleted {
klog.V(1).Infof("Controller observed a Node deletion: %v", deleted[i].Name)
logger.V(1).Info("Controller observed a Node deletion", "node", klog.KRef("", deleted[i].Name))
controllerutil.RecordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from Controller", deleted[i].Name))
delete(nc.knownNodeSet, deleted[i].Name)
}
@ -856,13 +859,12 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
name := node.Name
node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
if err != nil {
klog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
logger.Error(nil, "Failed while getting a Node to retry updating node health. Probably Node was deleted", "node", klog.KRef("", name))
return false, err
}
return false, nil
}); err != nil {
klog.Errorf("Update health of Node '%v' from Controller error: %v. "+
"Skipping - no pods will be evicted.", node.Name, err)
logger.Error(err, "Update health of Node from Controller error, Skipping - no pods will be evicted", "node", klog.KObj(node))
return
}
@ -925,40 +927,35 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
decisionTimestamp := nc.now()
// Check eviction timeout against decisionTimestamp
logger := klog.FromContext(ctx)
switch observedReadyCondition.Status {
case v1.ConditionFalse:
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
taintToAdd := *NotReadyTaintTemplate
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
logger.Error(nil, "Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle")
}
} else if nc.markNodeForTainting(node, v1.ConditionFalse) {
klog.V(2).Infof("Node %v is NotReady as of %v. Adding it to the Taint queue.",
node.Name,
decisionTimestamp,
)
logger.V(2).Info("Node is NotReady. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
}
case v1.ConditionUnknown:
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
taintToAdd := *UnreachableTaintTemplate
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.")
logger.Error(nil, "Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle")
}
} else if nc.markNodeForTainting(node, v1.ConditionUnknown) {
klog.V(2).Infof("Node %v is unresponsive as of %v. Adding it to the Taint queue.",
node.Name,
decisionTimestamp,
)
logger.V(2).Info("Node is unresponsive. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
}
case v1.ConditionTrue:
removed, err := nc.markNodeAsReachable(ctx, node)
if err != nil {
klog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
logger.Error(nil, "Failed to remove taints from node. Will retry in next iteration", "node", klog.KObj(node))
}
if removed {
klog.V(2).Infof("Node %s is healthy again, removing all taints", node.Name)
logger.V(2).Info("Node is healthy again, removing all taints", "node", klog.KObj(node))
}
}
}
@ -970,6 +967,7 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
return fmt.Errorf("health data doesn't exist for node %q", node.Name)
}
// Check eviction timeout against decisionTimestamp
logger := klog.FromContext(ctx)
switch observedReadyCondition.Status {
case v1.ConditionFalse:
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
@ -978,11 +976,11 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
return err
}
if enqueued {
klog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v",
node.Name,
decisionTimestamp,
nodeHealthData.readyTransitionTimestamp,
nc.podEvictionTimeout,
logger.V(2).Info("Node is NotReady. Adding Pods on Node to eviction queue: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout",
"node", klog.KObj(node),
"decisionTimestamp", decisionTimestamp,
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
"podEvictionTimeout", nc.podEvictionTimeout,
)
}
}
@ -993,17 +991,17 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
return err
}
if enqueued {
klog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v",
node.Name,
decisionTimestamp,
nodeHealthData.readyTransitionTimestamp,
nc.podEvictionTimeout-gracePeriod,
logger.V(2).Info("Node is unresponsive. Adding Pods on Node to eviction queues: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout-gracePeriod",
"node", klog.KObj(node),
"decisionTimestamp", decisionTimestamp,
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
"podEvictionTimeoutGracePeriod", nc.podEvictionTimeout-gracePeriod,
)
}
}
case v1.ConditionTrue:
if nc.cancelPodEviction(node) {
klog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
if nc.cancelPodEviction(logger, node) {
logger.V(2).Info("Node is ready again, cancelled pod eviction", "node", klog.KObj(node))
}
}
return nil
@ -1076,23 +1074,23 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
_, savedCondition = controllerutil.GetNodeCondition(nodeHealth.status, v1.NodeReady)
savedLease = nodeHealth.lease
}
logger := klog.FromContext(ctx)
if nodeHealth == nil {
klog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
logger.Info("Missing timestamp for Node. Assuming now as a timestamp", "node", klog.KObj(node))
nodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition == nil && currentReadyCondition != nil {
klog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
logger.V(1).Info("Creating timestamp entry for newly observed Node", "node", klog.KObj(node))
nodeHealth = &nodeHealthData{
status: &node.Status,
probeTimestamp: nc.now(),
readyTransitionTimestamp: nc.now(),
}
} else if savedCondition != nil && currentReadyCondition == nil {
klog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
logger.Error(nil, "ReadyCondition was removed from Status of Node", "node", klog.KObj(node))
// TODO: figure out what to do in this case. For now we do the same thing as above.
nodeHealth = &nodeHealthData{
status: &node.Status,
@ -1104,15 +1102,15 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
// otherwise we leave it as it is.
if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime {
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, currentReadyCondition)
logger.V(3).Info("ReadyCondition for Node transitioned from savedCondition to currentReadyCondition", "node", klog.KObj(node), "savedCondition", savedCondition, "currentReadyCondition", currentReadyCondition)
transitionTime = nc.now()
} else {
transitionTime = nodeHealth.readyTransitionTimestamp
}
if klogV := klog.V(5); klogV.Enabled() {
klogV.Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, nodeHealth.status, node.Status)
if loggerV := logger.V(5); loggerV.Enabled() {
loggerV.Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node), "nodeHealthStatus", nodeHealth.status, "nodeStatus", node.Status)
} else {
klog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
logger.V(3).Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node))
}
nodeHealth = &nodeHealthData{
status: &node.Status,
@ -1147,7 +1145,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
for _, nodeConditionType := range nodeConditionTypes {
_, currentCondition := controllerutil.GetNodeCondition(&node.Status, nodeConditionType)
if currentCondition == nil {
klog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name)
logger.V(2).Info("Condition of node was never updated by kubelet", "nodeConditionType", nodeConditionType, "node", klog.KObj(node))
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
Type: nodeConditionType,
Status: v1.ConditionUnknown,
@ -1157,8 +1155,8 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
LastTransitionTime: nowTimestamp,
})
} else {
klog.V(2).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
node.Name, nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition)
logger.V(2).Info("Node hasn't been updated",
"node", klog.KObj(node), "duration", nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), "nodeConditionType", nodeConditionType, "currentCondition", currentCondition)
if currentCondition.Status != v1.ConditionUnknown {
currentCondition.Status = v1.ConditionUnknown
currentCondition.Reason = "NodeStatusUnknown"
@ -1172,7 +1170,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
klog.Errorf("Error updating node %s: %v", node.Name, err)
logger.Error(err, "Error updating node", "node", klog.KObj(node))
return gracePeriod, observedReadyCondition, currentReadyCondition, err
}
nodeHealth = &nodeHealthData{
@ -1191,6 +1189,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
newZoneStates := map[string]ZoneState{}
allAreFullyDisrupted := true
logger := klog.FromContext(ctx)
for k, v := range zoneToNodeConditions {
zoneSize.WithLabelValues(k).Set(float64(len(v)))
unhealthy, newState := nc.computeZoneStateFunc(v)
@ -1201,7 +1200,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
}
newZoneStates[k] = newState
if _, had := nc.zoneStates[k]; !had {
klog.Errorf("Setting initial state for unseen zone: %v", k)
logger.Error(nil, "Setting initial state for unseen zone", "zone", k)
nc.zoneStates[k] = stateInitial
}
}
@ -1229,15 +1228,15 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
if !allAreFullyDisrupted || !allWasFullyDisrupted {
// We're switching to full disruption mode
if allAreFullyDisrupted {
klog.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
logger.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode")
for i := range nodes {
if nc.runTaintManager {
_, err := nc.markNodeAsReachable(ctx, nodes[i])
if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
logger.Error(nil, "Failed to remove taints from Node", "node", klog.KObj(nodes[i]))
}
} else {
nc.cancelPodEviction(nodes[i])
nc.cancelPodEviction(logger, nodes[i])
}
}
// We stop all evictions.
@ -1256,7 +1255,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
}
// We're exiting full disruption mode
if allWasFullyDisrupted {
klog.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode.")
logger.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode")
// When exiting disruption mode update probe timestamps on all Nodes.
now := nc.now()
for i := range nodes {
@ -1279,7 +1278,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
if v == newState {
continue
}
klog.Infof("Controller detected that zone %v is now in state %v.", k, newState)
logger.Info("Controller detected that zone is now in new state", "zone", k, "newState", newState)
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
nc.zoneStates[k] = newState
}
@ -1317,12 +1316,13 @@ func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
defer nc.podUpdateQueue.Done(podItem)
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
logger := klog.FromContext(ctx)
if err != nil {
if apierrors.IsNotFound(err) {
// If the pod was deleted, there is no need to requeue.
return
}
klog.Warningf("Failed to read pod %v/%v: %v.", podItem.namespace, podItem.name, err)
logger.Info("Failed to read pod", "pod", klog.KRef(podItem.namespace, podItem.name), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem)
return
}
@ -1338,7 +1338,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
node, err := nc.nodeLister.Get(nodeName)
if err != nil {
klog.Warningf("Failed to read node %v: %v.", nodeName, err)
logger.Info("Failed to read node", "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem)
return
}
@ -1356,7 +1356,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
// Pods are processed by TaintManager.
if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
logger.Info("Unable to process pod eviction from node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem)
return
}
@ -1364,7 +1364,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
if currentReadyCondition.Status != v1.ConditionTrue {
if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
klog.Warningf("Unable to mark pod %+v NotReady on node %v: %v.", podItem, nodeName, err)
logger.Info("Unable to mark pod NotReady on node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
nc.podUpdateQueue.AddRateLimited(podItem)
}
}
@ -1447,7 +1447,7 @@ func (nc *Controller) ReducedQPSFunc(nodeNum int) float32 {
}
// addPodEvictorForNewZone checks if new zone appeared, and if so add new evictor.
func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
func (nc *Controller) addPodEvictorForNewZone(logger klog.Logger, node *v1.Node) {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
zone := nodetopology.GetZoneKey(node)
@ -1463,7 +1463,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
}
// Init the metric for the new zone.
klog.Infof("Initializing eviction metric for zone: %v", zone)
logger.Info("Initializing eviction metric for zone", "zone", zone)
evictionsNumber.WithLabelValues(zone).Add(0)
evictionsTotal.WithLabelValues(zone).Add(0)
}
@ -1471,16 +1471,16 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
// returns true if an eviction was queued.
func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
func (nc *Controller) cancelPodEviction(logger klog.Logger, node *v1.Node) bool {
zone := nodetopology.GetZoneKey(node)
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
}
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
if wasDeleting {
klog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name)
logger.V(2).Info("Cancelling pod Eviction on Node", "node", klog.KObj(node))
return true
}
return false
@ -1502,8 +1502,9 @@ func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.P
}
return false, nil
}
logger := klog.FromContext(ctx)
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
}
nc.evictorLock.Lock()
@ -1532,13 +1533,14 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
logger := klog.FromContext(ctx)
if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
return false, err
}
err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
if err != nil {
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
return false, err
}
nc.evictorLock.Lock()

View File

@ -41,6 +41,8 @@ import (
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake"
testcore "k8s.io/client-go/testing"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
kubeletapis "k8s.io/kubelet/pkg/apis"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
@ -88,12 +90,12 @@ type nodeLifecycleController struct {
}
// doEviction does the fake eviction and returns the status of eviction operation.
func (nc *nodeLifecycleController) doEviction(fakeNodeHandler *testutil.FakeNodeHandler) bool {
func (nc *nodeLifecycleController) doEviction(logger klog.Logger, fakeNodeHandler *testutil.FakeNodeHandler) bool {
nc.evictorLock.Lock()
defer nc.evictorLock.Unlock()
zones := testutil.GetZones(fakeNodeHandler)
for _, zone := range zones {
nc.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
nc.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
uid, _ := value.UID.(string)
pods, _ := nc.getPodsAssignedToNode(value.Value)
controllerutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
@ -721,9 +723,10 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
zones := testutil.GetZones(item.fakeNodeHandler)
logger, _ := ktesting.NewTestContext(t)
for _, zone := range zones {
if _, ok := nodeController.zonePodEvictor[zone]; ok {
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
nodeUID, _ := value.UID.(string)
pods, err := nodeController.getPodsAssignedToNode(value.Value)
if err != nil {
@ -883,8 +886,9 @@ func TestPodStatusChange(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
zones := testutil.GetZones(item.fakeNodeHandler)
logger, _ := ktesting.NewTestContext(t)
for _, zone := range zones {
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
nodeUID, _ := value.UID.(string)
pods, err := nodeController.getPodsAssignedToNode(value.Value)
if err != nil {
@ -1467,8 +1471,9 @@ func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
// Infinite loop, used for retrying in case ratelimiter fails to reload for Try function.
// this breaks when we have the status that we need for test case or when we don't see the
// intended result after 1 minute.
logger, _ := ktesting.NewTestContext(t)
for {
podEvicted = nodeController.doEviction(fakeNodeHandler)
podEvicted = nodeController.doEviction(logger, fakeNodeHandler)
if podEvicted == item.expectedEvictPods || time.Since(start) > 1*time.Minute {
break
}

View File

@ -23,7 +23,6 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/flowcontrol"
"k8s.io/klog/v2"
)
@ -229,14 +228,14 @@ type ActionFunc func(TimedValue) (bool, time.Duration)
// function in NodeController when Node becomes Ready again) TODO:
// figure out a good way to do garbage collection for all Nodes that
// were removed from the cluster.
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
func (q *RateLimitedTimedQueue) Try(logger klog.Logger, fn ActionFunc) {
val, ok := q.queue.Head()
q.limiterLock.Lock()
defer q.limiterLock.Unlock()
for ok {
// rate limit the queue checking
if !q.limiter.TryAccept() {
klog.V(10).Infof("Try rate limited for value: %v", val)
logger.V(10).Info("Try rate limited", "value", val)
// Try again later
break
}

View File

@ -23,6 +23,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/util/flowcontrol"
"k8s.io/klog/v2/ktesting"
)
func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
@ -144,7 +145,8 @@ func TestTry(t *testing.T) {
evictor.Remove("second")
deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
logger, _ := ktesting.NewTestContext(t)
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value)
return true, 0
})
@ -180,7 +182,8 @@ func TestTryOrdering(t *testing.T) {
order := []string{}
count := 0
hasQueued := false
evictor.Try(func(value TimedValue) (bool, time.Duration) {
logger, _ := ktesting.NewTestContext(t)
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
count++
t.Logf("eviction %d", count)
if value.ProcessAt.IsZero() {
@ -242,8 +245,8 @@ func TestTryRemovingWhileTry(t *testing.T) {
evictor.Remove("second")
close(wait)
}()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
logger, _ := ktesting.NewTestContext(t)
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
count++
if value.AddedAt.IsZero() {
t.Fatalf("added should not be zero")
@ -313,7 +316,8 @@ func TestAddAfterTry(t *testing.T) {
evictor.Remove("second")
deletedMap := sets.NewString()
evictor.Try(func(value TimedValue) (bool, time.Duration) {
logger, _ := ktesting.NewTestContext(t)
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
deletedMap.Insert(value.Value)
return true, 0
})
@ -327,7 +331,7 @@ func TestAddAfterTry(t *testing.T) {
}
evictor.Add("first", "11111")
evictor.Try(func(value TimedValue) (bool, time.Duration) {
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.")
return true, 0
})

View File

@ -105,7 +105,7 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced
return func(ctx context.Context, args *WorkArgs) error {
ns := args.NamespacedName.Namespace
name := args.NamespacedName.Name
klog.InfoS("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
klog.FromContext(ctx).Info("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
if emitEventFunc != nil {
emitEventFunc(args.NamespacedName)
}
@ -202,16 +202,17 @@ func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, podLis
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
func (tc *NoExecuteTaintManager) Run(ctx context.Context) {
defer utilruntime.HandleCrash()
klog.InfoS("Starting NoExecuteTaintManager")
logger := klog.FromContext(ctx)
logger.Info("Starting NoExecuteTaintManager")
// Start events processing pipeline.
tc.broadcaster.StartStructuredLogging(0)
if tc.client != nil {
klog.InfoS("Sending events to api server")
logger.Info("Sending events to api server")
tc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: tc.client.CoreV1().Events("")})
} else {
klog.Fatalf("kubeClient is nil when starting NodeController")
logger.Error(nil, "kubeClient is nil when starting NodeController")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
defer tc.broadcaster.Shutdown()
@ -363,8 +364,8 @@ func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node)
tc.nodeUpdateQueue.Add(updateItem)
}
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName) {
if tc.taintEvictionQueue.CancelWork(nsName.String()) {
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(logger klog.Logger, nsName types.NamespacedName) {
if tc.taintEvictionQueue.CancelWork(logger, nsName.String()) {
tc.emitCancelPodDeletionEvent(nsName)
}
}
@ -377,22 +378,23 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
taints []v1.Taint,
now time.Time,
) {
logger := klog.FromContext(ctx)
if len(taints) == 0 {
tc.cancelWorkWithEvent(podNamespacedName)
tc.cancelWorkWithEvent(logger, podNamespacedName)
}
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
if !allTolerated {
klog.V(2).InfoS("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName))
logger.V(2).Info("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName))
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
tc.cancelWorkWithEvent(podNamespacedName)
tc.cancelWorkWithEvent(logger, podNamespacedName)
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
return
}
minTolerationTime := getMinTolerationTime(usedTolerations)
// getMinTolerationTime returns negative value to denote infinite toleration.
if minTolerationTime < 0 {
klog.V(4).InfoS("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
tc.cancelWorkWithEvent(podNamespacedName)
logger.V(4).Info("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
tc.cancelWorkWithEvent(logger, podNamespacedName)
return
}
@ -404,19 +406,20 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
if startTime.Add(minTolerationTime).Before(triggerTime) {
return
}
tc.cancelWorkWithEvent(podNamespacedName)
tc.cancelWorkWithEvent(logger, podNamespacedName)
}
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
}
func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) {
pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName)
logger := klog.FromContext(ctx)
if err != nil {
if apierrors.IsNotFound(err) {
// Delete
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
klog.V(4).InfoS("Noticed pod deletion", "pod", podNamespacedName)
tc.cancelWorkWithEvent(podNamespacedName)
logger.V(4).Info("Noticed pod deletion", "pod", podNamespacedName)
tc.cancelWorkWithEvent(logger, podNamespacedName)
return
}
utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err))
@ -430,7 +433,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
// Create or Update
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
klog.V(4).InfoS("Noticed pod update", "pod", podNamespacedName)
logger.V(4).Info("Noticed pod update", "pod", podNamespacedName)
nodeName := pod.Spec.NodeName
if nodeName == "" {
return
@ -451,10 +454,11 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
node, err := tc.nodeLister.Get(nodeUpdate.nodeName)
logger := klog.FromContext(ctx)
if err != nil {
if apierrors.IsNotFound(err) {
// Delete
klog.V(4).InfoS("Noticed node deletion", "node", nodeUpdate.nodeName)
logger.V(4).Info("Noticed node deletion", "node", klog.KRef("", nodeUpdate.nodeName))
tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock()
delete(tc.taintedNodes, nodeUpdate.nodeName)
@ -465,12 +469,12 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
}
// Create or Update
klog.V(4).InfoS("Noticed node update", "node", nodeUpdate)
logger.V(4).Info("Noticed node update", "node", klog.KObj(node))
taints := getNoExecuteTaints(node.Spec.Taints)
func() {
tc.taintedNodesLock.Lock()
defer tc.taintedNodesLock.Unlock()
klog.V(4).InfoS("Updating known taints on node", "node", node.Name, "taints", taints)
logger.V(4).Info("Updating known taints on node", "node", klog.KObj(node), "taints", taints)
if len(taints) == 0 {
delete(tc.taintedNodes, node.Name)
} else {
@ -483,7 +487,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
// tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods.
pods, err := tc.getPodsAssignedToNode(node.Name)
if err != nil {
klog.ErrorS(err, "Failed to get pods assigned to node", "node", node.Name)
logger.Error(err, "Failed to get pods assigned to node", "node", klog.KObj(node))
return
}
if len(pods) == 0 {
@ -491,9 +495,9 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
}
// Short circuit, to make this controller a bit faster.
if len(taints) == 0 {
klog.V(4).InfoS("All taints were removed from the node. Cancelling all evictions...", "node", node.Name)
logger.V(4).Info("All taints were removed from the node. Cancelling all evictions...", "node", klog.KObj(node))
for i := range pods {
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
tc.cancelWorkWithEvent(logger, types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
}
return
}

View File

@ -52,10 +52,11 @@ type TimedWorker struct {
// createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker {
delay := fireAt.Sub(createdAt)
logger := klog.FromContext(ctx)
fWithErrorLogging := func() {
err := f(ctx, args)
if err != nil {
klog.Errorf("NodeLifecycle: timed worker failed with error: %q", err)
logger.Error(err, "NodeLifecycle: timed worker failed")
}
}
if delay <= 0 {
@ -116,12 +117,13 @@ func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(ctx context.Con
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time) {
key := args.KeyFromWorkArgs()
klog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt)
logger := klog.FromContext(ctx)
logger.V(4).Info("Adding TimedWorkerQueue item and to be fired at firedTime", "item", key, "createTime", createdAt, "firedTime", fireAt)
q.Lock()
defer q.Unlock()
if _, exists := q.workers[key]; exists {
klog.Warningf("Trying to add already existing work for %+v. Skipping.", args)
logger.Info("Trying to add already existing work, skipping", "args", args)
return
}
worker := createWorker(ctx, args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock)
@ -129,13 +131,13 @@ func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdA
}
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
func (q *TimedWorkerQueue) CancelWork(key string) bool {
func (q *TimedWorkerQueue) CancelWork(logger klog.Logger, key string) bool {
q.Lock()
defer q.Unlock()
worker, found := q.workers[key]
result := false
if found {
klog.V(4).Infof("Cancelling TimedWorkerQueue item %v at %v", key, time.Now())
logger.V(4).Info("Cancelling TimedWorkerQueue item", "item", key, "time", time.Now())
if worker != nil {
result = true
worker.Cancel()

View File

@ -23,6 +23,7 @@ import (
"testing"
"time"
"k8s.io/klog/v2/ktesting"
testingclock "k8s.io/utils/clock/testing"
)
@ -108,8 +109,9 @@ func TestCancel(t *testing.T) {
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
logger, _ := ktesting.NewTestContext(t)
queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
fakeClock.Step(11 * time.Second)
wg.Wait()
lastVal := atomic.LoadInt32(&testVal)
@ -141,8 +143,9 @@ func TestCancelAndReadd(t *testing.T) {
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
logger, _ := ktesting.NewTestContext(t)
queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
fakeClock.Step(11 * time.Second)
wg.Wait()

View File

@ -33,6 +33,7 @@ import (
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
"k8s.io/kubernetes/pkg/controller/nodeipam"
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
@ -63,7 +64,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRAllocate(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go ipamController.Run(ctx.Done())
go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done())
tests := []struct {
@ -149,7 +150,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRRelease(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go ipamController.Run(ctx.Done())
go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done())
t.Run("Pod CIDR release after node delete", func(t *testing.T) {
@ -226,7 +227,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRDelete(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go ipamController.Run(ctx.Done())
go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done())
t.Run("delete cc with node associated", func(t *testing.T) {
@ -322,7 +323,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTerminate(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go ipamController.Run(ctx.Done())
go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done())
t.Run("Pod CIDRS must not be allocated from a terminating CC", func(t *testing.T) {
@ -407,7 +408,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTieBreak(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go ipamController.Run(ctx.Done())
go ipamController.Run(ctx)
sharedInformer.Start(ctx.Done())
tests := []struct {
@ -557,8 +558,9 @@ func booststrapMultiCIDRRangeAllocator(t *testing.T,
if _, err := clientSet.CoreV1().Nodes().Create(context.TODO(), initialNode, metav1.CreateOptions{}); err != nil {
t.Fatal(err)
}
_, ctx := ktesting.NewTestContext(t)
ipamController, err := nodeipam.NewNodeIpamController(
ctx,
sharedInformer.Core().V1().Nodes(),
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
nil,

View File

@ -17,6 +17,7 @@ limitations under the License.
package ipamperf
import (
"context"
"encoding/json"
"fmt"
"net"
@ -25,6 +26,7 @@ import (
"time"
"k8s.io/klog/v2"
"k8s.io/klog/v2/ktesting"
netutils "k8s.io/utils/net"
"k8s.io/client-go/informers"
@ -37,7 +39,7 @@ import (
"k8s.io/kubernetes/test/integration/util"
)
func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) {
func setupAllocator(ctx context.Context, kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) {
controllerStopChan := make(chan struct{})
shutdownFunc := func() {
close(controllerStopChan)
@ -50,6 +52,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
ipamController, err := nodeipam.NewNodeIpamController(
ctx,
sharedInformer.Core().V1().Nodes(),
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil,
@ -58,7 +61,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
if err != nil {
return nil, shutdownFunc, err
}
go ipamController.Run(controllerStopChan)
go ipamController.Run(ctx)
sharedInformer.Start(controllerStopChan)
return clientSet, shutdownFunc, nil
@ -74,8 +77,8 @@ func runTest(t *testing.T, kubeConfig *restclient.Config, config *Config, cluste
nodeClient := clientset.NewForConfigOrDie(nodeClientConfig)
defer deleteNodes(nodeClient) // cleanup nodes on after controller shutdown
clientSet, shutdownFunc, err := setupAllocator(kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize)
_, ctx := ktesting.NewTestContext(t)
clientSet, shutdownFunc, err := setupAllocator(ctx, kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize)
if err != nil {
t.Fatalf("Error starting IPAM allocator: %v", err)
}