use klog.InfoS instead of klog.V(0),Info
This commit is contained in:
parent
08bbecb8e3
commit
780ef3afb0
@ -62,11 +62,11 @@ func (nodeIpamController *nodeIPAMController) StartNodeIpamControllerWrapper(ini
|
|||||||
nodeIpamController.nodeIPAMControllerOptions.ApplyTo(&nodeIpamController.nodeIPAMControllerConfiguration)
|
nodeIpamController.nodeIPAMControllerOptions.ApplyTo(&nodeIpamController.nodeIPAMControllerConfiguration)
|
||||||
|
|
||||||
return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) {
|
return func(ctx context.Context, controllerContext genericcontrollermanager.ControllerContext) (controller.Interface, bool, error) {
|
||||||
return startNodeIpamController(initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud)
|
return startNodeIpamController(ctx, initContext, completedConfig, nodeIpamController.nodeIPAMControllerConfiguration, controllerContext, cloud)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, ctx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) {
|
func startNodeIpamController(ctx context.Context, initContext app.ControllerInitContext, ccmConfig *cloudcontrollerconfig.CompletedConfig, nodeIPAMConfig nodeipamconfig.NodeIPAMControllerConfiguration, controllerCtx genericcontrollermanager.ControllerContext, cloud cloudprovider.Interface) (controller.Interface, bool, error) {
|
||||||
var serviceCIDR *net.IPNet
|
var serviceCIDR *net.IPNet
|
||||||
var secondaryServiceCIDR *net.IPNet
|
var secondaryServiceCIDR *net.IPNet
|
||||||
|
|
||||||
@ -130,14 +130,14 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
|
|||||||
|
|
||||||
var clusterCIDRInformer v1alpha1.ClusterCIDRInformer
|
var clusterCIDRInformer v1alpha1.ClusterCIDRInformer
|
||||||
if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
|
if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
|
||||||
clusterCIDRInformer = ctx.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
clusterCIDRInformer = controllerCtx.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||||
}
|
}
|
||||||
|
|
||||||
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
|
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
|
||||||
ctx.InformerFactory.Core().V1().Nodes(),
|
ctx,
|
||||||
|
controllerCtx.InformerFactory.Core().V1().Nodes(),
|
||||||
clusterCIDRInformer,
|
clusterCIDRInformer,
|
||||||
cloud,
|
cloud,
|
||||||
ctx.ClientBuilder.ClientOrDie(initContext.ClientName),
|
controllerCtx.ClientBuilder.ClientOrDie(initContext.ClientName),
|
||||||
clusterCIDRs,
|
clusterCIDRs,
|
||||||
serviceCIDR,
|
serviceCIDR,
|
||||||
secondaryServiceCIDR,
|
secondaryServiceCIDR,
|
||||||
@ -147,7 +147,7 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, true, err
|
return nil, true, err
|
||||||
}
|
}
|
||||||
go nodeIpamController.Run(ctx.Stop)
|
go nodeIpamController.Run(ctx)
|
||||||
return nil, true, nil
|
return nil, true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,7 +154,9 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
|
|||||||
clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
clusterCIDRInformer = controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx = klog.NewContext(ctx, klog.LoggerWithName(klog.FromContext(ctx), "NodeIpamController"))
|
||||||
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
|
nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
|
||||||
|
ctx,
|
||||||
controllerContext.InformerFactory.Core().V1().Nodes(),
|
controllerContext.InformerFactory.Core().V1().Nodes(),
|
||||||
clusterCIDRInformer,
|
clusterCIDRInformer,
|
||||||
controllerContext.Cloud,
|
controllerContext.Cloud,
|
||||||
@ -168,7 +170,7 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, true, err
|
return nil, true, err
|
||||||
}
|
}
|
||||||
go nodeIpamController.RunWithMetrics(ctx.Done(), controllerContext.ControllerManagerMetrics)
|
go nodeIpamController.RunWithMetrics(ctx, controllerContext.ControllerManagerMetrics)
|
||||||
return nil, true, nil
|
return nil, true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ func newAdapter(k8s clientset.Interface, cloud *gce.Cloud) *adapter {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *adapter) Run(stopCh <-chan struct{}) {
|
func (a *adapter) Run(ctx context.Context) {
|
||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
|
|
||||||
// Start event processing pipeline.
|
// Start event processing pipeline.
|
||||||
@ -69,7 +69,7 @@ func (a *adapter) Run(stopCh <-chan struct{}) {
|
|||||||
a.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: a.k8s.CoreV1().Events("")})
|
a.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: a.k8s.CoreV1().Events("")})
|
||||||
defer a.broadcaster.Shutdown()
|
defer a.broadcaster.Shutdown()
|
||||||
|
|
||||||
<-stopCh
|
<-ctx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
|
func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
|
||||||
@ -88,7 +88,7 @@ func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error)
|
|||||||
case 1:
|
case 1:
|
||||||
break
|
break
|
||||||
default:
|
default:
|
||||||
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", node.Name, cidrs)
|
klog.FromContext(ctx).Info("Node has more than one alias assigned, defaulting to the first", "node", klog.KObj(node), "CIDRs", cidrs)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, cidrRange, err := netutils.ParseCIDRSloppy(cidrs[0])
|
_, cidrRange, err := netutils.ParseCIDRSloppy(cidrs[0])
|
||||||
|
@ -91,11 +91,11 @@ type CIDRAllocator interface {
|
|||||||
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid
|
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid
|
||||||
// CIDR if it doesn't currently have one or mark the CIDR as used if
|
// CIDR if it doesn't currently have one or mark the CIDR as used if
|
||||||
// the node already have one.
|
// the node already have one.
|
||||||
AllocateOrOccupyCIDR(node *v1.Node) error
|
AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error
|
||||||
// ReleaseCIDR releases the CIDR of the removed node.
|
// ReleaseCIDR releases the CIDR of the removed node.
|
||||||
ReleaseCIDR(node *v1.Node) error
|
ReleaseCIDR(logger klog.Logger, node *v1.Node) error
|
||||||
// Run starts all the working logic of the allocator.
|
// Run starts all the working logic of the allocator.
|
||||||
Run(stopCh <-chan struct{})
|
Run(ctx context.Context)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CIDRAllocatorParams is parameters that's required for creating new
|
// CIDRAllocatorParams is parameters that's required for creating new
|
||||||
@ -119,29 +119,30 @@ type nodeReservedCIDRs struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new CIDR range allocator.
|
// New creates a new CIDR range allocator.
|
||||||
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
|
func New(ctx context.Context, kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
|
||||||
nodeList, err := listNodes(kubeClient)
|
logger := klog.FromContext(ctx)
|
||||||
|
nodeList, err := listNodes(logger, kubeClient)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch allocatorType {
|
switch allocatorType {
|
||||||
case RangeAllocatorType:
|
case RangeAllocatorType:
|
||||||
return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList)
|
return NewCIDRRangeAllocator(logger, kubeClient, nodeInformer, allocatorParams, nodeList)
|
||||||
case MultiCIDRRangeAllocatorType:
|
case MultiCIDRRangeAllocatorType:
|
||||||
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
|
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
|
||||||
return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator)
|
return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator)
|
||||||
}
|
}
|
||||||
return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
|
return NewMultiCIDRRangeAllocator(ctx, kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
|
||||||
|
|
||||||
case CloudAllocatorType:
|
case CloudAllocatorType:
|
||||||
return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer)
|
return NewCloudCIDRAllocator(logger, kubeClient, cloud, nodeInformer)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("invalid CIDR allocator type: %v", allocatorType)
|
return nil, fmt.Errorf("invalid CIDR allocator type: %v", allocatorType)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
|
func listNodes(logger klog.Logger, kubeClient clientset.Interface) (*v1.NodeList, error) {
|
||||||
var nodeList *v1.NodeList
|
var nodeList *v1.NodeList
|
||||||
// We must poll because apiserver might not be up. This error causes
|
// We must poll because apiserver might not be up. This error causes
|
||||||
// controller manager to restart.
|
// controller manager to restart.
|
||||||
@ -152,7 +153,7 @@ func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
|
|||||||
LabelSelector: labels.Everything().String(),
|
LabelSelector: labels.Everything().String(),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to list all nodes: %v", err)
|
logger.Error(err, "Failed to list all nodes")
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
return true, nil
|
return true, nil
|
||||||
|
@ -23,7 +23,6 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"k8s.io/component-base/metrics/testutil"
|
"k8s.io/component-base/metrics/testutil"
|
||||||
"k8s.io/klog/v2"
|
|
||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -558,34 +557,35 @@ func TestGetBitforCIDR(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range cases {
|
for _, tc := range cases {
|
||||||
_, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr)
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
if err != nil {
|
_, clusterCIDR, err := netutils.ParseCIDRSloppy(tc.clusterCIDRStr)
|
||||||
t.Fatalf("unexpected error: %v for %v", err, tc.description)
|
if err != nil {
|
||||||
}
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
|
cs, err := NewCIDRSet(clusterCIDR, tc.subNetMaskSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Error allocating CIDRSet for %v", tc.description)
|
t.Fatalf("Error allocating CIDRSet")
|
||||||
}
|
}
|
||||||
_, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr)
|
_, subnetCIDR, err := netutils.ParseCIDRSloppy(tc.subNetCIDRStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected error: %v for %v", err, tc.description)
|
t.Fatalf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
got, err := cs.getIndexForCIDR(subnetCIDR)
|
||||||
|
if err == nil && tc.expectErr {
|
||||||
|
t.Errorf("expected error but got null")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
got, err := cs.getIndexForCIDR(subnetCIDR)
|
if err != nil && !tc.expectErr {
|
||||||
if err == nil && tc.expectErr {
|
t.Errorf("unexpected error: %v", err)
|
||||||
klog.Errorf("expected error but got null for %v", tc.description)
|
return
|
||||||
continue
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil && !tc.expectErr {
|
if got != tc.expectedBit {
|
||||||
klog.Errorf("unexpected error: %v for %v", err, tc.description)
|
t.Errorf("expected %v, but got %v", tc.expectedBit, got)
|
||||||
continue
|
}
|
||||||
}
|
})
|
||||||
|
|
||||||
if got != tc.expectedBit {
|
|
||||||
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
package ipam
|
package ipam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"net"
|
"net"
|
||||||
@ -27,6 +28,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
|
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
@ -85,9 +87,10 @@ type cloudCIDRAllocator struct {
|
|||||||
var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
|
var _ CIDRAllocator = (*cloudCIDRAllocator)(nil)
|
||||||
|
|
||||||
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
|
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
|
||||||
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
||||||
if client == nil {
|
if client == nil {
|
||||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
logger.Error(nil, "kubeClient is nil when starting cloud CIDR allocator")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
eventBroadcaster := record.NewBroadcaster()
|
eventBroadcaster := record.NewBroadcaster()
|
||||||
@ -111,74 +114,80 @@ func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Inter
|
|||||||
}
|
}
|
||||||
|
|
||||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||||
AddFunc: controllerutil.CreateAddNodeHandler(ca.AllocateOrOccupyCIDR),
|
AddFunc: controllerutil.CreateAddNodeHandler(
|
||||||
|
func(node *v1.Node) error {
|
||||||
|
return ca.AllocateOrOccupyCIDR(logger, node)
|
||||||
|
}),
|
||||||
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||||
if newNode.Spec.PodCIDR == "" {
|
if newNode.Spec.PodCIDR == "" {
|
||||||
return ca.AllocateOrOccupyCIDR(newNode)
|
return ca.AllocateOrOccupyCIDR(logger, newNode)
|
||||||
}
|
}
|
||||||
// Even if PodCIDR is assigned, but NetworkUnavailable condition is
|
// Even if PodCIDR is assigned, but NetworkUnavailable condition is
|
||||||
// set to true, we need to process the node to set the condition.
|
// set to true, we need to process the node to set the condition.
|
||||||
networkUnavailableTaint := &v1.Taint{Key: v1.TaintNodeNetworkUnavailable, Effect: v1.TaintEffectNoSchedule}
|
networkUnavailableTaint := &v1.Taint{Key: v1.TaintNodeNetworkUnavailable, Effect: v1.TaintEffectNoSchedule}
|
||||||
_, cond := controllerutil.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable)
|
_, cond := controllerutil.GetNodeCondition(&newNode.Status, v1.NodeNetworkUnavailable)
|
||||||
if cond == nil || cond.Status != v1.ConditionFalse || utiltaints.TaintExists(newNode.Spec.Taints, networkUnavailableTaint) {
|
if cond == nil || cond.Status != v1.ConditionFalse || utiltaints.TaintExists(newNode.Spec.Taints, networkUnavailableTaint) {
|
||||||
return ca.AllocateOrOccupyCIDR(newNode)
|
return ca.AllocateOrOccupyCIDR(logger, newNode)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}),
|
}),
|
||||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ca.ReleaseCIDR),
|
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
|
||||||
|
return ca.ReleaseCIDR(logger, node)
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
logger.Info("Using cloud CIDR allocator", "provider", cloud.ProviderName())
|
||||||
klog.Infof("Using cloud CIDR allocator (provider: %v)", cloud.ProviderName())
|
|
||||||
return ca, nil
|
return ca, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) {
|
func (ca *cloudCIDRAllocator) Run(ctx context.Context) {
|
||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
|
|
||||||
// Start event processing pipeline.
|
// Start event processing pipeline.
|
||||||
ca.broadcaster.StartStructuredLogging(0)
|
ca.broadcaster.StartStructuredLogging(0)
|
||||||
klog.Infof("Sending events to api server.")
|
logger := klog.FromContext(ctx)
|
||||||
|
logger.Info("Sending events to api server")
|
||||||
ca.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: ca.client.CoreV1().Events("")})
|
ca.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: ca.client.CoreV1().Events("")})
|
||||||
defer ca.broadcaster.Shutdown()
|
defer ca.broadcaster.Shutdown()
|
||||||
|
|
||||||
klog.Infof("Starting cloud CIDR allocator")
|
logger.Info("Starting cloud CIDR allocator")
|
||||||
defer klog.Infof("Shutting down cloud CIDR allocator")
|
defer logger.Info("Shutting down cloud CIDR allocator")
|
||||||
|
|
||||||
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, ca.nodesSynced) {
|
if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), ca.nodesSynced) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||||
go ca.worker(stopCh)
|
go ca.worker(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
<-stopCh
|
<-ctx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ca *cloudCIDRAllocator) worker(stopChan <-chan struct{}) {
|
func (ca *cloudCIDRAllocator) worker(ctx context.Context) {
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case workItem, ok := <-ca.nodeUpdateChannel:
|
case workItem, ok := <-ca.nodeUpdateChannel:
|
||||||
if !ok {
|
if !ok {
|
||||||
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := ca.updateCIDRAllocation(workItem); err == nil {
|
if err := ca.updateCIDRAllocation(logger, workItem); err == nil {
|
||||||
klog.V(3).Infof("Updated CIDR for %q", workItem)
|
logger.V(3).Info("Updated CIDR", "workItem", workItem)
|
||||||
} else {
|
} else {
|
||||||
klog.Errorf("Error updating CIDR for %q: %v", workItem, err)
|
logger.Error(err, "Error updating CIDR", "workItem", workItem)
|
||||||
if canRetry, timeout := ca.retryParams(workItem); canRetry {
|
if canRetry, timeout := ca.retryParams(logger, workItem); canRetry {
|
||||||
klog.V(2).Infof("Retrying update for %q after %v", workItem, timeout)
|
logger.V(2).Info("Retrying update on next period", "workItem", workItem, "timeout", timeout)
|
||||||
time.AfterFunc(timeout, func() {
|
time.AfterFunc(timeout, func() {
|
||||||
// Requeue the failed node for update again.
|
// Requeue the failed node for update again.
|
||||||
ca.nodeUpdateChannel <- workItem
|
ca.nodeUpdateChannel <- workItem
|
||||||
})
|
})
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
klog.Errorf("Exceeded retry count for %q, dropping from queue", workItem)
|
logger.Error(nil, "Exceeded retry count, dropping from queue", "workItem", workItem)
|
||||||
}
|
}
|
||||||
ca.removeNodeFromProcessing(workItem)
|
ca.removeNodeFromProcessing(workItem)
|
||||||
case <-stopChan:
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -194,13 +203,13 @@ func (ca *cloudCIDRAllocator) insertNodeToProcessing(nodeName string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ca *cloudCIDRAllocator) retryParams(nodeName string) (bool, time.Duration) {
|
func (ca *cloudCIDRAllocator) retryParams(logger klog.Logger, nodeName string) (bool, time.Duration) {
|
||||||
ca.lock.Lock()
|
ca.lock.Lock()
|
||||||
defer ca.lock.Unlock()
|
defer ca.lock.Unlock()
|
||||||
|
|
||||||
entry, ok := ca.nodesInProcessing[nodeName]
|
entry, ok := ca.nodesInProcessing[nodeName]
|
||||||
if !ok {
|
if !ok {
|
||||||
klog.Errorf("Cannot get retryParams for %q as entry does not exist", nodeName)
|
logger.Error(nil, "Cannot get retryParams for node as entry does not exist", "node", klog.KRef("", nodeName))
|
||||||
return false, 0
|
return false, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,28 +242,28 @@ func (ca *cloudCIDRAllocator) removeNodeFromProcessing(nodeName string) {
|
|||||||
// WARNING: If you're adding any return calls or defer any more work from this
|
// WARNING: If you're adding any return calls or defer any more work from this
|
||||||
// function you have to make sure to update nodesInProcessing properly with the
|
// function you have to make sure to update nodesInProcessing properly with the
|
||||||
// disposition of the node when the work is done.
|
// disposition of the node when the work is done.
|
||||||
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
func (ca *cloudCIDRAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
|
||||||
if node == nil {
|
if node == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if !ca.insertNodeToProcessing(node.Name) {
|
if !ca.insertNodeToProcessing(node.Name) {
|
||||||
klog.V(2).InfoS("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
|
logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(4).Infof("Putting node %s into the work queue", node.Name)
|
logger.V(4).Info("Putting node into the work queue", "node", klog.KObj(node))
|
||||||
ca.nodeUpdateChannel <- node.Name
|
ca.nodeUpdateChannel <- node.Name
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
|
// updateCIDRAllocation assigns CIDR to Node and sends an update to the API server.
|
||||||
func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
func (ca *cloudCIDRAllocator) updateCIDRAllocation(logger klog.Logger, nodeName string) error {
|
||||||
node, err := ca.nodeLister.Get(nodeName)
|
node, err := ca.nodeLister.Get(nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.IsNotFound(err) {
|
if errors.IsNotFound(err) {
|
||||||
return nil // node no longer available, skip processing
|
return nil // node no longer available, skip processing
|
||||||
}
|
}
|
||||||
klog.ErrorS(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "nodeName", nodeName)
|
logger.Error(err, "Failed while getting the node for updating Node.Spec.PodCIDR", "node", klog.KRef("", nodeName))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if node.Spec.ProviderID == "" {
|
if node.Spec.ProviderID == "" {
|
||||||
@ -272,7 +281,7 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
|||||||
}
|
}
|
||||||
//Can have at most 2 ips (one for v4 and one for v6)
|
//Can have at most 2 ips (one for v4 and one for v6)
|
||||||
if len(cidrStrings) > 2 {
|
if len(cidrStrings) > 2 {
|
||||||
klog.InfoS("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings)
|
logger.Info("Got more than 2 ips, truncating to 2", "cidrStrings", cidrStrings)
|
||||||
cidrStrings = cidrStrings[:2]
|
cidrStrings = cidrStrings[:2]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -281,13 +290,13 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
|||||||
return fmt.Errorf("failed to parse strings %v as CIDRs: %v", cidrStrings, err)
|
return fmt.Errorf("failed to parse strings %v as CIDRs: %v", cidrStrings, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
needUpdate, err := needPodCIDRsUpdate(node, cidrs)
|
needUpdate, err := needPodCIDRsUpdate(logger, node, cidrs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("err: %v, CIDRS: %v", err, cidrStrings)
|
return fmt.Errorf("err: %v, CIDRS: %v", err, cidrStrings)
|
||||||
}
|
}
|
||||||
if needUpdate {
|
if needUpdate {
|
||||||
if node.Spec.PodCIDR != "" {
|
if node.Spec.PodCIDR != "" {
|
||||||
klog.ErrorS(nil, "PodCIDR being reassigned!", "nodeName", node.Name, "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings)
|
logger.Error(nil, "PodCIDR being reassigned", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs, "cidrStrings", cidrStrings)
|
||||||
// We fall through and set the CIDR despite this error. This
|
// We fall through and set the CIDR despite this error. This
|
||||||
// implements the same logic as implemented in the
|
// implements the same logic as implemented in the
|
||||||
// rangeAllocator.
|
// rangeAllocator.
|
||||||
@ -296,14 +305,14 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
|||||||
}
|
}
|
||||||
for i := 0; i < cidrUpdateRetries; i++ {
|
for i := 0; i < cidrUpdateRetries; i++ {
|
||||||
if err = nodeutil.PatchNodeCIDRs(ca.client, types.NodeName(node.Name), cidrStrings); err == nil {
|
if err = nodeutil.PatchNodeCIDRs(ca.client, types.NodeName(node.Name), cidrStrings); err == nil {
|
||||||
klog.InfoS("Set the node PodCIDRs", "nodeName", node.Name, "cidrStrings", cidrStrings)
|
logger.Info("Set the node PodCIDRs", "node", klog.KObj(node), "cidrStrings", cidrStrings)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
controllerutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
|
controllerutil.RecordNodeStatusChange(ca.recorder, node, "CIDRAssignmentFailed")
|
||||||
klog.ErrorS(err, "Failed to update the node PodCIDR after multiple attempts", "nodeName", node.Name, "cidrStrings", cidrStrings)
|
logger.Error(err, "Failed to update the node PodCIDR after multiple attempts", "node", klog.KObj(node), "cidrStrings", cidrStrings)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,31 +324,31 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
|
|||||||
LastTransitionTime: metav1.Now(),
|
LastTransitionTime: metav1.Now(),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "Error setting route status for the node", "nodeName", node.Name)
|
logger.Error(err, "Error setting route status for the node", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
|
func needPodCIDRsUpdate(logger klog.Logger, node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
|
||||||
if node.Spec.PodCIDR == "" {
|
if node.Spec.PodCIDR == "" {
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
_, nodePodCIDR, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
_, nodePodCIDR, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR)
|
logger.Error(err, "Found invalid node.Spec.PodCIDR", "podCIDR", node.Spec.PodCIDR)
|
||||||
// We will try to overwrite with new CIDR(s)
|
// We will try to overwrite with new CIDR(s)
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
nodePodCIDRs, err := netutils.ParseCIDRs(node.Spec.PodCIDRs)
|
nodePodCIDRs, err := netutils.ParseCIDRs(node.Spec.PodCIDRs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs)
|
logger.Error(err, "Found invalid node.Spec.PodCIDRs", "podCIDRs", node.Spec.PodCIDRs)
|
||||||
// We will try to overwrite with new CIDR(s)
|
// We will try to overwrite with new CIDR(s)
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(podCIDRs) == 1 {
|
if len(podCIDRs) == 1 {
|
||||||
if cmp.Equal(nodePodCIDR, podCIDRs[0]) {
|
if cmp.Equal(nodePodCIDR, podCIDRs[0]) {
|
||||||
klog.V(4).InfoS("Node already has allocated CIDR. It matches the proposed one.", "nodeName", node.Name, "podCIDR", podCIDRs[0])
|
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "podCIDR", podCIDRs[0])
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
} else if len(nodePodCIDRs) == len(podCIDRs) {
|
} else if len(nodePodCIDRs) == len(podCIDRs) {
|
||||||
@ -351,15 +360,15 @@ func needPodCIDRsUpdate(node *v1.Node, podCIDRs []*net.IPNet) (bool, error) {
|
|||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
klog.V(4).InfoS("Node already has allocated CIDRs. It matches the proposed one.", "nodeName", node.Name, "podCIDRs", podCIDRs)
|
logger.V(4).Info("Node already has allocated CIDRs. It matches the proposed one", "node", klog.KObj(node), "podCIDRs", podCIDRs)
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return true, nil
|
return true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error {
|
func (ca *cloudCIDRAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
|
||||||
klog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)",
|
logger.V(2).Info("Node's PodCIDR will be released by external cloud provider (not managed by controller)",
|
||||||
node.Name, node.Spec.PodCIDR)
|
"node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -25,9 +25,10 @@ import (
|
|||||||
informers "k8s.io/client-go/informers/core/v1"
|
informers "k8s.io/client-go/informers/core/v1"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
cloudprovider "k8s.io/cloud-provider"
|
cloudprovider "k8s.io/cloud-provider"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
|
// NewCloudCIDRAllocator creates a new cloud CIDR allocator.
|
||||||
func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
func NewCloudCIDRAllocator(logger klog.Logger, client clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer) (CIDRAllocator, error) {
|
||||||
return nil, errors.New("legacy cloud provider support not built")
|
return nil, errors.New("legacy cloud provider support not built")
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ import (
|
|||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -42,7 +43,6 @@ func hasNodeInProcessing(ca *cloudCIDRAllocator, name string) bool {
|
|||||||
func TestBoundedRetries(t *testing.T) {
|
func TestBoundedRetries(t *testing.T) {
|
||||||
clientSet := fake.NewSimpleClientset()
|
clientSet := fake.NewSimpleClientset()
|
||||||
updateChan := make(chan string, 1) // need to buffer as we are using only on go routine
|
updateChan := make(chan string, 1) // need to buffer as we are using only on go routine
|
||||||
stopChan := make(chan struct{})
|
|
||||||
sharedInfomer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
|
sharedInfomer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
|
||||||
ca := &cloudCIDRAllocator{
|
ca := &cloudCIDRAllocator{
|
||||||
client: clientSet,
|
client: clientSet,
|
||||||
@ -51,9 +51,10 @@ func TestBoundedRetries(t *testing.T) {
|
|||||||
nodesSynced: sharedInfomer.Core().V1().Nodes().Informer().HasSynced,
|
nodesSynced: sharedInfomer.Core().V1().Nodes().Informer().HasSynced,
|
||||||
nodesInProcessing: map[string]*nodeProcessingInfo{},
|
nodesInProcessing: map[string]*nodeProcessingInfo{},
|
||||||
}
|
}
|
||||||
go ca.worker(stopChan)
|
logger, ctx := ktesting.NewTestContext(t)
|
||||||
|
go ca.worker(ctx)
|
||||||
nodeName := "testNode"
|
nodeName := "testNode"
|
||||||
ca.AllocateOrOccupyCIDR(&v1.Node{
|
ca.AllocateOrOccupyCIDR(logger, &v1.Node{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Name: nodeName,
|
Name: nodeName,
|
||||||
},
|
},
|
||||||
@ -176,9 +177,9 @@ func TestNeedPodCIDRsUpdate(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("failed to parse %v as CIDRs: %v", tc.cidrs, err)
|
t.Errorf("failed to parse %v as CIDRs: %v", tc.cidrs, err)
|
||||||
}
|
}
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
t.Run(tc.desc, func(t *testing.T) {
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
got, err := needPodCIDRsUpdate(&node, netCIDRs)
|
got, err := needPodCIDRsUpdate(logger, &node, netCIDRs)
|
||||||
if tc.wantErr == (err == nil) {
|
if tc.wantErr == (err == nil) {
|
||||||
t.Errorf("err: %v, wantErr: %v", err, tc.wantErr)
|
t.Errorf("err: %v, wantErr: %v", err, tc.wantErr)
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
package ipam
|
package ipam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"sync"
|
"sync"
|
||||||
@ -113,10 +114,10 @@ func NewController(
|
|||||||
// Start initializes the Controller with the existing list of nodes and
|
// Start initializes the Controller with the existing list of nodes and
|
||||||
// registers the informers for node changes. This will start synchronization
|
// registers the informers for node changes. This will start synchronization
|
||||||
// of the node and cloud CIDR range allocations.
|
// of the node and cloud CIDR range allocations.
|
||||||
func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
|
func (c *Controller) Start(logger klog.Logger, nodeInformer informers.NodeInformer) error {
|
||||||
klog.InfoS("Starting IPAM controller", "config", c.config)
|
logger.Info("Starting IPAM controller", "config", c.config)
|
||||||
|
|
||||||
nodes, err := listNodes(c.adapter.k8s)
|
nodes, err := listNodes(logger, c.adapter.k8s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -125,9 +126,9 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
|
|||||||
_, cidrRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
_, cidrRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
c.set.Occupy(cidrRange)
|
c.set.Occupy(cidrRange)
|
||||||
klog.V(3).InfoS("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", node.Name)
|
logger.V(3).Info("Occupying CIDR for node", "CIDR", node.Spec.PodCIDR, "node", klog.KObj(&node))
|
||||||
} else {
|
} else {
|
||||||
klog.ErrorS(err, "Node has an invalid CIDR", "node", node.Name, "CIDR", node.Spec.PodCIDR)
|
logger.Error(err, "Node has an invalid CIDR", "node", klog.KObj(&node), "CIDR", node.Spec.PodCIDR)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,24 +139,30 @@ func (c *Controller) Start(nodeInformer informers.NodeInformer) error {
|
|||||||
// XXX/bowei -- stagger the start of each sync cycle.
|
// XXX/bowei -- stagger the start of each sync cycle.
|
||||||
syncer := c.newSyncer(node.Name)
|
syncer := c.newSyncer(node.Name)
|
||||||
c.syncers[node.Name] = syncer
|
c.syncers[node.Name] = syncer
|
||||||
go syncer.Loop(nil)
|
go syncer.Loop(logger, nil)
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||||
AddFunc: controllerutil.CreateAddNodeHandler(c.onAdd),
|
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||||
UpdateFunc: controllerutil.CreateUpdateNodeHandler(c.onUpdate),
|
return c.onAdd(logger, node)
|
||||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(c.onDelete),
|
}),
|
||||||
|
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||||
|
return c.onUpdate(logger, newNode)
|
||||||
|
}),
|
||||||
|
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
|
||||||
|
return c.onDelete(logger, node)
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Controller) Run(stopCh <-chan struct{}) {
|
func (c *Controller) Run(ctx context.Context) {
|
||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
|
|
||||||
go c.adapter.Run(stopCh)
|
go c.adapter.Run(ctx)
|
||||||
<-stopCh
|
<-ctx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
// occupyServiceCIDR removes the service CIDR range from the cluster CIDR if it
|
// occupyServiceCIDR removes the service CIDR range from the cluster CIDR if it
|
||||||
@ -192,7 +199,7 @@ func (c *Controller) newSyncer(name string) *nodesync.NodeSync {
|
|||||||
return nodesync.New(ns, c.adapter, c.adapter, c.config.Mode, name, c.set)
|
return nodesync.New(ns, c.adapter, c.adapter, c.config.Mode, name, c.set)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Controller) onAdd(node *v1.Node) error {
|
func (c *Controller) onAdd(logger klog.Logger, node *v1.Node) error {
|
||||||
c.lock.Lock()
|
c.lock.Lock()
|
||||||
defer c.lock.Unlock()
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
@ -200,30 +207,30 @@ func (c *Controller) onAdd(node *v1.Node) error {
|
|||||||
if !ok {
|
if !ok {
|
||||||
syncer = c.newSyncer(node.Name)
|
syncer = c.newSyncer(node.Name)
|
||||||
c.syncers[node.Name] = syncer
|
c.syncers[node.Name] = syncer
|
||||||
go syncer.Loop(nil)
|
go syncer.Loop(logger, nil)
|
||||||
} else {
|
} else {
|
||||||
klog.InfoS("Add for node that already exists", "node", node.Name)
|
logger.Info("Add for node that already exists", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
syncer.Update(node)
|
syncer.Update(node)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Controller) onUpdate(_, node *v1.Node) error {
|
func (c *Controller) onUpdate(logger klog.Logger, node *v1.Node) error {
|
||||||
c.lock.Lock()
|
c.lock.Lock()
|
||||||
defer c.lock.Unlock()
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
if sync, ok := c.syncers[node.Name]; ok {
|
if sync, ok := c.syncers[node.Name]; ok {
|
||||||
sync.Update(node)
|
sync.Update(node)
|
||||||
} else {
|
} else {
|
||||||
klog.ErrorS(nil, "Received update for non-existent node", "node", node.Name)
|
logger.Error(nil, "Received update for non-existent node", "node", klog.KObj(node))
|
||||||
return fmt.Errorf("unknown node %q", node.Name)
|
return fmt.Errorf("unknown node %q", node.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Controller) onDelete(node *v1.Node) error {
|
func (c *Controller) onDelete(logger klog.Logger, node *v1.Node) error {
|
||||||
c.lock.Lock()
|
c.lock.Lock()
|
||||||
defer c.lock.Unlock()
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
@ -231,7 +238,7 @@ func (c *Controller) onDelete(node *v1.Node) error {
|
|||||||
syncer.Delete(node)
|
syncer.Delete(node)
|
||||||
delete(c.syncers, node.Name)
|
delete(c.syncers, node.Name)
|
||||||
} else {
|
} else {
|
||||||
klog.InfoS("Node was already deleted", "node", node.Name)
|
logger.Info("Node was already deleted", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -102,6 +102,7 @@ type multiCIDRRangeAllocator struct {
|
|||||||
// Caller must always pass in a list of existing nodes to the new allocator.
|
// Caller must always pass in a list of existing nodes to the new allocator.
|
||||||
// NodeList is only nil in testing.
|
// NodeList is only nil in testing.
|
||||||
func NewMultiCIDRRangeAllocator(
|
func NewMultiCIDRRangeAllocator(
|
||||||
|
ctx context.Context,
|
||||||
client clientset.Interface,
|
client clientset.Interface,
|
||||||
nodeInformer informers.NodeInformer,
|
nodeInformer informers.NodeInformer,
|
||||||
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
|
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
|
||||||
@ -109,8 +110,10 @@ func NewMultiCIDRRangeAllocator(
|
|||||||
nodeList *v1.NodeList,
|
nodeList *v1.NodeList,
|
||||||
testCIDRMap map[string][]*cidrset.ClusterCIDR,
|
testCIDRMap map[string][]*cidrset.ClusterCIDR,
|
||||||
) (CIDRAllocator, error) {
|
) (CIDRAllocator, error) {
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if client == nil {
|
if client == nil {
|
||||||
klog.Fatalf("client is nil")
|
logger.Error(nil, "kubeClient is nil when starting multi CIDRRangeAllocator")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
eventBroadcaster := record.NewBroadcaster()
|
eventBroadcaster := record.NewBroadcaster()
|
||||||
@ -137,10 +140,10 @@ func NewMultiCIDRRangeAllocator(
|
|||||||
// testCIDRMap is only set for testing purposes.
|
// testCIDRMap is only set for testing purposes.
|
||||||
if len(testCIDRMap) > 0 {
|
if len(testCIDRMap) > 0 {
|
||||||
ra.cidrMap = testCIDRMap
|
ra.cidrMap = testCIDRMap
|
||||||
klog.Warningf("testCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug.")
|
logger.Info("TestCIDRMap should only be set for testing purposes, if this is seen in production logs, it might be a misconfiguration or a bug")
|
||||||
}
|
}
|
||||||
|
|
||||||
ccList, err := listClusterCIDRs(client)
|
ccList, err := listClusterCIDRs(ctx, client)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -148,14 +151,14 @@ func NewMultiCIDRRangeAllocator(
|
|||||||
if ccList == nil {
|
if ccList == nil {
|
||||||
ccList = &networkingv1alpha1.ClusterCIDRList{}
|
ccList = &networkingv1alpha1.ClusterCIDRList{}
|
||||||
}
|
}
|
||||||
createDefaultClusterCIDR(ccList, allocatorParams)
|
createDefaultClusterCIDR(logger, ccList, allocatorParams)
|
||||||
|
|
||||||
// Regenerate the cidrMaps from the existing ClusterCIDRs.
|
// Regenerate the cidrMaps from the existing ClusterCIDRs.
|
||||||
for _, clusterCIDR := range ccList.Items {
|
for _, clusterCIDR := range ccList.Items {
|
||||||
klog.Infof("Regenerating existing ClusterCIDR: %v", clusterCIDR)
|
logger.Info("Regenerating existing ClusterCIDR", "clusterCIDR", clusterCIDR)
|
||||||
// Create an event for invalid ClusterCIDRs, do not crash on failures.
|
// Create an event for invalid ClusterCIDRs, do not crash on failures.
|
||||||
if err := ra.reconcileBootstrap(&clusterCIDR); err != nil {
|
if err := ra.reconcileBootstrap(ctx, &clusterCIDR); err != nil {
|
||||||
klog.Errorf("Error while regenerating existing ClusterCIDR: %v", err)
|
logger.Error(err, "Error while regenerating existing ClusterCIDR")
|
||||||
ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error())
|
ra.recorder.Event(&clusterCIDR, "Warning", "InvalidClusterCIDR encountered while regenerating ClusterCIDR during bootstrap.", err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -184,25 +187,25 @@ func NewMultiCIDRRangeAllocator(
|
|||||||
})
|
})
|
||||||
|
|
||||||
if allocatorParams.ServiceCIDR != nil {
|
if allocatorParams.ServiceCIDR != nil {
|
||||||
ra.filterOutServiceRange(allocatorParams.ServiceCIDR)
|
ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
|
||||||
} else {
|
} else {
|
||||||
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.")
|
logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
|
||||||
}
|
}
|
||||||
|
|
||||||
if allocatorParams.SecondaryServiceCIDR != nil {
|
if allocatorParams.SecondaryServiceCIDR != nil {
|
||||||
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR)
|
ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
|
||||||
} else {
|
} else {
|
||||||
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.")
|
logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
|
||||||
}
|
}
|
||||||
|
|
||||||
if nodeList != nil {
|
if nodeList != nil {
|
||||||
for _, node := range nodeList.Items {
|
for _, node := range nodeList.Items {
|
||||||
if len(node.Spec.PodCIDRs) == 0 {
|
if len(node.Spec.PodCIDRs) == 0 {
|
||||||
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name)
|
logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
klog.Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDRs)
|
logger.Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDRs", node.Spec.PodCIDRs)
|
||||||
if err := ra.occupyCIDRs(&node); err != nil {
|
if err := ra.occupyCIDRs(logger, &node); err != nil {
|
||||||
// This will happen if:
|
// This will happen if:
|
||||||
// 1. We find garbage in the podCIDRs field. Retrying is useless.
|
// 1. We find garbage in the podCIDRs field. Retrying is useless.
|
||||||
// 2. CIDR out of range: This means ClusterCIDR is not yet created
|
// 2. CIDR out of range: This means ClusterCIDR is not yet created
|
||||||
@ -231,7 +234,7 @@ func NewMultiCIDRRangeAllocator(
|
|||||||
// we don't see the Update with DeletionTimestamp != 0.
|
// we don't see the Update with DeletionTimestamp != 0.
|
||||||
// TODO: instead of executing the operation directly in the handler, build a small cache with key node.Name
|
// TODO: instead of executing the operation directly in the handler, build a small cache with key node.Name
|
||||||
// and value PodCIDRs use ReleaseCIDR on the reconcile loop so we can retry on `ReleaseCIDR` failures.
|
// and value PodCIDRs use ReleaseCIDR on the reconcile loop so we can retry on `ReleaseCIDR` failures.
|
||||||
ra.ReleaseCIDR(obj.(*v1.Node))
|
ra.ReleaseCIDR(logger, obj.(*v1.Node))
|
||||||
// IndexerInformer uses a delta nodeQueue, therefore for deletes we have to use this
|
// IndexerInformer uses a delta nodeQueue, therefore for deletes we have to use this
|
||||||
// key function.
|
// key function.
|
||||||
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
|
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
|
||||||
@ -244,51 +247,52 @@ func NewMultiCIDRRangeAllocator(
|
|||||||
return ra, nil
|
return ra, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *multiCIDRRangeAllocator) Run(stopCh <-chan struct{}) {
|
func (r *multiCIDRRangeAllocator) Run(ctx context.Context) {
|
||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
|
|
||||||
// Start event processing pipeline.
|
// Start event processing pipeline.
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
r.broadcaster.StartStructuredLogging(0)
|
r.broadcaster.StartStructuredLogging(0)
|
||||||
klog.Infof("Started sending events to API Server.")
|
logger.Info("Started sending events to API Server")
|
||||||
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
|
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
|
||||||
defer r.broadcaster.Shutdown()
|
defer r.broadcaster.Shutdown()
|
||||||
|
|
||||||
defer r.cidrQueue.ShutDown()
|
defer r.cidrQueue.ShutDown()
|
||||||
defer r.nodeQueue.ShutDown()
|
defer r.nodeQueue.ShutDown()
|
||||||
|
|
||||||
klog.Infof("Starting Multi CIDR Range allocator")
|
logger.Info("Starting Multi CIDR Range allocator")
|
||||||
defer klog.Infof("Shutting down Multi CIDR Range allocator")
|
defer logger.Info("Shutting down Multi CIDR Range allocator")
|
||||||
|
|
||||||
if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", stopCh, r.nodesSynced, r.clusterCIDRSynced) {
|
if !cache.WaitForNamedCacheSync("multi_cidr_range_allocator", ctx.Done(), r.nodesSynced, r.clusterCIDRSynced) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||||
go wait.Until(r.runCIDRWorker, time.Second, stopCh)
|
go wait.UntilWithContext(ctx, r.runCIDRWorker, time.Second)
|
||||||
go wait.Until(r.runNodeWorker, time.Second, stopCh)
|
go wait.UntilWithContext(ctx, r.runNodeWorker, time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
<-stopCh
|
<-ctx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
// runWorker is a long-running function that will continually call the
|
// runWorker is a long-running function that will continually call the
|
||||||
// processNextWorkItem function in order to read and process a message on the
|
// processNextWorkItem function in order to read and process a message on the
|
||||||
// cidrQueue.
|
// cidrQueue.
|
||||||
func (r *multiCIDRRangeAllocator) runCIDRWorker() {
|
func (r *multiCIDRRangeAllocator) runCIDRWorker(ctx context.Context) {
|
||||||
for r.processNextCIDRWorkItem() {
|
for r.processNextCIDRWorkItem(ctx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// processNextWorkItem will read a single work item off the cidrQueue and
|
// processNextWorkItem will read a single work item off the cidrQueue and
|
||||||
// attempt to process it, by calling the syncHandler.
|
// attempt to process it, by calling the syncHandler.
|
||||||
func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem(ctx context.Context) bool {
|
||||||
obj, shutdown := r.cidrQueue.Get()
|
obj, shutdown := r.cidrQueue.Get()
|
||||||
if shutdown {
|
if shutdown {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// We wrap this block in a func so we can defer c.cidrQueue.Done.
|
// We wrap this block in a func so we can defer c.cidrQueue.Done.
|
||||||
err := func(obj interface{}) error {
|
err := func(ctx context.Context, obj interface{}) error {
|
||||||
// We call Done here so the cidrQueue knows we have finished
|
// We call Done here so the cidrQueue knows we have finished
|
||||||
// processing this item. We also must remember to call Forget if we
|
// processing this item. We also must remember to call Forget if we
|
||||||
// do not want this work item being re-queued. For example, we do
|
// do not want this work item being re-queued. For example, we do
|
||||||
@ -313,7 +317,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
|||||||
}
|
}
|
||||||
// Run the syncHandler, passing it the namespace/name string of the
|
// Run the syncHandler, passing it the namespace/name string of the
|
||||||
// Foo resource to be synced.
|
// Foo resource to be synced.
|
||||||
if err := r.syncClusterCIDR(key); err != nil {
|
if err := r.syncClusterCIDR(ctx, key); err != nil {
|
||||||
// Put the item back on the cidrQueue to handle any transient errors.
|
// Put the item back on the cidrQueue to handle any transient errors.
|
||||||
r.cidrQueue.AddRateLimited(key)
|
r.cidrQueue.AddRateLimited(key)
|
||||||
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
|
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
|
||||||
@ -323,7 +327,7 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
|||||||
r.cidrQueue.Forget(obj)
|
r.cidrQueue.Forget(obj)
|
||||||
klog.Infof("Successfully synced '%s'", key)
|
klog.Infof("Successfully synced '%s'", key)
|
||||||
return nil
|
return nil
|
||||||
}(obj)
|
}(ctx, obj)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
@ -333,21 +337,21 @@ func (r *multiCIDRRangeAllocator) processNextCIDRWorkItem() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *multiCIDRRangeAllocator) runNodeWorker() {
|
func (r *multiCIDRRangeAllocator) runNodeWorker(ctx context.Context) {
|
||||||
for r.processNextNodeWorkItem() {
|
for r.processNextNodeWorkItem(ctx) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// processNextWorkItem will read a single work item off the cidrQueue and
|
// processNextWorkItem will read a single work item off the cidrQueue and
|
||||||
// attempt to process it, by calling the syncHandler.
|
// attempt to process it, by calling the syncHandler.
|
||||||
func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
func (r *multiCIDRRangeAllocator) processNextNodeWorkItem(ctx context.Context) bool {
|
||||||
obj, shutdown := r.nodeQueue.Get()
|
obj, shutdown := r.nodeQueue.Get()
|
||||||
if shutdown {
|
if shutdown {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// We wrap this block in a func so we can defer c.cidrQueue.Done.
|
// We wrap this block in a func so we can defer c.cidrQueue.Done.
|
||||||
err := func(obj interface{}) error {
|
err := func(logger klog.Logger, obj interface{}) error {
|
||||||
// We call Done here so the workNodeQueue knows we have finished
|
// We call Done here so the workNodeQueue knows we have finished
|
||||||
// processing this item. We also must remember to call Forget if we
|
// processing this item. We also must remember to call Forget if we
|
||||||
// do not want this work item being re-queued. For example, we do
|
// do not want this work item being re-queued. For example, we do
|
||||||
@ -372,7 +376,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
|||||||
}
|
}
|
||||||
// Run the syncHandler, passing it the namespace/name string of the
|
// Run the syncHandler, passing it the namespace/name string of the
|
||||||
// Foo resource to be synced.
|
// Foo resource to be synced.
|
||||||
if err := r.syncNode(key); err != nil {
|
if err := r.syncNode(logger, key); err != nil {
|
||||||
// Put the item back on the cidrQueue to handle any transient errors.
|
// Put the item back on the cidrQueue to handle any transient errors.
|
||||||
r.nodeQueue.AddRateLimited(key)
|
r.nodeQueue.AddRateLimited(key)
|
||||||
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
|
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
|
||||||
@ -382,7 +386,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
|||||||
r.nodeQueue.Forget(obj)
|
r.nodeQueue.Forget(obj)
|
||||||
klog.Infof("Successfully synced '%s'", key)
|
klog.Infof("Successfully synced '%s'", key)
|
||||||
return nil
|
return nil
|
||||||
}(obj)
|
}(klog.FromContext(ctx), obj)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
utilruntime.HandleError(err)
|
utilruntime.HandleError(err)
|
||||||
@ -392,7 +396,7 @@ func (r *multiCIDRRangeAllocator) processNextNodeWorkItem() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *multiCIDRRangeAllocator) syncNode(key string) error {
|
func (r *multiCIDRRangeAllocator) syncNode(logger klog.Logger, key string) error {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
klog.V(4).Infof("Finished syncing Node request %q (%v)", key, time.Since(startTime))
|
klog.V(4).Infof("Finished syncing Node request %q (%v)", key, time.Since(startTime))
|
||||||
@ -411,9 +415,9 @@ func (r *multiCIDRRangeAllocator) syncNode(key string) error {
|
|||||||
// Check the DeletionTimestamp to determine if object is under deletion.
|
// Check the DeletionTimestamp to determine if object is under deletion.
|
||||||
if !node.DeletionTimestamp.IsZero() {
|
if !node.DeletionTimestamp.IsZero() {
|
||||||
klog.V(3).Infof("node is being deleted: %v", key)
|
klog.V(3).Infof("node is being deleted: %v", key)
|
||||||
return r.ReleaseCIDR(node)
|
return r.ReleaseCIDR(logger, node)
|
||||||
}
|
}
|
||||||
return r.AllocateOrOccupyCIDR(node)
|
return r.AllocateOrOccupyCIDR(logger, node)
|
||||||
}
|
}
|
||||||
|
|
||||||
// needToAddFinalizer checks if a finalizer should be added to the object.
|
// needToAddFinalizer checks if a finalizer should be added to the object.
|
||||||
@ -422,15 +426,16 @@ func needToAddFinalizer(obj metav1.Object, finalizer string) bool {
|
|||||||
finalizer, nil)
|
finalizer, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error {
|
func (r *multiCIDRRangeAllocator) syncClusterCIDR(ctx context.Context, key string) error {
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
defer func() {
|
defer func() {
|
||||||
klog.V(4).Infof("Finished syncing clusterCIDR request %q (%v)", key, time.Since(startTime))
|
logger.V(4).Info("Finished syncing clusterCIDR request", "key", key, "latency", time.Since(startTime))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
clusterCIDR, err := r.clusterCIDRLister.Get(key)
|
clusterCIDR, err := r.clusterCIDRLister.Get(key)
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
klog.V(3).Infof("clusterCIDR has been deleted: %v", key)
|
logger.V(3).Info("clusterCIDR has been deleted", "key", key)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -440,21 +445,20 @@ func (r *multiCIDRRangeAllocator) syncClusterCIDR(key string) error {
|
|||||||
|
|
||||||
// Check the DeletionTimestamp to determine if object is under deletion.
|
// Check the DeletionTimestamp to determine if object is under deletion.
|
||||||
if !clusterCIDR.DeletionTimestamp.IsZero() {
|
if !clusterCIDR.DeletionTimestamp.IsZero() {
|
||||||
return r.reconcileDelete(clusterCIDR)
|
return r.reconcileDelete(ctx, clusterCIDR)
|
||||||
}
|
}
|
||||||
return r.reconcileCreate(clusterCIDR)
|
return r.reconcileCreate(ctx, clusterCIDR)
|
||||||
}
|
}
|
||||||
|
|
||||||
// occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet.
|
// occupyCIDRs marks node.PodCIDRs[...] as used in allocator's tracked cidrSet.
|
||||||
func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error {
|
func (r *multiCIDRRangeAllocator) occupyCIDRs(logger klog.Logger, node *v1.Node) error {
|
||||||
|
|
||||||
err := func(node *v1.Node) error {
|
err := func(node *v1.Node) error {
|
||||||
|
|
||||||
if len(node.Spec.PodCIDRs) == 0 {
|
if len(node.Spec.PodCIDRs) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
|
||||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -468,10 +472,10 @@ func (r *multiCIDRRangeAllocator) occupyCIDRs(node *v1.Node) error {
|
|||||||
return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err)
|
return fmt.Errorf("failed to parse CIDR %s on Node %v: %w", cidr, node.Name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.Infof("occupy CIDR %s for node: %s", cidr, node.Name)
|
logger.Info("occupy CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
|
||||||
|
|
||||||
if err := r.Occupy(clusterCIDR, podCIDR); err != nil {
|
if err := r.Occupy(clusterCIDR, podCIDR); err != nil {
|
||||||
klog.V(3).Infof("Could not occupy cidr: %v, trying next range: %w", node.Spec.PodCIDRs, err)
|
logger.V(3).Info("Could not occupy cidr, trying next range", "podCIDRs", node.Spec.PodCIDRs, "err", err)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -519,14 +523,14 @@ func (r *multiCIDRRangeAllocator) Occupy(clusterCIDR *cidrset.ClusterCIDR, cidr
|
|||||||
|
|
||||||
// Release marks the CIDR as free in the cidrSet used bitmap,
|
// Release marks the CIDR as free in the cidrSet used bitmap,
|
||||||
// Also removes the CIDR from the allocatedCIDRSet.
|
// Also removes the CIDR from the allocatedCIDRSet.
|
||||||
func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error {
|
func (r *multiCIDRRangeAllocator) Release(logger klog.Logger, clusterCIDR *cidrset.ClusterCIDR, cidr *net.IPNet) error {
|
||||||
currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr)
|
currCIDRSet, err := r.associatedCIDRSet(clusterCIDR, cidr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := currCIDRSet.Release(cidr); err != nil {
|
if err := currCIDRSet.Release(cidr); err != nil {
|
||||||
klog.Infof("Unable to release cidr %v in cidrSet", cidr)
|
logger.Info("Unable to release cidr in cidrSet", "CIDR", cidr)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -539,7 +543,7 @@ func (r *multiCIDRRangeAllocator) Release(clusterCIDR *cidrset.ClusterCIDR, cidr
|
|||||||
// WARNING: If you're adding any return calls or defer any more work from this
|
// WARNING: If you're adding any return calls or defer any more work from this
|
||||||
// function you have to make sure to update nodesInProcessing properly with the
|
// function you have to make sure to update nodesInProcessing properly with the
|
||||||
// disposition of the node when the work is done.
|
// disposition of the node when the work is done.
|
||||||
func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
|
||||||
r.lock.Lock()
|
r.lock.Lock()
|
||||||
defer r.lock.Unlock()
|
defer r.lock.Unlock()
|
||||||
|
|
||||||
@ -548,10 +552,10 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(node.Spec.PodCIDRs) > 0 {
|
if len(node.Spec.PodCIDRs) > 0 {
|
||||||
return r.occupyCIDRs(node)
|
return r.occupyCIDRs(logger, node)
|
||||||
}
|
}
|
||||||
|
|
||||||
cidrs, clusterCIDR, err := r.prioritizedCIDRs(node)
|
cidrs, clusterCIDR, err := r.prioritizedCIDRs(logger, node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
|
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRNotAvailable")
|
||||||
return fmt.Errorf("failed to get cidrs for node %s", node.Name)
|
return fmt.Errorf("failed to get cidrs for node %s", node.Name)
|
||||||
@ -571,11 +575,11 @@ func (r *multiCIDRRangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
|||||||
clusterCIDR: clusterCIDR,
|
clusterCIDR: clusterCIDR,
|
||||||
}
|
}
|
||||||
|
|
||||||
return r.updateCIDRsAllocation(allocated)
|
return r.updateCIDRsAllocation(logger, allocated)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets.
|
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets.
|
||||||
func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
func (r *multiCIDRRangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
|
||||||
r.lock.Lock()
|
r.lock.Lock()
|
||||||
defer r.lock.Unlock()
|
defer r.lock.Unlock()
|
||||||
|
|
||||||
@ -583,7 +587,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
clusterCIDR, err := r.allocatedClusterCIDR(node)
|
clusterCIDR, err := r.allocatedClusterCIDR(logger, node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -594,8 +598,8 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
|||||||
return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err)
|
return fmt.Errorf("failed to parse CIDR %q on Node %q: %w", cidr, node.Name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.Infof("release CIDR %s for node: %s", cidr, node.Name)
|
logger.Info("release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
|
||||||
if err := r.Release(clusterCIDR, podCIDR); err != nil {
|
if err := r.Release(logger, clusterCIDR, podCIDR); err != nil {
|
||||||
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err)
|
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node %q: %w", cidr, clusterCIDR.Name, node.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -608,7 +612,7 @@ func (r *multiCIDRRangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
|||||||
|
|
||||||
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
|
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
|
||||||
// so that they won't be assignable.
|
// so that they won't be assignable.
|
||||||
func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
|
func (r *multiCIDRRangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
|
||||||
// Checks if service CIDR has a nonempty intersection with cluster
|
// Checks if service CIDR has a nonempty intersection with cluster
|
||||||
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
|
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
|
||||||
// clusterCIDR's Mask applied (this means that clusterCIDR contains
|
// clusterCIDR's Mask applied (this means that clusterCIDR contains
|
||||||
@ -617,7 +621,7 @@ func (r *multiCIDRRangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet)
|
|||||||
for _, clusterCIDRList := range r.cidrMap {
|
for _, clusterCIDRList := range r.cidrMap {
|
||||||
for _, clusterCIDR := range clusterCIDRList {
|
for _, clusterCIDR := range clusterCIDRList {
|
||||||
if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil {
|
if err := r.occupyServiceCIDR(clusterCIDR, serviceCIDR); err != nil {
|
||||||
klog.Errorf("unable to occupy service CIDR: %w", err)
|
logger.Error(err, "Unable to occupy service CIDR")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -645,12 +649,12 @@ func (r *multiCIDRRangeAllocator) occupyServiceCIDR(clusterCIDR *cidrset.Cluster
|
|||||||
}
|
}
|
||||||
|
|
||||||
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
|
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
|
||||||
func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReservedCIDRs) error {
|
func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(logger klog.Logger, data multiCIDRNodeReservedCIDRs) error {
|
||||||
err := func(data multiCIDRNodeReservedCIDRs) error {
|
err := func(data multiCIDRNodeReservedCIDRs) error {
|
||||||
cidrsString := ipnetToStringList(data.allocatedCIDRs)
|
cidrsString := ipnetToStringList(data.allocatedCIDRs)
|
||||||
node, err := r.nodeLister.Get(data.nodeName)
|
node, err := r.nodeLister.Get(data.nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err)
|
logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -666,16 +670,16 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if match {
|
if match {
|
||||||
klog.V(4).Infof("Node %q already has allocated CIDR %q. It matches the proposed one.", node.Name, data.allocatedCIDRs)
|
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one.", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// node has cidrs allocated, release the reserved.
|
// node has cidrs allocated, release the reserved.
|
||||||
if len(node.Spec.PodCIDRs) != 0 {
|
if len(node.Spec.PodCIDRs) != 0 {
|
||||||
klog.Errorf("Node %q already has a CIDR allocated %q. Releasing the new one.", node.Name, node.Spec.PodCIDRs)
|
logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
|
||||||
for _, cidr := range data.allocatedCIDRs {
|
for _, cidr := range data.allocatedCIDRs {
|
||||||
if err := r.Release(data.clusterCIDR, cidr); err != nil {
|
if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
|
||||||
return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err)
|
return fmt.Errorf("failed to release cidr %s from clusterCIDR %s for node: %s: %w", cidr, data.clusterCIDR.Name, node.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -686,20 +690,20 @@ func (r *multiCIDRRangeAllocator) updateCIDRsAllocation(data multiCIDRNodeReserv
|
|||||||
for i := 0; i < cidrUpdateRetries; i++ {
|
for i := 0; i < cidrUpdateRetries; i++ {
|
||||||
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
|
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
|
||||||
data.clusterCIDR.AssociatedNodes[node.Name] = true
|
data.clusterCIDR.AssociatedNodes[node.Name] = true
|
||||||
klog.Infof("Set node %q PodCIDR to %q", node.Name, cidrsString)
|
logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrsString)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// failed release back to the pool.
|
// failed release back to the pool.
|
||||||
klog.Errorf("Failed to update node %q PodCIDR to %q after %d attempts: %v", node.Name, cidrsString, cidrUpdateRetries, err)
|
logger.Error(err, "Failed to update node PodCIDR after attempts", "node", klog.KObj(node), "podCIDR", cidrsString, "retries", cidrUpdateRetries)
|
||||||
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
||||||
// We accept the fact that we may leak CIDRs here. This is safer than releasing
|
// We accept the fact that we may leak CIDRs here. This is safer than releasing
|
||||||
// them in case when we don't know if request went through.
|
// them in case when we don't know if request went through.
|
||||||
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
||||||
if !apierrors.IsServerTimeout(err) {
|
if !apierrors.IsServerTimeout(err) {
|
||||||
klog.Errorf("CIDR assignment for node %q failed: %v. Releasing allocated CIDR", node.Name, err)
|
logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
|
||||||
for _, cidr := range data.allocatedCIDRs {
|
for _, cidr := range data.allocatedCIDRs {
|
||||||
if err := r.Release(data.clusterCIDR, cidr); err != nil {
|
if err := r.Release(logger, data.clusterCIDR, cidr); err != nil {
|
||||||
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err)
|
return fmt.Errorf("failed to release cidr %q from clusterCIDR %q for node: %q: %w", cidr, data.clusterCIDR.Name, node.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -732,8 +736,8 @@ func defaultNodeSelector() *v1.NodeSelector {
|
|||||||
// prioritizedCIDRs returns a list of CIDRs to be allocated to the node.
|
// prioritizedCIDRs returns a list of CIDRs to be allocated to the node.
|
||||||
// Returns 1 CIDR if single stack.
|
// Returns 1 CIDR if single stack.
|
||||||
// Returns 2 CIDRs , 1 from each ip family if dual stack.
|
// Returns 2 CIDRs , 1 from each ip family if dual stack.
|
||||||
func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) {
|
func (r *multiCIDRRangeAllocator) prioritizedCIDRs(logger klog.Logger, node *v1.Node) ([]*net.IPNet, *cidrset.ClusterCIDR, error) {
|
||||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, true)
|
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
|
return nil, nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
|
||||||
}
|
}
|
||||||
@ -743,7 +747,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
|
|||||||
if clusterCIDR.IPv4CIDRSet != nil {
|
if clusterCIDR.IPv4CIDRSet != nil {
|
||||||
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet)
|
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv4CIDRSet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.V(3).Infof("unable to allocate IPv4 CIDR, trying next range: %w", err)
|
logger.V(3).Info("Unable to allocate IPv4 CIDR, trying next range", "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
cidrs = append(cidrs, cidr)
|
cidrs = append(cidrs, cidr)
|
||||||
@ -752,7 +756,7 @@ func (r *multiCIDRRangeAllocator) prioritizedCIDRs(node *v1.Node) ([]*net.IPNet,
|
|||||||
if clusterCIDR.IPv6CIDRSet != nil {
|
if clusterCIDR.IPv6CIDRSet != nil {
|
||||||
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet)
|
cidr, err := r.allocateCIDR(clusterCIDR, clusterCIDR.IPv6CIDRSet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.V(3).Infof("unable to allocate IPv6 CIDR, trying next range: %w", err)
|
logger.V(3).Info("Unable to allocate IPv6 CIDR, trying next range", "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
cidrs = append(cidrs, cidr)
|
cidrs = append(cidrs, cidr)
|
||||||
@ -827,8 +831,8 @@ func (r *multiCIDRRangeAllocator) cidrOverlapWithAllocatedList(cidr *net.IPNet)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated.
|
// allocatedClusterCIDR returns the ClusterCIDR from which the node CIDRs were allocated.
|
||||||
func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.ClusterCIDR, error) {
|
func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(logger klog.Logger, node *v1.Node) (*cidrset.ClusterCIDR, error) {
|
||||||
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(node, false)
|
clusterCIDRList, err := r.orderedMatchingClusterCIDRs(logger, node, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
|
return nil, fmt.Errorf("unable to get a clusterCIDR for node %s: %w", node.Name, err)
|
||||||
}
|
}
|
||||||
@ -852,12 +856,12 @@ func (r *multiCIDRRangeAllocator) allocatedClusterCIDR(node *v1.Node) (*cidrset.
|
|||||||
// orderedMatchingClusterCIDRs takes `occupy` as an argument, it determines whether the function
|
// orderedMatchingClusterCIDRs takes `occupy` as an argument, it determines whether the function
|
||||||
// is called during an occupy or a release operation. For a release operation, a ClusterCIDR must
|
// is called during an occupy or a release operation. For a release operation, a ClusterCIDR must
|
||||||
// be added to the matching ClusterCIDRs list, irrespective of whether the ClusterCIDR is terminating.
|
// be added to the matching ClusterCIDRs list, irrespective of whether the ClusterCIDR is terminating.
|
||||||
func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) {
|
func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(logger klog.Logger, node *v1.Node, occupy bool) ([]*cidrset.ClusterCIDR, error) {
|
||||||
matchingCIDRs := make([]*cidrset.ClusterCIDR, 0)
|
matchingCIDRs := make([]*cidrset.ClusterCIDR, 0)
|
||||||
pq := make(PriorityQueue, 0)
|
pq := make(PriorityQueue, 0)
|
||||||
|
|
||||||
for label, clusterCIDRList := range r.cidrMap {
|
for label, clusterCIDRList := range r.cidrMap {
|
||||||
labelsMatch, matchCnt, err := r.matchCIDRLabels(node, label)
|
labelsMatch, matchCnt, err := r.matchCIDRLabels(logger, node, label)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -902,14 +906,14 @@ func (r *multiCIDRRangeAllocator) orderedMatchingClusterCIDRs(node *v1.Node, occ
|
|||||||
|
|
||||||
// matchCIDRLabels Matches the Node labels to CIDR Configs.
|
// matchCIDRLabels Matches the Node labels to CIDR Configs.
|
||||||
// Returns true only if all the labels match, also returns the count of matching labels.
|
// Returns true only if all the labels match, also returns the count of matching labels.
|
||||||
func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (bool, int, error) {
|
func (r *multiCIDRRangeAllocator) matchCIDRLabels(logger klog.Logger, node *v1.Node, label string) (bool, int, error) {
|
||||||
var labelSet labels.Set
|
var labelSet labels.Set
|
||||||
var matchCnt int
|
var matchCnt int
|
||||||
labelsMatch := false
|
labelsMatch := false
|
||||||
|
|
||||||
ls, err := labels.Parse(label)
|
ls, err := labels.Parse(label)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Unable to parse label %s to labels.Selector: %v", label, err)
|
logger.Error(err, "Unable to parse label to labels.Selector", "label", label)
|
||||||
return labelsMatch, 0, err
|
return labelsMatch, 0, err
|
||||||
}
|
}
|
||||||
reqs, selectable := ls.Requirements()
|
reqs, selectable := ls.Requirements()
|
||||||
@ -934,7 +938,7 @@ func (r *multiCIDRRangeAllocator) matchCIDRLabels(node *v1.Node, label string) (
|
|||||||
// createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has
|
// createDefaultClusterCIDR creates a default ClusterCIDR if --cluster-cidr has
|
||||||
// been configured. It converts the --cluster-cidr and --per-node-mask-size* flags
|
// been configured. It converts the --cluster-cidr and --per-node-mask-size* flags
|
||||||
// to appropriate ClusterCIDR fields.
|
// to appropriate ClusterCIDR fields.
|
||||||
func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDRList,
|
func createDefaultClusterCIDR(logger klog.Logger, existingConfigList *networkingv1alpha1.ClusterCIDRList,
|
||||||
allocatorParams CIDRAllocatorParams) {
|
allocatorParams CIDRAllocatorParams) {
|
||||||
// Create default ClusterCIDR only if --cluster-cidr has been configured
|
// Create default ClusterCIDR only if --cluster-cidr has been configured
|
||||||
if len(allocatorParams.ClusterCIDRs) == 0 {
|
if len(allocatorParams.ClusterCIDRs) == 0 {
|
||||||
@ -944,7 +948,7 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
|
|||||||
for _, clusterCIDR := range existingConfigList.Items {
|
for _, clusterCIDR := range existingConfigList.Items {
|
||||||
if clusterCIDR.Name == defaultClusterCIDRName {
|
if clusterCIDR.Name == defaultClusterCIDRName {
|
||||||
// Default ClusterCIDR already exists, no further action required.
|
// Default ClusterCIDR already exists, no further action required.
|
||||||
klog.V(3).Infof("Default ClusterCIDR %s already exists", defaultClusterCIDRName)
|
logger.V(3).Info("Default ClusterCIDR already exists", "defaultClusterCIDRName", defaultClusterCIDRName)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1006,14 +1010,15 @@ func createDefaultClusterCIDR(existingConfigList *networkingv1alpha1.ClusterCIDR
|
|||||||
}
|
}
|
||||||
|
|
||||||
// reconcileCreate handles create ClusterCIDR events.
|
// reconcileCreate handles create ClusterCIDR events.
|
||||||
func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
func (r *multiCIDRRangeAllocator) reconcileCreate(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||||
r.lock.Lock()
|
r.lock.Lock()
|
||||||
defer r.lock.Unlock()
|
defer r.lock.Unlock()
|
||||||
|
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) {
|
if needToAddFinalizer(clusterCIDR, clusterCIDRFinalizer) {
|
||||||
klog.V(3).Infof("Creating ClusterCIDR %s", clusterCIDR.Name)
|
logger.V(3).Info("Creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||||
if err := r.createClusterCIDR(clusterCIDR, false); err != nil {
|
if err := r.createClusterCIDR(ctx, clusterCIDR, false); err != nil {
|
||||||
klog.Errorf("Unable to create ClusterCIDR %s : %v", clusterCIDR.Name, err)
|
logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1022,21 +1027,22 @@ func (r *multiCIDRRangeAllocator) reconcileCreate(clusterCIDR *networkingv1alpha
|
|||||||
|
|
||||||
// reconcileBootstrap handles creation of existing ClusterCIDRs.
|
// reconcileBootstrap handles creation of existing ClusterCIDRs.
|
||||||
// adds a finalizer if not already present.
|
// adds a finalizer if not already present.
|
||||||
func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
func (r *multiCIDRRangeAllocator) reconcileBootstrap(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||||
r.lock.Lock()
|
r.lock.Lock()
|
||||||
defer r.lock.Unlock()
|
defer r.lock.Unlock()
|
||||||
|
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
terminating := false
|
terminating := false
|
||||||
// Create the ClusterCIDR only if the Spec has not been modified.
|
// Create the ClusterCIDR only if the Spec has not been modified.
|
||||||
if clusterCIDR.Generation > 1 {
|
if clusterCIDR.Generation > 1 {
|
||||||
terminating = true
|
terminating = true
|
||||||
err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name)
|
err := fmt.Errorf("CIDRs from ClusterCIDR %s will not be used for allocation as it was modified", clusterCIDR.Name)
|
||||||
klog.Errorf("ClusterCIDR Modified: %v", err)
|
logger.Error(err, "ClusterCIDR Modified")
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(2).Infof("Creating ClusterCIDR %s during bootstrap", clusterCIDR.Name)
|
logger.V(2).Info("Creating ClusterCIDR during bootstrap", "clusterCIDR", clusterCIDR.Name)
|
||||||
if err := r.createClusterCIDR(clusterCIDR, terminating); err != nil {
|
if err := r.createClusterCIDR(ctx, clusterCIDR, terminating); err != nil {
|
||||||
klog.Errorf("Unable to create ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
logger.Error(err, "Unable to create ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1044,7 +1050,7 @@ func (r *multiCIDRRangeAllocator) reconcileBootstrap(clusterCIDR *networkingv1al
|
|||||||
}
|
}
|
||||||
|
|
||||||
// createClusterCIDR creates and maps the cidrSets in the cidrMap.
|
// createClusterCIDR creates and maps the cidrSets in the cidrMap.
|
||||||
func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error {
|
func (r *multiCIDRRangeAllocator) createClusterCIDR(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR, terminating bool) error {
|
||||||
nodeSelector, err := r.nodeSelectorKey(clusterCIDR)
|
nodeSelector, err := r.nodeSelectorKey(clusterCIDR)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to get labelSelector key: %w", err)
|
return fmt.Errorf("unable to get labelSelector key: %w", err)
|
||||||
@ -1069,16 +1075,17 @@ func (r *multiCIDRRangeAllocator) createClusterCIDR(clusterCIDR *networkingv1alp
|
|||||||
updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer)
|
updatedClusterCIDR.ObjectMeta.Finalizers = append(clusterCIDR.ObjectMeta.Finalizers, clusterCIDRFinalizer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if updatedClusterCIDR.ResourceVersion == "" {
|
if updatedClusterCIDR.ResourceVersion == "" {
|
||||||
// Create is only used for creating default ClusterCIDR.
|
// Create is only used for creating default ClusterCIDR.
|
||||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(context.TODO(), updatedClusterCIDR, metav1.CreateOptions{}); err != nil {
|
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Create(ctx, updatedClusterCIDR, metav1.CreateOptions{}); err != nil {
|
||||||
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", klog.KObj(clusterCIDR), "err", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Update the ClusterCIDR object when called from reconcileCreate.
|
// Update the ClusterCIDR object when called from reconcileCreate.
|
||||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), updatedClusterCIDR, metav1.UpdateOptions{}); err != nil {
|
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, updatedClusterCIDR, metav1.UpdateOptions{}); err != nil {
|
||||||
klog.V(2).Infof("Error creating ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
logger.V(2).Info("Error creating ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1136,30 +1143,31 @@ func (r *multiCIDRRangeAllocator) mapClusterCIDRSet(cidrMap map[string][]*cidrse
|
|||||||
|
|
||||||
// reconcileDelete releases the assigned ClusterCIDR and removes the finalizer
|
// reconcileDelete releases the assigned ClusterCIDR and removes the finalizer
|
||||||
// if the deletion timestamp is set.
|
// if the deletion timestamp is set.
|
||||||
func (r *multiCIDRRangeAllocator) reconcileDelete(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
func (r *multiCIDRRangeAllocator) reconcileDelete(ctx context.Context, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||||
r.lock.Lock()
|
r.lock.Lock()
|
||||||
defer r.lock.Unlock()
|
defer r.lock.Unlock()
|
||||||
|
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) {
|
if slice.ContainsString(clusterCIDR.GetFinalizers(), clusterCIDRFinalizer, nil) {
|
||||||
klog.V(2).Infof("Releasing ClusterCIDR: %s", clusterCIDR.Name)
|
logger.V(2).Info("Releasing ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||||
if err := r.deleteClusterCIDR(clusterCIDR); err != nil {
|
if err := r.deleteClusterCIDR(logger, clusterCIDR); err != nil {
|
||||||
klog.V(2).Infof("Error while deleting ClusterCIDR: %+v", err)
|
klog.V(2).Info("Error while deleting ClusterCIDR", "err", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// Remove the finalizer as delete is successful.
|
// Remove the finalizer as delete is successful.
|
||||||
cccCopy := clusterCIDR.DeepCopy()
|
cccCopy := clusterCIDR.DeepCopy()
|
||||||
cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil)
|
cccCopy.ObjectMeta.Finalizers = slice.RemoveString(cccCopy.ObjectMeta.Finalizers, clusterCIDRFinalizer, nil)
|
||||||
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(context.TODO(), cccCopy, metav1.UpdateOptions{}); err != nil {
|
if _, err := r.client.NetworkingV1alpha1().ClusterCIDRs().Update(ctx, cccCopy, metav1.UpdateOptions{}); err != nil {
|
||||||
klog.V(2).Infof("Error removing finalizer for ClusterCIDR %s: %v", clusterCIDR.Name, err)
|
logger.V(2).Info("Error removing finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name, "err", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
klog.V(2).Infof("Removed finalizer for ClusterCIDR %s", clusterCIDR.Name)
|
logger.V(2).Info("Removed finalizer for ClusterCIDR", "clusterCIDR", clusterCIDR.Name)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap.
|
// deleteClusterCIDR Deletes and unmaps the ClusterCIDRs from the cidrMap.
|
||||||
func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
func (r *multiCIDRRangeAllocator) deleteClusterCIDR(logger klog.Logger, clusterCIDR *networkingv1alpha1.ClusterCIDR) error {
|
||||||
|
|
||||||
labelSelector, err := r.nodeSelectorKey(clusterCIDR)
|
labelSelector, err := r.nodeSelectorKey(clusterCIDR)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1168,7 +1176,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
|
|||||||
|
|
||||||
clusterCIDRSetList, ok := r.cidrMap[labelSelector]
|
clusterCIDRSetList, ok := r.cidrMap[labelSelector]
|
||||||
if !ok {
|
if !ok {
|
||||||
klog.Infof("Label %s not found in CIDRMap, proceeding with delete", labelSelector)
|
logger.Info("Label not found in CIDRMap, proceeding with delete", "labelSelector", labelSelector)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1196,7 +1204,7 @@ func (r *multiCIDRRangeAllocator) deleteClusterCIDR(clusterCIDR *networkingv1alp
|
|||||||
r.cidrMap[labelSelector] = clusterCIDRSetList
|
r.cidrMap[labelSelector] = clusterCIDRSetList
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
klog.V(2).Info("clusterCIDR not found, proceeding with delete", "Name", clusterCIDR.Name, "label", labelSelector)
|
logger.V(2).Info("clusterCIDR not found, proceeding with delete", "clusterCIDR", clusterCIDR.Name, "label", labelSelector)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1217,7 +1225,7 @@ func (r *multiCIDRRangeAllocator) nodeSelectorKey(clusterCIDR *networkingv1alpha
|
|||||||
return nodeSelector.String(), nil
|
return nodeSelector.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
|
func listClusterCIDRs(ctx context.Context, kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
|
||||||
var clusterCIDRList *networkingv1alpha1.ClusterCIDRList
|
var clusterCIDRList *networkingv1alpha1.ClusterCIDRList
|
||||||
// We must poll because apiserver might not be up. This error causes
|
// We must poll because apiserver might not be up. This error causes
|
||||||
// controller manager to restart.
|
// controller manager to restart.
|
||||||
@ -1230,19 +1238,20 @@ func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.Clust
|
|||||||
Steps: 11,
|
Steps: 11,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
|
||||||
var err error
|
var err error
|
||||||
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(context.TODO(), metav1.ListOptions{
|
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(ctx, metav1.ListOptions{
|
||||||
FieldSelector: fields.Everything().String(),
|
FieldSelector: fields.Everything().String(),
|
||||||
LabelSelector: labels.Everything().String(),
|
LabelSelector: labels.Everything().String(),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to list all clusterCIDRs: %v", err)
|
logger.Error(err, "Failed to list all clusterCIDRs")
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
return true, nil
|
return true, nil
|
||||||
}); pollErr != nil {
|
}); pollErr != nil {
|
||||||
klog.Errorf("Failed to list clusterCIDRs (after %v)", time.Now().Sub(startTimestamp))
|
logger.Error(nil, "Failed to list clusterCIDRs", "latency", time.Now().Sub(startTimestamp))
|
||||||
return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map",
|
return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map",
|
||||||
apiserverStartupGracePeriod)
|
apiserverStartupGracePeriod)
|
||||||
}
|
}
|
||||||
|
@ -34,6 +34,7 @@ import (
|
|||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
k8stesting "k8s.io/client-go/testing"
|
k8stesting "k8s.io/client-go/testing"
|
||||||
"k8s.io/client-go/tools/cache"
|
"k8s.io/client-go/tools/cache"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset"
|
cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset"
|
||||||
@ -473,6 +474,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// test function
|
// test function
|
||||||
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
for _, tc := range testCaseMultiCIDRs {
|
for _, tc := range testCaseMultiCIDRs {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
@ -482,7 +484,7 @@ func TestMultiCIDROccupyPreExistingCIDR(t *testing.T) {
|
|||||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||||
|
|
||||||
_, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
_, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, fakeNodeInformer, fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
||||||
if err == nil && tc.ctrlCreateFail {
|
if err == nil && tc.ctrlCreateFail {
|
||||||
t.Fatalf("creating range allocator was expected to fail, but it did not")
|
t.Fatalf("creating range allocator was expected to fail, but it did not")
|
||||||
}
|
}
|
||||||
@ -1177,6 +1179,8 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger, ctx := ktesting.NewTestContext(t)
|
||||||
|
|
||||||
// test function
|
// test function
|
||||||
testFunc := func(tc testCaseMultiCIDR) {
|
testFunc := func(tc testCaseMultiCIDR) {
|
||||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||||
@ -1185,7 +1189,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
|||||||
fakeClient := &fake.Clientset{}
|
fakeClient := &fake.Clientset{}
|
||||||
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
||||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||||
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nodeList, tc.testCIDRMap)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||||
return
|
return
|
||||||
@ -1230,7 +1234,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
|||||||
if node.Spec.PodCIDRs == nil {
|
if node.Spec.PodCIDRs == nil {
|
||||||
updateCount++
|
updateCount++
|
||||||
}
|
}
|
||||||
if err := allocator.AllocateOrOccupyCIDR(node); err != nil {
|
if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
|
||||||
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1308,13 +1312,15 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger, ctx := ktesting.NewTestContext(t)
|
||||||
|
|
||||||
testFunc := func(tc testCaseMultiCIDR) {
|
testFunc := func(tc testCaseMultiCIDR) {
|
||||||
fakeClient := &fake.Clientset{}
|
fakeClient := &fake.Clientset{}
|
||||||
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
||||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||||
|
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
allocator, err := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
allocator, err := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||||
}
|
}
|
||||||
@ -1353,7 +1359,7 @@ func TestMultiCIDRAllocateOrOccupyCIDRFailure(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
|
if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
|
||||||
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
// We don't expect any updates, so just sleep for some time
|
// We don't expect any updates, so just sleep for some time
|
||||||
@ -1498,13 +1504,13 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
logger, ctx := ktesting.NewTestContext(t)
|
||||||
testFunc := func(tc releasetestCaseMultiCIDR) {
|
testFunc := func(tc releasetestCaseMultiCIDR) {
|
||||||
fakeClient := &fake.Clientset{}
|
fakeClient := &fake.Clientset{}
|
||||||
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
fakeInformerFactory := informers.NewSharedInformerFactory(fakeClient, controller.NoResyncPeriodFunc())
|
||||||
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
fakeClusterCIDRInformer := fakeInformerFactory.Networking().V1alpha1().ClusterCIDRs()
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
allocator, _ := NewMultiCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
allocator, _ := NewMultiCIDRRangeAllocator(ctx, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), fakeClusterCIDRInformer, tc.allocatorParams, nil, tc.testCIDRMap)
|
||||||
rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator)
|
rangeAllocator, ok := allocator.(*multiCIDRRangeAllocator)
|
||||||
if !ok {
|
if !ok {
|
||||||
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
|
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
|
||||||
@ -1540,7 +1546,7 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
|
err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
|
||||||
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
|
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
@ -1570,12 +1576,12 @@ func TestMultiCIDRReleaseCIDRSuccess(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
nodeToRelease.Spec.PodCIDRs = cidrToRelease
|
nodeToRelease.Spec.PodCIDRs = cidrToRelease
|
||||||
err = allocator.ReleaseCIDR(&nodeToRelease)
|
err = allocator.ReleaseCIDR(logger, &nodeToRelease)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
|
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
|
if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
|
||||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
|
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
|
||||||
@ -1614,7 +1620,7 @@ type clusterCIDRController struct {
|
|||||||
clusterCIDRStore cache.Store
|
clusterCIDRStore cache.Store
|
||||||
}
|
}
|
||||||
|
|
||||||
func newController() (*fake.Clientset, *clusterCIDRController) {
|
func newController(ctx context.Context) (*fake.Clientset, *clusterCIDRController) {
|
||||||
client := fake.NewSimpleClientset()
|
client := fake.NewSimpleClientset()
|
||||||
|
|
||||||
informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc())
|
informerFactory := informers.NewSharedInformerFactory(client, controller.NoResyncPeriodFunc())
|
||||||
@ -1657,7 +1663,7 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
|
|||||||
testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0)
|
testCIDRMap := make(map[string][]*cidrset.ClusterCIDR, 0)
|
||||||
|
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
ra, _ := NewMultiCIDRRangeAllocator(client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap)
|
ra, _ := NewMultiCIDRRangeAllocator(ctx, client, nodeInformer, cccInformer, allocatorParams, nil, testCIDRMap)
|
||||||
cccController := ra.(*multiCIDRRangeAllocator)
|
cccController := ra.(*multiCIDRRangeAllocator)
|
||||||
|
|
||||||
cccController.clusterCIDRSynced = alwaysReady
|
cccController.clusterCIDRSynced = alwaysReady
|
||||||
@ -1671,8 +1677,8 @@ func newController() (*fake.Clientset, *clusterCIDRController) {
|
|||||||
// Ensure default ClusterCIDR is created during bootstrap.
|
// Ensure default ClusterCIDR is created during bootstrap.
|
||||||
func TestClusterCIDRDefault(t *testing.T) {
|
func TestClusterCIDRDefault(t *testing.T) {
|
||||||
defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil)
|
defaultCCC := makeClusterCIDR(defaultClusterCIDRName, "192.168.0.0/16", "", 8, nil)
|
||||||
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
client, _ := newController()
|
client, _ := newController(ctx)
|
||||||
createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{})
|
createdCCC, err := client.NetworkingV1alpha1().ClusterCIDRs().Get(context.TODO(), defaultClusterCIDRName, metav1.GetOptions{})
|
||||||
assert.Nil(t, err, "Expected no error getting clustercidr objects")
|
assert.Nil(t, err, "Expected no error getting clustercidr objects")
|
||||||
assert.Equal(t, defaultCCC.Spec, createdCCC.Spec)
|
assert.Equal(t, defaultCCC.Spec, createdCCC.Spec)
|
||||||
@ -1752,11 +1758,11 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
|
|||||||
wantErr: true,
|
wantErr: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
client, cccController := newController()
|
client, cccController := newController(ctx)
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
cccController.clusterCIDRStore.Add(tc.ccc)
|
cccController.clusterCIDRStore.Add(tc.ccc)
|
||||||
err := cccController.syncClusterCIDR(tc.ccc.Name)
|
err := cccController.syncClusterCIDR(ctx, tc.ccc.Name)
|
||||||
if tc.wantErr {
|
if tc.wantErr {
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
continue
|
continue
|
||||||
@ -1773,30 +1779,32 @@ func TestSyncClusterCIDRCreate(t *testing.T) {
|
|||||||
|
|
||||||
// Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR.
|
// Ensure syncClusterCIDR for ClusterCIDR delete removes the ClusterCIDR.
|
||||||
func TestSyncClusterCIDRDelete(t *testing.T) {
|
func TestSyncClusterCIDRDelete(t *testing.T) {
|
||||||
_, cccController := newController()
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
|
_, cccController := newController(ctx)
|
||||||
|
|
||||||
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
|
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
|
||||||
|
|
||||||
cccController.clusterCIDRStore.Add(testCCC)
|
cccController.clusterCIDRStore.Add(testCCC)
|
||||||
err := cccController.syncClusterCIDR(testCCC.Name)
|
err := cccController.syncClusterCIDR(ctx, testCCC.Name)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
deletionTimestamp := metav1.Now()
|
deletionTimestamp := metav1.Now()
|
||||||
testCCC.DeletionTimestamp = &deletionTimestamp
|
testCCC.DeletionTimestamp = &deletionTimestamp
|
||||||
cccController.clusterCIDRStore.Update(testCCC)
|
cccController.clusterCIDRStore.Update(testCCC)
|
||||||
err = cccController.syncClusterCIDR(testCCC.Name)
|
err = cccController.syncClusterCIDR(ctx, testCCC.Name)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR
|
// Ensure syncClusterCIDR for ClusterCIDR delete does not remove ClusterCIDR
|
||||||
// if a node is associated with the ClusterCIDR.
|
// if a node is associated with the ClusterCIDR.
|
||||||
func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
|
func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
|
||||||
client, cccController := newController()
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
|
client, cccController := newController(ctx)
|
||||||
|
|
||||||
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
|
testCCC := makeClusterCIDR("testing-1", "10.1.0.0/16", "", 8, makeNodeSelector("foo", v1.NodeSelectorOpIn, []string{"bar"}))
|
||||||
|
|
||||||
cccController.clusterCIDRStore.Add(testCCC)
|
cccController.clusterCIDRStore.Add(testCCC)
|
||||||
err := cccController.syncClusterCIDR(testCCC.Name)
|
err := cccController.syncClusterCIDR(ctx, testCCC.Name)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
// Mock the IPAM controller behavior associating node with ClusterCIDR.
|
// Mock the IPAM controller behavior associating node with ClusterCIDR.
|
||||||
@ -1810,7 +1818,7 @@ func TestSyncClusterCIDRDeleteWithNodesAssociated(t *testing.T) {
|
|||||||
deletionTimestamp := metav1.Now()
|
deletionTimestamp := metav1.Now()
|
||||||
createdCCC.DeletionTimestamp = &deletionTimestamp
|
createdCCC.DeletionTimestamp = &deletionTimestamp
|
||||||
cccController.clusterCIDRStore.Update(createdCCC)
|
cccController.clusterCIDRStore.Update(createdCCC)
|
||||||
err = cccController.syncClusterCIDR(createdCCC.Name)
|
err = cccController.syncClusterCIDR(ctx, createdCCC.Name)
|
||||||
assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name))
|
assert.Error(t, err, fmt.Sprintf("ClusterCIDR %s marked as terminating, won't be deleted until all associated nodes are deleted", createdCCC.Name))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"k8s.io/component-base/metrics/testutil"
|
"k8s.io/component-base/metrics/testutil"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2/ktesting"
|
||||||
utilnet "k8s.io/utils/net"
|
utilnet "k8s.io/utils/net"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -568,6 +568,7 @@ func TestGetBitforCIDR(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
for _, tc := range cases {
|
for _, tc := range cases {
|
||||||
_, clusterCIDR, err := utilnet.ParseCIDRSloppy(tc.clusterCIDRStr)
|
_, clusterCIDR, err := utilnet.ParseCIDRSloppy(tc.clusterCIDRStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -585,17 +586,17 @@ func TestGetBitforCIDR(t *testing.T) {
|
|||||||
|
|
||||||
got, err := cs.getIndexForCIDR(subnetCIDR)
|
got, err := cs.getIndexForCIDR(subnetCIDR)
|
||||||
if err == nil && tc.expectErr {
|
if err == nil && tc.expectErr {
|
||||||
klog.Errorf("expected error but got null for %v", tc.description)
|
logger.Error(nil, "Expected error but got null", "description", tc.description)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil && !tc.expectErr {
|
if err != nil && !tc.expectErr {
|
||||||
klog.Errorf("unexpected error: %v for %v", err, tc.description)
|
logger.Error(err, "Unexpected error", "description", tc.description)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if got != tc.expectedBit {
|
if got != tc.expectedBit {
|
||||||
klog.Errorf("expected %v, but got %v for %v", tc.expectedBit, got, tc.description)
|
logger.Error(nil, "Unexpected value", "description", tc.description, "expected", tc.expectedBit, "got", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
package ipam
|
package ipam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"sync"
|
"sync"
|
||||||
@ -66,9 +67,10 @@ type rangeAllocator struct {
|
|||||||
// Caller must always pass in a list of existing nodes so the new allocator.
|
// Caller must always pass in a list of existing nodes so the new allocator.
|
||||||
// Caller must ensure that ClusterCIDRs are semantically correct e.g (1 for non DualStack, 2 for DualStack etc..)
|
// Caller must ensure that ClusterCIDRs are semantically correct e.g (1 for non DualStack, 2 for DualStack etc..)
|
||||||
// can initialize its CIDR map. NodeList is only nil in testing.
|
// can initialize its CIDR map. NodeList is only nil in testing.
|
||||||
func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) {
|
func NewCIDRRangeAllocator(logger klog.Logger, client clientset.Interface, nodeInformer informers.NodeInformer, allocatorParams CIDRAllocatorParams, nodeList *v1.NodeList) (CIDRAllocator, error) {
|
||||||
if client == nil {
|
if client == nil {
|
||||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
logger.Error(nil, "kubeClient is nil when starting CIDRRangeAllocator")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
eventBroadcaster := record.NewBroadcaster()
|
eventBroadcaster := record.NewBroadcaster()
|
||||||
@ -98,24 +100,24 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
|
|||||||
}
|
}
|
||||||
|
|
||||||
if allocatorParams.ServiceCIDR != nil {
|
if allocatorParams.ServiceCIDR != nil {
|
||||||
ra.filterOutServiceRange(allocatorParams.ServiceCIDR)
|
ra.filterOutServiceRange(logger, allocatorParams.ServiceCIDR)
|
||||||
} else {
|
} else {
|
||||||
klog.Info("No Service CIDR provided. Skipping filtering out service addresses.")
|
logger.Info("No Service CIDR provided. Skipping filtering out service addresses")
|
||||||
}
|
}
|
||||||
|
|
||||||
if allocatorParams.SecondaryServiceCIDR != nil {
|
if allocatorParams.SecondaryServiceCIDR != nil {
|
||||||
ra.filterOutServiceRange(allocatorParams.SecondaryServiceCIDR)
|
ra.filterOutServiceRange(logger, allocatorParams.SecondaryServiceCIDR)
|
||||||
} else {
|
} else {
|
||||||
klog.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses.")
|
logger.Info("No Secondary Service CIDR provided. Skipping filtering out secondary service addresses")
|
||||||
}
|
}
|
||||||
|
|
||||||
if nodeList != nil {
|
if nodeList != nil {
|
||||||
for _, node := range nodeList.Items {
|
for _, node := range nodeList.Items {
|
||||||
if len(node.Spec.PodCIDRs) == 0 {
|
if len(node.Spec.PodCIDRs) == 0 {
|
||||||
klog.V(4).Infof("Node %v has no CIDR, ignoring", node.Name)
|
logger.V(4).Info("Node has no CIDR, ignoring", "node", klog.KObj(&node))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
klog.V(4).Infof("Node %v has CIDR %s, occupying it in CIDR map", node.Name, node.Spec.PodCIDR)
|
logger.V(4).Info("Node has CIDR, occupying it in CIDR map", "node", klog.KObj(&node), "podCIDR", node.Spec.PodCIDR)
|
||||||
if err := ra.occupyCIDRs(&node); err != nil {
|
if err := ra.occupyCIDRs(&node); err != nil {
|
||||||
// This will happen if:
|
// This will happen if:
|
||||||
// 1. We find garbage in the podCIDRs field. Retrying is useless.
|
// 1. We find garbage in the podCIDRs field. Retrying is useless.
|
||||||
@ -127,7 +129,9 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
|
|||||||
}
|
}
|
||||||
|
|
||||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||||
AddFunc: controllerutil.CreateAddNodeHandler(ra.AllocateOrOccupyCIDR),
|
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||||
|
return ra.AllocateOrOccupyCIDR(logger, node)
|
||||||
|
}),
|
||||||
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
UpdateFunc: controllerutil.CreateUpdateNodeHandler(func(_, newNode *v1.Node) error {
|
||||||
// If the PodCIDRs list is not empty we either:
|
// If the PodCIDRs list is not empty we either:
|
||||||
// - already processed a Node that already had CIDRs after NC restarted
|
// - already processed a Node that already had CIDRs after NC restarted
|
||||||
@ -149,52 +153,56 @@ func NewCIDRRangeAllocator(client clientset.Interface, nodeInformer informers.No
|
|||||||
// state is correct.
|
// state is correct.
|
||||||
// Restart of NC fixes the issue.
|
// Restart of NC fixes the issue.
|
||||||
if len(newNode.Spec.PodCIDRs) == 0 {
|
if len(newNode.Spec.PodCIDRs) == 0 {
|
||||||
return ra.AllocateOrOccupyCIDR(newNode)
|
return ra.AllocateOrOccupyCIDR(logger, newNode)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}),
|
}),
|
||||||
DeleteFunc: controllerutil.CreateDeleteNodeHandler(ra.ReleaseCIDR),
|
DeleteFunc: controllerutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
|
||||||
|
return ra.ReleaseCIDR(logger, node)
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|
||||||
return ra, nil
|
return ra, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rangeAllocator) Run(stopCh <-chan struct{}) {
|
func (r *rangeAllocator) Run(ctx context.Context) {
|
||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
|
|
||||||
// Start event processing pipeline.
|
// Start event processing pipeline.
|
||||||
r.broadcaster.StartStructuredLogging(0)
|
r.broadcaster.StartStructuredLogging(0)
|
||||||
klog.Infof("Sending events to api server.")
|
logger := klog.FromContext(ctx)
|
||||||
|
logger.Info("Sending events to api server")
|
||||||
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
|
r.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: r.client.CoreV1().Events("")})
|
||||||
defer r.broadcaster.Shutdown()
|
defer r.broadcaster.Shutdown()
|
||||||
|
|
||||||
klog.Infof("Starting range CIDR allocator")
|
logger.Info("Starting range CIDR allocator")
|
||||||
defer klog.Infof("Shutting down range CIDR allocator")
|
defer logger.Info("Shutting down range CIDR allocator")
|
||||||
|
|
||||||
if !cache.WaitForNamedCacheSync("cidrallocator", stopCh, r.nodesSynced) {
|
if !cache.WaitForNamedCacheSync("cidrallocator", ctx.Done(), r.nodesSynced) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < cidrUpdateWorkers; i++ {
|
for i := 0; i < cidrUpdateWorkers; i++ {
|
||||||
go r.worker(stopCh)
|
go r.worker(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
<-stopCh
|
<-ctx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rangeAllocator) worker(stopChan <-chan struct{}) {
|
func (r *rangeAllocator) worker(ctx context.Context) {
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case workItem, ok := <-r.nodeCIDRUpdateChannel:
|
case workItem, ok := <-r.nodeCIDRUpdateChannel:
|
||||||
if !ok {
|
if !ok {
|
||||||
klog.Warning("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
logger.Info("Channel nodeCIDRUpdateChannel was unexpectedly closed")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := r.updateCIDRsAllocation(workItem); err != nil {
|
if err := r.updateCIDRsAllocation(logger, workItem); err != nil {
|
||||||
// Requeue the failed node for update again.
|
// Requeue the failed node for update again.
|
||||||
r.nodeCIDRUpdateChannel <- workItem
|
r.nodeCIDRUpdateChannel <- workItem
|
||||||
}
|
}
|
||||||
case <-stopChan:
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -244,12 +252,12 @@ func (r *rangeAllocator) occupyCIDRs(node *v1.Node) error {
|
|||||||
// WARNING: If you're adding any return calls or defer any more work from this
|
// WARNING: If you're adding any return calls or defer any more work from this
|
||||||
// function you have to make sure to update nodesInProcessing properly with the
|
// function you have to make sure to update nodesInProcessing properly with the
|
||||||
// disposition of the node when the work is done.
|
// disposition of the node when the work is done.
|
||||||
func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
func (r *rangeAllocator) AllocateOrOccupyCIDR(logger klog.Logger, node *v1.Node) error {
|
||||||
if node == nil {
|
if node == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if !r.insertNodeToProcessing(node.Name) {
|
if !r.insertNodeToProcessing(node.Name) {
|
||||||
klog.V(2).Infof("Node %v is already in a process of CIDR assignment.", node.Name)
|
logger.V(2).Info("Node is already in a process of CIDR assignment", "node", klog.KObj(node))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -273,13 +281,13 @@ func (r *rangeAllocator) AllocateOrOccupyCIDR(node *v1.Node) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//queue the assignment
|
//queue the assignment
|
||||||
klog.V(4).Infof("Putting node %s with CIDR %v into the work queue", node.Name, allocated.allocatedCIDRs)
|
logger.V(4).Info("Putting node with CIDR into the work queue", "node", klog.KObj(node), "CIDRs", allocated.allocatedCIDRs)
|
||||||
r.nodeCIDRUpdateChannel <- allocated
|
r.nodeCIDRUpdateChannel <- allocated
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets
|
// ReleaseCIDR marks node.podCIDRs[...] as unused in our tracked cidrSets
|
||||||
func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
func (r *rangeAllocator) ReleaseCIDR(logger klog.Logger, node *v1.Node) error {
|
||||||
if node == nil || len(node.Spec.PodCIDRs) == 0 {
|
if node == nil || len(node.Spec.PodCIDRs) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -297,7 +305,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
|||||||
return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx)
|
return fmt.Errorf("node:%s has an allocated cidr: %v at index:%v that does not exist in cluster cidrs configuration", node.Name, cidr, idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(4).Infof("release CIDR %s for node:%v", cidr, node.Name)
|
logger.V(4).Info("Release CIDR for node", "CIDR", cidr, "node", klog.KObj(node))
|
||||||
if err = r.cidrSets[idx].Release(podCIDR); err != nil {
|
if err = r.cidrSets[idx].Release(podCIDR); err != nil {
|
||||||
return fmt.Errorf("error when releasing CIDR %v: %v", cidr, err)
|
return fmt.Errorf("error when releasing CIDR %v: %v", cidr, err)
|
||||||
}
|
}
|
||||||
@ -307,7 +315,7 @@ func (r *rangeAllocator) ReleaseCIDR(node *v1.Node) error {
|
|||||||
|
|
||||||
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
|
// Marks all CIDRs with subNetMaskSize that belongs to serviceCIDR as used across all cidrs
|
||||||
// so that they won't be assignable.
|
// so that they won't be assignable.
|
||||||
func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
|
func (r *rangeAllocator) filterOutServiceRange(logger klog.Logger, serviceCIDR *net.IPNet) {
|
||||||
// Checks if service CIDR has a nonempty intersection with cluster
|
// Checks if service CIDR has a nonempty intersection with cluster
|
||||||
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
|
// CIDR. It is the case if either clusterCIDR contains serviceCIDR with
|
||||||
// clusterCIDR's Mask applied (this means that clusterCIDR contains
|
// clusterCIDR's Mask applied (this means that clusterCIDR contains
|
||||||
@ -321,20 +329,20 @@ func (r *rangeAllocator) filterOutServiceRange(serviceCIDR *net.IPNet) {
|
|||||||
|
|
||||||
// at this point, len(cidrSet) == len(clusterCidr)
|
// at this point, len(cidrSet) == len(clusterCidr)
|
||||||
if err := r.cidrSets[idx].Occupy(serviceCIDR); err != nil {
|
if err := r.cidrSets[idx].Occupy(serviceCIDR); err != nil {
|
||||||
klog.Errorf("Error filtering out service cidr out cluster cidr:%v (index:%v) %v: %v", cidr, idx, serviceCIDR, err)
|
logger.Error(err, "Error filtering out service cidr out cluster cidr", "CIDR", cidr, "index", idx, "serviceCIDR", serviceCIDR)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
|
// updateCIDRsAllocation assigns CIDR to Node and sends an update to the API server.
|
||||||
func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
|
func (r *rangeAllocator) updateCIDRsAllocation(logger klog.Logger, data nodeReservedCIDRs) error {
|
||||||
var err error
|
var err error
|
||||||
var node *v1.Node
|
var node *v1.Node
|
||||||
defer r.removeNodeFromProcessing(data.nodeName)
|
defer r.removeNodeFromProcessing(data.nodeName)
|
||||||
cidrsString := ipnetToStringList(data.allocatedCIDRs)
|
cidrsString := ipnetToStringList(data.allocatedCIDRs)
|
||||||
node, err = r.nodeLister.Get(data.nodeName)
|
node, err = r.nodeLister.Get(data.nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDRs: %v", data.nodeName, err)
|
logger.Error(err, "Failed while getting node for updating Node.Spec.PodCIDRs", "node", klog.KRef("", data.nodeName))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -350,17 +358,17 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if match {
|
if match {
|
||||||
klog.V(4).Infof("Node %v already has allocated CIDR %v. It matches the proposed one.", node.Name, data.allocatedCIDRs)
|
logger.V(4).Info("Node already has allocated CIDR. It matches the proposed one", "node", klog.KObj(node), "CIDRs", data.allocatedCIDRs)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// node has cidrs, release the reserved
|
// node has cidrs, release the reserved
|
||||||
if len(node.Spec.PodCIDRs) != 0 {
|
if len(node.Spec.PodCIDRs) != 0 {
|
||||||
klog.Errorf("Node %v already has a CIDR allocated %v. Releasing the new one.", node.Name, node.Spec.PodCIDRs)
|
logger.Error(nil, "Node already has a CIDR allocated. Releasing the new one", "node", klog.KObj(node), "podCIDRs", node.Spec.PodCIDRs)
|
||||||
for idx, cidr := range data.allocatedCIDRs {
|
for idx, cidr := range data.allocatedCIDRs {
|
||||||
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
|
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
|
||||||
klog.Errorf("Error when releasing CIDR idx:%v value: %v err:%v", idx, cidr, releaseErr)
|
logger.Error(releaseErr, "Error when releasing CIDR", "index", idx, "CIDR", cidr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -369,21 +377,21 @@ func (r *rangeAllocator) updateCIDRsAllocation(data nodeReservedCIDRs) error {
|
|||||||
// If we reached here, it means that the node has no CIDR currently assigned. So we set it.
|
// If we reached here, it means that the node has no CIDR currently assigned. So we set it.
|
||||||
for i := 0; i < cidrUpdateRetries; i++ {
|
for i := 0; i < cidrUpdateRetries; i++ {
|
||||||
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
|
if err = nodeutil.PatchNodeCIDRs(r.client, types.NodeName(node.Name), cidrsString); err == nil {
|
||||||
klog.Infof("Set node %v PodCIDR to %v", node.Name, cidrsString)
|
logger.Info("Set node PodCIDR", "node", klog.KObj(node), "podCIDRs", cidrsString)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// failed release back to the pool
|
// failed release back to the pool
|
||||||
klog.Errorf("Failed to update node %v PodCIDR to %v after multiple attempts: %v", node.Name, cidrsString, err)
|
logger.Error(err, "Failed to update node PodCIDR after multiple attempts", "node", klog.KObj(node), "podCIDRs", cidrsString)
|
||||||
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
controllerutil.RecordNodeStatusChange(r.recorder, node, "CIDRAssignmentFailed")
|
||||||
// We accept the fact that we may leak CIDRs here. This is safer than releasing
|
// We accept the fact that we may leak CIDRs here. This is safer than releasing
|
||||||
// them in case when we don't know if request went through.
|
// them in case when we don't know if request went through.
|
||||||
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
// NodeController restart will return all falsely allocated CIDRs to the pool.
|
||||||
if !apierrors.IsServerTimeout(err) {
|
if !apierrors.IsServerTimeout(err) {
|
||||||
klog.Errorf("CIDR assignment for node %v failed: %v. Releasing allocated CIDR", node.Name, err)
|
logger.Error(err, "CIDR assignment for node failed. Releasing allocated CIDR", "node", klog.KObj(node))
|
||||||
for idx, cidr := range data.allocatedCIDRs {
|
for idx, cidr := range data.allocatedCIDRs {
|
||||||
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
|
if releaseErr := r.cidrSets[idx].Release(cidr); releaseErr != nil {
|
||||||
klog.Errorf("Error releasing allocated CIDR for node %v: %v", node.Name, releaseErr)
|
logger.Error(releaseErr, "Error releasing allocated CIDR for node", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ import (
|
|||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/util/wait"
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
|
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
|
||||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
@ -274,12 +275,13 @@ func TestOccupyPreExistingCIDR(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// test function
|
// test function
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
|
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
|
||||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||||
_, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
_, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
||||||
if err == nil && tc.ctrlCreateFail {
|
if err == nil && tc.ctrlCreateFail {
|
||||||
t.Fatalf("creating range allocator was expected to fail, but it did not")
|
t.Fatalf("creating range allocator was expected to fail, but it did not")
|
||||||
}
|
}
|
||||||
@ -508,11 +510,12 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// test function
|
// test function
|
||||||
|
logger, ctx := ktesting.NewTestContext(t)
|
||||||
testFunc := func(tc testCase) {
|
testFunc := func(tc testCase) {
|
||||||
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
|
fakeNodeInformer := test.FakeNodeInformer(tc.fakeNodeHandler)
|
||||||
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
nodeList, _ := tc.fakeNodeHandler.List(context.TODO(), metav1.ListOptions{})
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, fakeNodeInformer, tc.allocatorParams, nodeList)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
t.Errorf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||||
return
|
return
|
||||||
@ -524,7 +527,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
|||||||
}
|
}
|
||||||
rangeAllocator.nodesSynced = test.AlwaysReady
|
rangeAllocator.nodesSynced = test.AlwaysReady
|
||||||
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
||||||
go allocator.Run(wait.NeverStop)
|
go allocator.Run(ctx)
|
||||||
|
|
||||||
// this is a bit of white box testing
|
// this is a bit of white box testing
|
||||||
// pre allocate the cidrs as per the test
|
// pre allocate the cidrs as per the test
|
||||||
@ -545,7 +548,7 @@ func TestAllocateOrOccupyCIDRSuccess(t *testing.T) {
|
|||||||
if node.Spec.PodCIDRs == nil {
|
if node.Spec.PodCIDRs == nil {
|
||||||
updateCount++
|
updateCount++
|
||||||
}
|
}
|
||||||
if err := allocator.AllocateOrOccupyCIDR(node); err != nil {
|
if err := allocator.AllocateOrOccupyCIDR(logger, node); err != nil {
|
||||||
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Errorf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -608,10 +611,10 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
logger, ctx := ktesting.NewTestContext(t)
|
||||||
testFunc := func(tc testCase) {
|
testFunc := func(tc testCase) {
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
allocator, err := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
allocator, err := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
t.Logf("%v: failed to create CIDRRangeAllocator with error %v", tc.description, err)
|
||||||
}
|
}
|
||||||
@ -622,7 +625,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
|
|||||||
}
|
}
|
||||||
rangeAllocator.nodesSynced = test.AlwaysReady
|
rangeAllocator.nodesSynced = test.AlwaysReady
|
||||||
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
||||||
go allocator.Run(wait.NeverStop)
|
go allocator.Run(ctx)
|
||||||
|
|
||||||
// this is a bit of white box testing
|
// this is a bit of white box testing
|
||||||
for setIdx, allocatedList := range tc.allocatedCIDRs {
|
for setIdx, allocatedList := range tc.allocatedCIDRs {
|
||||||
@ -637,7 +640,7 @@ func TestAllocateOrOccupyCIDRFailure(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err == nil {
|
if err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err == nil {
|
||||||
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Errorf("%v: unexpected success in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
// We don't expect any updates, so just sleep for some time
|
// We don't expect any updates, so just sleep for some time
|
||||||
@ -753,10 +756,10 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
logger, ctx := ktesting.NewTestContext(t)
|
||||||
testFunc := func(tc releaseTestCase) {
|
testFunc := func(tc releaseTestCase) {
|
||||||
// Initialize the range allocator.
|
// Initialize the range allocator.
|
||||||
allocator, _ := NewCIDRRangeAllocator(tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
allocator, _ := NewCIDRRangeAllocator(logger, tc.fakeNodeHandler, test.FakeNodeInformer(tc.fakeNodeHandler), tc.allocatorParams, nil)
|
||||||
rangeAllocator, ok := allocator.(*rangeAllocator)
|
rangeAllocator, ok := allocator.(*rangeAllocator)
|
||||||
if !ok {
|
if !ok {
|
||||||
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
|
t.Logf("%v: found non-default implementation of CIDRAllocator, skipping white-box test...", tc.description)
|
||||||
@ -764,7 +767,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
|||||||
}
|
}
|
||||||
rangeAllocator.nodesSynced = test.AlwaysReady
|
rangeAllocator.nodesSynced = test.AlwaysReady
|
||||||
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
rangeAllocator.recorder = testutil.NewFakeRecorder()
|
||||||
go allocator.Run(wait.NeverStop)
|
go allocator.Run(ctx)
|
||||||
|
|
||||||
// this is a bit of white box testing
|
// this is a bit of white box testing
|
||||||
for setIdx, allocatedList := range tc.allocatedCIDRs {
|
for setIdx, allocatedList := range tc.allocatedCIDRs {
|
||||||
@ -780,7 +783,7 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err := allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0])
|
err := allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0])
|
||||||
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
|
if len(tc.expectedAllocatedCIDRFirstRound) != 0 {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
@ -805,12 +808,12 @@ func TestReleaseCIDRSuccess(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
nodeToRelease.Spec.PodCIDRs = cidrToRelease
|
nodeToRelease.Spec.PodCIDRs = cidrToRelease
|
||||||
err = allocator.ReleaseCIDR(&nodeToRelease)
|
err = allocator.ReleaseCIDR(logger, &nodeToRelease)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
|
t.Fatalf("%v: unexpected error in ReleaseCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err = allocator.AllocateOrOccupyCIDR(tc.fakeNodeHandler.Existing[0]); err != nil {
|
if err = allocator.AllocateOrOccupyCIDR(logger, tc.fakeNodeHandler.Existing[0]); err != nil {
|
||||||
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
t.Fatalf("%v: unexpected error in AllocateOrOccupyCIDR: %v", tc.description, err)
|
||||||
}
|
}
|
||||||
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
|
if err := test.WaitForUpdatedNodeWithTimeout(tc.fakeNodeHandler, 1, wait.ForeverTestTimeout); err != nil {
|
||||||
|
@ -120,8 +120,8 @@ func New(c controller, cloudAlias cloudAlias, kubeAPI kubeAPI, mode NodeSyncMode
|
|||||||
|
|
||||||
// Loop runs the sync loop for a given node. done is an optional channel that
|
// Loop runs the sync loop for a given node. done is an optional channel that
|
||||||
// is closed when the Loop() returns.
|
// is closed when the Loop() returns.
|
||||||
func (sync *NodeSync) Loop(done chan struct{}) {
|
func (sync *NodeSync) Loop(logger klog.Logger, done chan struct{}) {
|
||||||
klog.V(2).Infof("Starting sync loop for node %q", sync.nodeName)
|
logger.V(2).Info("Starting sync loop", "node", klog.KRef("", sync.nodeName))
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
if done != nil {
|
if done != nil {
|
||||||
@ -131,27 +131,27 @@ func (sync *NodeSync) Loop(done chan struct{}) {
|
|||||||
|
|
||||||
timeout := sync.c.ResyncTimeout()
|
timeout := sync.c.ResyncTimeout()
|
||||||
delayTimer := time.NewTimer(timeout)
|
delayTimer := time.NewTimer(timeout)
|
||||||
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
|
logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case op, more := <-sync.opChan:
|
case op, more := <-sync.opChan:
|
||||||
if !more {
|
if !more {
|
||||||
klog.V(2).Infof("Stopping sync loop")
|
logger.V(2).Info("Stopping sync loop")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
sync.c.ReportResult(op.run(sync))
|
sync.c.ReportResult(op.run(logger, sync))
|
||||||
if !delayTimer.Stop() {
|
if !delayTimer.Stop() {
|
||||||
<-delayTimer.C
|
<-delayTimer.C
|
||||||
}
|
}
|
||||||
case <-delayTimer.C:
|
case <-delayTimer.C:
|
||||||
klog.V(4).Infof("Running resync for node %q", sync.nodeName)
|
logger.V(4).Info("Running resync", "node", klog.KRef("", sync.nodeName))
|
||||||
sync.c.ReportResult((&updateOp{}).run(sync))
|
sync.c.ReportResult((&updateOp{}).run(logger, sync))
|
||||||
}
|
}
|
||||||
|
|
||||||
timeout := sync.c.ResyncTimeout()
|
timeout := sync.c.ResyncTimeout()
|
||||||
delayTimer.Reset(timeout)
|
delayTimer.Reset(timeout)
|
||||||
klog.V(4).Infof("Resync node %q in %v", sync.nodeName, timeout)
|
logger.V(4).Info("Try to resync node later", "node", klog.KRef("", sync.nodeName), "resyncTime", timeout)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,7 +175,7 @@ func (sync *NodeSync) Delete(node *v1.Node) {
|
|||||||
// syncOp is the interface for generic sync operation.
|
// syncOp is the interface for generic sync operation.
|
||||||
type syncOp interface {
|
type syncOp interface {
|
||||||
// run the requested sync operation.
|
// run the requested sync operation.
|
||||||
run(sync *NodeSync) error
|
run(logger klog.Logger, sync *NodeSync) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateOp handles creation and updates of a node.
|
// updateOp handles creation and updates of a node.
|
||||||
@ -190,16 +190,16 @@ func (op *updateOp) String() string {
|
|||||||
return fmt.Sprintf("updateOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
|
return fmt.Sprintf("updateOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (op *updateOp) run(sync *NodeSync) error {
|
func (op *updateOp) run(logger klog.Logger, sync *NodeSync) error {
|
||||||
klog.V(3).Infof("Running updateOp %+v", op)
|
logger.V(3).Info("Running updateOp", "updateOp", op)
|
||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
if op.node == nil {
|
if op.node == nil {
|
||||||
klog.V(3).Infof("Getting node spec for %q", sync.nodeName)
|
logger.V(3).Info("Getting node spec", "node", klog.KRef("", sync.nodeName))
|
||||||
node, err := sync.kubeAPI.Node(ctx, sync.nodeName)
|
node, err := sync.kubeAPI.Node(ctx, sync.nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Error getting node %q spec: %v", sync.nodeName, err)
|
logger.Error(err, "Error getting node pec", "node", klog.KRef("", sync.nodeName))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
op.node = node
|
op.node = node
|
||||||
@ -207,7 +207,7 @@ func (op *updateOp) run(sync *NodeSync) error {
|
|||||||
|
|
||||||
aliasRange, err := sync.cloudAlias.Alias(ctx, op.node)
|
aliasRange, err := sync.cloudAlias.Alias(ctx, op.node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err)
|
logger.Error(err, "Error getting cloud alias for node", "node", klog.KRef("", sync.nodeName))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,14 +229,13 @@ func (op *updateOp) run(sync *NodeSync) error {
|
|||||||
// match.
|
// match.
|
||||||
func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
|
func (op *updateOp) validateRange(ctx context.Context, sync *NodeSync, node *v1.Node, aliasRange *net.IPNet) error {
|
||||||
if node.Spec.PodCIDR != aliasRange.String() {
|
if node.Spec.PodCIDR != aliasRange.String() {
|
||||||
klog.Errorf("Inconsistency detected between node PodCIDR and node alias (%v != %v)",
|
klog.FromContext(ctx).Error(nil, "Inconsistency detected between node PodCIDR and node alias", "podCIDR", node.Spec.PodCIDR, "alias", aliasRange)
|
||||||
node.Spec.PodCIDR, aliasRange)
|
|
||||||
sync.kubeAPI.EmitNodeWarningEvent(node.Name, MismatchEvent,
|
sync.kubeAPI.EmitNodeWarningEvent(node.Name, MismatchEvent,
|
||||||
"Node.Spec.PodCIDR != cloud alias (%v != %v)", node.Spec.PodCIDR, aliasRange)
|
"Node.Spec.PodCIDR != cloud alias (%v != %v)", node.Spec.PodCIDR, aliasRange)
|
||||||
// User intervention is required in this case, as this is most likely due
|
// User intervention is required in this case, as this is most likely due
|
||||||
// to the user mucking around with their VM aliases on the side.
|
// to the user mucking around with their VM aliases on the side.
|
||||||
} else {
|
} else {
|
||||||
klog.V(4).Infof("Node %q CIDR range %v is matches cloud assignment", node.Name, node.Spec.PodCIDR)
|
klog.FromContext(ctx).V(4).Info("Node CIDR range is matches cloud assignment", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -249,27 +248,27 @@ func (op *updateOp) updateNodeFromAlias(ctx context.Context, sync *NodeSync, nod
|
|||||||
"Cannot sync from cloud in mode %q", sync.mode)
|
"Cannot sync from cloud in mode %q", sync.mode)
|
||||||
return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode)
|
return fmt.Errorf("cannot sync from cloud in mode %q", sync.mode)
|
||||||
}
|
}
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
klog.V(2).Infof("Updating node spec with alias range, node.PodCIDR = %v", aliasRange)
|
logger.V(2).Info("Updating node spec with alias range", "podCIDR", aliasRange)
|
||||||
|
|
||||||
if err := sync.set.Occupy(aliasRange); err != nil {
|
if err := sync.set.Occupy(aliasRange); err != nil {
|
||||||
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
|
logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, aliasRange); err != nil {
|
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, aliasRange); err != nil {
|
||||||
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, aliasRange, err)
|
logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", aliasRange)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(2).Infof("Node %q PodCIDR set to %v", node.Name, aliasRange)
|
logger.V(2).Info("Node PodCIDR updated", "node", klog.KObj(node), "podCIDR", aliasRange)
|
||||||
|
|
||||||
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
||||||
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
|
logger.Error(err, "Could not update node NetworkUnavailable status to false")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(2).Infof("Updated node %q PodCIDR from cloud alias %v", node.Name, aliasRange)
|
logger.V(2).Info("Updated node PodCIDR from cloud alias", "node", klog.KObj(node), "alias", aliasRange)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -283,29 +282,29 @@ func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, nod
|
|||||||
}
|
}
|
||||||
|
|
||||||
_, aliasRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
_, aliasRange, err := netutils.ParseCIDRSloppy(node.Spec.PodCIDR)
|
||||||
|
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Could not parse PodCIDR (%q) for node %q: %v",
|
logger.Error(err, "Could not parse PodCIDR for node", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||||
node.Spec.PodCIDR, node.Name, err)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := sync.set.Occupy(aliasRange); err != nil {
|
if err := sync.set.Occupy(aliasRange); err != nil {
|
||||||
klog.Errorf("Error occupying range %v for node %v", aliasRange, sync.nodeName)
|
logger.Error(nil, "Error occupying range for node", "node", klog.KRef("", sync.nodeName), "alias", aliasRange)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil {
|
if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil {
|
||||||
klog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err)
|
logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", aliasRange)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
||||||
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
|
logger.Error(err, "Could not update node NetworkUnavailable status to false")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(2).Infof("Updated node %q cloud alias with node spec, node.PodCIDR = %v",
|
logger.V(2).Info("Updated node cloud alias with node spec", "node", klog.KObj(node), "podCIDR", node.Spec.PodCIDR)
|
||||||
node.Name, node.Spec.PodCIDR)
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -326,22 +325,23 @@ func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1.
|
|||||||
// If addAlias returns a hard error, cidrRange will be leaked as there
|
// If addAlias returns a hard error, cidrRange will be leaked as there
|
||||||
// is no durable record of the range. The missing space will be
|
// is no durable record of the range. The missing space will be
|
||||||
// recovered on the next restart of the controller.
|
// recovered on the next restart of the controller.
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil {
|
if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil {
|
||||||
klog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err)
|
logger.Error(err, "Could not add alias for node", "node", klog.KObj(node), "alias", cidrRange)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, cidrRange); err != nil {
|
if err := sync.kubeAPI.UpdateNodePodCIDR(ctx, node, cidrRange); err != nil {
|
||||||
klog.Errorf("Could not update node %q PodCIDR to %v: %v", node.Name, cidrRange, err)
|
logger.Error(err, "Could not update node PodCIDR", "node", klog.KObj(node), "podCIDR", cidrRange)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
if err := sync.kubeAPI.UpdateNodeNetworkUnavailable(node.Name, false); err != nil {
|
||||||
klog.Errorf("Could not update node NetworkUnavailable status to false: %v", err)
|
logger.Error(err, "Could not update node NetworkUnavailable status to false")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.V(2).Infof("Allocated PodCIDR %v for node %q", cidrRange, node.Name)
|
logger.V(2).Info("Allocated PodCIDR for node", "node", klog.KObj(node), "podCIDR", cidrRange)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -358,25 +358,23 @@ func (op *deleteOp) String() string {
|
|||||||
return fmt.Sprintf("deleteOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
|
return fmt.Sprintf("deleteOp(%q,%v)", op.node.Name, op.node.Spec.PodCIDR)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (op *deleteOp) run(sync *NodeSync) error {
|
func (op *deleteOp) run(logger klog.Logger, sync *NodeSync) error {
|
||||||
klog.V(3).Infof("Running deleteOp %+v", op)
|
logger.V(3).Info("Running deleteOp", "deleteOp", op)
|
||||||
if op.node.Spec.PodCIDR == "" {
|
if op.node.Spec.PodCIDR == "" {
|
||||||
klog.V(2).Infof("Node %q was deleted, node had no PodCIDR range assigned", op.node.Name)
|
logger.V(2).Info("Node was deleted, node had no PodCIDR range assigned", "node", klog.KObj(op.node))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
_, cidrRange, err := netutils.ParseCIDRSloppy(op.node.Spec.PodCIDR)
|
_, cidrRange, err := netutils.ParseCIDRSloppy(op.node.Spec.PodCIDR)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Deleted node %q has an invalid podCIDR %q: %v",
|
logger.Error(err, "Deleted node has an invalid podCIDR", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
|
||||||
op.node.Name, op.node.Spec.PodCIDR, err)
|
|
||||||
sync.kubeAPI.EmitNodeWarningEvent(op.node.Name, InvalidPodCIDR,
|
sync.kubeAPI.EmitNodeWarningEvent(op.node.Name, InvalidPodCIDR,
|
||||||
"Node %q has an invalid PodCIDR: %q", op.node.Name, op.node.Spec.PodCIDR)
|
"Node %q has an invalid PodCIDR: %q", op.node.Name, op.node.Spec.PodCIDR)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
sync.set.Release(cidrRange)
|
sync.set.Release(cidrRange)
|
||||||
klog.V(2).Infof("Node %q was deleted, releasing CIDR range %v",
|
logger.V(2).Info("Node was deleted, releasing CIDR range", "node", klog.KObj(op.node), "podCIDR", op.node.Spec.PodCIDR)
|
||||||
op.node.Name, op.node.Spec.PodCIDR)
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ import (
|
|||||||
|
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset"
|
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/cidrset"
|
||||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
|
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/test"
|
||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
@ -200,7 +201,8 @@ func TestNodeSyncUpdate(t *testing.T) {
|
|||||||
doneChan := make(chan struct{})
|
doneChan := make(chan struct{})
|
||||||
|
|
||||||
// Do a single step of the loop.
|
// Do a single step of the loop.
|
||||||
go sync.Loop(doneChan)
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
|
go sync.Loop(logger, doneChan)
|
||||||
sync.Update(tc.node)
|
sync.Update(tc.node)
|
||||||
close(sync.opChan)
|
close(sync.opChan)
|
||||||
<-doneChan
|
<-doneChan
|
||||||
@ -230,8 +232,8 @@ func TestNodeSyncResync(t *testing.T) {
|
|||||||
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
|
cidr, _ := cidrset.NewCIDRSet(clusterCIDRRange, 24)
|
||||||
sync := New(fake, fake, fake, SyncFromCluster, "node1", cidr)
|
sync := New(fake, fake, fake, SyncFromCluster, "node1", cidr)
|
||||||
doneChan := make(chan struct{})
|
doneChan := make(chan struct{})
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
go sync.Loop(doneChan)
|
go sync.Loop(logger, doneChan)
|
||||||
<-fake.reportChan
|
<-fake.reportChan
|
||||||
close(sync.opChan)
|
close(sync.opChan)
|
||||||
// Unblock loop().
|
// Unblock loop().
|
||||||
@ -275,7 +277,8 @@ func TestNodeSyncDelete(t *testing.T) {
|
|||||||
doneChan := make(chan struct{})
|
doneChan := make(chan struct{})
|
||||||
|
|
||||||
// Do a single step of the loop.
|
// Do a single step of the loop.
|
||||||
go sync.Loop(doneChan)
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
|
go sync.Loop(logger, doneChan)
|
||||||
sync.Delete(tc.node)
|
sync.Delete(tc.node)
|
||||||
<-doneChan
|
<-doneChan
|
||||||
tc.fake.dumpTrace()
|
tc.fake.dumpTrace()
|
||||||
|
@ -32,6 +32,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func createLegacyIPAM(
|
func createLegacyIPAM(
|
||||||
|
logger klog.Logger,
|
||||||
ic *Controller,
|
ic *Controller,
|
||||||
nodeInformer coreinformers.NodeInformer,
|
nodeInformer coreinformers.NodeInformer,
|
||||||
cloud cloudprovider.Interface,
|
cloud cloudprovider.Interface,
|
||||||
@ -58,14 +59,16 @@ func createLegacyIPAM(
|
|||||||
cidr = clusterCIDRs[0]
|
cidr = clusterCIDRs[0]
|
||||||
}
|
}
|
||||||
if len(clusterCIDRs) > 1 {
|
if len(clusterCIDRs) > 1 {
|
||||||
klog.Warningf("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded")
|
logger.Info("Multiple cidrs were configured with FromCluster or FromCloud. cidrs except first one were discarded")
|
||||||
}
|
}
|
||||||
ipamc, err := ipam.NewController(cfg, kubeClient, cloud, cidr, serviceCIDR, nodeCIDRMaskSizes[0])
|
ipamc, err := ipam.NewController(cfg, kubeClient, cloud, cidr, serviceCIDR, nodeCIDRMaskSizes[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Fatalf("Error creating ipam controller: %v", err)
|
logger.Error(err, "Error creating ipam controller")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
if err := ipamc.Start(nodeInformer); err != nil {
|
if err := ipamc.Start(logger, nodeInformer); err != nil {
|
||||||
klog.Fatalf("Error trying to Init(): %v", err)
|
logger.Error(err, "Error trying to Init()")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
return ipamc
|
return ipamc
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
package nodeipam
|
package nodeipam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net"
|
"net"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -50,7 +51,7 @@ const (
|
|||||||
// legacy mode. It is needed to ensure correct building for
|
// legacy mode. It is needed to ensure correct building for
|
||||||
// both provider-specific and providerless environments.
|
// both provider-specific and providerless environments.
|
||||||
type ipamController interface {
|
type ipamController interface {
|
||||||
Run(<-chan struct{})
|
Run(ctx context.Context)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Controller is the controller that manages node ipam state.
|
// Controller is the controller that manages node ipam state.
|
||||||
@ -79,6 +80,7 @@ type Controller struct {
|
|||||||
// podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes
|
// podCIDRs it has already allocated to nodes. Since we don't allow podCIDR changes
|
||||||
// currently, this should be handled as a fatal error.
|
// currently, this should be handled as a fatal error.
|
||||||
func NewNodeIpamController(
|
func NewNodeIpamController(
|
||||||
|
ctx context.Context,
|
||||||
nodeInformer coreinformers.NodeInformer,
|
nodeInformer coreinformers.NodeInformer,
|
||||||
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
|
clusterCIDRInformer networkinginformers.ClusterCIDRInformer,
|
||||||
cloud cloudprovider.Interface,
|
cloud cloudprovider.Interface,
|
||||||
@ -89,20 +91,24 @@ func NewNodeIpamController(
|
|||||||
nodeCIDRMaskSizes []int,
|
nodeCIDRMaskSizes []int,
|
||||||
allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
|
allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
|
||||||
|
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if kubeClient == nil {
|
if kubeClient == nil {
|
||||||
klog.Fatalf("kubeClient is nil when starting Controller")
|
logger.Error(nil, "kubeClient is nil when starting Controller")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cloud CIDR allocator does not rely on clusterCIDR or nodeCIDRMaskSize for allocation.
|
// Cloud CIDR allocator does not rely on clusterCIDR or nodeCIDRMaskSize for allocation.
|
||||||
if allocatorType != ipam.CloudAllocatorType {
|
if allocatorType != ipam.CloudAllocatorType {
|
||||||
if len(clusterCIDRs) == 0 {
|
if len(clusterCIDRs) == 0 {
|
||||||
klog.Fatal("Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set")
|
logger.Error(nil, "Controller: Must specify --cluster-cidr if --allocate-node-cidrs is set")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
for idx, cidr := range clusterCIDRs {
|
for idx, cidr := range clusterCIDRs {
|
||||||
mask := cidr.Mask
|
mask := cidr.Mask
|
||||||
if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSizes[idx] {
|
if maskSize, _ := mask.Size(); maskSize > nodeCIDRMaskSizes[idx] {
|
||||||
klog.Fatal("Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family")
|
logger.Error(nil, "Controller: Invalid --cluster-cidr, mask size of cluster CIDR must be less than or equal to --node-cidr-mask-size configured for CIDR family")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -120,7 +126,7 @@ func NewNodeIpamController(
|
|||||||
|
|
||||||
// TODO: Abstract this check into a generic controller manager should run method.
|
// TODO: Abstract this check into a generic controller manager should run method.
|
||||||
if ic.allocatorType == ipam.IPAMFromClusterAllocatorType || ic.allocatorType == ipam.IPAMFromCloudAllocatorType {
|
if ic.allocatorType == ipam.IPAMFromClusterAllocatorType || ic.allocatorType == ipam.IPAMFromCloudAllocatorType {
|
||||||
ic.legacyIPAM = createLegacyIPAM(ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes)
|
ic.legacyIPAM = createLegacyIPAM(logger, ic, nodeInformer, cloud, kubeClient, clusterCIDRs, serviceCIDR, nodeCIDRMaskSizes)
|
||||||
} else {
|
} else {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
@ -131,7 +137,7 @@ func NewNodeIpamController(
|
|||||||
NodeCIDRMaskSizes: nodeCIDRMaskSizes,
|
NodeCIDRMaskSizes: nodeCIDRMaskSizes,
|
||||||
}
|
}
|
||||||
|
|
||||||
ic.cidrAllocator, err = ipam.New(kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams)
|
ic.cidrAllocator, err = ipam.New(ctx, kubeClient, cloud, nodeInformer, clusterCIDRInformer, ic.allocatorType, allocatorParams)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -144,33 +150,32 @@ func NewNodeIpamController(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run starts an asynchronous loop that monitors the status of cluster nodes.
|
// Run starts an asynchronous loop that monitors the status of cluster nodes.
|
||||||
func (nc *Controller) Run(stopCh <-chan struct{}) {
|
func (nc *Controller) Run(ctx context.Context) {
|
||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
|
|
||||||
// Start event processing pipeline.
|
// Start event processing pipeline.
|
||||||
nc.eventBroadcaster.StartStructuredLogging(0)
|
nc.eventBroadcaster.StartStructuredLogging(0)
|
||||||
nc.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: nc.kubeClient.CoreV1().Events("")})
|
nc.eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: nc.kubeClient.CoreV1().Events("")})
|
||||||
defer nc.eventBroadcaster.Shutdown()
|
defer nc.eventBroadcaster.Shutdown()
|
||||||
|
klog.FromContext(ctx).Info("Starting ipam controller")
|
||||||
|
defer klog.FromContext(ctx).Info("Shutting down ipam controller")
|
||||||
|
|
||||||
klog.Infof("Starting ipam controller")
|
if !cache.WaitForNamedCacheSync("node", ctx.Done(), nc.nodeInformerSynced) {
|
||||||
defer klog.Infof("Shutting down ipam controller")
|
|
||||||
|
|
||||||
if !cache.WaitForNamedCacheSync("node", stopCh, nc.nodeInformerSynced) {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if nc.allocatorType == ipam.IPAMFromClusterAllocatorType || nc.allocatorType == ipam.IPAMFromCloudAllocatorType {
|
if nc.allocatorType == ipam.IPAMFromClusterAllocatorType || nc.allocatorType == ipam.IPAMFromCloudAllocatorType {
|
||||||
go nc.legacyIPAM.Run(stopCh)
|
go nc.legacyIPAM.Run(ctx)
|
||||||
} else {
|
} else {
|
||||||
go nc.cidrAllocator.Run(stopCh)
|
go nc.cidrAllocator.Run(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
<-stopCh
|
<-ctx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunWithMetrics is a wrapper for Run that also tracks starting and stopping of the nodeipam controller with additional metric
|
// RunWithMetrics is a wrapper for Run that also tracks starting and stopping of the nodeipam controller with additional metric
|
||||||
func (nc *Controller) RunWithMetrics(stopCh <-chan struct{}, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) {
|
func (nc *Controller) RunWithMetrics(ctx context.Context, controllerManagerMetrics *controllersmetrics.ControllerManagerMetrics) {
|
||||||
controllerManagerMetrics.ControllerStarted("nodeipam")
|
controllerManagerMetrics.ControllerStarted("nodeipam")
|
||||||
defer controllerManagerMetrics.ControllerStopped("nodeipam")
|
defer controllerManagerMetrics.ControllerStopped("nodeipam")
|
||||||
nc.Run(stopCh)
|
nc.Run(ctx)
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
package nodeipam
|
package nodeipam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
@ -30,6 +31,7 @@ import (
|
|||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
|
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
|
||||||
"k8s.io/kubernetes/pkg/controller/testutil"
|
"k8s.io/kubernetes/pkg/controller/testutil"
|
||||||
@ -37,7 +39,7 @@ import (
|
|||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
)
|
)
|
||||||
|
|
||||||
func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
|
func newTestNodeIpamController(ctx context.Context, clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet, secondaryServiceCIDR *net.IPNet, nodeCIDRMaskSizes []int, allocatorType ipam.CIDRAllocatorType) (*Controller, error) {
|
||||||
clientSet := fake.NewSimpleClientset()
|
clientSet := fake.NewSimpleClientset()
|
||||||
fakeNodeHandler := &testutil.FakeNodeHandler{
|
fakeNodeHandler := &testutil.FakeNodeHandler{
|
||||||
Existing: []*v1.Node{
|
Existing: []*v1.Node{
|
||||||
@ -56,6 +58,7 @@ func newTestNodeIpamController(clusterCIDR []*net.IPNet, serviceCIDR *net.IPNet,
|
|||||||
|
|
||||||
fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues())
|
fakeGCE := gce.NewFakeGCECloud(gce.DefaultTestClusterValues())
|
||||||
return NewNodeIpamController(
|
return NewNodeIpamController(
|
||||||
|
ctx,
|
||||||
fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet,
|
fakeNodeInformer, fakeClusterCIDRInformer, fakeGCE, clientSet,
|
||||||
clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType,
|
clusterCIDR, serviceCIDR, secondaryServiceCIDR, nodeCIDRMaskSizes, allocatorType,
|
||||||
)
|
)
|
||||||
@ -92,6 +95,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
|
|||||||
{"invalid_serviceCIDR_contains_clusterCIDR", "10.0.0.0/23", "10.0.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, true},
|
{"invalid_serviceCIDR_contains_clusterCIDR", "10.0.0.0/23", "10.0.0.0/21", emptyServiceCIDR, []int{24}, ipam.IPAMFromClusterAllocatorType, true},
|
||||||
{"invalid_CIDR_mask_size", "10.0.0.0/24,2000::/64", "10.1.0.0/21", emptyServiceCIDR, []int{24, 48}, ipam.IPAMFromClusterAllocatorType, true},
|
{"invalid_CIDR_mask_size", "10.0.0.0/24,2000::/64", "10.1.0.0/21", emptyServiceCIDR, []int{24, 48}, ipam.IPAMFromClusterAllocatorType, true},
|
||||||
} {
|
} {
|
||||||
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
t.Run(tc.desc, func(t *testing.T) {
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
clusterCidrs, _ := netutils.ParseCIDRs(strings.Split(tc.clusterCIDR, ","))
|
clusterCidrs, _ := netutils.ParseCIDRs(strings.Split(tc.clusterCIDR, ","))
|
||||||
_, serviceCIDRIpNet, _ := netutils.ParseCIDRSloppy(tc.serviceCIDR)
|
_, serviceCIDRIpNet, _ := netutils.ParseCIDRSloppy(tc.serviceCIDR)
|
||||||
@ -99,7 +103,7 @@ func TestNewNodeIpamControllerWithCIDRMasks(t *testing.T) {
|
|||||||
|
|
||||||
if os.Getenv("EXIT_ON_FATAL") == "1" {
|
if os.Getenv("EXIT_ON_FATAL") == "1" {
|
||||||
// This is the subprocess which runs the actual code.
|
// This is the subprocess which runs the actual code.
|
||||||
newTestNodeIpamController(clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType)
|
newTestNodeIpamController(ctx, clusterCidrs, serviceCIDRIpNet, secondaryServiceCIDRIpNet, tc.maskSize, tc.allocatorType)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// This is the host process that monitors the exit code of the subprocess.
|
// This is the host process that monitors the exit code of the subprocess.
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
package nodeipam
|
package nodeipam
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net"
|
"net"
|
||||||
|
|
||||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||||
@ -31,11 +32,12 @@ import (
|
|||||||
type fakeController struct {
|
type fakeController struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *fakeController) Run(stopCh <-chan struct{}) {
|
func (f *fakeController) Run(ctx context.Context) {
|
||||||
<-stopCh
|
<-ctx.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
func createLegacyIPAM(
|
func createLegacyIPAM(
|
||||||
|
logger klog.Logger,
|
||||||
ic *Controller,
|
ic *Controller,
|
||||||
nodeInformer coreinformers.NodeInformer,
|
nodeInformer coreinformers.NodeInformer,
|
||||||
cloud cloudprovider.Interface,
|
cloud cloudprovider.Interface,
|
||||||
@ -44,6 +46,7 @@ func createLegacyIPAM(
|
|||||||
serviceCIDR *net.IPNet,
|
serviceCIDR *net.IPNet,
|
||||||
nodeCIDRMaskSizes []int,
|
nodeCIDRMaskSizes []int,
|
||||||
) *fakeController {
|
) *fakeController {
|
||||||
klog.Fatal("Error trying to Init(): legacy cloud provider support disabled at build time")
|
logger.Error(nil, "Error trying to Init(): legacy cloud provider support disabled at build time")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
return &fakeController{}
|
return &fakeController{}
|
||||||
}
|
}
|
||||||
|
@ -368,9 +368,10 @@ func NewNodeLifecycleController(
|
|||||||
unhealthyZoneThreshold float32,
|
unhealthyZoneThreshold float32,
|
||||||
runTaintManager bool,
|
runTaintManager bool,
|
||||||
) (*Controller, error) {
|
) (*Controller, error) {
|
||||||
|
logger := klog.LoggerWithName(klog.FromContext(ctx), "NodeLifecycleController")
|
||||||
if kubeClient == nil {
|
if kubeClient == nil {
|
||||||
klog.Fatalf("kubeClient is nil when starting Controller")
|
logger.Error(nil, "kubeClient is nil when starting nodelifecycle Controller")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
eventBroadcaster := record.NewBroadcaster()
|
eventBroadcaster := record.NewBroadcaster()
|
||||||
@ -428,12 +429,12 @@ func NewNodeLifecycleController(
|
|||||||
if !isPod {
|
if !isPod {
|
||||||
deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
|
deletedState, ok := obj.(cache.DeletedFinalStateUnknown)
|
||||||
if !ok {
|
if !ok {
|
||||||
klog.Errorf("Received unexpected object: %v", obj)
|
logger.Error(nil, "Received unexpected object", "object", obj)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
pod, ok = deletedState.Obj.(*v1.Pod)
|
pod, ok = deletedState.Obj.(*v1.Pod)
|
||||||
if !ok {
|
if !ok {
|
||||||
klog.Errorf("DeletedFinalStateUnknown contained non-Pod object: %v", deletedState.Obj)
|
logger.Error(nil, "DeletedFinalStateUnknown contained non-Pod object", "object", deletedState.Obj)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -494,7 +495,7 @@ func NewNodeLifecycleController(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
klog.Infof("Controller will reconcile labels.")
|
logger.Info("Controller will reconcile labels")
|
||||||
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||||
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
AddFunc: controllerutil.CreateAddNodeHandler(func(node *v1.Node) error {
|
||||||
nc.nodeUpdateQueue.Add(node.Name)
|
nc.nodeUpdateQueue.Add(node.Name)
|
||||||
@ -529,7 +530,8 @@ func (nc *Controller) Run(ctx context.Context) {
|
|||||||
|
|
||||||
// Start events processing pipeline.
|
// Start events processing pipeline.
|
||||||
nc.broadcaster.StartStructuredLogging(0)
|
nc.broadcaster.StartStructuredLogging(0)
|
||||||
klog.Infof("Sending events to api server.")
|
logger := klog.FromContext(ctx)
|
||||||
|
logger.Info("Sending events to api server")
|
||||||
nc.broadcaster.StartRecordingToSink(
|
nc.broadcaster.StartRecordingToSink(
|
||||||
&v1core.EventSinkImpl{
|
&v1core.EventSinkImpl{
|
||||||
Interface: v1core.New(nc.kubeClient.CoreV1().RESTClient()).Events(""),
|
Interface: v1core.New(nc.kubeClient.CoreV1().RESTClient()).Events(""),
|
||||||
@ -540,8 +542,8 @@ func (nc *Controller) Run(ctx context.Context) {
|
|||||||
defer nc.nodeUpdateQueue.ShutDown()
|
defer nc.nodeUpdateQueue.ShutDown()
|
||||||
defer nc.podUpdateQueue.ShutDown()
|
defer nc.podUpdateQueue.ShutDown()
|
||||||
|
|
||||||
klog.Infof("Starting node controller")
|
logger.Info("Starting node controller")
|
||||||
defer klog.Infof("Shutting down node controller")
|
defer logger.Info("Shutting down node controller")
|
||||||
|
|
||||||
if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
|
if !cache.WaitForNamedCacheSync("taint", ctx.Done(), nc.leaseInformerSynced, nc.nodeInformerSynced, nc.podInformerSynced, nc.daemonSetInformerSynced) {
|
||||||
return
|
return
|
||||||
@ -578,7 +580,7 @@ func (nc *Controller) Run(ctx context.Context) {
|
|||||||
// Incorporate the results of node health signal pushed from kubelet to master.
|
// Incorporate the results of node health signal pushed from kubelet to master.
|
||||||
go wait.UntilWithContext(ctx, func(ctx context.Context) {
|
go wait.UntilWithContext(ctx, func(ctx context.Context) {
|
||||||
if err := nc.monitorNodeHealth(ctx); err != nil {
|
if err := nc.monitorNodeHealth(ctx); err != nil {
|
||||||
klog.Errorf("Error monitoring node health: %v", err)
|
logger.Error(err, "Error monitoring node health")
|
||||||
}
|
}
|
||||||
}, nc.nodeMonitorPeriod)
|
}, nc.nodeMonitorPeriod)
|
||||||
|
|
||||||
@ -586,6 +588,7 @@ func (nc *Controller) Run(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
|
func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
for {
|
for {
|
||||||
obj, shutdown := nc.nodeUpdateQueue.Get()
|
obj, shutdown := nc.nodeUpdateQueue.Get()
|
||||||
// "nodeUpdateQueue" will be shutdown when "stopCh" closed;
|
// "nodeUpdateQueue" will be shutdown when "stopCh" closed;
|
||||||
@ -595,13 +598,13 @@ func (nc *Controller) doNodeProcessingPassWorker(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
nodeName := obj.(string)
|
nodeName := obj.(string)
|
||||||
if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil {
|
if err := nc.doNoScheduleTaintingPass(ctx, nodeName); err != nil {
|
||||||
klog.Errorf("Failed to taint NoSchedule on node <%s>, requeue it: %v", nodeName, err)
|
logger.Error(err, "Failed to taint NoSchedule on node, requeue it", "node", klog.KRef("", nodeName))
|
||||||
// TODO(k82cn): Add nodeName back to the queue
|
// TODO(k82cn): Add nodeName back to the queue
|
||||||
}
|
}
|
||||||
// TODO: re-evaluate whether there are any labels that need to be
|
// TODO: re-evaluate whether there are any labels that need to be
|
||||||
// reconcile in 1.19. Remove this function if it's no longer necessary.
|
// reconcile in 1.19. Remove this function if it's no longer necessary.
|
||||||
if err := nc.reconcileNodeLabels(nodeName); err != nil {
|
if err := nc.reconcileNodeLabels(nodeName); err != nil {
|
||||||
klog.Errorf("Failed to reconcile labels for node <%s>, requeue it: %v", nodeName, err)
|
logger.Error(err, "Failed to reconcile labels for node, requeue it", "node", klog.KRef("", nodeName))
|
||||||
// TODO(yujuhong): Add nodeName back to the queue
|
// TODO(yujuhong): Add nodeName back to the queue
|
||||||
}
|
}
|
||||||
nc.nodeUpdateQueue.Done(nodeName)
|
nc.nodeUpdateQueue.Done(nodeName)
|
||||||
@ -677,6 +680,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
|||||||
zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k)
|
zoneNoExecuteTainterKeys = append(zoneNoExecuteTainterKeys, k)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
for _, k := range zoneNoExecuteTainterKeys {
|
for _, k := range zoneNoExecuteTainterKeys {
|
||||||
var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue
|
var zoneNoExecuteTainterWorker *scheduler.RateLimitedTimedQueue
|
||||||
func() {
|
func() {
|
||||||
@ -689,13 +693,13 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
|||||||
zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k]
|
zoneNoExecuteTainterWorker = nc.zoneNoExecuteTainter[k]
|
||||||
}()
|
}()
|
||||||
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
||||||
zoneNoExecuteTainterWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
zoneNoExecuteTainterWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||||
node, err := nc.nodeLister.Get(value.Value)
|
node, err := nc.nodeLister.Get(value.Value)
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
|
||||||
return true, 0
|
return true, 0
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
|
logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
|
||||||
// retry in 50 millisecond
|
// retry in 50 millisecond
|
||||||
return false, 50 * time.Millisecond
|
return false, 50 * time.Millisecond
|
||||||
}
|
}
|
||||||
@ -712,7 +716,7 @@ func (nc *Controller) doNoExecuteTaintingPass(ctx context.Context) {
|
|||||||
oppositeTaint = *NotReadyTaintTemplate
|
oppositeTaint = *NotReadyTaintTemplate
|
||||||
default:
|
default:
|
||||||
// It seems that the Node is ready again, so there's no need to taint it.
|
// It seems that the Node is ready again, so there's no need to taint it.
|
||||||
klog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value)
|
logger.V(4).Info("Node was in a taint queue, but it's ready now. Ignoring taint request", "node", klog.KRef("", value.Value))
|
||||||
return true, 0
|
return true, 0
|
||||||
}
|
}
|
||||||
result := controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
|
result := controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{&oppositeTaint}, node)
|
||||||
@ -742,7 +746,7 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
|
|||||||
zonePodEvictorKeys = append(zonePodEvictorKeys, k)
|
zonePodEvictorKeys = append(zonePodEvictorKeys, k)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
for _, k := range zonePodEvictorKeys {
|
for _, k := range zonePodEvictorKeys {
|
||||||
var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue
|
var zonePodEvictionWorker *scheduler.RateLimitedTimedQueue
|
||||||
func() {
|
func() {
|
||||||
@ -756,12 +760,12 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
// Function should return 'false' and a time after which it should be retried, or 'true' if it shouldn't (it succeeded).
|
||||||
zonePodEvictionWorker.Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
zonePodEvictionWorker.Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||||
node, err := nc.nodeLister.Get(value.Value)
|
node, err := nc.nodeLister.Get(value.Value)
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
klog.Warningf("Node %v no longer present in nodeLister!", value.Value)
|
logger.Info("Node no longer present in nodeLister", "node", klog.KRef("", value.Value))
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
klog.Warningf("Failed to get Node %v from the nodeLister: %v", value.Value, err)
|
logger.Info("Failed to get Node from the nodeLister", "node", klog.KRef("", value.Value), "err", err)
|
||||||
}
|
}
|
||||||
nodeUID, _ := value.UID.(string)
|
nodeUID, _ := value.UID.(string)
|
||||||
pods, err := nc.getPodsAssignedToNode(value.Value)
|
pods, err := nc.getPodsAssignedToNode(value.Value)
|
||||||
@ -778,10 +782,10 @@ func (nc *Controller) doEvictionPass(ctx context.Context) {
|
|||||||
return false, 0
|
return false, 0
|
||||||
}
|
}
|
||||||
if !nc.nodeEvictionMap.setStatus(value.Value, evicted) {
|
if !nc.nodeEvictionMap.setStatus(value.Value, evicted) {
|
||||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", value.Value)
|
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KRef("", value.Value))
|
||||||
}
|
}
|
||||||
if remaining {
|
if remaining {
|
||||||
klog.Infof("Pods awaiting deletion due to Controller eviction")
|
logger.Info("Pods awaiting deletion due to Controller eviction")
|
||||||
}
|
}
|
||||||
|
|
||||||
if node != nil {
|
if node != nil {
|
||||||
@ -811,25 +815,24 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
added, deleted, newZoneRepresentatives := nc.classifyNodes(nodes)
|
added, deleted, newZoneRepresentatives := nc.classifyNodes(nodes)
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
for i := range newZoneRepresentatives {
|
for i := range newZoneRepresentatives {
|
||||||
nc.addPodEvictorForNewZone(newZoneRepresentatives[i])
|
nc.addPodEvictorForNewZone(logger, newZoneRepresentatives[i])
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range added {
|
for i := range added {
|
||||||
klog.V(1).Infof("Controller observed a new Node: %#v", added[i].Name)
|
logger.V(1).Info("Controller observed a new Node", "node", klog.KRef("", added[i].Name))
|
||||||
controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
|
controllerutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
|
||||||
nc.knownNodeSet[added[i].Name] = added[i]
|
nc.knownNodeSet[added[i].Name] = added[i]
|
||||||
nc.addPodEvictorForNewZone(added[i])
|
nc.addPodEvictorForNewZone(logger, added[i])
|
||||||
if nc.runTaintManager {
|
if nc.runTaintManager {
|
||||||
nc.markNodeAsReachable(ctx, added[i])
|
nc.markNodeAsReachable(ctx, added[i])
|
||||||
} else {
|
} else {
|
||||||
nc.cancelPodEviction(added[i])
|
nc.cancelPodEviction(logger, added[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range deleted {
|
for i := range deleted {
|
||||||
klog.V(1).Infof("Controller observed a Node deletion: %v", deleted[i].Name)
|
logger.V(1).Info("Controller observed a Node deletion", "node", klog.KRef("", deleted[i].Name))
|
||||||
controllerutil.RecordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from Controller", deleted[i].Name))
|
controllerutil.RecordNodeEvent(nc.recorder, deleted[i].Name, string(deleted[i].UID), v1.EventTypeNormal, "RemovingNode", fmt.Sprintf("Removing Node %v from Controller", deleted[i].Name))
|
||||||
delete(nc.knownNodeSet, deleted[i].Name)
|
delete(nc.knownNodeSet, deleted[i].Name)
|
||||||
}
|
}
|
||||||
@ -856,13 +859,12 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
|
|||||||
name := node.Name
|
name := node.Name
|
||||||
node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
|
node, err = nc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed while getting a Node to retry updating node health. Probably Node %s was deleted.", name)
|
logger.Error(nil, "Failed while getting a Node to retry updating node health. Probably Node was deleted", "node", klog.KRef("", name))
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
return false, nil
|
return false, nil
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
klog.Errorf("Update health of Node '%v' from Controller error: %v. "+
|
logger.Error(err, "Update health of Node from Controller error, Skipping - no pods will be evicted", "node", klog.KObj(node))
|
||||||
"Skipping - no pods will be evicted.", node.Name, err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -925,40 +927,35 @@ func (nc *Controller) monitorNodeHealth(ctx context.Context) error {
|
|||||||
func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
|
func (nc *Controller) processTaintBaseEviction(ctx context.Context, node *v1.Node, observedReadyCondition *v1.NodeCondition) {
|
||||||
decisionTimestamp := nc.now()
|
decisionTimestamp := nc.now()
|
||||||
// Check eviction timeout against decisionTimestamp
|
// Check eviction timeout against decisionTimestamp
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
switch observedReadyCondition.Status {
|
switch observedReadyCondition.Status {
|
||||||
case v1.ConditionFalse:
|
case v1.ConditionFalse:
|
||||||
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
||||||
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
|
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
|
||||||
taintToAdd := *NotReadyTaintTemplate
|
taintToAdd := *NotReadyTaintTemplate
|
||||||
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
|
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{UnreachableTaintTemplate}, node) {
|
||||||
klog.Errorf("Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle.")
|
logger.Error(nil, "Failed to instantly swap UnreachableTaint to NotReadyTaint. Will try again in the next cycle")
|
||||||
}
|
}
|
||||||
} else if nc.markNodeForTainting(node, v1.ConditionFalse) {
|
} else if nc.markNodeForTainting(node, v1.ConditionFalse) {
|
||||||
klog.V(2).Infof("Node %v is NotReady as of %v. Adding it to the Taint queue.",
|
logger.V(2).Info("Node is NotReady. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
|
||||||
node.Name,
|
|
||||||
decisionTimestamp,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
case v1.ConditionUnknown:
|
case v1.ConditionUnknown:
|
||||||
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
|
||||||
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
|
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
|
||||||
taintToAdd := *UnreachableTaintTemplate
|
taintToAdd := *UnreachableTaintTemplate
|
||||||
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
|
if !controllerutil.SwapNodeControllerTaint(ctx, nc.kubeClient, []*v1.Taint{&taintToAdd}, []*v1.Taint{NotReadyTaintTemplate}, node) {
|
||||||
klog.Errorf("Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle.")
|
logger.Error(nil, "Failed to instantly swap NotReadyTaint to UnreachableTaint. Will try again in the next cycle")
|
||||||
}
|
}
|
||||||
} else if nc.markNodeForTainting(node, v1.ConditionUnknown) {
|
} else if nc.markNodeForTainting(node, v1.ConditionUnknown) {
|
||||||
klog.V(2).Infof("Node %v is unresponsive as of %v. Adding it to the Taint queue.",
|
logger.V(2).Info("Node is unresponsive. Adding it to the Taint queue", "node", klog.KObj(node), "timeStamp", decisionTimestamp)
|
||||||
node.Name,
|
|
||||||
decisionTimestamp,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
case v1.ConditionTrue:
|
case v1.ConditionTrue:
|
||||||
removed, err := nc.markNodeAsReachable(ctx, node)
|
removed, err := nc.markNodeAsReachable(ctx, node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to remove taints from node %v. Will retry in next iteration.", node.Name)
|
logger.Error(nil, "Failed to remove taints from node. Will retry in next iteration", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
if removed {
|
if removed {
|
||||||
klog.V(2).Infof("Node %s is healthy again, removing all taints", node.Name)
|
logger.V(2).Info("Node is healthy again, removing all taints", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -970,6 +967,7 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
|
|||||||
return fmt.Errorf("health data doesn't exist for node %q", node.Name)
|
return fmt.Errorf("health data doesn't exist for node %q", node.Name)
|
||||||
}
|
}
|
||||||
// Check eviction timeout against decisionTimestamp
|
// Check eviction timeout against decisionTimestamp
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
switch observedReadyCondition.Status {
|
switch observedReadyCondition.Status {
|
||||||
case v1.ConditionFalse:
|
case v1.ConditionFalse:
|
||||||
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
|
if decisionTimestamp.After(nodeHealthData.readyTransitionTimestamp.Add(nc.podEvictionTimeout)) {
|
||||||
@ -978,11 +976,11 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if enqueued {
|
if enqueued {
|
||||||
klog.V(2).Infof("Node is NotReady. Adding Pods on Node %s to eviction queue: %v is later than %v + %v",
|
logger.V(2).Info("Node is NotReady. Adding Pods on Node to eviction queue: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout",
|
||||||
node.Name,
|
"node", klog.KObj(node),
|
||||||
decisionTimestamp,
|
"decisionTimestamp", decisionTimestamp,
|
||||||
nodeHealthData.readyTransitionTimestamp,
|
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
|
||||||
nc.podEvictionTimeout,
|
"podEvictionTimeout", nc.podEvictionTimeout,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -993,17 +991,17 @@ func (nc *Controller) processNoTaintBaseEviction(ctx context.Context, node *v1.N
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if enqueued {
|
if enqueued {
|
||||||
klog.V(2).Infof("Node is unresponsive. Adding Pods on Node %s to eviction queues: %v is later than %v + %v",
|
logger.V(2).Info("Node is unresponsive. Adding Pods on Node to eviction queues: decisionTimestamp is later than readyTransitionTimestamp + podEvictionTimeout-gracePeriod",
|
||||||
node.Name,
|
"node", klog.KObj(node),
|
||||||
decisionTimestamp,
|
"decisionTimestamp", decisionTimestamp,
|
||||||
nodeHealthData.readyTransitionTimestamp,
|
"readyTransitionTimestamp", nodeHealthData.readyTransitionTimestamp,
|
||||||
nc.podEvictionTimeout-gracePeriod,
|
"podEvictionTimeoutGracePeriod", nc.podEvictionTimeout-gracePeriod,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case v1.ConditionTrue:
|
case v1.ConditionTrue:
|
||||||
if nc.cancelPodEviction(node) {
|
if nc.cancelPodEviction(logger, node) {
|
||||||
klog.V(2).Infof("Node %s is ready again, cancelled pod eviction", node.Name)
|
logger.V(2).Info("Node is ready again, cancelled pod eviction", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -1076,23 +1074,23 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
|||||||
_, savedCondition = controllerutil.GetNodeCondition(nodeHealth.status, v1.NodeReady)
|
_, savedCondition = controllerutil.GetNodeCondition(nodeHealth.status, v1.NodeReady)
|
||||||
savedLease = nodeHealth.lease
|
savedLease = nodeHealth.lease
|
||||||
}
|
}
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if nodeHealth == nil {
|
if nodeHealth == nil {
|
||||||
klog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
|
logger.Info("Missing timestamp for Node. Assuming now as a timestamp", "node", klog.KObj(node))
|
||||||
nodeHealth = &nodeHealthData{
|
nodeHealth = &nodeHealthData{
|
||||||
status: &node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: nc.now(),
|
probeTimestamp: nc.now(),
|
||||||
readyTransitionTimestamp: nc.now(),
|
readyTransitionTimestamp: nc.now(),
|
||||||
}
|
}
|
||||||
} else if savedCondition == nil && currentReadyCondition != nil {
|
} else if savedCondition == nil && currentReadyCondition != nil {
|
||||||
klog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
|
logger.V(1).Info("Creating timestamp entry for newly observed Node", "node", klog.KObj(node))
|
||||||
nodeHealth = &nodeHealthData{
|
nodeHealth = &nodeHealthData{
|
||||||
status: &node.Status,
|
status: &node.Status,
|
||||||
probeTimestamp: nc.now(),
|
probeTimestamp: nc.now(),
|
||||||
readyTransitionTimestamp: nc.now(),
|
readyTransitionTimestamp: nc.now(),
|
||||||
}
|
}
|
||||||
} else if savedCondition != nil && currentReadyCondition == nil {
|
} else if savedCondition != nil && currentReadyCondition == nil {
|
||||||
klog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
|
logger.Error(nil, "ReadyCondition was removed from Status of Node", "node", klog.KObj(node))
|
||||||
// TODO: figure out what to do in this case. For now we do the same thing as above.
|
// TODO: figure out what to do in this case. For now we do the same thing as above.
|
||||||
nodeHealth = &nodeHealthData{
|
nodeHealth = &nodeHealthData{
|
||||||
status: &node.Status,
|
status: &node.Status,
|
||||||
@ -1104,15 +1102,15 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
|||||||
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
|
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
|
||||||
// otherwise we leave it as it is.
|
// otherwise we leave it as it is.
|
||||||
if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime {
|
if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime {
|
||||||
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, currentReadyCondition)
|
logger.V(3).Info("ReadyCondition for Node transitioned from savedCondition to currentReadyCondition", "node", klog.KObj(node), "savedCondition", savedCondition, "currentReadyCondition", currentReadyCondition)
|
||||||
transitionTime = nc.now()
|
transitionTime = nc.now()
|
||||||
} else {
|
} else {
|
||||||
transitionTime = nodeHealth.readyTransitionTimestamp
|
transitionTime = nodeHealth.readyTransitionTimestamp
|
||||||
}
|
}
|
||||||
if klogV := klog.V(5); klogV.Enabled() {
|
if loggerV := logger.V(5); loggerV.Enabled() {
|
||||||
klogV.Infof("Node %s ReadyCondition updated. Updating timestamp: %+v vs %+v.", node.Name, nodeHealth.status, node.Status)
|
loggerV.Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node), "nodeHealthStatus", nodeHealth.status, "nodeStatus", node.Status)
|
||||||
} else {
|
} else {
|
||||||
klog.V(3).Infof("Node %s ReadyCondition updated. Updating timestamp.", node.Name)
|
logger.V(3).Info("Node ReadyCondition updated. Updating timestamp", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
nodeHealth = &nodeHealthData{
|
nodeHealth = &nodeHealthData{
|
||||||
status: &node.Status,
|
status: &node.Status,
|
||||||
@ -1147,7 +1145,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
|||||||
for _, nodeConditionType := range nodeConditionTypes {
|
for _, nodeConditionType := range nodeConditionTypes {
|
||||||
_, currentCondition := controllerutil.GetNodeCondition(&node.Status, nodeConditionType)
|
_, currentCondition := controllerutil.GetNodeCondition(&node.Status, nodeConditionType)
|
||||||
if currentCondition == nil {
|
if currentCondition == nil {
|
||||||
klog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name)
|
logger.V(2).Info("Condition of node was never updated by kubelet", "nodeConditionType", nodeConditionType, "node", klog.KObj(node))
|
||||||
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
|
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
|
||||||
Type: nodeConditionType,
|
Type: nodeConditionType,
|
||||||
Status: v1.ConditionUnknown,
|
Status: v1.ConditionUnknown,
|
||||||
@ -1157,8 +1155,8 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
|||||||
LastTransitionTime: nowTimestamp,
|
LastTransitionTime: nowTimestamp,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
klog.V(2).Infof("node %v hasn't been updated for %+v. Last %v is: %+v",
|
logger.V(2).Info("Node hasn't been updated",
|
||||||
node.Name, nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), nodeConditionType, currentCondition)
|
"node", klog.KObj(node), "duration", nc.now().Time.Sub(nodeHealth.probeTimestamp.Time), "nodeConditionType", nodeConditionType, "currentCondition", currentCondition)
|
||||||
if currentCondition.Status != v1.ConditionUnknown {
|
if currentCondition.Status != v1.ConditionUnknown {
|
||||||
currentCondition.Status = v1.ConditionUnknown
|
currentCondition.Status = v1.ConditionUnknown
|
||||||
currentCondition.Reason = "NodeStatusUnknown"
|
currentCondition.Reason = "NodeStatusUnknown"
|
||||||
@ -1172,7 +1170,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
|||||||
|
|
||||||
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
|
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
|
||||||
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
|
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil {
|
||||||
klog.Errorf("Error updating node %s: %v", node.Name, err)
|
logger.Error(err, "Error updating node", "node", klog.KObj(node))
|
||||||
return gracePeriod, observedReadyCondition, currentReadyCondition, err
|
return gracePeriod, observedReadyCondition, currentReadyCondition, err
|
||||||
}
|
}
|
||||||
nodeHealth = &nodeHealthData{
|
nodeHealth = &nodeHealthData{
|
||||||
@ -1191,6 +1189,7 @@ func (nc *Controller) tryUpdateNodeHealth(ctx context.Context, node *v1.Node) (t
|
|||||||
func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
|
func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {
|
||||||
newZoneStates := map[string]ZoneState{}
|
newZoneStates := map[string]ZoneState{}
|
||||||
allAreFullyDisrupted := true
|
allAreFullyDisrupted := true
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
for k, v := range zoneToNodeConditions {
|
for k, v := range zoneToNodeConditions {
|
||||||
zoneSize.WithLabelValues(k).Set(float64(len(v)))
|
zoneSize.WithLabelValues(k).Set(float64(len(v)))
|
||||||
unhealthy, newState := nc.computeZoneStateFunc(v)
|
unhealthy, newState := nc.computeZoneStateFunc(v)
|
||||||
@ -1201,7 +1200,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
|||||||
}
|
}
|
||||||
newZoneStates[k] = newState
|
newZoneStates[k] = newState
|
||||||
if _, had := nc.zoneStates[k]; !had {
|
if _, had := nc.zoneStates[k]; !had {
|
||||||
klog.Errorf("Setting initial state for unseen zone: %v", k)
|
logger.Error(nil, "Setting initial state for unseen zone", "zone", k)
|
||||||
nc.zoneStates[k] = stateInitial
|
nc.zoneStates[k] = stateInitial
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1229,15 +1228,15 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
|||||||
if !allAreFullyDisrupted || !allWasFullyDisrupted {
|
if !allAreFullyDisrupted || !allWasFullyDisrupted {
|
||||||
// We're switching to full disruption mode
|
// We're switching to full disruption mode
|
||||||
if allAreFullyDisrupted {
|
if allAreFullyDisrupted {
|
||||||
klog.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
|
logger.Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode")
|
||||||
for i := range nodes {
|
for i := range nodes {
|
||||||
if nc.runTaintManager {
|
if nc.runTaintManager {
|
||||||
_, err := nc.markNodeAsReachable(ctx, nodes[i])
|
_, err := nc.markNodeAsReachable(ctx, nodes[i])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
|
logger.Error(nil, "Failed to remove taints from Node", "node", klog.KObj(nodes[i]))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
nc.cancelPodEviction(nodes[i])
|
nc.cancelPodEviction(logger, nodes[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We stop all evictions.
|
// We stop all evictions.
|
||||||
@ -1256,7 +1255,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
|||||||
}
|
}
|
||||||
// We're exiting full disruption mode
|
// We're exiting full disruption mode
|
||||||
if allWasFullyDisrupted {
|
if allWasFullyDisrupted {
|
||||||
klog.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode.")
|
logger.Info("Controller detected that some Nodes are Ready. Exiting master disruption mode")
|
||||||
// When exiting disruption mode update probe timestamps on all Nodes.
|
// When exiting disruption mode update probe timestamps on all Nodes.
|
||||||
now := nc.now()
|
now := nc.now()
|
||||||
for i := range nodes {
|
for i := range nodes {
|
||||||
@ -1279,7 +1278,7 @@ func (nc *Controller) handleDisruption(ctx context.Context, zoneToNodeConditions
|
|||||||
if v == newState {
|
if v == newState {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
klog.Infof("Controller detected that zone %v is now in state %v.", k, newState)
|
logger.Info("Controller detected that zone is now in new state", "zone", k, "newState", newState)
|
||||||
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
|
nc.setLimiterInZone(k, len(zoneToNodeConditions[k]), newState)
|
||||||
nc.zoneStates[k] = newState
|
nc.zoneStates[k] = newState
|
||||||
}
|
}
|
||||||
@ -1317,12 +1316,13 @@ func (nc *Controller) doPodProcessingWorker(ctx context.Context) {
|
|||||||
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
||||||
defer nc.podUpdateQueue.Done(podItem)
|
defer nc.podUpdateQueue.Done(podItem)
|
||||||
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
|
pod, err := nc.podLister.Pods(podItem.namespace).Get(podItem.name)
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
// If the pod was deleted, there is no need to requeue.
|
// If the pod was deleted, there is no need to requeue.
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
klog.Warningf("Failed to read pod %v/%v: %v.", podItem.namespace, podItem.name, err)
|
logger.Info("Failed to read pod", "pod", klog.KRef(podItem.namespace, podItem.name), "err", err)
|
||||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1338,7 +1338,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
|||||||
|
|
||||||
node, err := nc.nodeLister.Get(nodeName)
|
node, err := nc.nodeLister.Get(nodeName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Warningf("Failed to read node %v: %v.", nodeName, err)
|
logger.Info("Failed to read node", "node", klog.KRef("", nodeName), "err", err)
|
||||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1356,7 +1356,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
|||||||
// Pods are processed by TaintManager.
|
// Pods are processed by TaintManager.
|
||||||
if !nc.runTaintManager {
|
if !nc.runTaintManager {
|
||||||
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
|
if err := nc.processNoTaintBaseEviction(ctx, node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
|
||||||
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
|
logger.Info("Unable to process pod eviction from node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
|
||||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1364,7 +1364,7 @@ func (nc *Controller) processPod(ctx context.Context, podItem podUpdateItem) {
|
|||||||
|
|
||||||
if currentReadyCondition.Status != v1.ConditionTrue {
|
if currentReadyCondition.Status != v1.ConditionTrue {
|
||||||
if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
|
if err := controllerutil.MarkPodsNotReady(ctx, nc.kubeClient, nc.recorder, pods, nodeName); err != nil {
|
||||||
klog.Warningf("Unable to mark pod %+v NotReady on node %v: %v.", podItem, nodeName, err)
|
logger.Info("Unable to mark pod NotReady on node", "pod", klog.KRef(podItem.namespace, podItem.name), "node", klog.KRef("", nodeName), "err", err)
|
||||||
nc.podUpdateQueue.AddRateLimited(podItem)
|
nc.podUpdateQueue.AddRateLimited(podItem)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1447,7 +1447,7 @@ func (nc *Controller) ReducedQPSFunc(nodeNum int) float32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// addPodEvictorForNewZone checks if new zone appeared, and if so add new evictor.
|
// addPodEvictorForNewZone checks if new zone appeared, and if so add new evictor.
|
||||||
func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
|
func (nc *Controller) addPodEvictorForNewZone(logger klog.Logger, node *v1.Node) {
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
defer nc.evictorLock.Unlock()
|
defer nc.evictorLock.Unlock()
|
||||||
zone := nodetopology.GetZoneKey(node)
|
zone := nodetopology.GetZoneKey(node)
|
||||||
@ -1463,7 +1463,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
|
|||||||
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
|
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
|
||||||
}
|
}
|
||||||
// Init the metric for the new zone.
|
// Init the metric for the new zone.
|
||||||
klog.Infof("Initializing eviction metric for zone: %v", zone)
|
logger.Info("Initializing eviction metric for zone", "zone", zone)
|
||||||
evictionsNumber.WithLabelValues(zone).Add(0)
|
evictionsNumber.WithLabelValues(zone).Add(0)
|
||||||
evictionsTotal.WithLabelValues(zone).Add(0)
|
evictionsTotal.WithLabelValues(zone).Add(0)
|
||||||
}
|
}
|
||||||
@ -1471,16 +1471,16 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
|
|||||||
|
|
||||||
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
|
// cancelPodEviction removes any queued evictions, typically because the node is available again. It
|
||||||
// returns true if an eviction was queued.
|
// returns true if an eviction was queued.
|
||||||
func (nc *Controller) cancelPodEviction(node *v1.Node) bool {
|
func (nc *Controller) cancelPodEviction(logger klog.Logger, node *v1.Node) bool {
|
||||||
zone := nodetopology.GetZoneKey(node)
|
zone := nodetopology.GetZoneKey(node)
|
||||||
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
|
if !nc.nodeEvictionMap.setStatus(node.Name, unmarked) {
|
||||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
defer nc.evictorLock.Unlock()
|
defer nc.evictorLock.Unlock()
|
||||||
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
|
wasDeleting := nc.zonePodEvictor[zone].Remove(node.Name)
|
||||||
if wasDeleting {
|
if wasDeleting {
|
||||||
klog.V(2).Infof("Cancelling pod Eviction on Node: %v", node.Name)
|
logger.V(2).Info("Cancelling pod Eviction on Node", "node", klog.KObj(node))
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@ -1502,8 +1502,9 @@ func (nc *Controller) evictPods(ctx context.Context, node *v1.Node, pods []*v1.P
|
|||||||
}
|
}
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
|
if !nc.nodeEvictionMap.setStatus(node.Name, toBeEvicted) {
|
||||||
klog.V(2).Infof("node %v was unregistered in the meantime - skipping setting status", node.Name)
|
logger.V(2).Info("Node was unregistered in the meantime - skipping setting status", "node", klog.KObj(node))
|
||||||
}
|
}
|
||||||
|
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
@ -1532,13 +1533,14 @@ func (nc *Controller) markNodeForTainting(node *v1.Node, status v1.ConditionStat
|
|||||||
|
|
||||||
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
|
func (nc *Controller) markNodeAsReachable(ctx context.Context, node *v1.Node) (bool, error) {
|
||||||
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
|
err := controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, UnreachableTaintTemplate)
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
|
err = controller.RemoveTaintOffNode(ctx, nc.kubeClient, node.Name, node, NotReadyTaintTemplate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("Failed to remove taint from node %v: %v", node.Name, err)
|
logger.Error(err, "Failed to remove taint from node", "node", klog.KObj(node))
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
|
@ -40,6 +40,8 @@ import (
|
|||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/kubernetes/fake"
|
"k8s.io/client-go/kubernetes/fake"
|
||||||
testcore "k8s.io/client-go/testing"
|
testcore "k8s.io/client-go/testing"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
kubeletapis "k8s.io/kubelet/pkg/apis"
|
kubeletapis "k8s.io/kubelet/pkg/apis"
|
||||||
"k8s.io/kubernetes/pkg/controller"
|
"k8s.io/kubernetes/pkg/controller"
|
||||||
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
|
"k8s.io/kubernetes/pkg/controller/nodelifecycle/scheduler"
|
||||||
@ -87,12 +89,12 @@ type nodeLifecycleController struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// doEviction does the fake eviction and returns the status of eviction operation.
|
// doEviction does the fake eviction and returns the status of eviction operation.
|
||||||
func (nc *nodeLifecycleController) doEviction(fakeNodeHandler *testutil.FakeNodeHandler) bool {
|
func (nc *nodeLifecycleController) doEviction(logger klog.Logger, fakeNodeHandler *testutil.FakeNodeHandler) bool {
|
||||||
nc.evictorLock.Lock()
|
nc.evictorLock.Lock()
|
||||||
defer nc.evictorLock.Unlock()
|
defer nc.evictorLock.Unlock()
|
||||||
zones := testutil.GetZones(fakeNodeHandler)
|
zones := testutil.GetZones(fakeNodeHandler)
|
||||||
for _, zone := range zones {
|
for _, zone := range zones {
|
||||||
nc.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
nc.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||||
uid, _ := value.UID.(string)
|
uid, _ := value.UID.(string)
|
||||||
pods, _ := nc.getPodsAssignedToNode(value.Value)
|
pods, _ := nc.getPodsAssignedToNode(value.Value)
|
||||||
controllerutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
|
controllerutil.DeletePods(context.TODO(), fakeNodeHandler, pods, nc.recorder, value.Value, uid, nc.daemonSetStore)
|
||||||
@ -720,9 +722,10 @@ func TestMonitorNodeHealthEvictPods(t *testing.T) {
|
|||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
zones := testutil.GetZones(item.fakeNodeHandler)
|
zones := testutil.GetZones(item.fakeNodeHandler)
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
for _, zone := range zones {
|
for _, zone := range zones {
|
||||||
if _, ok := nodeController.zonePodEvictor[zone]; ok {
|
if _, ok := nodeController.zonePodEvictor[zone]; ok {
|
||||||
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||||
nodeUID, _ := value.UID.(string)
|
nodeUID, _ := value.UID.(string)
|
||||||
pods, err := nodeController.getPodsAssignedToNode(value.Value)
|
pods, err := nodeController.getPodsAssignedToNode(value.Value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -882,8 +885,9 @@ func TestPodStatusChange(t *testing.T) {
|
|||||||
t.Errorf("unexpected error: %v", err)
|
t.Errorf("unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
zones := testutil.GetZones(item.fakeNodeHandler)
|
zones := testutil.GetZones(item.fakeNodeHandler)
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
for _, zone := range zones {
|
for _, zone := range zones {
|
||||||
nodeController.zonePodEvictor[zone].Try(func(value scheduler.TimedValue) (bool, time.Duration) {
|
nodeController.zonePodEvictor[zone].Try(logger, func(value scheduler.TimedValue) (bool, time.Duration) {
|
||||||
nodeUID, _ := value.UID.(string)
|
nodeUID, _ := value.UID.(string)
|
||||||
pods, err := nodeController.getPodsAssignedToNode(value.Value)
|
pods, err := nodeController.getPodsAssignedToNode(value.Value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1466,8 +1470,9 @@ func TestMonitorNodeHealthEvictPodsWithDisruption(t *testing.T) {
|
|||||||
// Infinite loop, used for retrying in case ratelimiter fails to reload for Try function.
|
// Infinite loop, used for retrying in case ratelimiter fails to reload for Try function.
|
||||||
// this breaks when we have the status that we need for test case or when we don't see the
|
// this breaks when we have the status that we need for test case or when we don't see the
|
||||||
// intended result after 1 minute.
|
// intended result after 1 minute.
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
for {
|
for {
|
||||||
podEvicted = nodeController.doEviction(fakeNodeHandler)
|
podEvicted = nodeController.doEviction(logger, fakeNodeHandler)
|
||||||
if podEvicted == item.expectedEvictPods || time.Since(start) > 1*time.Minute {
|
if podEvicted == item.expectedEvictPods || time.Since(start) > 1*time.Minute {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,6 @@ import (
|
|||||||
|
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/client-go/util/flowcontrol"
|
"k8s.io/client-go/util/flowcontrol"
|
||||||
|
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -229,14 +228,14 @@ type ActionFunc func(TimedValue) (bool, time.Duration)
|
|||||||
// function in NodeController when Node becomes Ready again) TODO:
|
// function in NodeController when Node becomes Ready again) TODO:
|
||||||
// figure out a good way to do garbage collection for all Nodes that
|
// figure out a good way to do garbage collection for all Nodes that
|
||||||
// were removed from the cluster.
|
// were removed from the cluster.
|
||||||
func (q *RateLimitedTimedQueue) Try(fn ActionFunc) {
|
func (q *RateLimitedTimedQueue) Try(logger klog.Logger, fn ActionFunc) {
|
||||||
val, ok := q.queue.Head()
|
val, ok := q.queue.Head()
|
||||||
q.limiterLock.Lock()
|
q.limiterLock.Lock()
|
||||||
defer q.limiterLock.Unlock()
|
defer q.limiterLock.Unlock()
|
||||||
for ok {
|
for ok {
|
||||||
// rate limit the queue checking
|
// rate limit the queue checking
|
||||||
if !q.limiter.TryAccept() {
|
if !q.limiter.TryAccept() {
|
||||||
klog.V(10).Infof("Try rate limited for value: %v", val)
|
logger.V(10).Info("Try rate limited", "value", val)
|
||||||
// Try again later
|
// Try again later
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
|
|
||||||
"k8s.io/apimachinery/pkg/util/sets"
|
"k8s.io/apimachinery/pkg/util/sets"
|
||||||
"k8s.io/client-go/util/flowcontrol"
|
"k8s.io/client-go/util/flowcontrol"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
)
|
)
|
||||||
|
|
||||||
func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
|
func CheckQueueEq(lhs []string, rhs TimedQueue) bool {
|
||||||
@ -144,7 +145,8 @@ func TestTry(t *testing.T) {
|
|||||||
evictor.Remove("second")
|
evictor.Remove("second")
|
||||||
|
|
||||||
deletedMap := sets.NewString()
|
deletedMap := sets.NewString()
|
||||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
|
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||||
deletedMap.Insert(value.Value)
|
deletedMap.Insert(value.Value)
|
||||||
return true, 0
|
return true, 0
|
||||||
})
|
})
|
||||||
@ -180,7 +182,8 @@ func TestTryOrdering(t *testing.T) {
|
|||||||
order := []string{}
|
order := []string{}
|
||||||
count := 0
|
count := 0
|
||||||
hasQueued := false
|
hasQueued := false
|
||||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
|
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||||
count++
|
count++
|
||||||
t.Logf("eviction %d", count)
|
t.Logf("eviction %d", count)
|
||||||
if value.ProcessAt.IsZero() {
|
if value.ProcessAt.IsZero() {
|
||||||
@ -242,8 +245,8 @@ func TestTryRemovingWhileTry(t *testing.T) {
|
|||||||
evictor.Remove("second")
|
evictor.Remove("second")
|
||||||
close(wait)
|
close(wait)
|
||||||
}()
|
}()
|
||||||
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||||
count++
|
count++
|
||||||
if value.AddedAt.IsZero() {
|
if value.AddedAt.IsZero() {
|
||||||
t.Fatalf("added should not be zero")
|
t.Fatalf("added should not be zero")
|
||||||
@ -313,7 +316,8 @@ func TestAddAfterTry(t *testing.T) {
|
|||||||
evictor.Remove("second")
|
evictor.Remove("second")
|
||||||
|
|
||||||
deletedMap := sets.NewString()
|
deletedMap := sets.NewString()
|
||||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
|
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||||
deletedMap.Insert(value.Value)
|
deletedMap.Insert(value.Value)
|
||||||
return true, 0
|
return true, 0
|
||||||
})
|
})
|
||||||
@ -327,7 +331,7 @@ func TestAddAfterTry(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
evictor.Add("first", "11111")
|
evictor.Add("first", "11111")
|
||||||
evictor.Try(func(value TimedValue) (bool, time.Duration) {
|
evictor.Try(logger, func(value TimedValue) (bool, time.Duration) {
|
||||||
t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.")
|
t.Errorf("We shouldn't process the same value if the explicit remove wasn't called.")
|
||||||
return true, 0
|
return true, 0
|
||||||
})
|
})
|
||||||
|
@ -105,7 +105,7 @@ func deletePodHandler(c clientset.Interface, emitEventFunc func(types.Namespaced
|
|||||||
return func(ctx context.Context, args *WorkArgs) error {
|
return func(ctx context.Context, args *WorkArgs) error {
|
||||||
ns := args.NamespacedName.Namespace
|
ns := args.NamespacedName.Namespace
|
||||||
name := args.NamespacedName.Name
|
name := args.NamespacedName.Name
|
||||||
klog.InfoS("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
|
klog.FromContext(ctx).Info("NoExecuteTaintManager is deleting pod", "pod", args.NamespacedName.String())
|
||||||
if emitEventFunc != nil {
|
if emitEventFunc != nil {
|
||||||
emitEventFunc(args.NamespacedName)
|
emitEventFunc(args.NamespacedName)
|
||||||
}
|
}
|
||||||
@ -202,16 +202,17 @@ func NewNoExecuteTaintManager(ctx context.Context, c clientset.Interface, podLis
|
|||||||
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
|
// Run starts NoExecuteTaintManager which will run in loop until `stopCh` is closed.
|
||||||
func (tc *NoExecuteTaintManager) Run(ctx context.Context) {
|
func (tc *NoExecuteTaintManager) Run(ctx context.Context) {
|
||||||
defer utilruntime.HandleCrash()
|
defer utilruntime.HandleCrash()
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
klog.InfoS("Starting NoExecuteTaintManager")
|
logger.Info("Starting NoExecuteTaintManager")
|
||||||
|
|
||||||
// Start events processing pipeline.
|
// Start events processing pipeline.
|
||||||
tc.broadcaster.StartStructuredLogging(0)
|
tc.broadcaster.StartStructuredLogging(0)
|
||||||
if tc.client != nil {
|
if tc.client != nil {
|
||||||
klog.InfoS("Sending events to api server")
|
logger.Info("Sending events to api server")
|
||||||
tc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: tc.client.CoreV1().Events("")})
|
tc.broadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: tc.client.CoreV1().Events("")})
|
||||||
} else {
|
} else {
|
||||||
klog.Fatalf("kubeClient is nil when starting NodeController")
|
logger.Error(nil, "kubeClient is nil when starting NodeController")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
}
|
}
|
||||||
defer tc.broadcaster.Shutdown()
|
defer tc.broadcaster.Shutdown()
|
||||||
|
|
||||||
@ -363,8 +364,8 @@ func (tc *NoExecuteTaintManager) NodeUpdated(oldNode *v1.Node, newNode *v1.Node)
|
|||||||
tc.nodeUpdateQueue.Add(updateItem)
|
tc.nodeUpdateQueue.Add(updateItem)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(nsName types.NamespacedName) {
|
func (tc *NoExecuteTaintManager) cancelWorkWithEvent(logger klog.Logger, nsName types.NamespacedName) {
|
||||||
if tc.taintEvictionQueue.CancelWork(nsName.String()) {
|
if tc.taintEvictionQueue.CancelWork(logger, nsName.String()) {
|
||||||
tc.emitCancelPodDeletionEvent(nsName)
|
tc.emitCancelPodDeletionEvent(nsName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -377,22 +378,23 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
|
|||||||
taints []v1.Taint,
|
taints []v1.Taint,
|
||||||
now time.Time,
|
now time.Time,
|
||||||
) {
|
) {
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if len(taints) == 0 {
|
if len(taints) == 0 {
|
||||||
tc.cancelWorkWithEvent(podNamespacedName)
|
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||||
}
|
}
|
||||||
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
|
allTolerated, usedTolerations := v1helper.GetMatchingTolerations(taints, tolerations)
|
||||||
if !allTolerated {
|
if !allTolerated {
|
||||||
klog.V(2).InfoS("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName))
|
logger.V(2).Info("Not all taints are tolerated after update for pod on node", "pod", podNamespacedName.String(), "node", klog.KRef("", nodeName))
|
||||||
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
|
// We're canceling scheduled work (if any), as we're going to delete the Pod right away.
|
||||||
tc.cancelWorkWithEvent(podNamespacedName)
|
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||||
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
|
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), time.Now(), time.Now())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
minTolerationTime := getMinTolerationTime(usedTolerations)
|
minTolerationTime := getMinTolerationTime(usedTolerations)
|
||||||
// getMinTolerationTime returns negative value to denote infinite toleration.
|
// getMinTolerationTime returns negative value to denote infinite toleration.
|
||||||
if minTolerationTime < 0 {
|
if minTolerationTime < 0 {
|
||||||
klog.V(4).InfoS("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
|
logger.V(4).Info("Current tolerations for pod tolerate forever, cancelling any scheduled deletion", "pod", podNamespacedName.String())
|
||||||
tc.cancelWorkWithEvent(podNamespacedName)
|
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -404,19 +406,20 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
|
|||||||
if startTime.Add(minTolerationTime).Before(triggerTime) {
|
if startTime.Add(minTolerationTime).Before(triggerTime) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
tc.cancelWorkWithEvent(podNamespacedName)
|
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||||
}
|
}
|
||||||
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
|
tc.taintEvictionQueue.AddWork(ctx, NewWorkArgs(podNamespacedName.Name, podNamespacedName.Namespace), startTime, triggerTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) {
|
func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate podUpdateItem) {
|
||||||
pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName)
|
pod, err := tc.podLister.Pods(podUpdate.podNamespace).Get(podUpdate.podName)
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
// Delete
|
// Delete
|
||||||
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
|
podNamespacedName := types.NamespacedName{Namespace: podUpdate.podNamespace, Name: podUpdate.podName}
|
||||||
klog.V(4).InfoS("Noticed pod deletion", "pod", podNamespacedName)
|
logger.V(4).Info("Noticed pod deletion", "pod", podNamespacedName)
|
||||||
tc.cancelWorkWithEvent(podNamespacedName)
|
tc.cancelWorkWithEvent(logger, podNamespacedName)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err))
|
utilruntime.HandleError(fmt.Errorf("could not get pod %s/%s: %v", podUpdate.podName, podUpdate.podNamespace, err))
|
||||||
@ -430,7 +433,7 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
|
|||||||
|
|
||||||
// Create or Update
|
// Create or Update
|
||||||
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
|
||||||
klog.V(4).InfoS("Noticed pod update", "pod", podNamespacedName)
|
logger.V(4).Info("Noticed pod update", "pod", podNamespacedName)
|
||||||
nodeName := pod.Spec.NodeName
|
nodeName := pod.Spec.NodeName
|
||||||
if nodeName == "" {
|
if nodeName == "" {
|
||||||
return
|
return
|
||||||
@ -451,10 +454,11 @@ func (tc *NoExecuteTaintManager) handlePodUpdate(ctx context.Context, podUpdate
|
|||||||
|
|
||||||
func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
|
func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdate nodeUpdateItem) {
|
||||||
node, err := tc.nodeLister.Get(nodeUpdate.nodeName)
|
node, err := tc.nodeLister.Get(nodeUpdate.nodeName)
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if apierrors.IsNotFound(err) {
|
if apierrors.IsNotFound(err) {
|
||||||
// Delete
|
// Delete
|
||||||
klog.V(4).InfoS("Noticed node deletion", "node", nodeUpdate.nodeName)
|
logger.V(4).Info("Noticed node deletion", "node", klog.KRef("", nodeUpdate.nodeName))
|
||||||
tc.taintedNodesLock.Lock()
|
tc.taintedNodesLock.Lock()
|
||||||
defer tc.taintedNodesLock.Unlock()
|
defer tc.taintedNodesLock.Unlock()
|
||||||
delete(tc.taintedNodes, nodeUpdate.nodeName)
|
delete(tc.taintedNodes, nodeUpdate.nodeName)
|
||||||
@ -465,12 +469,12 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create or Update
|
// Create or Update
|
||||||
klog.V(4).InfoS("Noticed node update", "node", nodeUpdate)
|
logger.V(4).Info("Noticed node update", "node", klog.KObj(node))
|
||||||
taints := getNoExecuteTaints(node.Spec.Taints)
|
taints := getNoExecuteTaints(node.Spec.Taints)
|
||||||
func() {
|
func() {
|
||||||
tc.taintedNodesLock.Lock()
|
tc.taintedNodesLock.Lock()
|
||||||
defer tc.taintedNodesLock.Unlock()
|
defer tc.taintedNodesLock.Unlock()
|
||||||
klog.V(4).InfoS("Updating known taints on node", "node", node.Name, "taints", taints)
|
logger.V(4).Info("Updating known taints on node", "node", klog.KObj(node), "taints", taints)
|
||||||
if len(taints) == 0 {
|
if len(taints) == 0 {
|
||||||
delete(tc.taintedNodes, node.Name)
|
delete(tc.taintedNodes, node.Name)
|
||||||
} else {
|
} else {
|
||||||
@ -483,7 +487,7 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
|
|||||||
// tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods.
|
// tc.PodUpdated which will use tc.taintedNodes to potentially delete delayed pods.
|
||||||
pods, err := tc.getPodsAssignedToNode(node.Name)
|
pods, err := tc.getPodsAssignedToNode(node.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "Failed to get pods assigned to node", "node", node.Name)
|
logger.Error(err, "Failed to get pods assigned to node", "node", klog.KObj(node))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if len(pods) == 0 {
|
if len(pods) == 0 {
|
||||||
@ -491,9 +495,9 @@ func (tc *NoExecuteTaintManager) handleNodeUpdate(ctx context.Context, nodeUpdat
|
|||||||
}
|
}
|
||||||
// Short circuit, to make this controller a bit faster.
|
// Short circuit, to make this controller a bit faster.
|
||||||
if len(taints) == 0 {
|
if len(taints) == 0 {
|
||||||
klog.V(4).InfoS("All taints were removed from the node. Cancelling all evictions...", "node", node.Name)
|
logger.V(4).Info("All taints were removed from the node. Cancelling all evictions...", "node", klog.KObj(node))
|
||||||
for i := range pods {
|
for i := range pods {
|
||||||
tc.cancelWorkWithEvent(types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
|
tc.cancelWorkWithEvent(logger, types.NamespacedName{Namespace: pods[i].Namespace, Name: pods[i].Name})
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -52,10 +52,11 @@ type TimedWorker struct {
|
|||||||
// createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
|
// createWorker creates a TimedWorker that will execute `f` not earlier than `fireAt`.
|
||||||
func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker {
|
func createWorker(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time, f func(ctx context.Context, args *WorkArgs) error, clock clock.WithDelayedExecution) *TimedWorker {
|
||||||
delay := fireAt.Sub(createdAt)
|
delay := fireAt.Sub(createdAt)
|
||||||
|
logger := klog.FromContext(ctx)
|
||||||
fWithErrorLogging := func() {
|
fWithErrorLogging := func() {
|
||||||
err := f(ctx, args)
|
err := f(ctx, args)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Errorf("NodeLifecycle: timed worker failed with error: %q", err)
|
logger.Error(err, "NodeLifecycle: timed worker failed")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if delay <= 0 {
|
if delay <= 0 {
|
||||||
@ -116,12 +117,13 @@ func (q *TimedWorkerQueue) getWrappedWorkerFunc(key string) func(ctx context.Con
|
|||||||
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
|
// AddWork adds a work to the WorkerQueue which will be executed not earlier than `fireAt`.
|
||||||
func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time) {
|
func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdAt time.Time, fireAt time.Time) {
|
||||||
key := args.KeyFromWorkArgs()
|
key := args.KeyFromWorkArgs()
|
||||||
klog.V(4).Infof("Adding TimedWorkerQueue item %v at %v to be fired at %v", key, createdAt, fireAt)
|
logger := klog.FromContext(ctx)
|
||||||
|
logger.V(4).Info("Adding TimedWorkerQueue item and to be fired at firedTime", "item", key, "createTime", createdAt, "firedTime", fireAt)
|
||||||
|
|
||||||
q.Lock()
|
q.Lock()
|
||||||
defer q.Unlock()
|
defer q.Unlock()
|
||||||
if _, exists := q.workers[key]; exists {
|
if _, exists := q.workers[key]; exists {
|
||||||
klog.Warningf("Trying to add already existing work for %+v. Skipping.", args)
|
logger.Info("Trying to add already existing work, skipping", "args", args)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
worker := createWorker(ctx, args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock)
|
worker := createWorker(ctx, args, createdAt, fireAt, q.getWrappedWorkerFunc(key), q.clock)
|
||||||
@ -129,13 +131,13 @@ func (q *TimedWorkerQueue) AddWork(ctx context.Context, args *WorkArgs, createdA
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
|
// CancelWork removes scheduled function execution from the queue. Returns true if work was cancelled.
|
||||||
func (q *TimedWorkerQueue) CancelWork(key string) bool {
|
func (q *TimedWorkerQueue) CancelWork(logger klog.Logger, key string) bool {
|
||||||
q.Lock()
|
q.Lock()
|
||||||
defer q.Unlock()
|
defer q.Unlock()
|
||||||
worker, found := q.workers[key]
|
worker, found := q.workers[key]
|
||||||
result := false
|
result := false
|
||||||
if found {
|
if found {
|
||||||
klog.V(4).Infof("Cancelling TimedWorkerQueue item %v at %v", key, time.Now())
|
logger.V(4).Info("Cancelling TimedWorkerQueue item", "item", key, "time", time.Now())
|
||||||
if worker != nil {
|
if worker != nil {
|
||||||
result = true
|
result = true
|
||||||
worker.Cancel()
|
worker.Cancel()
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
testingclock "k8s.io/utils/clock/testing"
|
testingclock "k8s.io/utils/clock/testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -108,8 +109,9 @@ func TestCancel(t *testing.T) {
|
|||||||
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
|
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
|
||||||
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
|
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
|
||||||
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
|
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
|
||||||
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
|
queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
|
||||||
|
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
|
||||||
fakeClock.Step(11 * time.Second)
|
fakeClock.Step(11 * time.Second)
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
lastVal := atomic.LoadInt32(&testVal)
|
lastVal := atomic.LoadInt32(&testVal)
|
||||||
@ -141,8 +143,9 @@ func TestCancelAndReadd(t *testing.T) {
|
|||||||
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
|
queue.AddWork(context.TODO(), NewWorkArgs("3", "3"), now, then)
|
||||||
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
|
queue.AddWork(context.TODO(), NewWorkArgs("4", "4"), now, then)
|
||||||
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
|
queue.AddWork(context.TODO(), NewWorkArgs("5", "5"), now, then)
|
||||||
queue.CancelWork(NewWorkArgs("2", "2").KeyFromWorkArgs())
|
logger, _ := ktesting.NewTestContext(t)
|
||||||
queue.CancelWork(NewWorkArgs("4", "4").KeyFromWorkArgs())
|
queue.CancelWork(logger, NewWorkArgs("2", "2").KeyFromWorkArgs())
|
||||||
|
queue.CancelWork(logger, NewWorkArgs("4", "4").KeyFromWorkArgs())
|
||||||
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
|
queue.AddWork(context.TODO(), NewWorkArgs("2", "2"), now, then)
|
||||||
fakeClock.Step(11 * time.Second)
|
fakeClock.Step(11 * time.Second)
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
@ -33,6 +33,7 @@ import (
|
|||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
clientset "k8s.io/client-go/kubernetes"
|
clientset "k8s.io/client-go/kubernetes"
|
||||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
|
"k8s.io/kubernetes/cmd/kube-apiserver/app/options"
|
||||||
"k8s.io/kubernetes/pkg/controller/nodeipam"
|
"k8s.io/kubernetes/pkg/controller/nodeipam"
|
||||||
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
|
"k8s.io/kubernetes/pkg/controller/nodeipam/ipam"
|
||||||
@ -63,7 +64,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRAllocate(t *testing.T) {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
go ipamController.Run(ctx.Done())
|
go ipamController.Run(ctx)
|
||||||
sharedInformer.Start(ctx.Done())
|
sharedInformer.Start(ctx.Done())
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
@ -149,7 +150,7 @@ func TestIPAMMultiCIDRRangeAllocatorCIDRRelease(t *testing.T) {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
go ipamController.Run(ctx.Done())
|
go ipamController.Run(ctx)
|
||||||
sharedInformer.Start(ctx.Done())
|
sharedInformer.Start(ctx.Done())
|
||||||
|
|
||||||
t.Run("Pod CIDR release after node delete", func(t *testing.T) {
|
t.Run("Pod CIDR release after node delete", func(t *testing.T) {
|
||||||
@ -226,7 +227,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRDelete(t *testing.T) {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
go ipamController.Run(ctx.Done())
|
go ipamController.Run(ctx)
|
||||||
sharedInformer.Start(ctx.Done())
|
sharedInformer.Start(ctx.Done())
|
||||||
|
|
||||||
t.Run("delete cc with node associated", func(t *testing.T) {
|
t.Run("delete cc with node associated", func(t *testing.T) {
|
||||||
@ -322,7 +323,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTerminate(t *testing.T) {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
go ipamController.Run(ctx.Done())
|
go ipamController.Run(ctx)
|
||||||
sharedInformer.Start(ctx.Done())
|
sharedInformer.Start(ctx.Done())
|
||||||
|
|
||||||
t.Run("Pod CIDRS must not be allocated from a terminating CC", func(t *testing.T) {
|
t.Run("Pod CIDRS must not be allocated from a terminating CC", func(t *testing.T) {
|
||||||
@ -407,7 +408,7 @@ func TestIPAMMultiCIDRRangeAllocatorClusterCIDRTieBreak(t *testing.T) {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
go ipamController.Run(ctx.Done())
|
go ipamController.Run(ctx)
|
||||||
sharedInformer.Start(ctx.Done())
|
sharedInformer.Start(ctx.Done())
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
@ -557,8 +558,9 @@ func booststrapMultiCIDRRangeAllocator(t *testing.T,
|
|||||||
if _, err := clientSet.CoreV1().Nodes().Create(context.TODO(), initialNode, metav1.CreateOptions{}); err != nil {
|
if _, err := clientSet.CoreV1().Nodes().Create(context.TODO(), initialNode, metav1.CreateOptions{}); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
ipamController, err := nodeipam.NewNodeIpamController(
|
ipamController, err := nodeipam.NewNodeIpamController(
|
||||||
|
ctx,
|
||||||
sharedInformer.Core().V1().Nodes(),
|
sharedInformer.Core().V1().Nodes(),
|
||||||
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
|
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
|
||||||
nil,
|
nil,
|
||||||
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||||||
package ipamperf
|
package ipamperf
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
@ -25,6 +26,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"k8s.io/klog/v2"
|
"k8s.io/klog/v2"
|
||||||
|
"k8s.io/klog/v2/ktesting"
|
||||||
netutils "k8s.io/utils/net"
|
netutils "k8s.io/utils/net"
|
||||||
|
|
||||||
"k8s.io/client-go/informers"
|
"k8s.io/client-go/informers"
|
||||||
@ -37,7 +39,7 @@ import (
|
|||||||
"k8s.io/kubernetes/test/integration/util"
|
"k8s.io/kubernetes/test/integration/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) {
|
func setupAllocator(ctx context.Context, kubeConfig *restclient.Config, config *Config, clusterCIDR, serviceCIDR *net.IPNet, subnetMaskSize int) (*clientset.Clientset, util.ShutdownFunc, error) {
|
||||||
controllerStopChan := make(chan struct{})
|
controllerStopChan := make(chan struct{})
|
||||||
shutdownFunc := func() {
|
shutdownFunc := func() {
|
||||||
close(controllerStopChan)
|
close(controllerStopChan)
|
||||||
@ -50,6 +52,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
|
|||||||
|
|
||||||
sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
|
sharedInformer := informers.NewSharedInformerFactory(clientSet, 1*time.Hour)
|
||||||
ipamController, err := nodeipam.NewNodeIpamController(
|
ipamController, err := nodeipam.NewNodeIpamController(
|
||||||
|
ctx,
|
||||||
sharedInformer.Core().V1().Nodes(),
|
sharedInformer.Core().V1().Nodes(),
|
||||||
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
|
sharedInformer.Networking().V1alpha1().ClusterCIDRs(),
|
||||||
config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil,
|
config.Cloud, clientSet, []*net.IPNet{clusterCIDR}, serviceCIDR, nil,
|
||||||
@ -58,7 +61,7 @@ func setupAllocator(kubeConfig *restclient.Config, config *Config, clusterCIDR,
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, shutdownFunc, err
|
return nil, shutdownFunc, err
|
||||||
}
|
}
|
||||||
go ipamController.Run(controllerStopChan)
|
go ipamController.Run(ctx)
|
||||||
sharedInformer.Start(controllerStopChan)
|
sharedInformer.Start(controllerStopChan)
|
||||||
|
|
||||||
return clientSet, shutdownFunc, nil
|
return clientSet, shutdownFunc, nil
|
||||||
@ -74,8 +77,8 @@ func runTest(t *testing.T, kubeConfig *restclient.Config, config *Config, cluste
|
|||||||
nodeClient := clientset.NewForConfigOrDie(nodeClientConfig)
|
nodeClient := clientset.NewForConfigOrDie(nodeClientConfig)
|
||||||
|
|
||||||
defer deleteNodes(nodeClient) // cleanup nodes on after controller shutdown
|
defer deleteNodes(nodeClient) // cleanup nodes on after controller shutdown
|
||||||
|
_, ctx := ktesting.NewTestContext(t)
|
||||||
clientSet, shutdownFunc, err := setupAllocator(kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize)
|
clientSet, shutdownFunc, err := setupAllocator(ctx, kubeConfig, config, clusterCIDR, serviceCIDR, subnetMaskSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Error starting IPAM allocator: %v", err)
|
t.Fatalf("Error starting IPAM allocator: %v", err)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user