Merge pull request #122692 from aroradaman/reject-packets-to-invalid-port
proxy/nftables: reject packets destined for invalid ports of service ips
This commit is contained in:
		@@ -921,7 +921,7 @@ func (s *ProxyServer) Run() error {
 | 
			
		||||
			options.LabelSelector = labelSelector.String()
 | 
			
		||||
		}))
 | 
			
		||||
 | 
			
		||||
	// Create configs (i.e. Watches for Services and EndpointSlices)
 | 
			
		||||
	// Create configs (i.e. Watches for Services, EndpointSlices and ServiceCIDRs)
 | 
			
		||||
	// Note: RegisterHandler() calls need to happen before creation of Sources because sources
 | 
			
		||||
	// only notify on changes, and the initial update (on process start) may be lost if no handlers
 | 
			
		||||
	// are registered yet.
 | 
			
		||||
@@ -933,6 +933,11 @@ func (s *ProxyServer) Run() error {
 | 
			
		||||
	endpointSliceConfig.RegisterEventHandler(s.Proxier)
 | 
			
		||||
	go endpointSliceConfig.Run(wait.NeverStop)
 | 
			
		||||
 | 
			
		||||
	if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) {
 | 
			
		||||
		serviceCIDRConfig := config.NewServiceCIDRConfig(informerFactory.Networking().V1alpha1().ServiceCIDRs(), s.Config.ConfigSyncPeriod.Duration)
 | 
			
		||||
		serviceCIDRConfig.RegisterEventHandler(s.Proxier)
 | 
			
		||||
		go serviceCIDRConfig.Run(wait.NeverStop)
 | 
			
		||||
	}
 | 
			
		||||
	// This has to start after the calls to NewServiceConfig because that
 | 
			
		||||
	// function must configure its shared informer event handlers first.
 | 
			
		||||
	informerFactory.Start(wait.NeverStop)
 | 
			
		||||
 
 | 
			
		||||
@@ -18,13 +18,17 @@ package config
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"sync"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	v1 "k8s.io/api/core/v1"
 | 
			
		||||
	discovery "k8s.io/api/discovery/v1"
 | 
			
		||||
	discoveryv1 "k8s.io/api/discovery/v1"
 | 
			
		||||
	networkingv1alpha1 "k8s.io/api/networking/v1alpha1"
 | 
			
		||||
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 | 
			
		||||
	coreinformers "k8s.io/client-go/informers/core/v1"
 | 
			
		||||
	discoveryinformers "k8s.io/client-go/informers/discovery/v1"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/util/sets"
 | 
			
		||||
	v1informers "k8s.io/client-go/informers/core/v1"
 | 
			
		||||
	discoveryv1informers "k8s.io/client-go/informers/discovery/v1"
 | 
			
		||||
	networkingv1alpha1informers "k8s.io/client-go/informers/networking/v1alpha1"
 | 
			
		||||
	"k8s.io/client-go/tools/cache"
 | 
			
		||||
	"k8s.io/klog/v2"
 | 
			
		||||
)
 | 
			
		||||
@@ -51,13 +55,13 @@ type ServiceHandler interface {
 | 
			
		||||
type EndpointSliceHandler interface {
 | 
			
		||||
	// OnEndpointSliceAdd is called whenever creation of new endpoint slice
 | 
			
		||||
	// object is observed.
 | 
			
		||||
	OnEndpointSliceAdd(endpointSlice *discovery.EndpointSlice)
 | 
			
		||||
	OnEndpointSliceAdd(endpointSlice *discoveryv1.EndpointSlice)
 | 
			
		||||
	// OnEndpointSliceUpdate is called whenever modification of an existing
 | 
			
		||||
	// endpoint slice object is observed.
 | 
			
		||||
	OnEndpointSliceUpdate(oldEndpointSlice, newEndpointSlice *discovery.EndpointSlice)
 | 
			
		||||
	OnEndpointSliceUpdate(oldEndpointSlice, newEndpointSlice *discoveryv1.EndpointSlice)
 | 
			
		||||
	// OnEndpointSliceDelete is called whenever deletion of an existing
 | 
			
		||||
	// endpoint slice object is observed.
 | 
			
		||||
	OnEndpointSliceDelete(endpointSlice *discovery.EndpointSlice)
 | 
			
		||||
	OnEndpointSliceDelete(endpointSlice *discoveryv1.EndpointSlice)
 | 
			
		||||
	// OnEndpointSlicesSynced is called once all the initial event handlers were
 | 
			
		||||
	// called and the state is fully propagated to local cache.
 | 
			
		||||
	OnEndpointSlicesSynced()
 | 
			
		||||
@@ -70,12 +74,12 @@ type EndpointSliceConfig struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewEndpointSliceConfig creates a new EndpointSliceConfig.
 | 
			
		||||
func NewEndpointSliceConfig(endpointSliceInformer discoveryinformers.EndpointSliceInformer, resyncPeriod time.Duration) *EndpointSliceConfig {
 | 
			
		||||
func NewEndpointSliceConfig(endpointSliceInformer discoveryv1informers.EndpointSliceInformer, resyncPeriod time.Duration) *EndpointSliceConfig {
 | 
			
		||||
	result := &EndpointSliceConfig{
 | 
			
		||||
		listerSynced: endpointSliceInformer.Informer().HasSynced,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	endpointSliceInformer.Informer().AddEventHandlerWithResyncPeriod(
 | 
			
		||||
	_, _ = endpointSliceInformer.Informer().AddEventHandlerWithResyncPeriod(
 | 
			
		||||
		cache.ResourceEventHandlerFuncs{
 | 
			
		||||
			AddFunc:    result.handleAddEndpointSlice,
 | 
			
		||||
			UpdateFunc: result.handleUpdateEndpointSlice,
 | 
			
		||||
@@ -107,7 +111,7 @@ func (c *EndpointSliceConfig) Run(stopCh <-chan struct{}) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *EndpointSliceConfig) handleAddEndpointSlice(obj interface{}) {
 | 
			
		||||
	endpointSlice, ok := obj.(*discovery.EndpointSlice)
 | 
			
		||||
	endpointSlice, ok := obj.(*discoveryv1.EndpointSlice)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		utilruntime.HandleError(fmt.Errorf("unexpected object type: %T", obj))
 | 
			
		||||
		return
 | 
			
		||||
@@ -119,12 +123,12 @@ func (c *EndpointSliceConfig) handleAddEndpointSlice(obj interface{}) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *EndpointSliceConfig) handleUpdateEndpointSlice(oldObj, newObj interface{}) {
 | 
			
		||||
	oldEndpointSlice, ok := oldObj.(*discovery.EndpointSlice)
 | 
			
		||||
	oldEndpointSlice, ok := oldObj.(*discoveryv1.EndpointSlice)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		utilruntime.HandleError(fmt.Errorf("unexpected object type: %T", newObj))
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	newEndpointSlice, ok := newObj.(*discovery.EndpointSlice)
 | 
			
		||||
	newEndpointSlice, ok := newObj.(*discoveryv1.EndpointSlice)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		utilruntime.HandleError(fmt.Errorf("unexpected object type: %T", newObj))
 | 
			
		||||
		return
 | 
			
		||||
@@ -136,14 +140,14 @@ func (c *EndpointSliceConfig) handleUpdateEndpointSlice(oldObj, newObj interface
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *EndpointSliceConfig) handleDeleteEndpointSlice(obj interface{}) {
 | 
			
		||||
	endpointSlice, ok := obj.(*discovery.EndpointSlice)
 | 
			
		||||
	endpointSlice, ok := obj.(*discoveryv1.EndpointSlice)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
 | 
			
		||||
		if !ok {
 | 
			
		||||
			utilruntime.HandleError(fmt.Errorf("unexpected object type: %T", obj))
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		if endpointSlice, ok = tombstone.Obj.(*discovery.EndpointSlice); !ok {
 | 
			
		||||
		if endpointSlice, ok = tombstone.Obj.(*discoveryv1.EndpointSlice); !ok {
 | 
			
		||||
			utilruntime.HandleError(fmt.Errorf("unexpected object type: %T", obj))
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
@@ -161,12 +165,12 @@ type ServiceConfig struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewServiceConfig creates a new ServiceConfig.
 | 
			
		||||
func NewServiceConfig(serviceInformer coreinformers.ServiceInformer, resyncPeriod time.Duration) *ServiceConfig {
 | 
			
		||||
func NewServiceConfig(serviceInformer v1informers.ServiceInformer, resyncPeriod time.Duration) *ServiceConfig {
 | 
			
		||||
	result := &ServiceConfig{
 | 
			
		||||
		listerSynced: serviceInformer.Informer().HasSynced,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	serviceInformer.Informer().AddEventHandlerWithResyncPeriod(
 | 
			
		||||
	_, _ = serviceInformer.Informer().AddEventHandlerWithResyncPeriod(
 | 
			
		||||
		cache.ResourceEventHandlerFuncs{
 | 
			
		||||
			AddFunc:    result.handleAddService,
 | 
			
		||||
			UpdateFunc: result.handleUpdateService,
 | 
			
		||||
@@ -288,12 +292,12 @@ type NodeConfig struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewNodeConfig creates a new NodeConfig.
 | 
			
		||||
func NewNodeConfig(nodeInformer coreinformers.NodeInformer, resyncPeriod time.Duration) *NodeConfig {
 | 
			
		||||
func NewNodeConfig(nodeInformer v1informers.NodeInformer, resyncPeriod time.Duration) *NodeConfig {
 | 
			
		||||
	result := &NodeConfig{
 | 
			
		||||
		listerSynced: nodeInformer.Informer().HasSynced,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	nodeInformer.Informer().AddEventHandlerWithResyncPeriod(
 | 
			
		||||
	_, _ = nodeInformer.Informer().AddEventHandlerWithResyncPeriod(
 | 
			
		||||
		cache.ResourceEventHandlerFuncs{
 | 
			
		||||
			AddFunc:    result.handleAddNode,
 | 
			
		||||
			UpdateFunc: result.handleUpdateNode,
 | 
			
		||||
@@ -371,3 +375,97 @@ func (c *NodeConfig) handleDeleteNode(obj interface{}) {
 | 
			
		||||
		c.eventHandlers[i].OnNodeDelete(node)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ServiceCIDRHandler is an abstract interface of objects which receive
 | 
			
		||||
// notifications about ServiceCIDR object changes.
 | 
			
		||||
type ServiceCIDRHandler interface {
 | 
			
		||||
	// OnServiceCIDRsChanged is called whenever a change is observed
 | 
			
		||||
	// in any of the ServiceCIDRs, and provides complete list of service cidrs.
 | 
			
		||||
	OnServiceCIDRsChanged(cidrs []string)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ServiceCIDRConfig tracks a set of service configurations.
 | 
			
		||||
type ServiceCIDRConfig struct {
 | 
			
		||||
	listerSynced  cache.InformerSynced
 | 
			
		||||
	eventHandlers []ServiceCIDRHandler
 | 
			
		||||
	mu            sync.Mutex
 | 
			
		||||
	cidrs         sets.Set[string]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewServiceCIDRConfig creates a new ServiceCIDRConfig.
 | 
			
		||||
func NewServiceCIDRConfig(serviceCIDRInformer networkingv1alpha1informers.ServiceCIDRInformer, resyncPeriod time.Duration) *ServiceCIDRConfig {
 | 
			
		||||
	result := &ServiceCIDRConfig{
 | 
			
		||||
		listerSynced: serviceCIDRInformer.Informer().HasSynced,
 | 
			
		||||
		cidrs:        sets.New[string](),
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	_, _ = serviceCIDRInformer.Informer().AddEventHandlerWithResyncPeriod(
 | 
			
		||||
		cache.ResourceEventHandlerFuncs{
 | 
			
		||||
			AddFunc: func(obj interface{}) {
 | 
			
		||||
				result.handleServiceCIDREvent(nil, obj)
 | 
			
		||||
			},
 | 
			
		||||
			UpdateFunc: func(oldObj, newObj interface{}) {
 | 
			
		||||
				result.handleServiceCIDREvent(oldObj, newObj)
 | 
			
		||||
			},
 | 
			
		||||
			DeleteFunc: func(obj interface{}) {
 | 
			
		||||
				result.handleServiceCIDREvent(obj, nil)
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
		resyncPeriod,
 | 
			
		||||
	)
 | 
			
		||||
	return result
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// RegisterEventHandler registers a handler which is called on every ServiceCIDR change.
 | 
			
		||||
func (c *ServiceCIDRConfig) RegisterEventHandler(handler ServiceCIDRHandler) {
 | 
			
		||||
	c.eventHandlers = append(c.eventHandlers, handler)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Run waits for cache synced and invokes handlers after syncing.
 | 
			
		||||
func (c *ServiceCIDRConfig) Run(stopCh <-chan struct{}) {
 | 
			
		||||
	klog.InfoS("Starting serviceCIDR config controller")
 | 
			
		||||
 | 
			
		||||
	if !cache.WaitForNamedCacheSync("serviceCIDR config", stopCh, c.listerSynced) {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	c.handleServiceCIDREvent(nil, nil)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// handleServiceCIDREvent is a helper function to handle Add, Update and Delete
 | 
			
		||||
// events on ServiceCIDR objects and call downstream event handlers.
 | 
			
		||||
func (c *ServiceCIDRConfig) handleServiceCIDREvent(oldObj, newObj interface{}) {
 | 
			
		||||
	var oldServiceCIDR, newServiceCIDR *networkingv1alpha1.ServiceCIDR
 | 
			
		||||
	var ok bool
 | 
			
		||||
 | 
			
		||||
	if oldObj != nil {
 | 
			
		||||
		oldServiceCIDR, ok = oldObj.(*networkingv1alpha1.ServiceCIDR)
 | 
			
		||||
		if !ok {
 | 
			
		||||
			utilruntime.HandleError(fmt.Errorf("unexpected object type: %v", oldObj))
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if newObj != nil {
 | 
			
		||||
		newServiceCIDR, ok = newObj.(*networkingv1alpha1.ServiceCIDR)
 | 
			
		||||
		if !ok {
 | 
			
		||||
			utilruntime.HandleError(fmt.Errorf("unexpected object type: %v", newObj))
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	c.mu.Lock()
 | 
			
		||||
	defer c.mu.Unlock()
 | 
			
		||||
 | 
			
		||||
	if oldServiceCIDR != nil {
 | 
			
		||||
		c.cidrs.Delete(oldServiceCIDR.Spec.CIDRs...)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if newServiceCIDR != nil {
 | 
			
		||||
		c.cidrs.Insert(newServiceCIDR.Spec.CIDRs...)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for i := range c.eventHandlers {
 | 
			
		||||
		klog.V(4).InfoS("Calling handler.OnServiceCIDRsChanged")
 | 
			
		||||
		c.eventHandlers[i].OnServiceCIDRsChanged(c.cidrs.UnsortedList())
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -673,6 +673,10 @@ func (proxier *Proxier) OnNodeDelete(node *v1.Node) {
 | 
			
		||||
func (proxier *Proxier) OnNodeSynced() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// OnServiceCIDRsChanged is called whenever a change is observed
 | 
			
		||||
// in any of the ServiceCIDRs, and provides complete list of service cidrs.
 | 
			
		||||
func (proxier *Proxier) OnServiceCIDRsChanged(_ []string) {}
 | 
			
		||||
 | 
			
		||||
// portProtoHash takes the ServicePortName and protocol for a service
 | 
			
		||||
// returns the associated 16 character hash. This is computed by hashing (sha256)
 | 
			
		||||
// then encoding to base32 and truncating to 16 chars. We do this because IPTables
 | 
			
		||||
 
 | 
			
		||||
@@ -892,6 +892,10 @@ func (proxier *Proxier) OnNodeDelete(node *v1.Node) {
 | 
			
		||||
func (proxier *Proxier) OnNodeSynced() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// OnServiceCIDRsChanged is called whenever a change is observed
 | 
			
		||||
// in any of the ServiceCIDRs, and provides complete list of service cidrs.
 | 
			
		||||
func (proxier *Proxier) OnServiceCIDRsChanged(_ []string) {}
 | 
			
		||||
 | 
			
		||||
// This is where all of the ipvs calls happen.
 | 
			
		||||
func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
	proxier.mu.Lock()
 | 
			
		||||
 
 | 
			
		||||
@@ -53,6 +53,7 @@ func (*FakeProxier) OnEndpointSliceAdd(slice *discoveryv1.EndpointSlice)
 | 
			
		||||
func (*FakeProxier) OnEndpointSliceUpdate(oldSlice, slice *discoveryv1.EndpointSlice) {}
 | 
			
		||||
func (*FakeProxier) OnEndpointSliceDelete(slice *discoveryv1.EndpointSlice)           {}
 | 
			
		||||
func (*FakeProxier) OnEndpointSlicesSynced()                                          {}
 | 
			
		||||
func (*FakeProxier) OnServiceCIDRsChanged(_ []string)                                 {}
 | 
			
		||||
 | 
			
		||||
func NewHollowProxy(
 | 
			
		||||
	nodeName string,
 | 
			
		||||
 
 | 
			
		||||
@@ -158,3 +158,10 @@ func (proxier *metaProxier) OnNodeSynced() {
 | 
			
		||||
	proxier.ipv4Proxier.OnNodeSynced()
 | 
			
		||||
	proxier.ipv6Proxier.OnNodeSynced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// OnServiceCIDRsChanged is called whenever a change is observed
 | 
			
		||||
// in any of the ServiceCIDRs, and provides complete list of service cidrs.
 | 
			
		||||
func (proxier *metaProxier) OnServiceCIDRsChanged(cidrs []string) {
 | 
			
		||||
	proxier.ipv4Proxier.OnServiceCIDRsChanged(cidrs)
 | 
			
		||||
	proxier.ipv6Proxier.OnServiceCIDRsChanged(cidrs)
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -51,7 +51,7 @@ the forward path.
 | 
			
		||||
 | 
			
		||||
## kube-proxy's use of nftables hooks
 | 
			
		||||
 | 
			
		||||
Kube-proxy uses nftables for four things:
 | 
			
		||||
Kube-proxy uses nftables for seven things:
 | 
			
		||||
 | 
			
		||||
  - Using DNAT to rewrite traffic from service IPs (cluster IPs, external IPs, load balancer
 | 
			
		||||
    IP, and NodePorts on node IPs) to the corresponding endpoint IPs.
 | 
			
		||||
@@ -65,6 +65,10 @@ Kube-proxy uses nftables for four things:
 | 
			
		||||
 | 
			
		||||
  - Rejecting packets for services with no local or remote endpoints.
 | 
			
		||||
 
 | 
			
		||||
  - Dropping packets to ClusterIPs which are not yet allocated.
 | 
			
		||||
 | 
			
		||||
  - Rejecting packets to undefined ports of ClusterIPs.
 | 
			
		||||
 | 
			
		||||
This is implemented as follows:
 | 
			
		||||
 | 
			
		||||
  - We do the DNAT for inbound traffic in `prerouting`: this covers traffic coming from
 | 
			
		||||
@@ -87,13 +91,9 @@ This is implemented as follows:
 | 
			
		||||
    explicitly before or after any other rules (since they match packets that wouldn't be
 | 
			
		||||
    matched by any other rules). But with kernels before 5.9, `reject` is not allowed in
 | 
			
		||||
    `prerouting`, so we can't just do them in the same place as the source ranges
 | 
			
		||||
    firewall. So we do these checks from `input`, `forward`, and `output`, to cover all
 | 
			
		||||
    three paths. (In fact, we only need to check `@no-endpoint-nodeports` on the `input`
 | 
			
		||||
    hook, but it's easier to just check them both in one place, and this code is likely to
 | 
			
		||||
    be rewritten later anyway. Note that the converse statement "we only need to check
 | 
			
		||||
    `@no-endpoint-services` on the `forward` and `output` hooks" is *not* true, because
 | 
			
		||||
    `@no-endpoint-services` may include externalIPs/LB IPs that are assigned to local
 | 
			
		||||
    interfaces.)
 | 
			
		||||
    firewall. So we do these checks from `input`, `forward`, and `output` for
 | 
			
		||||
    `@no-endpoint-services` and from `input` for `@no-endpoint-nodeports` to cover all
 | 
			
		||||
    the possible paths.
 | 
			
		||||
 | 
			
		||||
  - Masquerading has to happen in the `postrouting` hook, because "masquerade" means "SNAT
 | 
			
		||||
    to the IP of the interface the packet is going out on", so it has to happen after the
 | 
			
		||||
@@ -101,3 +101,9 @@ This is implemented as follows:
 | 
			
		||||
    network IP, because masquerading is about ensuring that the packet eventually gets
 | 
			
		||||
    routed back to the host network namespace on this node, so if it's never getting
 | 
			
		||||
    routed away from there, there's nothing to do.)
 | 
			
		||||
 | 
			
		||||
  - We install a `reject` rule for ClusterIPs matching `@cluster-ips` set and a `drop`
 | 
			
		||||
    rule for ClusterIPs belonging to any of the ServiceCIDRs in `forward` and `output` hook, with a 
 | 
			
		||||
    higher (i.e. less urgent) priority than the DNAT chains making sure all valid
 | 
			
		||||
    traffic directed for ClusterIPs is already DNATed. Drop rule will only
 | 
			
		||||
    be installed if `MultiCIDRServiceAllocator` feature is enabled.
 | 
			
		||||
@@ -22,6 +22,7 @@ package nftables
 | 
			
		||||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"net"
 | 
			
		||||
	"regexp"
 | 
			
		||||
	"runtime"
 | 
			
		||||
	"sort"
 | 
			
		||||
@@ -174,7 +175,7 @@ func newNFTablesTracer(t *testing.T, nft *knftables.Fake, nodeIPs []string) *nft
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (tracer *nftablesTracer) addressMatches(ipStr, not, ruleAddress string) bool {
 | 
			
		||||
func (tracer *nftablesTracer) addressMatches(ipStr string, wantMatch bool, ruleAddress string) bool {
 | 
			
		||||
	ip := netutils.ParseIPSloppy(ipStr)
 | 
			
		||||
	if ip == nil {
 | 
			
		||||
		tracer.t.Fatalf("Bad IP in test case: %s", ipStr)
 | 
			
		||||
@@ -195,18 +196,16 @@ func (tracer *nftablesTracer) addressMatches(ipStr, not, ruleAddress string) boo
 | 
			
		||||
		match = ip.Equal(ip2)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if not == "!= " {
 | 
			
		||||
		return !match
 | 
			
		||||
	} else {
 | 
			
		||||
		return match
 | 
			
		||||
	}
 | 
			
		||||
	return match == wantMatch
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (tracer *nftablesTracer) noneAddressesMatch(ipStr, ruleAddress string) bool {
 | 
			
		||||
func (tracer *nftablesTracer) addressMatchesSet(ipStr string, wantMatch bool, ruleAddress string) bool {
 | 
			
		||||
	ruleAddress = strings.ReplaceAll(ruleAddress, " ", "")
 | 
			
		||||
	addresses := strings.Split(ruleAddress, ",")
 | 
			
		||||
	var match bool
 | 
			
		||||
	for _, address := range addresses {
 | 
			
		||||
		if tracer.addressMatches(ipStr, "", address) {
 | 
			
		||||
		match = tracer.addressMatches(ipStr, true, address)
 | 
			
		||||
		if match != wantMatch {
 | 
			
		||||
			return false
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
@@ -240,7 +239,7 @@ func (tracer *nftablesTracer) matchDest(elements []*knftables.Element, destIP, p
 | 
			
		||||
// found.
 | 
			
		||||
func (tracer *nftablesTracer) matchDestAndSource(elements []*knftables.Element, destIP, protocol, destPort, sourceIP string) *knftables.Element {
 | 
			
		||||
	for _, element := range elements {
 | 
			
		||||
		if element.Key[0] == destIP && element.Key[1] == protocol && element.Key[2] == destPort && tracer.addressMatches(sourceIP, "", element.Key[3]) {
 | 
			
		||||
		if element.Key[0] == destIP && element.Key[1] == protocol && element.Key[2] == destPort && tracer.addressMatches(sourceIP, true, element.Key[3]) {
 | 
			
		||||
			return element
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
@@ -270,7 +269,7 @@ func (tracer *nftablesTracer) matchDestPort(elements []*knftables.Element, proto
 | 
			
		||||
// match verdictRegexp.
 | 
			
		||||
 | 
			
		||||
var destAddrRegexp = regexp.MustCompile(`^ip6* daddr (!= )?(\S+)`)
 | 
			
		||||
var destAddrLookupRegexp = regexp.MustCompile(`^ip6* daddr != \{([^}]*)\}`)
 | 
			
		||||
var destAddrLookupRegexp = regexp.MustCompile(`^ip6* daddr (!= )?\{([^}]*)\}`)
 | 
			
		||||
var destAddrLocalRegexp = regexp.MustCompile(`^fib daddr type local`)
 | 
			
		||||
var destPortRegexp = regexp.MustCompile(`^(tcp|udp|sctp) dport (\d+)`)
 | 
			
		||||
var destIPOnlyLookupRegexp = regexp.MustCompile(`^ip6* daddr @(\S+)`)
 | 
			
		||||
@@ -282,13 +281,13 @@ var destDispatchRegexp = regexp.MustCompile(`^ip6* daddr \. meta l4proto \. th d
 | 
			
		||||
var destPortDispatchRegexp = regexp.MustCompile(`^meta l4proto \. th dport vmap @(\S+)$`)
 | 
			
		||||
 | 
			
		||||
var sourceAddrRegexp = regexp.MustCompile(`^ip6* saddr (!= )?(\S+)`)
 | 
			
		||||
var sourceAddrLookupRegexp = regexp.MustCompile(`^ip6* saddr != \{([^}]*)\}`)
 | 
			
		||||
var sourceAddrLookupRegexp = regexp.MustCompile(`^ip6* saddr (!= )?\{([^}]*)\}`)
 | 
			
		||||
var sourceAddrLocalRegexp = regexp.MustCompile(`^fib saddr type local`)
 | 
			
		||||
 | 
			
		||||
var endpointVMAPRegexp = regexp.MustCompile(`^numgen random mod \d+ vmap \{(.*)\}$`)
 | 
			
		||||
var endpointVMapEntryRegexp = regexp.MustCompile(`\d+ : goto (\S+)`)
 | 
			
		||||
 | 
			
		||||
var masqueradeRegexp = regexp.MustCompile(`^jump ` + kubeMarkMasqChain + `$`)
 | 
			
		||||
var masqueradeRegexp = regexp.MustCompile(`^jump ` + markMasqChain + `$`)
 | 
			
		||||
var jumpRegexp = regexp.MustCompile(`^(jump|goto) (\S+)$`)
 | 
			
		||||
var returnRegexp = regexp.MustCompile(`^return$`)
 | 
			
		||||
var verdictRegexp = regexp.MustCompile(`^(drop|reject)$`)
 | 
			
		||||
@@ -402,11 +401,12 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
			case destAddrLookupRegexp.MatchString(rule):
 | 
			
		||||
				// `^ip6* daddr != \{([^}]*)\}`
 | 
			
		||||
				// `^ip6* daddr (!= )?\{([^}]*)\}`
 | 
			
		||||
				// Tests whether destIP doesn't match an anonymous set.
 | 
			
		||||
				match := destAddrLookupRegexp.FindStringSubmatch(rule)
 | 
			
		||||
				rule = strings.TrimPrefix(rule, match[0])
 | 
			
		||||
				if !tracer.noneAddressesMatch(destIP, match[1]) {
 | 
			
		||||
				wantMatch, set := match[1] != "!= ", match[2]
 | 
			
		||||
				if !tracer.addressMatchesSet(destIP, wantMatch, set) {
 | 
			
		||||
					rule = ""
 | 
			
		||||
					break
 | 
			
		||||
				}
 | 
			
		||||
@@ -416,8 +416,8 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
 | 
			
		||||
				// Tests whether destIP does/doesn't match a literal.
 | 
			
		||||
				match := destAddrRegexp.FindStringSubmatch(rule)
 | 
			
		||||
				rule = strings.TrimPrefix(rule, match[0])
 | 
			
		||||
				not, ip := match[1], match[2]
 | 
			
		||||
				if !tracer.addressMatches(destIP, not, ip) {
 | 
			
		||||
				wantMatch, ip := match[1] != "!= ", match[2]
 | 
			
		||||
				if !tracer.addressMatches(destIP, wantMatch, ip) {
 | 
			
		||||
					rule = ""
 | 
			
		||||
					break
 | 
			
		||||
				}
 | 
			
		||||
@@ -444,11 +444,12 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
			case sourceAddrLookupRegexp.MatchString(rule):
 | 
			
		||||
				// `^ip6* saddr != \{([^}]*)\}`
 | 
			
		||||
				// `^ip6* saddr (!= )?\{([^}]*)\}`
 | 
			
		||||
				// Tests whether sourceIP doesn't match an anonymous set.
 | 
			
		||||
				match := sourceAddrLookupRegexp.FindStringSubmatch(rule)
 | 
			
		||||
				rule = strings.TrimPrefix(rule, match[0])
 | 
			
		||||
				if !tracer.noneAddressesMatch(sourceIP, match[1]) {
 | 
			
		||||
				wantMatch, set := match[1] != "!= ", match[2]
 | 
			
		||||
				if !tracer.addressMatchesSet(sourceIP, wantMatch, set) {
 | 
			
		||||
					rule = ""
 | 
			
		||||
					break
 | 
			
		||||
				}
 | 
			
		||||
@@ -458,8 +459,8 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
 | 
			
		||||
				// Tests whether sourceIP does/doesn't match a literal.
 | 
			
		||||
				match := sourceAddrRegexp.FindStringSubmatch(rule)
 | 
			
		||||
				rule = strings.TrimPrefix(rule, match[0])
 | 
			
		||||
				not, ip := match[1], match[2]
 | 
			
		||||
				if !tracer.addressMatches(sourceIP, not, ip) {
 | 
			
		||||
				wantMatch, ip := match[1] != "!= ", match[2]
 | 
			
		||||
				if !tracer.addressMatches(sourceIP, wantMatch, ip) {
 | 
			
		||||
					rule = ""
 | 
			
		||||
					break
 | 
			
		||||
				}
 | 
			
		||||
@@ -569,6 +570,7 @@ func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destP
 | 
			
		||||
// destinations (a comma-separated list of IPs, or one of the special targets "ACCEPT",
 | 
			
		||||
// "DROP", or "REJECT"), and whether the packet would be masqueraded.
 | 
			
		||||
func tracePacket(t *testing.T, nft *knftables.Fake, sourceIP, protocol, destIP, destPort string, nodeIPs []string) ([]string, string, bool) {
 | 
			
		||||
	var err error
 | 
			
		||||
	tracer := newNFTablesTracer(t, nft, nodeIPs)
 | 
			
		||||
 | 
			
		||||
	// filter-prerouting goes first, then nat-prerouting if not terminated.
 | 
			
		||||
@@ -579,11 +581,19 @@ func tracePacket(t *testing.T, nft *knftables.Fake, sourceIP, protocol, destIP,
 | 
			
		||||
	// After the prerouting rules run, pending DNATs are processed (which would affect
 | 
			
		||||
	// the destination IP that later rules match against).
 | 
			
		||||
	if len(tracer.outputs) != 0 {
 | 
			
		||||
		destIP = strings.Split(tracer.outputs[0], ":")[0]
 | 
			
		||||
		destIP, _, err = net.SplitHostPort(tracer.outputs[0])
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			t.Errorf("failed to parse host port '%s': %s", tracer.outputs[0], err.Error())
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Run filter-forward, skip filter-input as it ought to be fully redundant with the filter-forward chain.
 | 
			
		||||
	tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort)
 | 
			
		||||
	// Run filter-forward, return if packet is terminated.
 | 
			
		||||
	if tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort) {
 | 
			
		||||
		return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Run filter-input
 | 
			
		||||
	tracer.runChain("filter-input", sourceIP, protocol, destIP, destPort)
 | 
			
		||||
 | 
			
		||||
	// Skip filter-output and nat-output as they ought to be fully redundant with the prerouting chains.
 | 
			
		||||
	// Skip nat-postrouting because it only does masquerading and we handle that separately.
 | 
			
		||||
 
 | 
			
		||||
@@ -55,6 +55,7 @@ import (
 | 
			
		||||
	proxyutiliptables "k8s.io/kubernetes/pkg/proxy/util/iptables"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util/async"
 | 
			
		||||
	utilexec "k8s.io/utils/exec"
 | 
			
		||||
	netutils "k8s.io/utils/net"
 | 
			
		||||
	"k8s.io/utils/ptr"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -63,27 +64,44 @@ const (
 | 
			
		||||
	// so they don't need any "kube-" or "kube-proxy-" prefix of their own.
 | 
			
		||||
	kubeProxyTable = "kube-proxy"
 | 
			
		||||
 | 
			
		||||
	// base chains
 | 
			
		||||
	filterPreroutingChain     = "filter-prerouting"
 | 
			
		||||
	filterInputChain          = "filter-input"
 | 
			
		||||
	filterForwardChain        = "filter-forward"
 | 
			
		||||
	filterOutputChain         = "filter-output"
 | 
			
		||||
	filterOutputPostDNATChain = "filter-output-post-dnat"
 | 
			
		||||
	natPreroutingChain        = "nat-prerouting"
 | 
			
		||||
	natOutputChain            = "nat-output"
 | 
			
		||||
	natPostroutingChain       = "nat-postrouting"
 | 
			
		||||
 | 
			
		||||
	// service dispatch
 | 
			
		||||
	kubeServicesChain       = "services"
 | 
			
		||||
	kubeServiceIPsMap       = "service-ips"
 | 
			
		||||
	kubeServiceNodePortsMap = "service-nodeports"
 | 
			
		||||
	servicesChain       = "services"
 | 
			
		||||
	serviceIPsMap       = "service-ips"
 | 
			
		||||
	serviceNodePortsMap = "service-nodeports"
 | 
			
		||||
 | 
			
		||||
	// set of IPs that accept NodePort traffic
 | 
			
		||||
	kubeNodePortIPsSet = "nodeport-ips"
 | 
			
		||||
	nodePortIPsSet = "nodeport-ips"
 | 
			
		||||
 | 
			
		||||
	// set of active ClusterIPs.
 | 
			
		||||
	clusterIPsSet = "cluster-ips"
 | 
			
		||||
 | 
			
		||||
	// handling for services with no endpoints
 | 
			
		||||
	kubeEndpointsCheckChain    = "endpoints-check"
 | 
			
		||||
	kubeNoEndpointServicesMap  = "no-endpoint-services"
 | 
			
		||||
	kubeNoEndpointNodePortsMap = "no-endpoint-nodeports"
 | 
			
		||||
	kubeRejectChain            = "reject-chain"
 | 
			
		||||
	serviceEndpointsCheckChain  = "service-endpoints-check"
 | 
			
		||||
	nodePortEndpointsCheckChain = "nodeport-endpoints-check"
 | 
			
		||||
	noEndpointServicesMap       = "no-endpoint-services"
 | 
			
		||||
	noEndpointNodePortsMap      = "no-endpoint-nodeports"
 | 
			
		||||
	rejectChain                 = "reject-chain"
 | 
			
		||||
 | 
			
		||||
	// handling traffic to unallocated ClusterIPs and undefined ports of ClusterIPs
 | 
			
		||||
	clusterIPsCheckChain = "cluster-ips-check"
 | 
			
		||||
 | 
			
		||||
	// LoadBalancerSourceRanges handling
 | 
			
		||||
	kubeFirewallIPsMap     = "firewall-ips"
 | 
			
		||||
	kubeFirewallCheckChain = "firewall-check"
 | 
			
		||||
	firewallIPsMap     = "firewall-ips"
 | 
			
		||||
	firewallCheckChain = "firewall-check"
 | 
			
		||||
 | 
			
		||||
	// masquerading
 | 
			
		||||
	kubeMarkMasqChain     = "mark-for-masquerade"
 | 
			
		||||
	kubeMasqueradingChain = "masquerading"
 | 
			
		||||
	markMasqChain     = "mark-for-masquerade"
 | 
			
		||||
	masqueradingChain = "masquerading"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// internal struct for string service information
 | 
			
		||||
@@ -179,6 +197,10 @@ type Proxier struct {
 | 
			
		||||
 | 
			
		||||
	// staleChains contains information about chains to be deleted later
 | 
			
		||||
	staleChains map[string]time.Time
 | 
			
		||||
 | 
			
		||||
	// serviceCIDRs is a comma separated list of ServiceCIDRs belonging to the IPFamily
 | 
			
		||||
	// which proxier is operating on, can be directly consumed by knftables.
 | 
			
		||||
	serviceCIDRs string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Proxier implements proxy.Provider
 | 
			
		||||
@@ -309,13 +331,14 @@ type nftablesBaseChain struct {
 | 
			
		||||
var nftablesBaseChains = []nftablesBaseChain{
 | 
			
		||||
	// We want our filtering rules to operate on pre-DNAT dest IPs, so our filter
 | 
			
		||||
	// chains have to run before DNAT.
 | 
			
		||||
	{"filter-prerouting", knftables.FilterType, knftables.PreroutingHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{"filter-input", knftables.FilterType, knftables.InputHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{"filter-forward", knftables.FilterType, knftables.ForwardHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{"filter-output", knftables.FilterType, knftables.OutputHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{"nat-prerouting", knftables.NATType, knftables.PreroutingHook, knftables.DNATPriority},
 | 
			
		||||
	{"nat-output", knftables.NATType, knftables.OutputHook, knftables.DNATPriority},
 | 
			
		||||
	{"nat-postrouting", knftables.NATType, knftables.PostroutingHook, knftables.SNATPriority},
 | 
			
		||||
	{filterPreroutingChain, knftables.FilterType, knftables.PreroutingHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{filterInputChain, knftables.FilterType, knftables.InputHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{filterForwardChain, knftables.FilterType, knftables.ForwardHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{filterOutputChain, knftables.FilterType, knftables.OutputHook, knftables.DNATPriority + "-10"},
 | 
			
		||||
	{filterOutputPostDNATChain, knftables.FilterType, knftables.OutputHook, knftables.DNATPriority + "+10"},
 | 
			
		||||
	{natPreroutingChain, knftables.NATType, knftables.PreroutingHook, knftables.DNATPriority},
 | 
			
		||||
	{natOutputChain, knftables.NATType, knftables.OutputHook, knftables.DNATPriority},
 | 
			
		||||
	{natPostroutingChain, knftables.NATType, knftables.PostroutingHook, knftables.SNATPriority},
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// nftablesJumpChains lists our top-level "regular chains" that are jumped to directly
 | 
			
		||||
@@ -328,19 +351,23 @@ type nftablesJumpChain struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var nftablesJumpChains = []nftablesJumpChain{
 | 
			
		||||
	// We can't jump to kubeEndpointsCheckChain from filter-prerouting like
 | 
			
		||||
	// kubeFirewallCheckChain because reject action is only valid in chains using the
 | 
			
		||||
	// We can't jump to endpointsCheckChain from filter-prerouting like
 | 
			
		||||
	// firewallCheckChain because reject action is only valid in chains using the
 | 
			
		||||
	// input, forward or output hooks.
 | 
			
		||||
	{kubeEndpointsCheckChain, "filter-input", "ct state new"},
 | 
			
		||||
	{kubeEndpointsCheckChain, "filter-forward", "ct state new"},
 | 
			
		||||
	{kubeEndpointsCheckChain, "filter-output", "ct state new"},
 | 
			
		||||
	{nodePortEndpointsCheckChain, filterInputChain, "ct state new"},
 | 
			
		||||
	{serviceEndpointsCheckChain, filterInputChain, "ct state new"},
 | 
			
		||||
	{serviceEndpointsCheckChain, filterForwardChain, "ct state new"},
 | 
			
		||||
	{serviceEndpointsCheckChain, filterOutputChain, "ct state new"},
 | 
			
		||||
 | 
			
		||||
	{kubeFirewallCheckChain, "filter-prerouting", "ct state new"},
 | 
			
		||||
	{kubeFirewallCheckChain, "filter-output", "ct state new"},
 | 
			
		||||
	{firewallCheckChain, filterPreroutingChain, "ct state new"},
 | 
			
		||||
	{firewallCheckChain, filterOutputChain, "ct state new"},
 | 
			
		||||
 | 
			
		||||
	{kubeServicesChain, "nat-output", ""},
 | 
			
		||||
	{kubeServicesChain, "nat-prerouting", ""},
 | 
			
		||||
	{kubeMasqueradingChain, "nat-postrouting", ""},
 | 
			
		||||
	{servicesChain, natOutputChain, ""},
 | 
			
		||||
	{servicesChain, natPreroutingChain, ""},
 | 
			
		||||
	{masqueradingChain, natPostroutingChain, ""},
 | 
			
		||||
 | 
			
		||||
	{clusterIPsCheckChain, filterForwardChain, "ct state new"},
 | 
			
		||||
	{clusterIPsCheckChain, filterOutputPostDNATChain, "ct state new"},
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ensureChain adds commands to tx to ensure that chain exists and doesn't contain
 | 
			
		||||
@@ -399,51 +426,79 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Ensure all of our other "top-level" chains exist
 | 
			
		||||
	for _, chain := range []string{kubeServicesChain, kubeMasqueradingChain, kubeMarkMasqChain} {
 | 
			
		||||
	for _, chain := range []string{servicesChain, clusterIPsCheckChain, masqueradingChain, markMasqChain} {
 | 
			
		||||
		ensureChain(chain, tx, createdChains)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Add the rules in the mark-for-masquerade and masquerading chains
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeMarkMasqChain,
 | 
			
		||||
		Chain: markMasqChain,
 | 
			
		||||
		Rule: knftables.Concat(
 | 
			
		||||
			"mark", "set", "mark", "or", proxier.masqueradeMark,
 | 
			
		||||
		),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeMasqueradingChain,
 | 
			
		||||
		Chain: masqueradingChain,
 | 
			
		||||
		Rule: knftables.Concat(
 | 
			
		||||
			"mark", "and", proxier.masqueradeMark, "==", "0",
 | 
			
		||||
			"return",
 | 
			
		||||
		),
 | 
			
		||||
	})
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeMasqueradingChain,
 | 
			
		||||
		Chain: masqueradingChain,
 | 
			
		||||
		Rule: knftables.Concat(
 | 
			
		||||
			"mark", "set", "mark", "xor", proxier.masqueradeMark,
 | 
			
		||||
		),
 | 
			
		||||
	})
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeMasqueradingChain,
 | 
			
		||||
		Chain: masqueradingChain,
 | 
			
		||||
		Rule:  "masquerade fully-random",
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// add cluster-ips set.
 | 
			
		||||
	tx.Add(&knftables.Set{
 | 
			
		||||
		Name:    clusterIPsSet,
 | 
			
		||||
		Type:    ipvX_addr,
 | 
			
		||||
		Comment: ptr.To("Active ClusterIPs"),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// reject traffic to invalid ports of ClusterIPs.
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: clusterIPsCheckChain,
 | 
			
		||||
		Rule: knftables.Concat(
 | 
			
		||||
			ipX, "daddr", "@", clusterIPsSet, "reject",
 | 
			
		||||
		),
 | 
			
		||||
		Comment: ptr.To("Reject traffic to invalid ports of ClusterIPs"),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// drop traffic to unallocated ClusterIPs.
 | 
			
		||||
	if len(proxier.serviceCIDRs) > 0 {
 | 
			
		||||
		tx.Add(&knftables.Rule{
 | 
			
		||||
			Chain: clusterIPsCheckChain,
 | 
			
		||||
			Rule: knftables.Concat(
 | 
			
		||||
				ipX, "daddr", "{", proxier.serviceCIDRs, "}",
 | 
			
		||||
				"drop",
 | 
			
		||||
			),
 | 
			
		||||
			Comment: ptr.To("Drop traffic to unallocated ClusterIPs"),
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Fill in nodeport-ips set if needed (or delete it if not). (We do "add+delete"
 | 
			
		||||
	// rather than just "delete" when we want to ensure the set doesn't exist, because
 | 
			
		||||
	// doing just "delete" would return an error if the set didn't exist.)
 | 
			
		||||
	tx.Add(&knftables.Set{
 | 
			
		||||
		Name:    kubeNodePortIPsSet,
 | 
			
		||||
		Name:    nodePortIPsSet,
 | 
			
		||||
		Type:    ipvX_addr,
 | 
			
		||||
		Comment: ptr.To("IPs that accept NodePort traffic"),
 | 
			
		||||
	})
 | 
			
		||||
	if proxier.nodePortAddresses.MatchAll() {
 | 
			
		||||
		tx.Delete(&knftables.Set{
 | 
			
		||||
			Name: kubeNodePortIPsSet,
 | 
			
		||||
			Name: nodePortIPsSet,
 | 
			
		||||
		})
 | 
			
		||||
	} else {
 | 
			
		||||
		tx.Flush(&knftables.Set{
 | 
			
		||||
			Name: kubeNodePortIPsSet,
 | 
			
		||||
			Name: nodePortIPsSet,
 | 
			
		||||
		})
 | 
			
		||||
		nodeIPs, err := proxier.nodePortAddresses.GetNodeIPs(proxier.networkInterfacer)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
@@ -455,7 +510,7 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			tx.Add(&knftables.Element{
 | 
			
		||||
				Set: kubeNodePortIPsSet,
 | 
			
		||||
				Set: nodePortIPsSet,
 | 
			
		||||
				Key: []string{
 | 
			
		||||
					ip.String(),
 | 
			
		||||
				},
 | 
			
		||||
@@ -465,108 +520,107 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
 | 
			
		||||
 | 
			
		||||
	// Set up "no endpoints" drop/reject handling
 | 
			
		||||
	tx.Add(&knftables.Map{
 | 
			
		||||
		Name:    kubeNoEndpointServicesMap,
 | 
			
		||||
		Name:    noEndpointServicesMap,
 | 
			
		||||
		Type:    ipvX_addr + " . inet_proto . inet_service : verdict",
 | 
			
		||||
		Comment: ptr.To("vmap to drop or reject packets to services with no endpoints"),
 | 
			
		||||
	})
 | 
			
		||||
	tx.Add(&knftables.Map{
 | 
			
		||||
		Name:    kubeNoEndpointNodePortsMap,
 | 
			
		||||
		Name:    noEndpointNodePortsMap,
 | 
			
		||||
		Type:    "inet_proto . inet_service : verdict",
 | 
			
		||||
		Comment: ptr.To("vmap to drop or reject packets to service nodeports with no endpoints"),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	tx.Add(&knftables.Chain{
 | 
			
		||||
		Name:    kubeRejectChain,
 | 
			
		||||
		Name:    rejectChain,
 | 
			
		||||
		Comment: ptr.To("helper for @no-endpoint-services / @no-endpoint-nodeports"),
 | 
			
		||||
	})
 | 
			
		||||
	tx.Flush(&knftables.Chain{
 | 
			
		||||
		Name: kubeRejectChain,
 | 
			
		||||
		Name: rejectChain,
 | 
			
		||||
	})
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeRejectChain,
 | 
			
		||||
		Chain: rejectChain,
 | 
			
		||||
		Rule:  "reject",
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeEndpointsCheckChain,
 | 
			
		||||
		Chain: serviceEndpointsCheckChain,
 | 
			
		||||
		Rule: knftables.Concat(
 | 
			
		||||
			ipX, "daddr", ".", "meta l4proto", ".", "th dport",
 | 
			
		||||
			"vmap", "@", kubeNoEndpointServicesMap,
 | 
			
		||||
			"vmap", "@", noEndpointServicesMap,
 | 
			
		||||
		),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	if proxier.nodePortAddresses.MatchAll() {
 | 
			
		||||
		tx.Add(&knftables.Rule{
 | 
			
		||||
			Chain: kubeEndpointsCheckChain,
 | 
			
		||||
			Chain: nodePortEndpointsCheckChain,
 | 
			
		||||
			Rule: knftables.Concat(
 | 
			
		||||
				"fib daddr type local",
 | 
			
		||||
				noLocalhost,
 | 
			
		||||
				"meta l4proto . th dport",
 | 
			
		||||
				"vmap", "@", kubeNoEndpointNodePortsMap,
 | 
			
		||||
				"vmap", "@", noEndpointNodePortsMap,
 | 
			
		||||
			),
 | 
			
		||||
		})
 | 
			
		||||
	} else {
 | 
			
		||||
		tx.Add(&knftables.Rule{
 | 
			
		||||
			Chain: kubeEndpointsCheckChain,
 | 
			
		||||
			Chain: nodePortEndpointsCheckChain,
 | 
			
		||||
			Rule: knftables.Concat(
 | 
			
		||||
				ipX, "daddr", "@", kubeNodePortIPsSet,
 | 
			
		||||
				ipX, "daddr", "@", nodePortIPsSet,
 | 
			
		||||
				"meta l4proto . th dport",
 | 
			
		||||
				"vmap", "@", kubeNoEndpointNodePortsMap,
 | 
			
		||||
				"vmap", "@", noEndpointNodePortsMap,
 | 
			
		||||
			),
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Set up LoadBalancerSourceRanges firewalling
 | 
			
		||||
	tx.Add(&knftables.Map{
 | 
			
		||||
		Name:    kubeFirewallIPsMap,
 | 
			
		||||
		Name:    firewallIPsMap,
 | 
			
		||||
		Type:    ipvX_addr + " . inet_proto . inet_service : verdict",
 | 
			
		||||
		Comment: ptr.To("destinations that are subject to LoadBalancerSourceRanges"),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	ensureChain(kubeFirewallCheckChain, tx, createdChains)
 | 
			
		||||
	ensureChain(firewallCheckChain, tx, createdChains)
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeFirewallCheckChain,
 | 
			
		||||
		Chain: firewallCheckChain,
 | 
			
		||||
		Rule: knftables.Concat(
 | 
			
		||||
			ipX, "daddr", ".", "meta l4proto", ".", "th dport",
 | 
			
		||||
			"vmap", "@", kubeFirewallIPsMap,
 | 
			
		||||
			"vmap", "@", firewallIPsMap,
 | 
			
		||||
		),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// Set up service dispatch
 | 
			
		||||
	tx.Add(&knftables.Map{
 | 
			
		||||
		Name:    kubeServiceIPsMap,
 | 
			
		||||
		Name:    serviceIPsMap,
 | 
			
		||||
		Type:    ipvX_addr + " . inet_proto . inet_service : verdict",
 | 
			
		||||
		Comment: ptr.To("ClusterIP, ExternalIP and LoadBalancer IP traffic"),
 | 
			
		||||
	})
 | 
			
		||||
	tx.Add(&knftables.Map{
 | 
			
		||||
		Name:    kubeServiceNodePortsMap,
 | 
			
		||||
		Name:    serviceNodePortsMap,
 | 
			
		||||
		Type:    "inet_proto . inet_service : verdict",
 | 
			
		||||
		Comment: ptr.To("NodePort traffic"),
 | 
			
		||||
	})
 | 
			
		||||
	tx.Add(&knftables.Rule{
 | 
			
		||||
		Chain: kubeServicesChain,
 | 
			
		||||
		Chain: servicesChain,
 | 
			
		||||
		Rule: knftables.Concat(
 | 
			
		||||
			ipX, "daddr", ".", "meta l4proto", ".", "th dport",
 | 
			
		||||
			"vmap", "@", kubeServiceIPsMap,
 | 
			
		||||
			"vmap", "@", serviceIPsMap,
 | 
			
		||||
		),
 | 
			
		||||
	})
 | 
			
		||||
	if proxier.nodePortAddresses.MatchAll() {
 | 
			
		||||
		tx.Add(&knftables.Rule{
 | 
			
		||||
			Chain: kubeServicesChain,
 | 
			
		||||
			Chain: servicesChain,
 | 
			
		||||
			Rule: knftables.Concat(
 | 
			
		||||
				"fib daddr type local",
 | 
			
		||||
				noLocalhost,
 | 
			
		||||
				"meta l4proto . th dport",
 | 
			
		||||
				"vmap", "@", kubeServiceNodePortsMap,
 | 
			
		||||
				"vmap", "@", serviceNodePortsMap,
 | 
			
		||||
			),
 | 
			
		||||
		})
 | 
			
		||||
	} else {
 | 
			
		||||
		tx.Add(&knftables.Rule{
 | 
			
		||||
			Chain: kubeServicesChain,
 | 
			
		||||
			Chain: servicesChain,
 | 
			
		||||
			Rule: knftables.Concat(
 | 
			
		||||
				ipX, "daddr @nodeport-ips",
 | 
			
		||||
				"meta l4proto . th dport",
 | 
			
		||||
				"vmap", "@", kubeServiceNodePortsMap,
 | 
			
		||||
				"vmap", "@", serviceNodePortsMap,
 | 
			
		||||
			),
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
@@ -764,6 +818,26 @@ func (proxier *Proxier) OnNodeDelete(node *v1.Node) {
 | 
			
		||||
func (proxier *Proxier) OnNodeSynced() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// OnServiceCIDRsChanged is called whenever a change is observed
 | 
			
		||||
// in any of the ServiceCIDRs, and provides complete list of service cidrs.
 | 
			
		||||
func (proxier *Proxier) OnServiceCIDRsChanged(cidrs []string) {
 | 
			
		||||
	proxier.mu.Lock()
 | 
			
		||||
	defer proxier.mu.Unlock()
 | 
			
		||||
 | 
			
		||||
	cidrsForProxier := make([]string, 0)
 | 
			
		||||
	for _, cidr := range cidrs {
 | 
			
		||||
		isIPv4CIDR := netutils.IsIPv4CIDRString(cidr)
 | 
			
		||||
		if proxier.ipFamily == v1.IPv4Protocol && isIPv4CIDR {
 | 
			
		||||
			cidrsForProxier = append(cidrsForProxier, cidr)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if proxier.ipFamily == v1.IPv6Protocol && !isIPv4CIDR {
 | 
			
		||||
			cidrsForProxier = append(cidrsForProxier, cidr)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	proxier.serviceCIDRs = strings.Join(cidrsForProxier, ",")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	// Maximum length for one of our chain name prefixes, including the trailing
 | 
			
		||||
	// hyphen.
 | 
			
		||||
@@ -955,20 +1029,23 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// We currently fully-rebuild our sets and maps on each resync
 | 
			
		||||
	tx.Flush(&knftables.Map{
 | 
			
		||||
		Name: kubeFirewallIPsMap,
 | 
			
		||||
	tx.Flush(&knftables.Set{
 | 
			
		||||
		Name: clusterIPsSet,
 | 
			
		||||
	})
 | 
			
		||||
	tx.Flush(&knftables.Map{
 | 
			
		||||
		Name: kubeNoEndpointServicesMap,
 | 
			
		||||
		Name: firewallIPsMap,
 | 
			
		||||
	})
 | 
			
		||||
	tx.Flush(&knftables.Map{
 | 
			
		||||
		Name: kubeNoEndpointNodePortsMap,
 | 
			
		||||
		Name: noEndpointServicesMap,
 | 
			
		||||
	})
 | 
			
		||||
	tx.Flush(&knftables.Map{
 | 
			
		||||
		Name: kubeServiceIPsMap,
 | 
			
		||||
		Name: noEndpointNodePortsMap,
 | 
			
		||||
	})
 | 
			
		||||
	tx.Flush(&knftables.Map{
 | 
			
		||||
		Name: kubeServiceNodePortsMap,
 | 
			
		||||
		Name: serviceIPsMap,
 | 
			
		||||
	})
 | 
			
		||||
	tx.Flush(&knftables.Map{
 | 
			
		||||
		Name: serviceNodePortsMap,
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// Accumulate service/endpoint chains and affinity sets to keep.
 | 
			
		||||
@@ -1074,8 +1151,8 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
			// generate any chains in the "nat" table for the service; only
 | 
			
		||||
			// rules in the "filter" table rejecting incoming packets for
 | 
			
		||||
			// the service's IPs.
 | 
			
		||||
			internalTrafficFilterVerdict = fmt.Sprintf("goto %s", kubeRejectChain)
 | 
			
		||||
			externalTrafficFilterVerdict = fmt.Sprintf("goto %s", kubeRejectChain)
 | 
			
		||||
			internalTrafficFilterVerdict = fmt.Sprintf("goto %s", rejectChain)
 | 
			
		||||
			externalTrafficFilterVerdict = fmt.Sprintf("goto %s", rejectChain)
 | 
			
		||||
		} else {
 | 
			
		||||
			if !hasInternalEndpoints {
 | 
			
		||||
				// The internalTrafficPolicy is "Local" but there are no local
 | 
			
		||||
@@ -1095,9 +1172,13 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Capture the clusterIP.
 | 
			
		||||
		tx.Add(&knftables.Element{
 | 
			
		||||
			Set: clusterIPsSet,
 | 
			
		||||
			Key: []string{svcInfo.ClusterIP().String()},
 | 
			
		||||
		})
 | 
			
		||||
		if hasInternalEndpoints {
 | 
			
		||||
			tx.Add(&knftables.Element{
 | 
			
		||||
				Map: kubeServiceIPsMap,
 | 
			
		||||
				Map: serviceIPsMap,
 | 
			
		||||
				Key: []string{
 | 
			
		||||
					svcInfo.ClusterIP().String(),
 | 
			
		||||
					protocol,
 | 
			
		||||
@@ -1110,7 +1191,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
		} else {
 | 
			
		||||
			// No endpoints.
 | 
			
		||||
			tx.Add(&knftables.Element{
 | 
			
		||||
				Map: kubeNoEndpointServicesMap,
 | 
			
		||||
				Map: noEndpointServicesMap,
 | 
			
		||||
				Key: []string{
 | 
			
		||||
					svcInfo.ClusterIP().String(),
 | 
			
		||||
					protocol,
 | 
			
		||||
@@ -1129,7 +1210,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
				// Send traffic bound for external IPs to the "external
 | 
			
		||||
				// destinations" chain.
 | 
			
		||||
				tx.Add(&knftables.Element{
 | 
			
		||||
					Map: kubeServiceIPsMap,
 | 
			
		||||
					Map: serviceIPsMap,
 | 
			
		||||
					Key: []string{
 | 
			
		||||
						externalIP.String(),
 | 
			
		||||
						protocol,
 | 
			
		||||
@@ -1145,7 +1226,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
				// external traffic (DROP anything that didn't get
 | 
			
		||||
				// short-circuited by the EXT chain.)
 | 
			
		||||
				tx.Add(&knftables.Element{
 | 
			
		||||
					Map: kubeNoEndpointServicesMap,
 | 
			
		||||
					Map: noEndpointServicesMap,
 | 
			
		||||
					Key: []string{
 | 
			
		||||
						externalIP.String(),
 | 
			
		||||
						protocol,
 | 
			
		||||
@@ -1197,7 +1278,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
		for _, lbip := range svcInfo.LoadBalancerVIPs() {
 | 
			
		||||
			if hasEndpoints {
 | 
			
		||||
				tx.Add(&knftables.Element{
 | 
			
		||||
					Map: kubeServiceIPsMap,
 | 
			
		||||
					Map: serviceIPsMap,
 | 
			
		||||
					Key: []string{
 | 
			
		||||
						lbip.String(),
 | 
			
		||||
						protocol,
 | 
			
		||||
@@ -1211,7 +1292,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
 | 
			
		||||
			if usesFWChain {
 | 
			
		||||
				tx.Add(&knftables.Element{
 | 
			
		||||
					Map: kubeFirewallIPsMap,
 | 
			
		||||
					Map: firewallIPsMap,
 | 
			
		||||
					Key: []string{
 | 
			
		||||
						lbip.String(),
 | 
			
		||||
						protocol,
 | 
			
		||||
@@ -1230,7 +1311,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
			// by the EXT chain.)
 | 
			
		||||
			for _, lbip := range svcInfo.LoadBalancerVIPs() {
 | 
			
		||||
				tx.Add(&knftables.Element{
 | 
			
		||||
					Map: kubeNoEndpointServicesMap,
 | 
			
		||||
					Map: noEndpointServicesMap,
 | 
			
		||||
					Key: []string{
 | 
			
		||||
						lbip.String(),
 | 
			
		||||
						protocol,
 | 
			
		||||
@@ -1251,7 +1332,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
				// worse, nodeports are not subect to loadBalancerSourceRanges,
 | 
			
		||||
				// and we can't change that.
 | 
			
		||||
				tx.Add(&knftables.Element{
 | 
			
		||||
					Map: kubeServiceNodePortsMap,
 | 
			
		||||
					Map: serviceNodePortsMap,
 | 
			
		||||
					Key: []string{
 | 
			
		||||
						protocol,
 | 
			
		||||
						strconv.Itoa(svcInfo.NodePort()),
 | 
			
		||||
@@ -1266,7 +1347,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
				// external traffic (DROP anything that didn't get
 | 
			
		||||
				// short-circuited by the EXT chain.)
 | 
			
		||||
				tx.Add(&knftables.Element{
 | 
			
		||||
					Map: kubeNoEndpointNodePortsMap,
 | 
			
		||||
					Map: noEndpointNodePortsMap,
 | 
			
		||||
					Key: []string{
 | 
			
		||||
						protocol,
 | 
			
		||||
						strconv.Itoa(svcInfo.NodePort()),
 | 
			
		||||
@@ -1287,7 +1368,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
					Rule: knftables.Concat(
 | 
			
		||||
						ipX, "daddr", svcInfo.ClusterIP(),
 | 
			
		||||
						protocol, "dport", svcInfo.Port(),
 | 
			
		||||
						"jump", kubeMarkMasqChain,
 | 
			
		||||
						"jump", markMasqChain,
 | 
			
		||||
					),
 | 
			
		||||
				})
 | 
			
		||||
			} else if proxier.localDetector.IsImplemented() {
 | 
			
		||||
@@ -1302,7 +1383,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
						ipX, "daddr", svcInfo.ClusterIP(),
 | 
			
		||||
						protocol, "dport", svcInfo.Port(),
 | 
			
		||||
						proxier.localDetector.IfNotLocalNFT(),
 | 
			
		||||
						"jump", kubeMarkMasqChain,
 | 
			
		||||
						"jump", markMasqChain,
 | 
			
		||||
					),
 | 
			
		||||
				})
 | 
			
		||||
			}
 | 
			
		||||
@@ -1319,7 +1400,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
				tx.Add(&knftables.Rule{
 | 
			
		||||
					Chain: externalTrafficChain,
 | 
			
		||||
					Rule: knftables.Concat(
 | 
			
		||||
						"jump", kubeMarkMasqChain,
 | 
			
		||||
						"jump", markMasqChain,
 | 
			
		||||
					),
 | 
			
		||||
				})
 | 
			
		||||
			} else {
 | 
			
		||||
@@ -1348,7 +1429,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
					Chain: externalTrafficChain,
 | 
			
		||||
					Rule: knftables.Concat(
 | 
			
		||||
						"fib", "saddr", "type", "local",
 | 
			
		||||
						"jump", kubeMarkMasqChain,
 | 
			
		||||
						"jump", markMasqChain,
 | 
			
		||||
					),
 | 
			
		||||
					Comment: ptr.To("masquerade local traffic"),
 | 
			
		||||
				})
 | 
			
		||||
@@ -1441,7 +1522,7 @@ func (proxier *Proxier) syncProxyRules() {
 | 
			
		||||
				Chain: endpointChain,
 | 
			
		||||
				Rule: knftables.Concat(
 | 
			
		||||
					ipX, "saddr", epInfo.IP(),
 | 
			
		||||
					"jump", kubeMarkMasqChain,
 | 
			
		||||
					"jump", markMasqChain,
 | 
			
		||||
				),
 | 
			
		||||
			})
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -28,6 +28,8 @@ import (
 | 
			
		||||
 | 
			
		||||
	"github.com/danwinship/knftables"
 | 
			
		||||
	"github.com/lithammer/dedent"
 | 
			
		||||
	"github.com/stretchr/testify/assert"
 | 
			
		||||
 | 
			
		||||
	v1 "k8s.io/api/core/v1"
 | 
			
		||||
	discovery "k8s.io/api/discovery/v1"
 | 
			
		||||
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 | 
			
		||||
@@ -40,9 +42,8 @@ import (
 | 
			
		||||
	"k8s.io/kubernetes/pkg/features"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/proxy"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/proxy/conntrack"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/proxy/metrics"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/kubernetes/pkg/proxy/healthcheck"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/proxy/metrics"
 | 
			
		||||
	proxyutil "k8s.io/kubernetes/pkg/proxy/util"
 | 
			
		||||
	proxyutiliptables "k8s.io/kubernetes/pkg/proxy/util/iptables"
 | 
			
		||||
	proxyutiltest "k8s.io/kubernetes/pkg/proxy/util/testing"
 | 
			
		||||
@@ -78,9 +79,11 @@ func NewFakeProxier(ipFamily v1.IPFamily) (*knftables.Fake, *Proxier) {
 | 
			
		||||
	// invocation into a Run() method.
 | 
			
		||||
	nftablesFamily := knftables.IPv4Family
 | 
			
		||||
	podCIDR := "10.0.0.0/8"
 | 
			
		||||
	serviceCIDRs := "172.30.0.0/16"
 | 
			
		||||
	if ipFamily == v1.IPv6Protocol {
 | 
			
		||||
		nftablesFamily = knftables.IPv6Family
 | 
			
		||||
		podCIDR = "fd00:10::/64"
 | 
			
		||||
		serviceCIDRs = "fd00:10:96::/112"
 | 
			
		||||
	}
 | 
			
		||||
	detectLocal, _ := proxyutiliptables.NewDetectLocalByCIDR(podCIDR)
 | 
			
		||||
 | 
			
		||||
@@ -119,6 +122,7 @@ func NewFakeProxier(ipFamily v1.IPFamily) (*knftables.Fake, *Proxier) {
 | 
			
		||||
		nodePortAddresses:   proxyutil.NewNodePortAddresses(ipFamily, nil),
 | 
			
		||||
		networkInterfacer:   networkInterfacer,
 | 
			
		||||
		staleChains:         make(map[string]time.Time),
 | 
			
		||||
		serviceCIDRs:        serviceCIDRs,
 | 
			
		||||
	}
 | 
			
		||||
	p.setInitialized(true)
 | 
			
		||||
	p.syncRunner = async.NewBoundedFrequencyRunner("test-sync-runner", p.syncProxyRules, 0, time.Minute, 1)
 | 
			
		||||
@@ -303,12 +307,16 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; }
 | 
			
		||||
		add rule ip kube-proxy filter-prerouting ct state new jump firewall-check
 | 
			
		||||
		add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; }
 | 
			
		||||
		add rule ip kube-proxy filter-forward ct state new jump endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-forward ct state new jump service-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-forward ct state new jump cluster-ips-check
 | 
			
		||||
		add chain ip kube-proxy filter-input { type filter hook input priority -110 ; }
 | 
			
		||||
		add rule ip kube-proxy filter-input ct state new jump endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-input ct state new jump nodeport-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-input ct state new jump service-endpoints-check
 | 
			
		||||
		add chain ip kube-proxy filter-output { type filter hook output priority -110 ; }
 | 
			
		||||
		add rule ip kube-proxy filter-output ct state new jump endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-output ct state new jump service-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-output ct state new jump firewall-check
 | 
			
		||||
		add chain ip kube-proxy filter-output-post-dnat { type filter hook output priority -90 ; }
 | 
			
		||||
		add rule ip kube-proxy filter-output-post-dnat ct state new jump cluster-ips-check
 | 
			
		||||
		add chain ip kube-proxy nat-output { type nat hook output priority -100 ; }
 | 
			
		||||
		add rule ip kube-proxy nat-output jump services
 | 
			
		||||
		add chain ip kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; }
 | 
			
		||||
@@ -316,6 +324,11 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; }
 | 
			
		||||
		add rule ip kube-proxy nat-prerouting jump services
 | 
			
		||||
 | 
			
		||||
		add set ip kube-proxy cluster-ips { type ipv4_addr ; comment "Active ClusterIPs" ; }
 | 
			
		||||
		add chain ip kube-proxy cluster-ips-check
 | 
			
		||||
		add rule ip kube-proxy cluster-ips-check ip daddr @cluster-ips reject comment "Reject traffic to invalid ports of ClusterIPs"
 | 
			
		||||
		add rule ip kube-proxy cluster-ips-check ip daddr { 172.30.0.0/16 } drop comment "Drop traffic to unallocated ClusterIPs"
 | 
			
		||||
 | 
			
		||||
		add map ip kube-proxy firewall-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
 | 
			
		||||
		add chain ip kube-proxy firewall-check
 | 
			
		||||
		add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips
 | 
			
		||||
@@ -326,9 +339,10 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; }
 | 
			
		||||
		add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
 | 
			
		||||
 | 
			
		||||
		add chain ip kube-proxy endpoints-check
 | 
			
		||||
		add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
 | 
			
		||||
		add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
 | 
			
		||||
		add chain ip kube-proxy nodeport-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy nodeport-endpoints-check ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
 | 
			
		||||
		add chain ip kube-proxy service-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy service-endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
 | 
			
		||||
 | 
			
		||||
		add map ip kube-proxy service-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "ClusterIP, ExternalIP and LoadBalancer IP traffic" ; }
 | 
			
		||||
		add map ip kube-proxy service-nodeports { type inet_proto . inet_service : verdict ; comment "NodePort traffic" ; }
 | 
			
		||||
@@ -344,6 +358,7 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add rule ip kube-proxy endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 ip saddr 10.180.0.1 jump mark-for-masquerade
 | 
			
		||||
		add rule ip kube-proxy endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 meta l4proto tcp dnat to 10.180.0.1:80
 | 
			
		||||
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
 | 
			
		||||
		# svc2
 | 
			
		||||
@@ -358,6 +373,7 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add rule ip kube-proxy endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 ip saddr 10.180.0.2 jump mark-for-masquerade
 | 
			
		||||
		add rule ip kube-proxy endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 meta l4proto tcp dnat to 10.180.0.2:80
 | 
			
		||||
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.42 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.42 . tcp . 80 : goto service-42NFTM6N-ns2/svc2/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 192.168.99.22 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 1.2.3.4 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
 | 
			
		||||
@@ -378,6 +394,7 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add rule ip kube-proxy endpoint-UEIP74TE-ns3/svc3/tcp/p80__10.180.0.3/80 ip saddr 10.180.0.3 jump mark-for-masquerade
 | 
			
		||||
		add rule ip kube-proxy endpoint-UEIP74TE-ns3/svc3/tcp/p80__10.180.0.3/80 meta l4proto tcp dnat to 10.180.0.3:80
 | 
			
		||||
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-nodeports { tcp . 3003 : goto external-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
 | 
			
		||||
@@ -395,6 +412,7 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add rule ip kube-proxy endpoint-UNZV3OEC-ns4/svc4/tcp/p80__10.180.0.4/80 ip saddr 10.180.0.4 jump mark-for-masquerade
 | 
			
		||||
		add rule ip kube-proxy endpoint-UNZV3OEC-ns4/svc4/tcp/p80__10.180.0.4/80 meta l4proto tcp dnat to 10.180.0.4:80
 | 
			
		||||
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.44 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.44 . tcp . 80 : goto service-LAUZTJTB-ns4/svc4/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 192.168.99.33 . tcp . 80 : goto external-LAUZTJTB-ns4/svc4/tcp/p80 }
 | 
			
		||||
 | 
			
		||||
@@ -416,12 +434,14 @@ func TestOverallNFTablesRules(t *testing.T) {
 | 
			
		||||
		add chain ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80
 | 
			
		||||
		add rule ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80 ip saddr != { 203.0.113.0/25 } drop
 | 
			
		||||
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.45 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.45 . tcp . 80 : goto service-HVFWP5L3-ns5/svc5/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 5.6.7.8 . tcp . 80 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-nodeports { tcp . 3002 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy firewall-ips { 5.6.7.8 . tcp . 80 comment "ns5/svc5:p80" : goto firewall-HVFWP5L3-ns5/svc5/tcp/p80 }
 | 
			
		||||
 | 
			
		||||
		# svc6
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.46 }
 | 
			
		||||
		add element ip kube-proxy no-endpoint-services { 172.30.0.46 . tcp . 80 comment "ns6/svc6:p80" : goto reject-chain }
 | 
			
		||||
		`)
 | 
			
		||||
 | 
			
		||||
@@ -678,14 +698,21 @@ func TestClusterIPGeneral(t *testing.T) {
 | 
			
		||||
			protocol: v1.ProtocolUDP,
 | 
			
		||||
			destIP:   "172.30.0.42",
 | 
			
		||||
			destPort: 80,
 | 
			
		||||
			output:   "",
 | 
			
		||||
			output:   "REJECT",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name:     "svc1 does not accept svc2's ports",
 | 
			
		||||
			sourceIP: "10.180.0.2",
 | 
			
		||||
			destIP:   "172.30.0.41",
 | 
			
		||||
			destPort: 443,
 | 
			
		||||
			output:   "",
 | 
			
		||||
			output:   "REJECT",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name:     "packet to unallocated cluster ip",
 | 
			
		||||
			sourceIP: "10.180.0.2",
 | 
			
		||||
			destIP:   "172.30.0.50",
 | 
			
		||||
			destPort: 80,
 | 
			
		||||
			output:   "DROP",
 | 
			
		||||
		},
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
@@ -3919,27 +3946,33 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	baseRules := dedent.Dedent(`
 | 
			
		||||
		add table ip kube-proxy { comment "rules for kube-proxy" ; }
 | 
			
		||||
 | 
			
		||||
		add chain ip kube-proxy endpoints-check
 | 
			
		||||
		add chain ip kube-proxy cluster-ips-check
 | 
			
		||||
		add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; }
 | 
			
		||||
		add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; }
 | 
			
		||||
		add chain ip kube-proxy filter-input { type filter hook input priority -110 ; }
 | 
			
		||||
		add chain ip kube-proxy filter-output { type filter hook output priority -110 ; }
 | 
			
		||||
		add chain ip kube-proxy filter-output-post-dnat { type filter hook output priority -90 ; }
 | 
			
		||||
		add chain ip kube-proxy firewall-check
 | 
			
		||||
		add chain ip kube-proxy mark-for-masquerade
 | 
			
		||||
		add chain ip kube-proxy masquerading
 | 
			
		||||
		add chain ip kube-proxy nat-output { type nat hook output priority -100 ; }
 | 
			
		||||
		add chain ip kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; }
 | 
			
		||||
		add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; }
 | 
			
		||||
		add chain ip kube-proxy nodeport-endpoints-check
 | 
			
		||||
		add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; }
 | 
			
		||||
		add chain ip kube-proxy services
 | 
			
		||||
		add chain ip kube-proxy service-endpoints-check
 | 
			
		||||
 | 
			
		||||
		add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
 | 
			
		||||
		add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
 | 
			
		||||
		add rule ip kube-proxy cluster-ips-check ip daddr @cluster-ips reject comment "Reject traffic to invalid ports of ClusterIPs"
 | 
			
		||||
		add rule ip kube-proxy cluster-ips-check ip daddr { 172.30.0.0/16 } drop comment "Drop traffic to unallocated ClusterIPs"
 | 
			
		||||
		add rule ip kube-proxy filter-prerouting ct state new jump firewall-check
 | 
			
		||||
		add rule ip kube-proxy filter-forward ct state new jump endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-input ct state new jump endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-output ct state new jump endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-forward ct state new jump service-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-forward ct state new jump cluster-ips-check
 | 
			
		||||
		add rule ip kube-proxy filter-input ct state new jump nodeport-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-input ct state new jump service-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-output ct state new jump service-endpoints-check
 | 
			
		||||
		add rule ip kube-proxy filter-output ct state new jump firewall-check
 | 
			
		||||
		add rule ip kube-proxy filter-output-post-dnat ct state new jump cluster-ips-check
 | 
			
		||||
		add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips
 | 
			
		||||
		add rule ip kube-proxy mark-for-masquerade mark set mark or 0x4000
 | 
			
		||||
		add rule ip kube-proxy masquerading mark and 0x4000 == 0 return
 | 
			
		||||
@@ -3948,9 +3981,13 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
		add rule ip kube-proxy nat-output jump services
 | 
			
		||||
		add rule ip kube-proxy nat-postrouting jump masquerading
 | 
			
		||||
		add rule ip kube-proxy nat-prerouting jump services
 | 
			
		||||
		add rule ip kube-proxy nodeport-endpoints-check ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
 | 
			
		||||
		add rule ip kube-proxy reject-chain reject
 | 
			
		||||
		add rule ip kube-proxy services ip daddr . meta l4proto . th dport vmap @service-ips
 | 
			
		||||
		add rule ip kube-proxy services fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @service-nodeports
 | 
			
		||||
		add rule ip kube-proxy service-endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
 | 
			
		||||
 | 
			
		||||
		add set ip kube-proxy cluster-ips { type ipv4_addr ; comment "Active ClusterIPs" ; }
 | 
			
		||||
 | 
			
		||||
		add map ip kube-proxy firewall-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
 | 
			
		||||
		add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
 | 
			
		||||
@@ -4020,6 +4057,8 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
 | 
			
		||||
	expected := baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.42 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.42 . tcp . 8080 : goto service-MHHHYRWA-ns2/svc2/tcp/p8080 }
 | 
			
		||||
 | 
			
		||||
@@ -4069,6 +4108,9 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.42 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.42 . tcp . 8080 : goto service-MHHHYRWA-ns2/svc2/tcp/p8080 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
@@ -4100,6 +4142,8 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	fp.OnServiceDelete(svc2)
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
 | 
			
		||||
@@ -4126,6 +4170,8 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	ageStaleChains()
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
 | 
			
		||||
@@ -4159,6 +4205,9 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	)
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.44 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
 | 
			
		||||
@@ -4195,6 +4244,9 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	)
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.44 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.44 . tcp . 80 : goto service-LAUZTJTB-ns4/svc4/tcp/p80 }
 | 
			
		||||
@@ -4230,6 +4282,9 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
 | 
			
		||||
	// The old endpoint chain (for 10.0.3.1) will not be deleted yet.
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.44 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.44 . tcp . 80 : goto service-LAUZTJTB-ns4/svc4/tcp/p80 }
 | 
			
		||||
@@ -4268,6 +4323,9 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.44 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.44 . tcp . 80 : goto service-LAUZTJTB-ns4/svc4/tcp/p80 }
 | 
			
		||||
@@ -4304,6 +4362,9 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	fp.OnEndpointSliceUpdate(eps3update2, eps3update3)
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.44 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy no-endpoint-services { 172.30.0.43 . tcp . 80 comment "ns3/svc3:p80" : goto reject-chain }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.44 . tcp . 80 : goto service-LAUZTJTB-ns4/svc4/tcp/p80 }
 | 
			
		||||
@@ -4336,6 +4397,9 @@ func TestSyncProxyRulesRepeated(t *testing.T) {
 | 
			
		||||
	fp.OnEndpointSliceUpdate(eps3update3, eps3update2)
 | 
			
		||||
	fp.syncProxyRules()
 | 
			
		||||
	expected = baseRules + dedent.Dedent(`
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.41 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.43 }
 | 
			
		||||
		add element ip kube-proxy cluster-ips { 172.30.0.44 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
 | 
			
		||||
		add element ip kube-proxy service-ips { 172.30.0.44 . tcp . 80 : goto service-LAUZTJTB-ns4/svc4/tcp/p80 }
 | 
			
		||||
@@ -4808,3 +4872,21 @@ func Test_servicePortEndpointChainNameBase(t *testing.T) {
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestProxier_OnServiceCIDRsChanged(t *testing.T) {
 | 
			
		||||
	var proxier *Proxier
 | 
			
		||||
 | 
			
		||||
	proxier = &Proxier{ipFamily: v1.IPv4Protocol}
 | 
			
		||||
	proxier.OnServiceCIDRsChanged([]string{"172.30.0.0/16", "fd00:10:96::/112"})
 | 
			
		||||
	assert.Equal(t, proxier.serviceCIDRs, "172.30.0.0/16")
 | 
			
		||||
 | 
			
		||||
	proxier.OnServiceCIDRsChanged([]string{"172.30.0.0/16", "172.50.0.0/16", "fd00:10:96::/112", "fd00:172:30::/112"})
 | 
			
		||||
	assert.Equal(t, proxier.serviceCIDRs, "172.30.0.0/16,172.50.0.0/16")
 | 
			
		||||
 | 
			
		||||
	proxier = &Proxier{ipFamily: v1.IPv6Protocol}
 | 
			
		||||
	proxier.OnServiceCIDRsChanged([]string{"172.30.0.0/16", "fd00:10:96::/112"})
 | 
			
		||||
	assert.Equal(t, proxier.serviceCIDRs, "fd00:10:96::/112")
 | 
			
		||||
 | 
			
		||||
	proxier.OnServiceCIDRsChanged([]string{"172.30.0.0/16", "172.50.0.0/16", "fd00:10:96::/112", "fd00:172:30::/112"})
 | 
			
		||||
	assert.Equal(t, proxier.serviceCIDRs, "fd00:10:96::/112,fd00:172:30::/112")
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -29,6 +29,7 @@ type Provider interface {
 | 
			
		||||
	config.EndpointSliceHandler
 | 
			
		||||
	config.ServiceHandler
 | 
			
		||||
	config.NodeHandler
 | 
			
		||||
	config.ServiceCIDRHandler
 | 
			
		||||
 | 
			
		||||
	// Sync immediately synchronizes the Provider's current state to proxy rules.
 | 
			
		||||
	Sync()
 | 
			
		||||
 
 | 
			
		||||
@@ -1009,6 +1009,10 @@ func (proxier *Proxier) OnEndpointSlicesSynced() {
 | 
			
		||||
	proxier.syncProxyRules()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// OnServiceCIDRsChanged is called whenever a change is observed
 | 
			
		||||
// in any of the ServiceCIDRs, and provides complete list of service cidrs.
 | 
			
		||||
func (proxier *Proxier) OnServiceCIDRsChanged(_ []string) {}
 | 
			
		||||
 | 
			
		||||
func (proxier *Proxier) cleanupAllPolicies() {
 | 
			
		||||
	for svcName, svc := range proxier.svcPortMap {
 | 
			
		||||
		svcInfo, ok := svc.(*serviceInfo)
 | 
			
		||||
 
 | 
			
		||||
@@ -534,6 +534,10 @@ func ClusterRoles() []rbacv1.ClusterRole {
 | 
			
		||||
 | 
			
		||||
		eventsRule(),
 | 
			
		||||
	}
 | 
			
		||||
	if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) {
 | 
			
		||||
		nodeProxierRules = append(nodeProxierRules, rbacv1helpers.NewRule("list", "watch").Groups(networkingGroup).Resources("servicecidrs").RuleOrDie())
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	nodeProxierRules = append(nodeProxierRules, rbacv1helpers.NewRule("list", "watch").Groups(discoveryGroup).Resources("endpointslices").RuleOrDie())
 | 
			
		||||
	roles = append(roles, rbacv1.ClusterRole{
 | 
			
		||||
		ObjectMeta: metav1.ObjectMeta{Name: "system:node-proxier"},
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user