kube-proxy: change implementation of LoadBalancerSourceRanges for wider kernel support
The nftables implementation made use of concatenation of ranges when creating the set "firewall-allow", but the support was not available before kernel 5.6. Therefore, nftables mode couldn't run on earlier kernels, while 5.4 is still widely used. An alternative of concatenation of ranges is to create a separate firewall chain for every service port that needs firewalling, and jump to the service's firewall chain from the common firewall chain via a rule with vmap. Renaming from "firewall" to "firewall-ips" is required when changing the set to the map to support existing clusters to upgrade, otherwise it would fail to create the map. Besides, "firewall-ips" corresponds to the "service-ips" map, later we can add use "firewall-nodeports" if it's determined that NodePort traffic should be subject to LoadBalancerSourceRanges. Signed-off-by: Quan Tian <qtian@vmware.com>
This commit is contained in:
@@ -76,10 +76,8 @@ const (
|
||||
kubeRejectChain = "reject-chain"
|
||||
|
||||
// LoadBalancerSourceRanges handling
|
||||
kubeFirewallSet = "firewall"
|
||||
kubeFirewallCheckChain = "firewall-check"
|
||||
kubeFirewallAllowSet = "firewall-allow"
|
||||
kubeFirewallAllowCheckChain = "firewall-allow-check"
|
||||
kubeFirewallIPsMap = "firewall-ips"
|
||||
kubeFirewallCheckChain = "firewall-check"
|
||||
|
||||
// masquerading
|
||||
kubeMarkMasqChain = "mark-for-masquerade"
|
||||
@@ -99,6 +97,7 @@ type servicePortInfo struct {
|
||||
clusterPolicyChainName string
|
||||
localPolicyChainName string
|
||||
externalChainName string
|
||||
firewallChainName string
|
||||
}
|
||||
|
||||
// returns a new proxy.ServicePort which abstracts a serviceInfo
|
||||
@@ -114,6 +113,7 @@ func newServiceInfo(port *v1.ServicePort, service *v1.Service, bsvcPortInfo *pro
|
||||
svcPort.clusterPolicyChainName = servicePortPolicyClusterChainNamePrefix + chainNameBase
|
||||
svcPort.localPolicyChainName = servicePortPolicyLocalChainNamePrefix + chainNameBase
|
||||
svcPort.externalChainName = serviceExternalChainNamePrefix + chainNameBase
|
||||
svcPort.firewallChainName = servicePortFirewallChainNamePrefix + chainNameBase
|
||||
|
||||
return svcPort
|
||||
}
|
||||
@@ -543,38 +543,20 @@ func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
|
||||
}
|
||||
|
||||
// Set up LoadBalancerSourceRanges firewalling
|
||||
tx.Add(&knftables.Set{
|
||||
Name: kubeFirewallSet,
|
||||
Type: ipvX_addr + " . inet_proto . inet_service",
|
||||
tx.Add(&knftables.Map{
|
||||
Name: kubeFirewallIPsMap,
|
||||
Type: ipvX_addr + " . inet_proto . inet_service : verdict",
|
||||
Comment: ptr.To("destinations that are subject to LoadBalancerSourceRanges"),
|
||||
})
|
||||
tx.Add(&knftables.Set{
|
||||
Name: kubeFirewallAllowSet,
|
||||
Type: ipvX_addr + " . inet_proto . inet_service . " + ipvX_addr,
|
||||
Flags: []knftables.SetFlag{knftables.IntervalFlag},
|
||||
Comment: ptr.To("destinations+sources that are allowed by LoadBalancerSourceRanges"),
|
||||
})
|
||||
|
||||
ensureChain(kubeFirewallCheckChain, tx, createdChains)
|
||||
ensureChain(kubeFirewallAllowCheckChain, tx, createdChains)
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: kubeFirewallCheckChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "daddr", ".", "meta l4proto", ".", "th dport", "@", kubeFirewallSet,
|
||||
"jump", kubeFirewallAllowCheckChain,
|
||||
ipX, "daddr", ".", "meta l4proto", ".", "th dport",
|
||||
"vmap", "@", kubeFirewallIPsMap,
|
||||
),
|
||||
})
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: kubeFirewallAllowCheckChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "daddr", ".", "meta l4proto", ".", "th dport", ".", ipX, "saddr", "@", kubeFirewallAllowSet,
|
||||
"return",
|
||||
),
|
||||
})
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: kubeFirewallAllowCheckChain,
|
||||
Rule: "drop",
|
||||
})
|
||||
|
||||
// Set up service dispatch
|
||||
tx.Add(&knftables.Map{
|
||||
@@ -824,6 +806,7 @@ const (
|
||||
serviceExternalChainNamePrefix = "external-"
|
||||
servicePortEndpointChainNamePrefix = "endpoint-"
|
||||
servicePortEndpointAffinityNamePrefix = "affinity-"
|
||||
servicePortFirewallChainNamePrefix = "firewall-"
|
||||
)
|
||||
|
||||
// hashAndTruncate prefixes name with a hash of itself and then truncates to
|
||||
@@ -998,11 +981,8 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
|
||||
// We currently fully-rebuild our sets and maps on each resync
|
||||
tx.Flush(&knftables.Set{
|
||||
Name: kubeFirewallSet,
|
||||
})
|
||||
tx.Flush(&knftables.Set{
|
||||
Name: kubeFirewallAllowSet,
|
||||
tx.Flush(&knftables.Map{
|
||||
Name: kubeFirewallIPsMap,
|
||||
})
|
||||
tx.Flush(&knftables.Map{
|
||||
Name: kubeNoEndpointServicesMap,
|
||||
@@ -1205,6 +1185,44 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
}
|
||||
|
||||
usesFWChain := len(svcInfo.LoadBalancerVIPStrings()) > 0 && len(svcInfo.LoadBalancerSourceRanges()) > 0
|
||||
fwChain := svcInfo.firewallChainName
|
||||
if usesFWChain {
|
||||
ensureChain(fwChain, tx, activeChains)
|
||||
var sources []string
|
||||
allowFromNode := false
|
||||
for _, src := range svcInfo.LoadBalancerSourceRanges() {
|
||||
_, cidr, _ := netutils.ParseCIDRSloppy(src)
|
||||
if cidr == nil {
|
||||
continue
|
||||
}
|
||||
if len(sources) > 0 {
|
||||
sources = append(sources, ",")
|
||||
}
|
||||
sources = append(sources, src)
|
||||
if cidr.Contains(proxier.nodeIP) {
|
||||
allowFromNode = true
|
||||
}
|
||||
}
|
||||
// For VIP-like LBs, the VIP is often added as a local
|
||||
// address (via an IP route rule). In that case, a request
|
||||
// from a node to the VIP will not hit the loadbalancer but
|
||||
// will loop back with the source IP set to the VIP. We
|
||||
// need the following rules to allow requests from this node.
|
||||
if allowFromNode {
|
||||
for _, lbip := range svcInfo.LoadBalancerVIPStrings() {
|
||||
sources = append(sources, ",", lbip)
|
||||
}
|
||||
}
|
||||
tx.Add(&knftables.Rule{
|
||||
Chain: fwChain,
|
||||
Rule: knftables.Concat(
|
||||
ipX, "saddr", "!=", "{", sources, "}",
|
||||
"drop",
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
// Capture load-balancer ingress.
|
||||
for _, lbip := range svcInfo.LoadBalancerVIPStrings() {
|
||||
if hasEndpoints {
|
||||
@@ -1221,53 +1239,19 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
})
|
||||
}
|
||||
|
||||
if len(svcInfo.LoadBalancerSourceRanges()) > 0 {
|
||||
if usesFWChain {
|
||||
tx.Add(&knftables.Element{
|
||||
Set: kubeFirewallSet,
|
||||
Map: kubeFirewallIPsMap,
|
||||
Key: []string{
|
||||
lbip,
|
||||
protocol,
|
||||
strconv.Itoa(svcInfo.Port()),
|
||||
},
|
||||
Value: []string{
|
||||
fmt.Sprintf("goto %s", fwChain),
|
||||
},
|
||||
Comment: &svcPortNameString,
|
||||
})
|
||||
|
||||
allowFromNode := false
|
||||
for _, src := range svcInfo.LoadBalancerSourceRanges() {
|
||||
_, cidr, _ := netutils.ParseCIDRSloppy(src)
|
||||
if cidr == nil {
|
||||
continue
|
||||
}
|
||||
tx.Add(&knftables.Element{
|
||||
Set: kubeFirewallAllowSet,
|
||||
Key: []string{
|
||||
lbip,
|
||||
protocol,
|
||||
strconv.Itoa(svcInfo.Port()),
|
||||
src,
|
||||
},
|
||||
Comment: &svcPortNameString,
|
||||
})
|
||||
if cidr.Contains(proxier.nodeIP) {
|
||||
allowFromNode = true
|
||||
}
|
||||
}
|
||||
// For VIP-like LBs, the VIP is often added as a local
|
||||
// address (via an IP route rule). In that case, a request
|
||||
// from a node to the VIP will not hit the loadbalancer but
|
||||
// will loop back with the source IP set to the VIP. We
|
||||
// need the following rules to allow requests from this node.
|
||||
if allowFromNode {
|
||||
tx.Add(&knftables.Element{
|
||||
Set: kubeFirewallAllowSet,
|
||||
Key: []string{
|
||||
lbip,
|
||||
protocol,
|
||||
strconv.Itoa(svcInfo.Port()),
|
||||
lbip,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
if !hasExternalEndpoints {
|
||||
|
Reference in New Issue
Block a user