Kube-proxy: perf-enhancement: Reduce NAT table KUBE-SERVICES/NODEPORTS chain rules

The nat KUBE-SERVICES chain is called from OUTPUT and PREROUTING stages. In
clusters with large number of services, the nat-KUBE-SERVICES chain is the largest
chain with for eg: 33k rules. This patch aims to move the KubeMarkMasq rules from
the kubeServicesChain into the respective KUBE-SVC-* chains. This way during each
packet-rule matching we won't have to traverse the MASQ rules of all services which
get accumulated in the KUBE-SERVICES and/or KUBE-NODEPORTS chains. Since the
jump to KUBE-MARK-MASQ ultimately sets the 0x400 mark for nodeIP SNAT, it should not
matter whether the jump is made from KUBE-SERVICES or KUBE-SVC-* chains.

Specifically we change:

1) For ClusterIP svc, we move the KUBE-MARK-MASQ jump rule from KUBE-SERVICES
chain into KUBE-SVC-* chain.
2) For ExternalIP svc, we move the KUBE-MARK-MASQ jump rule in the case of
non-ServiceExternalTrafficPolicyTypeLocal from KUBE-SERVICES
chain into KUBE-SVC-* chain.
3) For NodePorts svc, we move the KUBE-MARK-MASQ jump rule in case of
non-ServiceExternalTrafficPolicyTypeLocal from KUBE-NODEPORTS chain to
KUBE-SVC-* chain.
4) For load-balancer svc, we don't change anything since it is already svc specific
due to creation of KUBE-FW-* chains per svc.

This would cut the rules per svc in KUBE-SERVICES and KUBE-NODEPORTS in half.
This commit is contained in:
Surya Seetharaman
2020-11-25 15:19:54 +01:00
parent 667e50abc8
commit d3fe48e848
4 changed files with 156 additions and 24 deletions

View File

@@ -1062,23 +1062,22 @@ func (proxier *Proxier) syncProxyRules() {
// Capture the clusterIP.
if hasEndpoints {
args = append(args[:0],
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString),
"-m", protocol, "-p", protocol,
"-d", utilproxy.ToCIDR(svcInfo.ClusterIP()),
"--dport", strconv.Itoa(svcInfo.Port()),
)
if proxier.masqueradeAll {
utilproxy.WriteLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(svcChain), append(args, "-j", string(KubeMarkMasqChain))...)
} else if proxier.localDetector.IsImplemented() {
// This masquerades off-cluster traffic to a service VIP. The idea
// is that you can establish a static route for your Service range,
// routing to any node, and that node will bridge into the Service
// for you. Since that might bounce off-node, we masquerade here.
// If/when we support "Local" policy for VIPs, we should update this.
utilproxy.WriteLine(proxier.natRules, proxier.localDetector.JumpIfNotLocal(args, string(KubeMarkMasqChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(svcChain), proxier.localDetector.JumpIfNotLocal(args, string(KubeMarkMasqChain))...)
}
utilproxy.WriteLine(proxier.natRules, append(args, "-j", string(svcChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(kubeServicesChain), append(args, "-j", string(svcChain))...)
} else {
// No endpoints.
utilproxy.WriteLine(proxier.filterRules,
@@ -1129,7 +1128,6 @@ func (proxier *Proxier) syncProxyRules() {
if hasEndpoints {
args = append(args[:0],
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcNameString),
"-m", protocol, "-p", protocol,
"-d", utilproxy.ToCIDR(net.ParseIP(externalIP)),
@@ -1145,13 +1143,13 @@ func (proxier *Proxier) syncProxyRules() {
destChain = svcChain
// This masquerades off-cluster traffic to a External IP.
if proxier.localDetector.IsImplemented() {
utilproxy.WriteLine(proxier.natRules, proxier.localDetector.JumpIfNotLocal(args, string(KubeMarkMasqChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(svcChain), proxier.localDetector.JumpIfNotLocal(args, string(KubeMarkMasqChain))...)
} else {
utilproxy.WriteLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(svcChain), append(args, "-j", string(KubeMarkMasqChain))...)
}
}
// Sent traffic bound for external IPs to the service chain.
utilproxy.WriteLine(proxier.natRules, append(args, "-j", string(destChain))...)
// Send traffic bound for external IPs to the service chain.
utilproxy.WriteRuleLine(proxier.natRules, string(kubeServicesChain), append(args, "-j", string(destChain))...)
} else {
// No endpoints.
@@ -1302,16 +1300,15 @@ func (proxier *Proxier) syncProxyRules() {
if hasEndpoints {
args = append(args[:0],
"-A", string(kubeNodePortsChain),
"-m", "comment", "--comment", svcNameString,
"-m", protocol, "-p", protocol,
"--dport", strconv.Itoa(svcInfo.NodePort()),
)
if !svcInfo.NodeLocalExternal() {
// Nodeports need SNAT, unless they're local.
utilproxy.WriteLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(svcChain), append(args, "-j", string(KubeMarkMasqChain))...)
// Jump to the service chain.
utilproxy.WriteLine(proxier.natRules, append(args, "-j", string(svcChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(kubeNodePortsChain), append(args, "-j", string(svcChain))...)
} else {
// TODO: Make all nodePorts jump to the firewall chain.
// Currently we only create it for loadbalancers (#33586).
@@ -1321,8 +1318,8 @@ func (proxier *Proxier) syncProxyRules() {
if isIPv6 {
loopback = "::1/128"
}
utilproxy.WriteLine(proxier.natRules, append(args, "-s", loopback, "-j", string(KubeMarkMasqChain))...)
utilproxy.WriteLine(proxier.natRules, append(args, "-j", string(svcXlbChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(kubeNodePortsChain), append(args, "-s", loopback, "-j", string(KubeMarkMasqChain))...)
utilproxy.WriteRuleLine(proxier.natRules, string(kubeNodePortsChain), append(args, "-j", string(svcXlbChain))...)
}
} else {
// No endpoints.