kube-proxy: all external jumps to XLB chain

This makes the "destination" policy model clearer.  All external
destination captures now jump to the "XLB chain, which is the main place
that masquerade is done (removing it from most other places).

This is simpler to trace - XLB *always* exists (as long as you have an
external exposure) and never gets bypassed.
This commit is contained in:
Tim Hockin
2022-03-27 00:11:30 -07:00
parent dd0fc6b354
commit 482f3bc4bf
2 changed files with 183 additions and 168 deletions

View File

@@ -116,8 +116,8 @@ type serviceInfo struct {
*proxy.BaseServiceInfo
// The following fields are computed and stored for performance reasons.
nameString string
policyClusterChainName utiliptables.Chain
policyLocalChainName utiliptables.Chain
clusterPolicyChainName utiliptables.Chain
localPolicyChainName utiliptables.Chain
firewallChainName utiliptables.Chain
xlbChainName utiliptables.Chain
}
@@ -131,8 +131,8 @@ func newServiceInfo(port *v1.ServicePort, service *v1.Service, baseInfo *proxy.B
svcPortName := proxy.ServicePortName{NamespacedName: svcName, Port: port.Name}
protocol := strings.ToLower(string(info.Protocol()))
info.nameString = svcPortName.String()
info.policyClusterChainName = servicePortPolicyClusterChain(info.nameString, protocol)
info.policyLocalChainName = servicePortPolicyLocalChainName(info.nameString, protocol)
info.clusterPolicyChainName = servicePortPolicyClusterChain(info.nameString, protocol)
info.localPolicyChainName = servicePortPolicyLocalChainName(info.nameString, protocol)
info.firewallChainName = serviceFirewallChainName(info.nameString, protocol)
info.xlbChainName = serviceLBChainName(info.nameString, protocol)
@@ -712,8 +712,8 @@ func serviceFirewallChainName(servicePortName string, protocol string) utiliptab
}
// serviceLBChainName returns the name of the KUBE-XLB-XXXX chain for a service, which
// implements "short-circuiting" for internally-originated load balancer traffic when using
// `Local` external traffic policy. It forwards traffic from local sources to the KUBE-SVC-XXXX
// implements "short-circuiting" for internally-originated external-destination traffic when using
// `Local` external traffic policy. It forwards traffic from local sources to the KUBE-SVC-XXXX
// chain and traffic from external sources to the KUBE-SVL-XXXX chain.
func serviceLBChainName(servicePortName string, protocol string) utiliptables.Chain {
return utiliptables.Chain(serviceLBChainNamePrefix + portProtoHash(servicePortName, protocol))
@@ -989,7 +989,7 @@ func (proxier *Proxier) syncProxyRules() {
}
}
// Build rules for each service.
// Build rules for each service-port.
for svcName, svc := range proxier.serviceMap {
svcInfo, ok := svc.(*serviceInfo)
if !ok {
@@ -1042,80 +1042,108 @@ func (proxier *Proxier) syncProxyRules() {
proxier.natRules.Write(args)
}
policyClusterChain := svcInfo.policyClusterChainName
policyLocalChain := svcInfo.policyLocalChainName
svcXlbChain := svcInfo.xlbChainName
internalTrafficChain := policyClusterChain
externalTrafficChain := policyClusterChain
// These chains represent the sets of endpoints to use when internal or
// external traffic policy is "Cluster" vs "Local".
clusterPolicyChain := svcInfo.clusterPolicyChainName
localPolicyChain := svcInfo.localPolicyChainName
// These chains designate which policy chain to use for internal- and
// external-destination traffic.
internalPolicyChain := clusterPolicyChain
externalPolicyChain := clusterPolicyChain
if svcInfo.NodeLocalInternal() {
internalTrafficChain = policyLocalChain
internalPolicyChain = localPolicyChain
}
if svcInfo.NodeLocalExternal() {
externalTrafficChain = svcXlbChain
externalPolicyChain = localPolicyChain
}
// These chains are where *ALL* rules which match traffic that is
// service-destined should jump. ClusterIP traffic is considered
// "internal" while NodePort, LoadBalancer, and ExternalIPs traffic is
// considered "external".
internalTrafficChain := internalPolicyChain
externalTrafficChain := svcInfo.xlbChainName // eventually jumps to externalPolicyChain
// Declare the clusterPolicyChain if needed.
if hasEndpoints && svcInfo.UsesClusterEndpoints() {
// Create the Cluster traffic policy chain, retaining counters if possible.
if chain, ok := existingNATChains[policyClusterChain]; ok {
if chain, ok := existingNATChains[clusterPolicyChain]; ok {
proxier.natChains.WriteBytes(chain)
} else {
proxier.natChains.Write(utiliptables.MakeChainLine(policyClusterChain))
proxier.natChains.Write(utiliptables.MakeChainLine(clusterPolicyChain))
}
activeNATChains[policyClusterChain] = true
}
if hasEndpoints && svcInfo.ExternallyAccessible() && svcInfo.NodeLocalExternal() {
if chain, ok := existingNATChains[svcXlbChain]; ok {
proxier.natChains.WriteBytes(chain)
} else {
proxier.natChains.Write(utiliptables.MakeChainLine(svcXlbChain))
}
activeNATChains[svcXlbChain] = true
// The XLB chain redirects all pod -> external VIP
// traffic to the Service's ClusterIP instead. This happens
// whether or not we have local endpoints; only if localDetector
// is implemented
if proxier.localDetector.IsImplemented() {
proxier.natRules.Write(
"-A", string(svcXlbChain),
"-m", "comment", "--comment",
`"Redirect pods trying to reach external loadbalancer VIP to clusterIP"`,
proxier.localDetector.IfLocal(),
"-j", string(policyClusterChain))
}
// Next, redirect all src-type=LOCAL -> LB IP to the service chain
// for externalTrafficPolicy=Local This allows traffic originating
// from the host to be redirected to the service correctly,
// otherwise traffic to LB IPs are dropped if there are no local
// endpoints.
proxier.natRules.Write(
"-A", string(svcXlbChain),
"-m", "comment", "--comment", fmt.Sprintf(`"masquerade LOCAL traffic for %s LB IP"`, svcNameString),
"-m", "addrtype", "--src-type", "LOCAL",
"-j", string(KubeMarkMasqChain))
proxier.natRules.Write(
"-A", string(svcXlbChain),
"-m", "comment", "--comment", fmt.Sprintf(`"route LOCAL traffic for %s LB IP to service chain"`, svcNameString),
"-m", "addrtype", "--src-type", "LOCAL",
"-j", string(policyClusterChain))
// Everything else goes to the SVL chain
proxier.natRules.Write(
"-A", string(svcXlbChain),
"-j", string(policyLocalChain))
activeNATChains[clusterPolicyChain] = true
}
// Declare the localPolicyChain if needed.
if hasEndpoints && svcInfo.UsesLocalEndpoints() {
if chain, ok := existingNATChains[policyLocalChain]; ok {
if chain, ok := existingNATChains[localPolicyChain]; ok {
proxier.natChains.WriteBytes(chain)
} else {
proxier.natChains.Write(utiliptables.MakeChainLine(policyLocalChain))
proxier.natChains.Write(utiliptables.MakeChainLine(localPolicyChain))
}
activeNATChains[policyLocalChain] = true
activeNATChains[localPolicyChain] = true
}
// If any "external" destinations are enabled, set up external traffic
// handling. All captured traffic for all external destinations should
// jump to externalTrafficChain, which will handle some special-cases
// and then jump to externalPolicyChain.
if hasEndpoints && svcInfo.ExternallyAccessible() {
if chain, ok := existingNATChains[externalTrafficChain]; ok {
proxier.natChains.WriteBytes(chain)
} else {
proxier.natChains.Write(utiliptables.MakeChainLine(externalTrafficChain))
}
activeNATChains[externalTrafficChain] = true
if !svcInfo.NodeLocalExternal() {
// If we are using non-local endpoints we need to masquerade,
// in case we cross nodes.
proxier.natRules.Write(
"-A", string(externalTrafficChain),
"-m", "comment", "--comment", fmt.Sprintf(`"masquerade traffic for %s external destinations"`, svcNameString),
"-j", string(KubeMarkMasqChain))
} else {
// If we are only using same-node endpoints, we can retain the
// source IP in most cases.
if proxier.localDetector.IsImplemented() {
// Treat all locally-originated pod -> external destination
// traffic as a special-case. It is subject to neither
// form of traffic policy, which simulates going up-and-out
// to an external load-balancer and coming back in.
proxier.natRules.Write(
"-A", string(externalTrafficChain),
"-m", "comment", "--comment", fmt.Sprintf(`"pod traffic for %s external destinations"`, svcNameString),
proxier.localDetector.IfLocal(),
"-j", string(clusterPolicyChain))
}
// Locally originated traffic (not a pod, but the host node)
// still needs masquerade because the LBIP itself is a local
// address, so that will be the chosen source IP.
proxier.natRules.Write(
"-A", string(externalTrafficChain),
"-m", "comment", "--comment", fmt.Sprintf(`"masquerade LOCAL traffic for %s external destinations"`, svcNameString),
"-m", "addrtype", "--src-type", "LOCAL",
"-j", string(KubeMarkMasqChain))
// Redirect all src-type=LOCAL -> external destination to the
// policy=cluster chain. This allows traffic originating
// from the host to be redirected to the service correctly.
proxier.natRules.Write(
"-A", string(externalTrafficChain),
"-m", "comment", "--comment", fmt.Sprintf(`"route LOCAL traffic for %s external destinations"`, svcNameString),
"-m", "addrtype", "--src-type", "LOCAL",
"-j", string(clusterPolicyChain))
}
// Anything else falls thru to the appropriate policy chain.
proxier.natRules.Write(
"-A", string(externalTrafficChain),
"-j", string(externalPolicyChain))
}
// Capture the clusterIP.
@@ -1168,27 +1196,8 @@ func (proxier *Proxier) syncProxyRules() {
"--dport", strconv.Itoa(svcInfo.Port()),
)
// We have to SNAT packets to external IPs if externalTrafficPolicy is cluster
// and the traffic is NOT Local. Local traffic coming from Pods and Nodes will
// be always forwarded to the corresponding Service, so no need to SNAT
// If we can't differentiate the local traffic we always SNAT.
if !svcInfo.NodeLocalExternal() {
appendTo := []string{"-A", string(policyClusterChain)}
// This masquerades off-cluster traffic to a External IP.
if proxier.localDetector.IsImplemented() {
proxier.natRules.Write(
appendTo,
args,
proxier.localDetector.IfNotLocal(),
"-j", string(KubeMarkMasqChain))
} else {
proxier.natRules.Write(
appendTo,
args,
"-j", string(KubeMarkMasqChain))
}
}
// Send traffic bound for external IPs to the service chain.
// Send traffic bound for external IPs to the "external
// destinations" chain.
proxier.natRules.Write(
"-A", string(kubeServicesChain),
args,
@@ -1237,14 +1246,8 @@ func (proxier *Proxier) syncProxyRules() {
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
)
// If we are proxying globally, we need to masquerade in case we cross nodes.
// If we are proxying only locally, we can retain the source IP.
if !svcInfo.NodeLocalExternal() {
proxier.natRules.Write(args, "-j", string(KubeMarkMasqChain))
}
if len(svcInfo.LoadBalancerSourceRanges()) == 0 {
// allow all sources, so jump directly to the KUBE-SVC or KUBE-XLB chain
// allow all sources, so jump directly to the next chain
proxier.natRules.Write(args, "-j", string(externalTrafficChain))
} else {
// firewall filter based on each source range
@@ -1295,16 +1298,7 @@ func (proxier *Proxier) syncProxyRules() {
"-m", protocol, "-p", protocol,
"--dport", strconv.Itoa(svcInfo.NodePort()),
)
if !svcInfo.NodeLocalExternal() {
// Nodeports need SNAT, unless they're local.
proxier.natRules.Write(
"-A", string(policyClusterChain),
args,
"-j", string(KubeMarkMasqChain))
} else {
// TODO: Make all nodePorts jump to the firewall chain.
// Currently we only create it for loadbalancers (#33586).
if svcInfo.NodeLocalExternal() {
// Fix localhost martian source error
loopback := "127.0.0.0/8"
if isIPv6 {
@@ -1315,7 +1309,9 @@ func (proxier *Proxier) syncProxyRules() {
args,
"-s", loopback, "-j", string(KubeMarkMasqChain))
}
// Jump to the service chain.
// Jump to the external destination chain. For better or for
// worse, nodeports are not subect to loadBalancerSourceRanges,
// and we can't change that.
proxier.natRules.Write(
"-A", string(kubeNodePortsChain),
args,
@@ -1347,18 +1343,18 @@ func (proxier *Proxier) syncProxyRules() {
}
if svcInfo.UsesClusterEndpoints() {
// Write rules jumping from policyClusterChain to clusterEndpoints
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, policyClusterChain, clusterEndpoints, args)
// Write rules jumping from clusterPolicyChain to clusterEndpoints
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, clusterPolicyChain, clusterEndpoints, args)
}
if svcInfo.UsesLocalEndpoints() {
if len(localEndpoints) != 0 {
// Write rules jumping from policyLocalChain to localEndpointChains
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, policyLocalChain, localEndpoints, args)
// Write rules jumping from localPolicyChain to localEndpointChains
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, localPolicyChain, localEndpoints, args)
} else if hasEndpoints {
// Blackhole all traffic since there are no local endpoints
args = append(args[:0],
"-A", string(policyLocalChain),
"-A", string(localPolicyChain),
"-m", "comment", "--comment",
fmt.Sprintf(`"%s has no local endpoints"`, svcNameString),
"-j",