kube-proxy: all external jumps to XLB chain
This makes the "destination" policy model clearer. All external destination captures now jump to the "XLB chain, which is the main place that masquerade is done (removing it from most other places). This is simpler to trace - XLB *always* exists (as long as you have an external exposure) and never gets bypassed.
This commit is contained in:
@@ -116,8 +116,8 @@ type serviceInfo struct {
|
||||
*proxy.BaseServiceInfo
|
||||
// The following fields are computed and stored for performance reasons.
|
||||
nameString string
|
||||
policyClusterChainName utiliptables.Chain
|
||||
policyLocalChainName utiliptables.Chain
|
||||
clusterPolicyChainName utiliptables.Chain
|
||||
localPolicyChainName utiliptables.Chain
|
||||
firewallChainName utiliptables.Chain
|
||||
xlbChainName utiliptables.Chain
|
||||
}
|
||||
@@ -131,8 +131,8 @@ func newServiceInfo(port *v1.ServicePort, service *v1.Service, baseInfo *proxy.B
|
||||
svcPortName := proxy.ServicePortName{NamespacedName: svcName, Port: port.Name}
|
||||
protocol := strings.ToLower(string(info.Protocol()))
|
||||
info.nameString = svcPortName.String()
|
||||
info.policyClusterChainName = servicePortPolicyClusterChain(info.nameString, protocol)
|
||||
info.policyLocalChainName = servicePortPolicyLocalChainName(info.nameString, protocol)
|
||||
info.clusterPolicyChainName = servicePortPolicyClusterChain(info.nameString, protocol)
|
||||
info.localPolicyChainName = servicePortPolicyLocalChainName(info.nameString, protocol)
|
||||
info.firewallChainName = serviceFirewallChainName(info.nameString, protocol)
|
||||
info.xlbChainName = serviceLBChainName(info.nameString, protocol)
|
||||
|
||||
@@ -712,8 +712,8 @@ func serviceFirewallChainName(servicePortName string, protocol string) utiliptab
|
||||
}
|
||||
|
||||
// serviceLBChainName returns the name of the KUBE-XLB-XXXX chain for a service, which
|
||||
// implements "short-circuiting" for internally-originated load balancer traffic when using
|
||||
// `Local` external traffic policy. It forwards traffic from local sources to the KUBE-SVC-XXXX
|
||||
// implements "short-circuiting" for internally-originated external-destination traffic when using
|
||||
// `Local` external traffic policy. It forwards traffic from local sources to the KUBE-SVC-XXXX
|
||||
// chain and traffic from external sources to the KUBE-SVL-XXXX chain.
|
||||
func serviceLBChainName(servicePortName string, protocol string) utiliptables.Chain {
|
||||
return utiliptables.Chain(serviceLBChainNamePrefix + portProtoHash(servicePortName, protocol))
|
||||
@@ -989,7 +989,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
}
|
||||
|
||||
// Build rules for each service.
|
||||
// Build rules for each service-port.
|
||||
for svcName, svc := range proxier.serviceMap {
|
||||
svcInfo, ok := svc.(*serviceInfo)
|
||||
if !ok {
|
||||
@@ -1042,80 +1042,108 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
proxier.natRules.Write(args)
|
||||
}
|
||||
|
||||
policyClusterChain := svcInfo.policyClusterChainName
|
||||
policyLocalChain := svcInfo.policyLocalChainName
|
||||
svcXlbChain := svcInfo.xlbChainName
|
||||
|
||||
internalTrafficChain := policyClusterChain
|
||||
externalTrafficChain := policyClusterChain
|
||||
// These chains represent the sets of endpoints to use when internal or
|
||||
// external traffic policy is "Cluster" vs "Local".
|
||||
clusterPolicyChain := svcInfo.clusterPolicyChainName
|
||||
localPolicyChain := svcInfo.localPolicyChainName
|
||||
|
||||
// These chains designate which policy chain to use for internal- and
|
||||
// external-destination traffic.
|
||||
internalPolicyChain := clusterPolicyChain
|
||||
externalPolicyChain := clusterPolicyChain
|
||||
if svcInfo.NodeLocalInternal() {
|
||||
internalTrafficChain = policyLocalChain
|
||||
internalPolicyChain = localPolicyChain
|
||||
}
|
||||
if svcInfo.NodeLocalExternal() {
|
||||
externalTrafficChain = svcXlbChain
|
||||
externalPolicyChain = localPolicyChain
|
||||
}
|
||||
|
||||
// These chains are where *ALL* rules which match traffic that is
|
||||
// service-destined should jump. ClusterIP traffic is considered
|
||||
// "internal" while NodePort, LoadBalancer, and ExternalIPs traffic is
|
||||
// considered "external".
|
||||
internalTrafficChain := internalPolicyChain
|
||||
externalTrafficChain := svcInfo.xlbChainName // eventually jumps to externalPolicyChain
|
||||
|
||||
// Declare the clusterPolicyChain if needed.
|
||||
if hasEndpoints && svcInfo.UsesClusterEndpoints() {
|
||||
// Create the Cluster traffic policy chain, retaining counters if possible.
|
||||
if chain, ok := existingNATChains[policyClusterChain]; ok {
|
||||
if chain, ok := existingNATChains[clusterPolicyChain]; ok {
|
||||
proxier.natChains.WriteBytes(chain)
|
||||
} else {
|
||||
proxier.natChains.Write(utiliptables.MakeChainLine(policyClusterChain))
|
||||
proxier.natChains.Write(utiliptables.MakeChainLine(clusterPolicyChain))
|
||||
}
|
||||
activeNATChains[policyClusterChain] = true
|
||||
}
|
||||
|
||||
if hasEndpoints && svcInfo.ExternallyAccessible() && svcInfo.NodeLocalExternal() {
|
||||
if chain, ok := existingNATChains[svcXlbChain]; ok {
|
||||
proxier.natChains.WriteBytes(chain)
|
||||
} else {
|
||||
proxier.natChains.Write(utiliptables.MakeChainLine(svcXlbChain))
|
||||
}
|
||||
activeNATChains[svcXlbChain] = true
|
||||
|
||||
// The XLB chain redirects all pod -> external VIP
|
||||
// traffic to the Service's ClusterIP instead. This happens
|
||||
// whether or not we have local endpoints; only if localDetector
|
||||
// is implemented
|
||||
if proxier.localDetector.IsImplemented() {
|
||||
proxier.natRules.Write(
|
||||
"-A", string(svcXlbChain),
|
||||
"-m", "comment", "--comment",
|
||||
`"Redirect pods trying to reach external loadbalancer VIP to clusterIP"`,
|
||||
proxier.localDetector.IfLocal(),
|
||||
"-j", string(policyClusterChain))
|
||||
}
|
||||
|
||||
// Next, redirect all src-type=LOCAL -> LB IP to the service chain
|
||||
// for externalTrafficPolicy=Local This allows traffic originating
|
||||
// from the host to be redirected to the service correctly,
|
||||
// otherwise traffic to LB IPs are dropped if there are no local
|
||||
// endpoints.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(svcXlbChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"masquerade LOCAL traffic for %s LB IP"`, svcNameString),
|
||||
"-m", "addrtype", "--src-type", "LOCAL",
|
||||
"-j", string(KubeMarkMasqChain))
|
||||
proxier.natRules.Write(
|
||||
"-A", string(svcXlbChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"route LOCAL traffic for %s LB IP to service chain"`, svcNameString),
|
||||
"-m", "addrtype", "--src-type", "LOCAL",
|
||||
"-j", string(policyClusterChain))
|
||||
|
||||
// Everything else goes to the SVL chain
|
||||
proxier.natRules.Write(
|
||||
"-A", string(svcXlbChain),
|
||||
"-j", string(policyLocalChain))
|
||||
activeNATChains[clusterPolicyChain] = true
|
||||
}
|
||||
|
||||
// Declare the localPolicyChain if needed.
|
||||
if hasEndpoints && svcInfo.UsesLocalEndpoints() {
|
||||
if chain, ok := existingNATChains[policyLocalChain]; ok {
|
||||
if chain, ok := existingNATChains[localPolicyChain]; ok {
|
||||
proxier.natChains.WriteBytes(chain)
|
||||
} else {
|
||||
proxier.natChains.Write(utiliptables.MakeChainLine(policyLocalChain))
|
||||
proxier.natChains.Write(utiliptables.MakeChainLine(localPolicyChain))
|
||||
}
|
||||
activeNATChains[policyLocalChain] = true
|
||||
activeNATChains[localPolicyChain] = true
|
||||
}
|
||||
|
||||
// If any "external" destinations are enabled, set up external traffic
|
||||
// handling. All captured traffic for all external destinations should
|
||||
// jump to externalTrafficChain, which will handle some special-cases
|
||||
// and then jump to externalPolicyChain.
|
||||
if hasEndpoints && svcInfo.ExternallyAccessible() {
|
||||
if chain, ok := existingNATChains[externalTrafficChain]; ok {
|
||||
proxier.natChains.WriteBytes(chain)
|
||||
} else {
|
||||
proxier.natChains.Write(utiliptables.MakeChainLine(externalTrafficChain))
|
||||
}
|
||||
activeNATChains[externalTrafficChain] = true
|
||||
|
||||
if !svcInfo.NodeLocalExternal() {
|
||||
// If we are using non-local endpoints we need to masquerade,
|
||||
// in case we cross nodes.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(externalTrafficChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"masquerade traffic for %s external destinations"`, svcNameString),
|
||||
"-j", string(KubeMarkMasqChain))
|
||||
} else {
|
||||
// If we are only using same-node endpoints, we can retain the
|
||||
// source IP in most cases.
|
||||
|
||||
if proxier.localDetector.IsImplemented() {
|
||||
// Treat all locally-originated pod -> external destination
|
||||
// traffic as a special-case. It is subject to neither
|
||||
// form of traffic policy, which simulates going up-and-out
|
||||
// to an external load-balancer and coming back in.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(externalTrafficChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"pod traffic for %s external destinations"`, svcNameString),
|
||||
proxier.localDetector.IfLocal(),
|
||||
"-j", string(clusterPolicyChain))
|
||||
}
|
||||
|
||||
// Locally originated traffic (not a pod, but the host node)
|
||||
// still needs masquerade because the LBIP itself is a local
|
||||
// address, so that will be the chosen source IP.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(externalTrafficChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"masquerade LOCAL traffic for %s external destinations"`, svcNameString),
|
||||
"-m", "addrtype", "--src-type", "LOCAL",
|
||||
"-j", string(KubeMarkMasqChain))
|
||||
|
||||
// Redirect all src-type=LOCAL -> external destination to the
|
||||
// policy=cluster chain. This allows traffic originating
|
||||
// from the host to be redirected to the service correctly.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(externalTrafficChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"route LOCAL traffic for %s external destinations"`, svcNameString),
|
||||
"-m", "addrtype", "--src-type", "LOCAL",
|
||||
"-j", string(clusterPolicyChain))
|
||||
}
|
||||
|
||||
// Anything else falls thru to the appropriate policy chain.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(externalTrafficChain),
|
||||
"-j", string(externalPolicyChain))
|
||||
}
|
||||
|
||||
// Capture the clusterIP.
|
||||
@@ -1168,27 +1196,8 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
"--dport", strconv.Itoa(svcInfo.Port()),
|
||||
)
|
||||
|
||||
// We have to SNAT packets to external IPs if externalTrafficPolicy is cluster
|
||||
// and the traffic is NOT Local. Local traffic coming from Pods and Nodes will
|
||||
// be always forwarded to the corresponding Service, so no need to SNAT
|
||||
// If we can't differentiate the local traffic we always SNAT.
|
||||
if !svcInfo.NodeLocalExternal() {
|
||||
appendTo := []string{"-A", string(policyClusterChain)}
|
||||
// This masquerades off-cluster traffic to a External IP.
|
||||
if proxier.localDetector.IsImplemented() {
|
||||
proxier.natRules.Write(
|
||||
appendTo,
|
||||
args,
|
||||
proxier.localDetector.IfNotLocal(),
|
||||
"-j", string(KubeMarkMasqChain))
|
||||
} else {
|
||||
proxier.natRules.Write(
|
||||
appendTo,
|
||||
args,
|
||||
"-j", string(KubeMarkMasqChain))
|
||||
}
|
||||
}
|
||||
// Send traffic bound for external IPs to the service chain.
|
||||
// Send traffic bound for external IPs to the "external
|
||||
// destinations" chain.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(kubeServicesChain),
|
||||
args,
|
||||
@@ -1237,14 +1246,8 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
|
||||
)
|
||||
|
||||
// If we are proxying globally, we need to masquerade in case we cross nodes.
|
||||
// If we are proxying only locally, we can retain the source IP.
|
||||
if !svcInfo.NodeLocalExternal() {
|
||||
proxier.natRules.Write(args, "-j", string(KubeMarkMasqChain))
|
||||
}
|
||||
|
||||
if len(svcInfo.LoadBalancerSourceRanges()) == 0 {
|
||||
// allow all sources, so jump directly to the KUBE-SVC or KUBE-XLB chain
|
||||
// allow all sources, so jump directly to the next chain
|
||||
proxier.natRules.Write(args, "-j", string(externalTrafficChain))
|
||||
} else {
|
||||
// firewall filter based on each source range
|
||||
@@ -1295,16 +1298,7 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
"-m", protocol, "-p", protocol,
|
||||
"--dport", strconv.Itoa(svcInfo.NodePort()),
|
||||
)
|
||||
if !svcInfo.NodeLocalExternal() {
|
||||
// Nodeports need SNAT, unless they're local.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(policyClusterChain),
|
||||
args,
|
||||
"-j", string(KubeMarkMasqChain))
|
||||
} else {
|
||||
// TODO: Make all nodePorts jump to the firewall chain.
|
||||
// Currently we only create it for loadbalancers (#33586).
|
||||
|
||||
if svcInfo.NodeLocalExternal() {
|
||||
// Fix localhost martian source error
|
||||
loopback := "127.0.0.0/8"
|
||||
if isIPv6 {
|
||||
@@ -1315,7 +1309,9 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
args,
|
||||
"-s", loopback, "-j", string(KubeMarkMasqChain))
|
||||
}
|
||||
// Jump to the service chain.
|
||||
// Jump to the external destination chain. For better or for
|
||||
// worse, nodeports are not subect to loadBalancerSourceRanges,
|
||||
// and we can't change that.
|
||||
proxier.natRules.Write(
|
||||
"-A", string(kubeNodePortsChain),
|
||||
args,
|
||||
@@ -1347,18 +1343,18 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
|
||||
if svcInfo.UsesClusterEndpoints() {
|
||||
// Write rules jumping from policyClusterChain to clusterEndpoints
|
||||
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, policyClusterChain, clusterEndpoints, args)
|
||||
// Write rules jumping from clusterPolicyChain to clusterEndpoints
|
||||
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, clusterPolicyChain, clusterEndpoints, args)
|
||||
}
|
||||
|
||||
if svcInfo.UsesLocalEndpoints() {
|
||||
if len(localEndpoints) != 0 {
|
||||
// Write rules jumping from policyLocalChain to localEndpointChains
|
||||
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, policyLocalChain, localEndpoints, args)
|
||||
// Write rules jumping from localPolicyChain to localEndpointChains
|
||||
proxier.writeServiceToEndpointRules(svcNameString, svcInfo, localPolicyChain, localEndpoints, args)
|
||||
} else if hasEndpoints {
|
||||
// Blackhole all traffic since there are no local endpoints
|
||||
args = append(args[:0],
|
||||
"-A", string(policyLocalChain),
|
||||
"-A", string(localPolicyChain),
|
||||
"-m", "comment", "--comment",
|
||||
fmt.Sprintf(`"%s has no local endpoints"`, svcNameString),
|
||||
"-j",
|
||||
|
Reference in New Issue
Block a user