diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 24b21375e72..15323340c8e 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -955,12 +955,8 @@ func (proxier *Proxier) syncProxyRules() { // you should always do one of the below: // slice = slice[:0] // and then append to it // slice = append(slice[:0], ...) - endpoints := make([]*endpointsInfo, 0) - endpointChains := make([]utiliptables.Chain, 0) - readyEndpoints := make([]*endpointsInfo, 0) readyEndpointChains := make([]utiliptables.Chain, 0) - localReadyEndpointChains := make([]utiliptables.Chain, 0) - localServingTerminatingEndpointChains := make([]utiliptables.Chain, 0) + localEndpointChains := make([]utiliptables.Chain, 0) // To avoid growing this slice, we arbitrarily set its size to 64, // there is never more than that many arguments for a single line. @@ -1002,7 +998,82 @@ func (proxier *Proxier) syncProxyRules() { // Service does not have conflicting configuration such as // externalTrafficPolicy=Local. allEndpoints = proxy.FilterEndpoints(allEndpoints, svcInfo, proxier.nodeLabels) - hasEndpoints := len(allEndpoints) > 0 + + // Scan the endpoints list to see what we have. "hasEndpoints" will be true + // if there are any usable endpoints for this service anywhere in the cluster. + var hasEndpoints, hasLocalReadyEndpoints, hasLocalServingTerminatingEndpoints bool + for _, ep := range allEndpoints { + if ep.IsReady() { + hasEndpoints = true + if ep.GetIsLocal() { + hasLocalReadyEndpoints = true + } + } else if svc.NodeLocalExternal() && utilfeature.DefaultFeatureGate.Enabled(features.ProxyTerminatingEndpoints) { + if ep.IsServing() && ep.IsTerminating() { + hasEndpoints = true + if ep.GetIsLocal() { + hasLocalServingTerminatingEndpoints = true + } + } + } + } + useTerminatingEndpoints := !hasLocalReadyEndpoints && hasLocalServingTerminatingEndpoints + + // Generate the per-endpoint chains. + readyEndpointChains = readyEndpointChains[:0] + localEndpointChains = localEndpointChains[:0] + for _, ep := range allEndpoints { + epInfo, ok := ep.(*endpointsInfo) + if !ok { + klog.ErrorS(err, "Failed to cast endpointsInfo", "endpointsInfo", ep) + continue + } + + endpointChain := epInfo.endpointChain(svcNameString, protocol) + endpointInUse := false + + if epInfo.Ready { + readyEndpointChains = append(readyEndpointChains, endpointChain) + endpointInUse = true + } + if svc.NodeLocalExternal() && epInfo.IsLocal { + if useTerminatingEndpoints { + if epInfo.Serving && epInfo.Terminating { + localEndpointChains = append(localEndpointChains, endpointChain) + endpointInUse = true + } + } else if epInfo.Ready { + localEndpointChains = append(localEndpointChains, endpointChain) + endpointInUse = true + } + } + + if !endpointInUse { + continue + } + + // Create the endpoint chain, retaining counters if possible. + if chain, ok := existingNATChains[endpointChain]; ok { + utilproxy.WriteBytesLine(proxier.natChains, chain) + } else { + utilproxy.WriteLine(proxier.natChains, utiliptables.MakeChainLine(endpointChain)) + } + activeNATChains[endpointChain] = true + + args = append(args[:0], "-A", string(endpointChain)) + args = proxier.appendServiceCommentLocked(args, svcNameString) + // Handle traffic that loops back to the originator with SNAT. + utilproxy.WriteLine(proxier.natRules, append(args, + "-s", utilproxy.ToCIDR(netutils.ParseIPSloppy(epInfo.IP())), + "-j", string(KubeMarkMasqChain))...) + // Update client-affinity lists. + if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP { + args = append(args, "-m", "recent", "--name", string(endpointChain), "--set") + } + // DNAT to final destination. + args = append(args, "-m", protocol, "-p", protocol, "-j", "DNAT", "--to-destination", epInfo.Endpoint) + utilproxy.WriteLine(proxier.natRules, args...) + } svcChain := svcInfo.servicePortChainName if hasEndpoints { @@ -1319,35 +1390,9 @@ func (proxier *Proxier) syncProxyRules() { continue } - // Generate the per-endpoint chains. We do this in multiple passes so we - // can group rules together. - // These two slices parallel each other - keep in sync - endpoints = endpoints[:0] - endpointChains = endpointChains[:0] - var endpointChain utiliptables.Chain - for _, ep := range allEndpoints { - epInfo, ok := ep.(*endpointsInfo) - if !ok { - klog.ErrorS(err, "Failed to cast endpointsInfo", "endpointsInfo", ep) - continue - } - - endpoints = append(endpoints, epInfo) - endpointChain = epInfo.endpointChain(svcNameString, protocol) - endpointChains = append(endpointChains, endpointChain) - - // Create the endpoint chain, retaining counters if possible. - if chain, ok := existingNATChains[endpointChain]; ok { - utilproxy.WriteBytesLine(proxier.natChains, chain) - } else { - utilproxy.WriteLine(proxier.natChains, utiliptables.MakeChainLine(endpointChain)) - } - activeNATChains[endpointChain] = true - } - // First write session affinity rules, if applicable. if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP { - for _, endpointChain := range endpointChains { + for _, endpointChain := range readyEndpointChains { args = append(args[:0], "-A", string(svcChain), ) @@ -1361,30 +1406,7 @@ func (proxier *Proxier) syncProxyRules() { } } - // Firstly, categorize each endpoint into three buckets: - // 1. all endpoints that are ready and NOT terminating. - // 2. all endpoints that are local, ready and NOT terminating, and externalTrafficPolicy=Local - // 3. all endpoints that are local, serving and terminating, and externalTrafficPolicy=Local - readyEndpointChains = readyEndpointChains[:0] - readyEndpoints := readyEndpoints[:0] - localReadyEndpointChains := localReadyEndpointChains[:0] - localServingTerminatingEndpointChains := localServingTerminatingEndpointChains[:0] - for i, endpointChain := range endpointChains { - if endpoints[i].Ready { - readyEndpointChains = append(readyEndpointChains, endpointChain) - readyEndpoints = append(readyEndpoints, endpoints[i]) - } - - if svc.NodeLocalExternal() && endpoints[i].IsLocal { - if endpoints[i].Ready { - localReadyEndpointChains = append(localReadyEndpointChains, endpointChain) - } else if endpoints[i].Serving && endpoints[i].Terminating { - localServingTerminatingEndpointChains = append(localServingTerminatingEndpointChains, endpointChain) - } - } - } - - // Now write loadbalancing & DNAT rules. + // Now write loadbalancing rules numReadyEndpoints := len(readyEndpointChains) for i, endpointChain := range readyEndpointChains { // Balancing rules in the per-service chain. @@ -1402,25 +1424,6 @@ func (proxier *Proxier) syncProxyRules() { utilproxy.WriteLine(proxier.natRules, args...) } - // Every endpoint gets a chain, regardless of its state. This is required later since we may - // want to jump to endpoint chains that are terminating. - for i, endpointChain := range endpointChains { - // Rules in the per-endpoint chain. - args = append(args[:0], "-A", string(endpointChain)) - args = proxier.appendServiceCommentLocked(args, svcNameString) - // Handle traffic that loops back to the originator with SNAT. - utilproxy.WriteLine(proxier.natRules, append(args, - "-s", utilproxy.ToCIDR(netutils.ParseIPSloppy(endpoints[i].IP())), - "-j", string(KubeMarkMasqChain))...) - // Update client-affinity lists. - if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP { - args = append(args, "-m", "recent", "--name", string(endpointChain), "--set") - } - // DNAT to final destination. - args = append(args, "-m", protocol, "-p", protocol, "-j", "DNAT", "--to-destination", endpoints[i].Endpoint) - utilproxy.WriteLine(proxier.natRules, args...) - } - // The logic below this applies only if this service is marked as OnlyLocal if !svcInfo.NodeLocalExternal() { continue @@ -1449,12 +1452,6 @@ func (proxier *Proxier) syncProxyRules() { "-m", "comment", "--comment", fmt.Sprintf(`"route LOCAL traffic for %s LB IP to service chain"`, svcNameString), "-m", "addrtype", "--src-type", "LOCAL", "-j", string(svcChain))...) - // Prefer local ready endpoint chains, but fall back to ready terminating if none exist - localEndpointChains := localReadyEndpointChains - if utilfeature.DefaultFeatureGate.Enabled(features.ProxyTerminatingEndpoints) && len(localEndpointChains) == 0 { - localEndpointChains = localServingTerminatingEndpointChains - } - numLocalEndpoints := len(localEndpointChains) if numLocalEndpoints == 0 { // Blackhole all traffic since there are no local endpoints diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index c1ac27ebcaf..66eb9bcf92d 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -3532,7 +3532,6 @@ COMMIT :KUBE-SEP-3JOIVZTXZZRGORX4 - [0:0] :KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0] :KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0] -:KUBE-SEP-VLJB2F747S6W7EX4 - [0:0] -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE @@ -3548,8 +3547,6 @@ COMMIT -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -m tcp -p tcp -j DNAT --to-destination 10.0.1.2:80 -A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -s 10.0.1.3/32 -j KUBE-MARK-MASQ -A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -m tcp -p tcp -j DNAT --to-destination 10.0.1.3:80 --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -s 10.0.1.4/32 -j KUBE-MARK-MASQ --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -m tcp -p tcp -j DNAT --to-destination 10.0.1.4:80 -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS COMMIT ` @@ -3635,7 +3632,6 @@ COMMIT :KUBE-SEP-3JOIVZTXZZRGORX4 - [0:0] :KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0] :KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0] -:KUBE-SEP-VLJB2F747S6W7EX4 - [0:0] -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 -A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE @@ -3653,8 +3649,6 @@ COMMIT -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -m tcp -p tcp -j DNAT --to-destination 10.0.1.2:80 -A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -s 10.0.1.3/32 -j KUBE-MARK-MASQ -A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -m tcp -p tcp -j DNAT --to-destination 10.0.1.3:80 --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -s 10.0.1.4/32 -j KUBE-MARK-MASQ --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -m tcp -p tcp -j DNAT --to-destination 10.0.1.4:80 -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "Redirect pods trying to reach external loadbalancer VIP to clusterIP" -s 10.0.0.0/24 -j KUBE-SVC-AQI2S6QIMU7PVVRP -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "masquerade LOCAL traffic for ns1/svc1 LB IP" -m addrtype --src-type LOCAL -j KUBE-MARK-MASQ -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "route LOCAL traffic for ns1/svc1 LB IP to service chain" -m addrtype --src-type LOCAL -j KUBE-SVC-AQI2S6QIMU7PVVRP @@ -4301,6 +4295,7 @@ func Test_EndpointSliceWithTerminatingEndpoints(t *testing.T) { terminatingFeatureGate bool endpointslice *discovery.EndpointSlice expectedIPTables string + noUsableEndpoints bool }{ { name: "feature gate ProxyTerminatingEndpoints enabled, ready endpoints exist", @@ -4389,8 +4384,6 @@ COMMIT :KUBE-FW-AQI2S6QIMU7PVVRP - [0:0] :KUBE-SEP-3JOIVZTXZZRGORX4 - [0:0] :KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0] -:KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0] -:KUBE-SEP-VLJB2F747S6W7EX4 - [0:0] :KUBE-SEP-EQCHZ7S2PJ72OHAY - [0:0] -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 @@ -4403,8 +4396,6 @@ COMMIT -A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-MARK-DROP -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-3JOIVZTXZZRGORX4 --rcheck --seconds 10800 --reap -j KUBE-SEP-3JOIVZTXZZRGORX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --rcheck --seconds 10800 --reap -j KUBE-SEP-IO5XOSKPAXIFQXAJ --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --rcheck --seconds 10800 --reap -j KUBE-SEP-XGJFVO3L2O5SRFNT --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --rcheck --seconds 10800 --reap -j KUBE-SEP-VLJB2F747S6W7EX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --rcheck --seconds 10800 --reap -j KUBE-SEP-EQCHZ7S2PJ72OHAY -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m statistic --mode random --probability 0.3333333333 -j KUBE-SEP-3JOIVZTXZZRGORX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m statistic --mode random --probability 0.5000000000 -j KUBE-SEP-IO5XOSKPAXIFQXAJ @@ -4413,10 +4404,6 @@ COMMIT -A KUBE-SEP-3JOIVZTXZZRGORX4 -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-3JOIVZTXZZRGORX4 --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -s 10.0.1.2/32 -j KUBE-MARK-MASQ -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.2:80 --A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -s 10.0.1.3/32 -j KUBE-MARK-MASQ --A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.3:80 --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -s 10.0.1.4/32 -j KUBE-MARK-MASQ --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.4:80 -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -s 10.0.1.5/32 -j KUBE-MARK-MASQ -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.5:80 -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "Redirect pods trying to reach external loadbalancer VIP to clusterIP" -s 10.0.0.0/24 -j KUBE-SVC-AQI2S6QIMU7PVVRP @@ -4517,8 +4504,6 @@ COMMIT :KUBE-FW-AQI2S6QIMU7PVVRP - [0:0] :KUBE-SEP-3JOIVZTXZZRGORX4 - [0:0] :KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0] -:KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0] -:KUBE-SEP-VLJB2F747S6W7EX4 - [0:0] :KUBE-SEP-EQCHZ7S2PJ72OHAY - [0:0] -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 @@ -4531,8 +4516,6 @@ COMMIT -A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-MARK-DROP -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-3JOIVZTXZZRGORX4 --rcheck --seconds 10800 --reap -j KUBE-SEP-3JOIVZTXZZRGORX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --rcheck --seconds 10800 --reap -j KUBE-SEP-IO5XOSKPAXIFQXAJ --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --rcheck --seconds 10800 --reap -j KUBE-SEP-XGJFVO3L2O5SRFNT --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --rcheck --seconds 10800 --reap -j KUBE-SEP-VLJB2F747S6W7EX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --rcheck --seconds 10800 --reap -j KUBE-SEP-EQCHZ7S2PJ72OHAY -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m statistic --mode random --probability 0.3333333333 -j KUBE-SEP-3JOIVZTXZZRGORX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m statistic --mode random --probability 0.5000000000 -j KUBE-SEP-IO5XOSKPAXIFQXAJ @@ -4541,10 +4524,6 @@ COMMIT -A KUBE-SEP-3JOIVZTXZZRGORX4 -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-3JOIVZTXZZRGORX4 --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -s 10.0.1.2/32 -j KUBE-MARK-MASQ -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.2:80 --A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -s 10.0.1.3/32 -j KUBE-MARK-MASQ --A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.3:80 --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -s 10.0.1.4/32 -j KUBE-MARK-MASQ --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.4:80 -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -s 10.0.1.5/32 -j KUBE-MARK-MASQ -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.5:80 -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "Redirect pods trying to reach external loadbalancer VIP to clusterIP" -s 10.0.0.0/24 -j KUBE-SVC-AQI2S6QIMU7PVVRP @@ -4637,7 +4616,6 @@ COMMIT :KUBE-FW-AQI2S6QIMU7PVVRP - [0:0] :KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0] :KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0] -:KUBE-SEP-VLJB2F747S6W7EX4 - [0:0] :KUBE-SEP-EQCHZ7S2PJ72OHAY - [0:0] -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 @@ -4648,17 +4626,12 @@ COMMIT -A KUBE-SERVICES -m comment --comment "ns1/svc1 loadbalancer IP" -m tcp -p tcp -d 10.1.2.3/32 --dport 80 -j KUBE-FW-AQI2S6QIMU7PVVRP -A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-XLB-AQI2S6QIMU7PVVRP -A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-MARK-DROP --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --rcheck --seconds 10800 --reap -j KUBE-SEP-IO5XOSKPAXIFQXAJ --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --rcheck --seconds 10800 --reap -j KUBE-SEP-XGJFVO3L2O5SRFNT --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --rcheck --seconds 10800 --reap -j KUBE-SEP-VLJB2F747S6W7EX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --rcheck --seconds 10800 --reap -j KUBE-SEP-EQCHZ7S2PJ72OHAY -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -j KUBE-SEP-EQCHZ7S2PJ72OHAY -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -s 10.0.1.2/32 -j KUBE-MARK-MASQ -A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.2:80 -A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -s 10.0.1.3/32 -j KUBE-MARK-MASQ -A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.3:80 --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -s 10.0.1.4/32 -j KUBE-MARK-MASQ --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.4:80 -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -s 10.0.1.5/32 -j KUBE-MARK-MASQ -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.5:80 -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "Redirect pods trying to reach external loadbalancer VIP to clusterIP" -s 10.0.0.0/24 -j KUBE-SVC-AQI2S6QIMU7PVVRP @@ -4754,10 +4727,6 @@ COMMIT :KUBE-SVC-AQI2S6QIMU7PVVRP - [0:0] :KUBE-XLB-AQI2S6QIMU7PVVRP - [0:0] :KUBE-FW-AQI2S6QIMU7PVVRP - [0:0] -:KUBE-SEP-3JOIVZTXZZRGORX4 - [0:0] -:KUBE-SEP-IO5XOSKPAXIFQXAJ - [0:0] -:KUBE-SEP-XGJFVO3L2O5SRFNT - [0:0] -:KUBE-SEP-VLJB2F747S6W7EX4 - [0:0] :KUBE-SEP-EQCHZ7S2PJ72OHAY - [0:0] -A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN -A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 @@ -4768,20 +4737,8 @@ COMMIT -A KUBE-SERVICES -m comment --comment "ns1/svc1 loadbalancer IP" -m tcp -p tcp -d 10.1.2.3/32 --dport 80 -j KUBE-FW-AQI2S6QIMU7PVVRP -A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-XLB-AQI2S6QIMU7PVVRP -A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-MARK-DROP --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-3JOIVZTXZZRGORX4 --rcheck --seconds 10800 --reap -j KUBE-SEP-3JOIVZTXZZRGORX4 --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --rcheck --seconds 10800 --reap -j KUBE-SEP-IO5XOSKPAXIFQXAJ --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --rcheck --seconds 10800 --reap -j KUBE-SEP-XGJFVO3L2O5SRFNT --A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --rcheck --seconds 10800 --reap -j KUBE-SEP-VLJB2F747S6W7EX4 -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --rcheck --seconds 10800 --reap -j KUBE-SEP-EQCHZ7S2PJ72OHAY -A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment ns1/svc1 -j KUBE-SEP-EQCHZ7S2PJ72OHAY --A KUBE-SEP-3JOIVZTXZZRGORX4 -m comment --comment ns1/svc1 -s 10.0.1.1/32 -j KUBE-MARK-MASQ --A KUBE-SEP-3JOIVZTXZZRGORX4 -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-3JOIVZTXZZRGORX4 --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.1:80 --A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -s 10.0.1.2/32 -j KUBE-MARK-MASQ --A KUBE-SEP-IO5XOSKPAXIFQXAJ -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-IO5XOSKPAXIFQXAJ --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.2:80 --A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -s 10.0.1.3/32 -j KUBE-MARK-MASQ --A KUBE-SEP-XGJFVO3L2O5SRFNT -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-XGJFVO3L2O5SRFNT --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.3:80 --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -s 10.0.1.4/32 -j KUBE-MARK-MASQ --A KUBE-SEP-VLJB2F747S6W7EX4 -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-VLJB2F747S6W7EX4 --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.4:80 -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -s 10.0.1.5/32 -j KUBE-MARK-MASQ -A KUBE-SEP-EQCHZ7S2PJ72OHAY -m comment --comment ns1/svc1 -m recent --name KUBE-SEP-EQCHZ7S2PJ72OHAY --set -m tcp -p tcp -j DNAT --to-destination 10.0.1.5:80 -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "Redirect pods trying to reach external loadbalancer VIP to clusterIP" -s 10.0.0.0/24 -j KUBE-SVC-AQI2S6QIMU7PVVRP @@ -4790,6 +4747,137 @@ COMMIT -A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 has no local endpoints" -j KUBE-MARK-DROP -A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS COMMIT +`, + }, + { + name: "ProxyTerminatingEndpoints enabled, terminating endpoints on remote node", + terminatingFeatureGate: true, + endpointslice: &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-1", "svc1"), + Namespace: "ns1", + Labels: map[string]string{discovery.LabelServiceName: "svc1"}, + }, + Ports: []discovery.EndpointPort{{ + Name: utilpointer.StringPtr(""), + Port: utilpointer.Int32Ptr(80), + Protocol: &tcpProtocol, + }}, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{ + { + // this endpoint won't be used because it's not local, + // but it will prevent a REJECT rule from being created + Addresses: []string{"10.0.1.5"}, + Conditions: discovery.EndpointConditions{ + Ready: utilpointer.BoolPtr(false), + Serving: utilpointer.BoolPtr(true), + Terminating: utilpointer.BoolPtr(true), + }, + NodeName: utilpointer.StringPtr("host-1"), + }, + }, + }, + expectedIPTables: ` +*filter +:KUBE-SERVICES - [0:0] +:KUBE-EXTERNAL-SERVICES - [0:0] +:KUBE-FORWARD - [0:0] +:KUBE-NODEPORTS - [0:0] +-A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP +-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT +-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod source rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod destination rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +COMMIT +*nat +:KUBE-SERVICES - [0:0] +:KUBE-NODEPORTS - [0:0] +:KUBE-POSTROUTING - [0:0] +:KUBE-MARK-MASQ - [0:0] +:KUBE-SVC-AQI2S6QIMU7PVVRP - [0:0] +:KUBE-XLB-AQI2S6QIMU7PVVRP - [0:0] +:KUBE-FW-AQI2S6QIMU7PVVRP - [0:0] +-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN +-A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 +-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE +-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 +-A KUBE-SVC-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 cluster IP" -m tcp -p tcp -d 172.20.1.1/32 --dport 80 ! -s 10.0.0.0/24 -j KUBE-MARK-MASQ +-A KUBE-SERVICES -m comment --comment "ns1/svc1 cluster IP" -m tcp -p tcp -d 172.20.1.1/32 --dport 80 -j KUBE-SVC-AQI2S6QIMU7PVVRP +-A KUBE-SERVICES -m comment --comment "ns1/svc1 loadbalancer IP" -m tcp -p tcp -d 10.1.2.3/32 --dport 80 -j KUBE-FW-AQI2S6QIMU7PVVRP +-A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-XLB-AQI2S6QIMU7PVVRP +-A KUBE-FW-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 loadbalancer IP" -j KUBE-MARK-DROP +-A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "Redirect pods trying to reach external loadbalancer VIP to clusterIP" -s 10.0.0.0/24 -j KUBE-SVC-AQI2S6QIMU7PVVRP +-A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "masquerade LOCAL traffic for ns1/svc1 LB IP" -m addrtype --src-type LOCAL -j KUBE-MARK-MASQ +-A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "route LOCAL traffic for ns1/svc1 LB IP to service chain" -m addrtype --src-type LOCAL -j KUBE-SVC-AQI2S6QIMU7PVVRP +-A KUBE-XLB-AQI2S6QIMU7PVVRP -m comment --comment "ns1/svc1 has no local endpoints" -j KUBE-MARK-DROP +-A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS +COMMIT +`, + }, + { + name: "no usable endpoints on any node", + terminatingFeatureGate: true, + endpointslice: &discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-1", "svc1"), + Namespace: "ns1", + Labels: map[string]string{discovery.LabelServiceName: "svc1"}, + }, + Ports: []discovery.EndpointPort{{ + Name: utilpointer.StringPtr(""), + Port: utilpointer.Int32Ptr(80), + Protocol: &tcpProtocol, + }}, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{ + { + // Local but not ready or serving + Addresses: []string{"10.0.1.5"}, + Conditions: discovery.EndpointConditions{ + Ready: utilpointer.BoolPtr(false), + Serving: utilpointer.BoolPtr(false), + Terminating: utilpointer.BoolPtr(true), + }, + NodeName: utilpointer.StringPtr(testHostname), + }, + { + // Remote and not ready or serving + Addresses: []string{"10.0.1.5"}, + Conditions: discovery.EndpointConditions{ + Ready: utilpointer.BoolPtr(false), + Serving: utilpointer.BoolPtr(false), + Terminating: utilpointer.BoolPtr(true), + }, + NodeName: utilpointer.StringPtr("host-1"), + }, + }, + }, + noUsableEndpoints: true, + expectedIPTables: ` +*filter +:KUBE-SERVICES - [0:0] +:KUBE-EXTERNAL-SERVICES - [0:0] +:KUBE-FORWARD - [0:0] +:KUBE-NODEPORTS - [0:0] +-A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP +-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT +-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod source rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack pod destination rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT +-A KUBE-SERVICES -m comment --comment "ns1/svc1 has no endpoints" -m tcp -p tcp -d 172.20.1.1/32 --dport 80 -j REJECT +-A KUBE-EXTERNAL-SERVICES -m comment --comment "ns1/svc1 has no endpoints" -m tcp -p tcp -d 10.1.2.3/32 --dport 80 -j REJECT +COMMIT +*nat +:KUBE-SERVICES - [0:0] +:KUBE-NODEPORTS - [0:0] +:KUBE-POSTROUTING - [0:0] +:KUBE-MARK-MASQ - [0:0] +:KUBE-XLB-AQI2S6QIMU7PVVRP - [0:0] +-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN +-A KUBE-POSTROUTING -j MARK --xor-mark 0x4000 +-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE +-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000 +-A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS +COMMIT `, }, } @@ -4812,7 +4900,12 @@ COMMIT fp.OnEndpointSliceDelete(testcase.endpointslice) fp.syncProxyRules() - assertIPTablesRulesNotEqual(t, testcase.expectedIPTables, fp.iptablesData.String()) + if testcase.noUsableEndpoints { + // Deleting the EndpointSlice should have had no effect + assertIPTablesRulesEqual(t, testcase.expectedIPTables, fp.iptablesData.String()) + } else { + assertIPTablesRulesNotEqual(t, testcase.expectedIPTables, fp.iptablesData.String()) + } }) } }