Don't create no-op iptables rules for services with no endpoints

This commit is contained in:
Dan Winship 2017-12-18 14:06:50 -05:00
parent 6c91c420b6
commit 07ead7d8e2

View File

@ -824,7 +824,6 @@ func (proxier *Proxier) syncProxyRules() {
args := make([]string, 64) args := make([]string, 64)
// Build rules for each service. // Build rules for each service.
var svcNameString string
for svcName, svc := range proxier.serviceMap { for svcName, svc := range proxier.serviceMap {
svcInfo, ok := svc.(*serviceInfo) svcInfo, ok := svc.(*serviceInfo)
if !ok { if !ok {
@ -833,16 +832,19 @@ func (proxier *Proxier) syncProxyRules() {
} }
isIPv6 := utilproxy.IsIPv6(svcInfo.clusterIP) isIPv6 := utilproxy.IsIPv6(svcInfo.clusterIP)
protocol := strings.ToLower(string(svcInfo.protocol)) protocol := strings.ToLower(string(svcInfo.protocol))
svcNameString = svcInfo.serviceNameString svcNameString := svcInfo.serviceNameString
hasEndpoints := len(proxier.endpointsMap[svcName]) > 0
// Create the per-service chain, retaining counters if possible.
svcChain := svcInfo.servicePortChainName svcChain := svcInfo.servicePortChainName
if chain, ok := existingNATChains[svcChain]; ok { if hasEndpoints {
writeLine(proxier.natChains, chain) // Create the per-service chain, retaining counters if possible.
} else { if chain, ok := existingNATChains[svcChain]; ok {
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcChain)) writeLine(proxier.natChains, chain)
} else {
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcChain))
}
activeNATChains[svcChain] = true
} }
activeNATChains[svcChain] = true
svcXlbChain := svcInfo.serviceLBChainName svcXlbChain := svcInfo.serviceLBChainName
if svcInfo.onlyNodeLocalEndpoints { if svcInfo.onlyNodeLocalEndpoints {
@ -854,30 +856,38 @@ func (proxier *Proxier) syncProxyRules() {
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcXlbChain)) writeLine(proxier.natChains, utiliptables.MakeChainLine(svcXlbChain))
} }
activeNATChains[svcXlbChain] = true activeNATChains[svcXlbChain] = true
} else if activeNATChains[svcXlbChain] {
// Cleanup the previously created XLB chain for this service
delete(activeNATChains, svcXlbChain)
} }
// Capture the clusterIP. // Capture the clusterIP.
args = append(args[:0], if hasEndpoints {
"-A", string(kubeServicesChain), args = append(args[:0],
"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString), "-A", string(kubeServicesChain),
"-m", protocol, "-p", protocol, "-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString),
"-d", utilproxy.ToCIDR(svcInfo.clusterIP), "-m", protocol, "-p", protocol,
"--dport", strconv.Itoa(svcInfo.port), "-d", utilproxy.ToCIDR(svcInfo.clusterIP),
) "--dport", strconv.Itoa(svcInfo.port),
if proxier.masqueradeAll { )
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...) if proxier.masqueradeAll {
} else if len(proxier.clusterCIDR) > 0 { writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
// This masquerades off-cluster traffic to a service VIP. The idea } else if len(proxier.clusterCIDR) > 0 {
// is that you can establish a static route for your Service range, // This masquerades off-cluster traffic to a service VIP. The idea
// routing to any node, and that node will bridge into the Service // is that you can establish a static route for your Service range,
// for you. Since that might bounce off-node, we masquerade here. // routing to any node, and that node will bridge into the Service
// If/when we support "Local" policy for VIPs, we should update this. // for you. Since that might bounce off-node, we masquerade here.
writeLine(proxier.natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...) // If/when we support "Local" policy for VIPs, we should update this.
writeLine(proxier.natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...)
}
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
} else {
writeLine(proxier.filterRules,
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
"-m", protocol, "-p", protocol,
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
"--dport", strconv.Itoa(svcInfo.port),
"-j", "REJECT",
)
} }
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
// Capture externalIPs. // Capture externalIPs.
for _, externalIP := range svcInfo.externalIPs { for _, externalIP := range svcInfo.externalIPs {
@ -913,33 +923,32 @@ func (proxier *Proxier) syncProxyRules() {
} }
replacementPortsMap[lp] = socket replacementPortsMap[lp] = socket
} }
} // We're holding the port, so it's OK to install iptables rules. }
args = append(args[:0],
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcNameString),
"-m", protocol, "-p", protocol,
"-d", utilproxy.ToCIDR(net.ParseIP(externalIP)),
"--dport", strconv.Itoa(svcInfo.port),
)
// We have to SNAT packets to external IPs.
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container) if hasEndpoints {
// nor from a local process to be forwarded to the service. args = append(args[:0],
// This rule roughly translates to "all traffic from off-machine". "-A", string(kubeServicesChain),
// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later. "-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcNameString),
externalTrafficOnlyArgs := append(args, "-m", protocol, "-p", protocol,
"-m", "physdev", "!", "--physdev-is-in", "-d", utilproxy.ToCIDR(net.ParseIP(externalIP)),
"-m", "addrtype", "!", "--src-type", "LOCAL") "--dport", strconv.Itoa(svcInfo.port),
writeLine(proxier.natRules, append(externalTrafficOnlyArgs, "-j", string(svcChain))...) )
dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL") // We have to SNAT packets to external IPs.
// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local. writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
// This covers cases like GCE load-balancers which get added to the local routing table.
writeLine(proxier.natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...)
// If the service has no endpoints then reject packets coming via externalIP // Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
// Install ICMP Reject rule in filter table for destination=externalIP and dport=svcport // nor from a local process to be forwarded to the service.
if len(proxier.endpointsMap[svcName]) == 0 { // This rule roughly translates to "all traffic from off-machine".
// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later.
externalTrafficOnlyArgs := append(args,
"-m", "physdev", "!", "--physdev-is-in",
"-m", "addrtype", "!", "--src-type", "LOCAL")
writeLine(proxier.natRules, append(externalTrafficOnlyArgs, "-j", string(svcChain))...)
dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
// This covers cases like GCE load-balancers which get added to the local routing table.
writeLine(proxier.natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...)
} else {
writeLine(proxier.filterRules, writeLine(proxier.filterRules,
"-A", string(kubeServicesChain), "-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString), "-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
@ -952,71 +961,74 @@ func (proxier *Proxier) syncProxyRules() {
} }
// Capture load-balancer ingress. // Capture load-balancer ingress.
fwChain := svcInfo.serviceFirewallChainName if hasEndpoints {
for _, ingress := range svcInfo.loadBalancerStatus.Ingress { fwChain := svcInfo.serviceFirewallChainName
if ingress.IP != "" { for _, ingress := range svcInfo.loadBalancerStatus.Ingress {
// create service firewall chain if ingress.IP != "" {
if chain, ok := existingNATChains[fwChain]; ok { // create service firewall chain
writeLine(proxier.natChains, chain) if chain, ok := existingNATChains[fwChain]; ok {
} else { writeLine(proxier.natChains, chain)
writeLine(proxier.natChains, utiliptables.MakeChainLine(fwChain)) } else {
} writeLine(proxier.natChains, utiliptables.MakeChainLine(fwChain))
activeNATChains[fwChain] = true }
// The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field. activeNATChains[fwChain] = true
// This currently works for loadbalancers that preserves source ips. // The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field.
// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply. // This currently works for loadbalancers that preserves source ips.
// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply.
args = append(args[:0], args = append(args[:0],
"-A", string(kubeServicesChain), "-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString), "-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
"-m", protocol, "-p", protocol, "-m", protocol, "-p", protocol,
"-d", utilproxy.ToCIDR(net.ParseIP(ingress.IP)), "-d", utilproxy.ToCIDR(net.ParseIP(ingress.IP)),
"--dport", strconv.Itoa(svcInfo.port), "--dport", strconv.Itoa(svcInfo.port),
) )
// jump to service firewall chain // jump to service firewall chain
writeLine(proxier.natRules, append(args, "-j", string(fwChain))...) writeLine(proxier.natRules, append(args, "-j", string(fwChain))...)
args = append(args[:0], args = append(args[:0],
"-A", string(fwChain), "-A", string(fwChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString), "-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
) )
// Each source match rule in the FW chain may jump to either the SVC or the XLB chain // Each source match rule in the FW chain may jump to either the SVC or the XLB chain
chosenChain := svcXlbChain chosenChain := svcXlbChain
// If we are proxying globally, we need to masquerade in case we cross nodes. // If we are proxying globally, we need to masquerade in case we cross nodes.
// If we are proxying only locally, we can retain the source IP. // If we are proxying only locally, we can retain the source IP.
if !svcInfo.onlyNodeLocalEndpoints { if !svcInfo.onlyNodeLocalEndpoints {
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...) writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
chosenChain = svcChain chosenChain = svcChain
} }
if len(svcInfo.loadBalancerSourceRanges) == 0 { if len(svcInfo.loadBalancerSourceRanges) == 0 {
// allow all sources, so jump directly to the KUBE-SVC or KUBE-XLB chain // allow all sources, so jump directly to the KUBE-SVC or KUBE-XLB chain
writeLine(proxier.natRules, append(args, "-j", string(chosenChain))...) writeLine(proxier.natRules, append(args, "-j", string(chosenChain))...)
} else { } else {
// firewall filter based on each source range // firewall filter based on each source range
allowFromNode := false allowFromNode := false
for _, src := range svcInfo.loadBalancerSourceRanges { for _, src := range svcInfo.loadBalancerSourceRanges {
writeLine(proxier.natRules, append(args, "-s", src, "-j", string(chosenChain))...) writeLine(proxier.natRules, append(args, "-s", src, "-j", string(chosenChain))...)
// ignore error because it has been validated // ignore error because it has been validated
_, cidr, _ := net.ParseCIDR(src) _, cidr, _ := net.ParseCIDR(src)
if cidr.Contains(proxier.nodeIP) { if cidr.Contains(proxier.nodeIP) {
allowFromNode = true allowFromNode = true
}
}
// generally, ip route rule was added to intercept request to loadbalancer vip from the
// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
// Need to add the following rule to allow request on host.
if allowFromNode {
writeLine(proxier.natRules, append(args, "-s", utilproxy.ToCIDR(net.ParseIP(ingress.IP)), "-j", string(chosenChain))...)
} }
} }
// generally, ip route rule was added to intercept request to loadbalancer vip from the
// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
// Need to add the following rule to allow request on host.
if allowFromNode {
writeLine(proxier.natRules, append(args, "-s", utilproxy.ToCIDR(net.ParseIP(ingress.IP)), "-j", string(chosenChain))...)
}
}
// If the packet was able to reach the end of firewall chain, then it did not get DNATed. // If the packet was able to reach the end of firewall chain, then it did not get DNATed.
// It means the packet cannot go thru the firewall, then mark it for DROP // It means the packet cannot go thru the firewall, then mark it for DROP
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkDropChain))...) writeLine(proxier.natRules, append(args, "-j", string(KubeMarkDropChain))...)
}
} }
} }
// FIXME: do we need REJECT rules for load-balancer ingress if !hasEndpoints?
// Capture nodeports. If we had more than 2 rules it might be // Capture nodeports. If we had more than 2 rules it might be
// worthwhile to make a new per-service chain for nodeport rules, but // worthwhile to make a new per-service chain for nodeport rules, but
@ -1050,37 +1062,33 @@ func (proxier *Proxier) syncProxyRules() {
} }
} }
replacementPortsMap[lp] = socket replacementPortsMap[lp] = socket
} // We're holding the port, so it's OK to install iptables rules.
args = append(args[:0],
"-A", string(kubeNodePortsChain),
"-m", "comment", "--comment", svcNameString,
"-m", protocol, "-p", protocol,
"--dport", strconv.Itoa(svcInfo.nodePort),
)
if !svcInfo.onlyNodeLocalEndpoints {
// Nodeports need SNAT, unless they're local.
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
// Jump to the service chain.
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
} else {
// TODO: Make all nodePorts jump to the firewall chain.
// Currently we only create it for loadbalancers (#33586).
// Fix localhost martian source error
loopback := "127.0.0.0/8"
if isIPv6 {
loopback = "::1/128"
}
writeLine(proxier.natRules, append(args, "-s", loopback, "-j", string(KubeMarkMasqChain))...)
writeLine(proxier.natRules, append(args, "-j", string(svcXlbChain))...)
} }
// If the service has no endpoints then reject packets. The filter if hasEndpoints {
// table doesn't currently have the same per-service structure that args = append(args[:0],
// the nat table does, so we just stick this into the kube-services "-A", string(kubeNodePortsChain),
// chain. "-m", "comment", "--comment", svcNameString,
if len(proxier.endpointsMap[svcName]) == 0 { "-m", protocol, "-p", protocol,
"--dport", strconv.Itoa(svcInfo.nodePort),
)
if !svcInfo.onlyNodeLocalEndpoints {
// Nodeports need SNAT, unless they're local.
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
// Jump to the service chain.
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
} else {
// TODO: Make all nodePorts jump to the firewall chain.
// Currently we only create it for loadbalancers (#33586).
// Fix localhost martian source error
loopback := "127.0.0.0/8"
if isIPv6 {
loopback = "::1/128"
}
writeLine(proxier.natRules, append(args, "-s", loopback, "-j", string(KubeMarkMasqChain))...)
writeLine(proxier.natRules, append(args, "-j", string(svcXlbChain))...)
}
} else {
writeLine(proxier.filterRules, writeLine(proxier.filterRules,
"-A", string(kubeServicesChain), "-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString), "-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
@ -1092,21 +1100,10 @@ func (proxier *Proxier) syncProxyRules() {
} }
} }
// If the service has no endpoints then reject packets. if !hasEndpoints {
if len(proxier.endpointsMap[svcName]) == 0 {
writeLine(proxier.filterRules,
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
"-m", protocol, "-p", protocol,
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
"--dport", strconv.Itoa(svcInfo.port),
"-j", "REJECT",
)
continue continue
} }
// From here on, we assume there are active endpoints.
// Generate the per-endpoint chains. We do this in multiple passes so we // Generate the per-endpoint chains. We do this in multiple passes so we
// can group rules together. // can group rules together.
// These two slices parallel each other - keep in sync // These two slices parallel each other - keep in sync