Don't create no-op iptables rules for services with no endpoints
This commit is contained in:
parent
6c91c420b6
commit
07ead7d8e2
@ -824,7 +824,6 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
args := make([]string, 64)
|
args := make([]string, 64)
|
||||||
|
|
||||||
// Build rules for each service.
|
// Build rules for each service.
|
||||||
var svcNameString string
|
|
||||||
for svcName, svc := range proxier.serviceMap {
|
for svcName, svc := range proxier.serviceMap {
|
||||||
svcInfo, ok := svc.(*serviceInfo)
|
svcInfo, ok := svc.(*serviceInfo)
|
||||||
if !ok {
|
if !ok {
|
||||||
@ -833,16 +832,19 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
}
|
}
|
||||||
isIPv6 := utilproxy.IsIPv6(svcInfo.clusterIP)
|
isIPv6 := utilproxy.IsIPv6(svcInfo.clusterIP)
|
||||||
protocol := strings.ToLower(string(svcInfo.protocol))
|
protocol := strings.ToLower(string(svcInfo.protocol))
|
||||||
svcNameString = svcInfo.serviceNameString
|
svcNameString := svcInfo.serviceNameString
|
||||||
|
hasEndpoints := len(proxier.endpointsMap[svcName]) > 0
|
||||||
|
|
||||||
// Create the per-service chain, retaining counters if possible.
|
|
||||||
svcChain := svcInfo.servicePortChainName
|
svcChain := svcInfo.servicePortChainName
|
||||||
if chain, ok := existingNATChains[svcChain]; ok {
|
if hasEndpoints {
|
||||||
writeLine(proxier.natChains, chain)
|
// Create the per-service chain, retaining counters if possible.
|
||||||
} else {
|
if chain, ok := existingNATChains[svcChain]; ok {
|
||||||
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcChain))
|
writeLine(proxier.natChains, chain)
|
||||||
|
} else {
|
||||||
|
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcChain))
|
||||||
|
}
|
||||||
|
activeNATChains[svcChain] = true
|
||||||
}
|
}
|
||||||
activeNATChains[svcChain] = true
|
|
||||||
|
|
||||||
svcXlbChain := svcInfo.serviceLBChainName
|
svcXlbChain := svcInfo.serviceLBChainName
|
||||||
if svcInfo.onlyNodeLocalEndpoints {
|
if svcInfo.onlyNodeLocalEndpoints {
|
||||||
@ -854,30 +856,38 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcXlbChain))
|
writeLine(proxier.natChains, utiliptables.MakeChainLine(svcXlbChain))
|
||||||
}
|
}
|
||||||
activeNATChains[svcXlbChain] = true
|
activeNATChains[svcXlbChain] = true
|
||||||
} else if activeNATChains[svcXlbChain] {
|
|
||||||
// Cleanup the previously created XLB chain for this service
|
|
||||||
delete(activeNATChains, svcXlbChain)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Capture the clusterIP.
|
// Capture the clusterIP.
|
||||||
args = append(args[:0],
|
if hasEndpoints {
|
||||||
"-A", string(kubeServicesChain),
|
args = append(args[:0],
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString),
|
"-A", string(kubeServicesChain),
|
||||||
"-m", protocol, "-p", protocol,
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString),
|
||||||
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
|
"-m", protocol, "-p", protocol,
|
||||||
"--dport", strconv.Itoa(svcInfo.port),
|
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
|
||||||
)
|
"--dport", strconv.Itoa(svcInfo.port),
|
||||||
if proxier.masqueradeAll {
|
)
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
if proxier.masqueradeAll {
|
||||||
} else if len(proxier.clusterCIDR) > 0 {
|
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
||||||
// This masquerades off-cluster traffic to a service VIP. The idea
|
} else if len(proxier.clusterCIDR) > 0 {
|
||||||
// is that you can establish a static route for your Service range,
|
// This masquerades off-cluster traffic to a service VIP. The idea
|
||||||
// routing to any node, and that node will bridge into the Service
|
// is that you can establish a static route for your Service range,
|
||||||
// for you. Since that might bounce off-node, we masquerade here.
|
// routing to any node, and that node will bridge into the Service
|
||||||
// If/when we support "Local" policy for VIPs, we should update this.
|
// for you. Since that might bounce off-node, we masquerade here.
|
||||||
writeLine(proxier.natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...)
|
// If/when we support "Local" policy for VIPs, we should update this.
|
||||||
|
writeLine(proxier.natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...)
|
||||||
|
}
|
||||||
|
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
|
||||||
|
} else {
|
||||||
|
writeLine(proxier.filterRules,
|
||||||
|
"-A", string(kubeServicesChain),
|
||||||
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||||
|
"-m", protocol, "-p", protocol,
|
||||||
|
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
|
||||||
|
"--dport", strconv.Itoa(svcInfo.port),
|
||||||
|
"-j", "REJECT",
|
||||||
|
)
|
||||||
}
|
}
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
|
|
||||||
|
|
||||||
// Capture externalIPs.
|
// Capture externalIPs.
|
||||||
for _, externalIP := range svcInfo.externalIPs {
|
for _, externalIP := range svcInfo.externalIPs {
|
||||||
@ -913,33 +923,32 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
}
|
}
|
||||||
replacementPortsMap[lp] = socket
|
replacementPortsMap[lp] = socket
|
||||||
}
|
}
|
||||||
} // We're holding the port, so it's OK to install iptables rules.
|
}
|
||||||
args = append(args[:0],
|
|
||||||
"-A", string(kubeServicesChain),
|
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcNameString),
|
|
||||||
"-m", protocol, "-p", protocol,
|
|
||||||
"-d", utilproxy.ToCIDR(net.ParseIP(externalIP)),
|
|
||||||
"--dport", strconv.Itoa(svcInfo.port),
|
|
||||||
)
|
|
||||||
// We have to SNAT packets to external IPs.
|
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
|
||||||
|
|
||||||
// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
|
if hasEndpoints {
|
||||||
// nor from a local process to be forwarded to the service.
|
args = append(args[:0],
|
||||||
// This rule roughly translates to "all traffic from off-machine".
|
"-A", string(kubeServicesChain),
|
||||||
// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later.
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcNameString),
|
||||||
externalTrafficOnlyArgs := append(args,
|
"-m", protocol, "-p", protocol,
|
||||||
"-m", "physdev", "!", "--physdev-is-in",
|
"-d", utilproxy.ToCIDR(net.ParseIP(externalIP)),
|
||||||
"-m", "addrtype", "!", "--src-type", "LOCAL")
|
"--dport", strconv.Itoa(svcInfo.port),
|
||||||
writeLine(proxier.natRules, append(externalTrafficOnlyArgs, "-j", string(svcChain))...)
|
)
|
||||||
dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
|
// We have to SNAT packets to external IPs.
|
||||||
// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
|
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
||||||
// This covers cases like GCE load-balancers which get added to the local routing table.
|
|
||||||
writeLine(proxier.natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...)
|
|
||||||
|
|
||||||
// If the service has no endpoints then reject packets coming via externalIP
|
// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
|
||||||
// Install ICMP Reject rule in filter table for destination=externalIP and dport=svcport
|
// nor from a local process to be forwarded to the service.
|
||||||
if len(proxier.endpointsMap[svcName]) == 0 {
|
// This rule roughly translates to "all traffic from off-machine".
|
||||||
|
// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later.
|
||||||
|
externalTrafficOnlyArgs := append(args,
|
||||||
|
"-m", "physdev", "!", "--physdev-is-in",
|
||||||
|
"-m", "addrtype", "!", "--src-type", "LOCAL")
|
||||||
|
writeLine(proxier.natRules, append(externalTrafficOnlyArgs, "-j", string(svcChain))...)
|
||||||
|
dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
|
||||||
|
// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
|
||||||
|
// This covers cases like GCE load-balancers which get added to the local routing table.
|
||||||
|
writeLine(proxier.natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...)
|
||||||
|
} else {
|
||||||
writeLine(proxier.filterRules,
|
writeLine(proxier.filterRules,
|
||||||
"-A", string(kubeServicesChain),
|
"-A", string(kubeServicesChain),
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||||
@ -952,71 +961,74 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Capture load-balancer ingress.
|
// Capture load-balancer ingress.
|
||||||
fwChain := svcInfo.serviceFirewallChainName
|
if hasEndpoints {
|
||||||
for _, ingress := range svcInfo.loadBalancerStatus.Ingress {
|
fwChain := svcInfo.serviceFirewallChainName
|
||||||
if ingress.IP != "" {
|
for _, ingress := range svcInfo.loadBalancerStatus.Ingress {
|
||||||
// create service firewall chain
|
if ingress.IP != "" {
|
||||||
if chain, ok := existingNATChains[fwChain]; ok {
|
// create service firewall chain
|
||||||
writeLine(proxier.natChains, chain)
|
if chain, ok := existingNATChains[fwChain]; ok {
|
||||||
} else {
|
writeLine(proxier.natChains, chain)
|
||||||
writeLine(proxier.natChains, utiliptables.MakeChainLine(fwChain))
|
} else {
|
||||||
}
|
writeLine(proxier.natChains, utiliptables.MakeChainLine(fwChain))
|
||||||
activeNATChains[fwChain] = true
|
}
|
||||||
// The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field.
|
activeNATChains[fwChain] = true
|
||||||
// This currently works for loadbalancers that preserves source ips.
|
// The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field.
|
||||||
// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply.
|
// This currently works for loadbalancers that preserves source ips.
|
||||||
|
// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply.
|
||||||
|
|
||||||
args = append(args[:0],
|
args = append(args[:0],
|
||||||
"-A", string(kubeServicesChain),
|
"-A", string(kubeServicesChain),
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
|
||||||
"-m", protocol, "-p", protocol,
|
"-m", protocol, "-p", protocol,
|
||||||
"-d", utilproxy.ToCIDR(net.ParseIP(ingress.IP)),
|
"-d", utilproxy.ToCIDR(net.ParseIP(ingress.IP)),
|
||||||
"--dport", strconv.Itoa(svcInfo.port),
|
"--dport", strconv.Itoa(svcInfo.port),
|
||||||
)
|
)
|
||||||
// jump to service firewall chain
|
// jump to service firewall chain
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(fwChain))...)
|
writeLine(proxier.natRules, append(args, "-j", string(fwChain))...)
|
||||||
|
|
||||||
args = append(args[:0],
|
args = append(args[:0],
|
||||||
"-A", string(fwChain),
|
"-A", string(fwChain),
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
|
||||||
)
|
)
|
||||||
|
|
||||||
// Each source match rule in the FW chain may jump to either the SVC or the XLB chain
|
// Each source match rule in the FW chain may jump to either the SVC or the XLB chain
|
||||||
chosenChain := svcXlbChain
|
chosenChain := svcXlbChain
|
||||||
// If we are proxying globally, we need to masquerade in case we cross nodes.
|
// If we are proxying globally, we need to masquerade in case we cross nodes.
|
||||||
// If we are proxying only locally, we can retain the source IP.
|
// If we are proxying only locally, we can retain the source IP.
|
||||||
if !svcInfo.onlyNodeLocalEndpoints {
|
if !svcInfo.onlyNodeLocalEndpoints {
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
||||||
chosenChain = svcChain
|
chosenChain = svcChain
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(svcInfo.loadBalancerSourceRanges) == 0 {
|
if len(svcInfo.loadBalancerSourceRanges) == 0 {
|
||||||
// allow all sources, so jump directly to the KUBE-SVC or KUBE-XLB chain
|
// allow all sources, so jump directly to the KUBE-SVC or KUBE-XLB chain
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(chosenChain))...)
|
writeLine(proxier.natRules, append(args, "-j", string(chosenChain))...)
|
||||||
} else {
|
} else {
|
||||||
// firewall filter based on each source range
|
// firewall filter based on each source range
|
||||||
allowFromNode := false
|
allowFromNode := false
|
||||||
for _, src := range svcInfo.loadBalancerSourceRanges {
|
for _, src := range svcInfo.loadBalancerSourceRanges {
|
||||||
writeLine(proxier.natRules, append(args, "-s", src, "-j", string(chosenChain))...)
|
writeLine(proxier.natRules, append(args, "-s", src, "-j", string(chosenChain))...)
|
||||||
// ignore error because it has been validated
|
// ignore error because it has been validated
|
||||||
_, cidr, _ := net.ParseCIDR(src)
|
_, cidr, _ := net.ParseCIDR(src)
|
||||||
if cidr.Contains(proxier.nodeIP) {
|
if cidr.Contains(proxier.nodeIP) {
|
||||||
allowFromNode = true
|
allowFromNode = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// generally, ip route rule was added to intercept request to loadbalancer vip from the
|
||||||
|
// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
|
||||||
|
// Need to add the following rule to allow request on host.
|
||||||
|
if allowFromNode {
|
||||||
|
writeLine(proxier.natRules, append(args, "-s", utilproxy.ToCIDR(net.ParseIP(ingress.IP)), "-j", string(chosenChain))...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// generally, ip route rule was added to intercept request to loadbalancer vip from the
|
|
||||||
// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
|
|
||||||
// Need to add the following rule to allow request on host.
|
|
||||||
if allowFromNode {
|
|
||||||
writeLine(proxier.natRules, append(args, "-s", utilproxy.ToCIDR(net.ParseIP(ingress.IP)), "-j", string(chosenChain))...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the packet was able to reach the end of firewall chain, then it did not get DNATed.
|
// If the packet was able to reach the end of firewall chain, then it did not get DNATed.
|
||||||
// It means the packet cannot go thru the firewall, then mark it for DROP
|
// It means the packet cannot go thru the firewall, then mark it for DROP
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkDropChain))...)
|
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkDropChain))...)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// FIXME: do we need REJECT rules for load-balancer ingress if !hasEndpoints?
|
||||||
|
|
||||||
// Capture nodeports. If we had more than 2 rules it might be
|
// Capture nodeports. If we had more than 2 rules it might be
|
||||||
// worthwhile to make a new per-service chain for nodeport rules, but
|
// worthwhile to make a new per-service chain for nodeport rules, but
|
||||||
@ -1050,37 +1062,33 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
replacementPortsMap[lp] = socket
|
replacementPortsMap[lp] = socket
|
||||||
} // We're holding the port, so it's OK to install iptables rules.
|
|
||||||
|
|
||||||
args = append(args[:0],
|
|
||||||
"-A", string(kubeNodePortsChain),
|
|
||||||
"-m", "comment", "--comment", svcNameString,
|
|
||||||
"-m", protocol, "-p", protocol,
|
|
||||||
"--dport", strconv.Itoa(svcInfo.nodePort),
|
|
||||||
)
|
|
||||||
if !svcInfo.onlyNodeLocalEndpoints {
|
|
||||||
// Nodeports need SNAT, unless they're local.
|
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
|
||||||
// Jump to the service chain.
|
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
|
|
||||||
} else {
|
|
||||||
// TODO: Make all nodePorts jump to the firewall chain.
|
|
||||||
// Currently we only create it for loadbalancers (#33586).
|
|
||||||
|
|
||||||
// Fix localhost martian source error
|
|
||||||
loopback := "127.0.0.0/8"
|
|
||||||
if isIPv6 {
|
|
||||||
loopback = "::1/128"
|
|
||||||
}
|
|
||||||
writeLine(proxier.natRules, append(args, "-s", loopback, "-j", string(KubeMarkMasqChain))...)
|
|
||||||
writeLine(proxier.natRules, append(args, "-j", string(svcXlbChain))...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the service has no endpoints then reject packets. The filter
|
if hasEndpoints {
|
||||||
// table doesn't currently have the same per-service structure that
|
args = append(args[:0],
|
||||||
// the nat table does, so we just stick this into the kube-services
|
"-A", string(kubeNodePortsChain),
|
||||||
// chain.
|
"-m", "comment", "--comment", svcNameString,
|
||||||
if len(proxier.endpointsMap[svcName]) == 0 {
|
"-m", protocol, "-p", protocol,
|
||||||
|
"--dport", strconv.Itoa(svcInfo.nodePort),
|
||||||
|
)
|
||||||
|
if !svcInfo.onlyNodeLocalEndpoints {
|
||||||
|
// Nodeports need SNAT, unless they're local.
|
||||||
|
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
||||||
|
// Jump to the service chain.
|
||||||
|
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
|
||||||
|
} else {
|
||||||
|
// TODO: Make all nodePorts jump to the firewall chain.
|
||||||
|
// Currently we only create it for loadbalancers (#33586).
|
||||||
|
|
||||||
|
// Fix localhost martian source error
|
||||||
|
loopback := "127.0.0.0/8"
|
||||||
|
if isIPv6 {
|
||||||
|
loopback = "::1/128"
|
||||||
|
}
|
||||||
|
writeLine(proxier.natRules, append(args, "-s", loopback, "-j", string(KubeMarkMasqChain))...)
|
||||||
|
writeLine(proxier.natRules, append(args, "-j", string(svcXlbChain))...)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
writeLine(proxier.filterRules,
|
writeLine(proxier.filterRules,
|
||||||
"-A", string(kubeServicesChain),
|
"-A", string(kubeServicesChain),
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
||||||
@ -1092,21 +1100,10 @@ func (proxier *Proxier) syncProxyRules() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the service has no endpoints then reject packets.
|
if !hasEndpoints {
|
||||||
if len(proxier.endpointsMap[svcName]) == 0 {
|
|
||||||
writeLine(proxier.filterRules,
|
|
||||||
"-A", string(kubeServicesChain),
|
|
||||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcNameString),
|
|
||||||
"-m", protocol, "-p", protocol,
|
|
||||||
"-d", utilproxy.ToCIDR(svcInfo.clusterIP),
|
|
||||||
"--dport", strconv.Itoa(svcInfo.port),
|
|
||||||
"-j", "REJECT",
|
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// From here on, we assume there are active endpoints.
|
|
||||||
|
|
||||||
// Generate the per-endpoint chains. We do this in multiple passes so we
|
// Generate the per-endpoint chains. We do this in multiple passes so we
|
||||||
// can group rules together.
|
// can group rules together.
|
||||||
// These two slices parallel each other - keep in sync
|
// These two slices parallel each other - keep in sync
|
||||||
|
Loading…
Reference in New Issue
Block a user