only drop invalid cstate packets if non liberal
Conntrack invalid packets may cause unexpected and subtle bugs on esblished connections, because of that we install by default an iptables rules that drops the packets with this conntrack state. However, there are network scenarios, specially those that use multihoming nodes, that may have legit traffic that is detected by conntrack as invalid, hence these iptables rules are causing problems dropping this traffic. An alternative to solve the spurious problems caused by the invalid connectrack packets is to set the sysctl nf_conntrack_tcp_be_liberal option, but this is a system wide setting and we don't want kube-proxy to be opinionated about the whole node networking configuration. Kube-proxy will only install the DROP rules for invalid conntrack states if the nf_conntrack_tcp_be_liberal is not set. Change-Id: I5eb326931ed915f5ae74d210f0a375842b6a790e
This commit is contained in:
@@ -92,6 +92,7 @@ const (
|
||||
|
||||
const sysctlRouteLocalnet = "net/ipv4/conf/all/route_localnet"
|
||||
const sysctlBridgeCallIPTables = "net/bridge/bridge-nf-call-iptables"
|
||||
const sysctlNFConntrackTCPBeLiberal = "net/netfilter/nf_conntrack_tcp_be_liberal"
|
||||
|
||||
// internal struct for string service information
|
||||
type servicePortInfo struct {
|
||||
@@ -196,6 +197,10 @@ type Proxier struct {
|
||||
// localhostNodePorts indicates whether we allow NodePort services to be accessed
|
||||
// via localhost.
|
||||
localhostNodePorts bool
|
||||
|
||||
// conntrackTCPLiberal indicates whether the system sets the kernel nf_conntrack_tcp_be_liberal
|
||||
conntrackTCPLiberal bool
|
||||
|
||||
// nodePortAddresses selects the interfaces where nodePort works.
|
||||
nodePortAddresses *proxyutil.NodePortAddresses
|
||||
// networkInterfacer defines an interface for several net library functions.
|
||||
@@ -241,6 +246,14 @@ func NewProxier(ipFamily v1.IPFamily,
|
||||
}
|
||||
}
|
||||
|
||||
// Be conservative in what you do, be liberal in what you accept from others.
|
||||
// If it's non-zero, we mark only out of window RST segments as INVALID.
|
||||
// Ref: https://docs.kernel.org/networking/nf_conntrack-sysctl.html
|
||||
conntrackTCPLiberal := false
|
||||
if val, err := sysctl.GetSysctl(sysctlNFConntrackTCPBeLiberal); err == nil && val != 0 {
|
||||
conntrackTCPLiberal = true
|
||||
klog.InfoS("nf_conntrack_tcp_be_liberal set, not installing DROP rules for INVALID packets")
|
||||
}
|
||||
// Proxy needs br_netfilter and bridge-nf-call-iptables=1 when containers
|
||||
// are connected to a Linux bridge (but not SDN bridges). Until most
|
||||
// plugins handle this, log when config is missing
|
||||
@@ -282,6 +295,7 @@ func NewProxier(ipFamily v1.IPFamily,
|
||||
localhostNodePorts: localhostNodePorts,
|
||||
nodePortAddresses: nodePortAddresses,
|
||||
networkInterfacer: proxyutil.RealNetwork{},
|
||||
conntrackTCPLiberal: conntrackTCPLiberal,
|
||||
}
|
||||
|
||||
burstSyncs := 2
|
||||
@@ -1443,14 +1457,17 @@ func (proxier *Proxier) syncProxyRules() {
|
||||
}
|
||||
|
||||
// Drop the packets in INVALID state, which would potentially cause
|
||||
// unexpected connection reset.
|
||||
// https://github.com/kubernetes/kubernetes/issues/74839
|
||||
proxier.filterRules.Write(
|
||||
"-A", string(kubeForwardChain),
|
||||
"-m", "conntrack",
|
||||
"--ctstate", "INVALID",
|
||||
"-j", "DROP",
|
||||
)
|
||||
// unexpected connection reset if nf_conntrack_tcp_be_liberal is not set.
|
||||
// Ref: https://github.com/kubernetes/kubernetes/issues/74839
|
||||
// Ref: https://github.com/kubernetes/kubernetes/issues/117924
|
||||
if !proxier.conntrackTCPLiberal {
|
||||
proxier.filterRules.Write(
|
||||
"-A", string(kubeForwardChain),
|
||||
"-m", "conntrack",
|
||||
"--ctstate", "INVALID",
|
||||
"-j", "DROP",
|
||||
)
|
||||
}
|
||||
|
||||
// If the masqueradeMark has been added then we want to forward that same
|
||||
// traffic, this allows NodePort traffic to be forwarded even if the default
|
||||
|
@@ -2565,6 +2565,49 @@ func TestHealthCheckNodePort(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestDropInvalidRule(t *testing.T) {
|
||||
for _, testcase := range []bool{false, true} {
|
||||
t.Run(fmt.Sprintf("tcpLiberal %t", testcase), func(t *testing.T) {
|
||||
ipt := iptablestest.NewFake()
|
||||
fp := NewFakeProxier(ipt)
|
||||
fp.conntrackTCPLiberal = testcase
|
||||
fp.syncProxyRules()
|
||||
|
||||
expected := dedent.Dedent(`
|
||||
*filter
|
||||
:KUBE-NODEPORTS - [0:0]
|
||||
:KUBE-SERVICES - [0:0]
|
||||
:KUBE-EXTERNAL-SERVICES - [0:0]
|
||||
:KUBE-FIREWALL - [0:0]
|
||||
:KUBE-FORWARD - [0:0]
|
||||
:KUBE-PROXY-FIREWALL - [0:0]
|
||||
-A KUBE-FIREWALL -m comment --comment "block incoming localnet connections" -d 127.0.0.0/8 ! -s 127.0.0.0/8 -m conntrack ! --ctstate RELATED,ESTABLISHED,DNAT -j DROP`)
|
||||
if !testcase {
|
||||
expected += "\n-A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP"
|
||||
}
|
||||
|
||||
expected += dedent.Dedent(`
|
||||
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT
|
||||
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
|
||||
COMMIT
|
||||
*nat
|
||||
:KUBE-NODEPORTS - [0:0]
|
||||
:KUBE-SERVICES - [0:0]
|
||||
:KUBE-MARK-MASQ - [0:0]
|
||||
:KUBE-POSTROUTING - [0:0]
|
||||
-A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS
|
||||
-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000
|
||||
-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN
|
||||
-A KUBE-POSTROUTING -j MARK --xor-mark 0x4000
|
||||
-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE
|
||||
COMMIT
|
||||
`)
|
||||
|
||||
assertIPTablesRulesEqual(t, getLine(), true, expected, fp.iptablesData.String())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMasqueradeRule(t *testing.T) {
|
||||
for _, testcase := range []bool{false, true} {
|
||||
ipt := iptablestest.NewFake().SetHasRandomFully(testcase)
|
||||
|
Reference in New Issue
Block a user