only drop invalid cstate packets if non liberal

Conntrack invalid packets may cause unexpected and subtle bugs
on esblished connections, because of that we install by default an
iptables rules that drops the packets with this conntrack state.

However, there are network scenarios, specially those that use multihoming
nodes, that may have legit traffic that is detected by conntrack as
invalid, hence these iptables rules are causing problems dropping this
traffic.

An alternative to solve the spurious problems caused by the invalid
connectrack packets is to set the sysctl nf_conntrack_tcp_be_liberal
option, but this is a system wide setting and we don't want kube-proxy
to be opinionated about the whole node networking configuration.

Kube-proxy will only install the DROP rules for invalid conntrack states
if the nf_conntrack_tcp_be_liberal is not set.

Change-Id: I5eb326931ed915f5ae74d210f0a375842b6a790e
This commit is contained in:
Antonio Ojea
2023-09-04 15:58:13 +00:00
parent 294bde0079
commit 933bcc123b
2 changed files with 68 additions and 8 deletions

View File

@@ -92,6 +92,7 @@ const (
const sysctlRouteLocalnet = "net/ipv4/conf/all/route_localnet"
const sysctlBridgeCallIPTables = "net/bridge/bridge-nf-call-iptables"
const sysctlNFConntrackTCPBeLiberal = "net/netfilter/nf_conntrack_tcp_be_liberal"
// internal struct for string service information
type servicePortInfo struct {
@@ -196,6 +197,10 @@ type Proxier struct {
// localhostNodePorts indicates whether we allow NodePort services to be accessed
// via localhost.
localhostNodePorts bool
// conntrackTCPLiberal indicates whether the system sets the kernel nf_conntrack_tcp_be_liberal
conntrackTCPLiberal bool
// nodePortAddresses selects the interfaces where nodePort works.
nodePortAddresses *proxyutil.NodePortAddresses
// networkInterfacer defines an interface for several net library functions.
@@ -241,6 +246,14 @@ func NewProxier(ipFamily v1.IPFamily,
}
}
// Be conservative in what you do, be liberal in what you accept from others.
// If it's non-zero, we mark only out of window RST segments as INVALID.
// Ref: https://docs.kernel.org/networking/nf_conntrack-sysctl.html
conntrackTCPLiberal := false
if val, err := sysctl.GetSysctl(sysctlNFConntrackTCPBeLiberal); err == nil && val != 0 {
conntrackTCPLiberal = true
klog.InfoS("nf_conntrack_tcp_be_liberal set, not installing DROP rules for INVALID packets")
}
// Proxy needs br_netfilter and bridge-nf-call-iptables=1 when containers
// are connected to a Linux bridge (but not SDN bridges). Until most
// plugins handle this, log when config is missing
@@ -282,6 +295,7 @@ func NewProxier(ipFamily v1.IPFamily,
localhostNodePorts: localhostNodePorts,
nodePortAddresses: nodePortAddresses,
networkInterfacer: proxyutil.RealNetwork{},
conntrackTCPLiberal: conntrackTCPLiberal,
}
burstSyncs := 2
@@ -1443,14 +1457,17 @@ func (proxier *Proxier) syncProxyRules() {
}
// Drop the packets in INVALID state, which would potentially cause
// unexpected connection reset.
// https://github.com/kubernetes/kubernetes/issues/74839
proxier.filterRules.Write(
"-A", string(kubeForwardChain),
"-m", "conntrack",
"--ctstate", "INVALID",
"-j", "DROP",
)
// unexpected connection reset if nf_conntrack_tcp_be_liberal is not set.
// Ref: https://github.com/kubernetes/kubernetes/issues/74839
// Ref: https://github.com/kubernetes/kubernetes/issues/117924
if !proxier.conntrackTCPLiberal {
proxier.filterRules.Write(
"-A", string(kubeForwardChain),
"-m", "conntrack",
"--ctstate", "INVALID",
"-j", "DROP",
)
}
// If the masqueradeMark has been added then we want to forward that same
// traffic, this allows NodePort traffic to be forwarded even if the default

View File

@@ -2565,6 +2565,49 @@ func TestHealthCheckNodePort(t *testing.T) {
})
}
func TestDropInvalidRule(t *testing.T) {
for _, testcase := range []bool{false, true} {
t.Run(fmt.Sprintf("tcpLiberal %t", testcase), func(t *testing.T) {
ipt := iptablestest.NewFake()
fp := NewFakeProxier(ipt)
fp.conntrackTCPLiberal = testcase
fp.syncProxyRules()
expected := dedent.Dedent(`
*filter
:KUBE-NODEPORTS - [0:0]
:KUBE-SERVICES - [0:0]
:KUBE-EXTERNAL-SERVICES - [0:0]
:KUBE-FIREWALL - [0:0]
:KUBE-FORWARD - [0:0]
:KUBE-PROXY-FIREWALL - [0:0]
-A KUBE-FIREWALL -m comment --comment "block incoming localnet connections" -d 127.0.0.0/8 ! -s 127.0.0.0/8 -m conntrack ! --ctstate RELATED,ESTABLISHED,DNAT -j DROP`)
if !testcase {
expected += "\n-A KUBE-FORWARD -m conntrack --ctstate INVALID -j DROP"
}
expected += dedent.Dedent(`
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding rules" -m mark --mark 0x4000/0x4000 -j ACCEPT
-A KUBE-FORWARD -m comment --comment "kubernetes forwarding conntrack rule" -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
COMMIT
*nat
:KUBE-NODEPORTS - [0:0]
:KUBE-SERVICES - [0:0]
:KUBE-MARK-MASQ - [0:0]
:KUBE-POSTROUTING - [0:0]
-A KUBE-SERVICES -m comment --comment "kubernetes service nodeports; NOTE: this must be the last rule in this chain" -m addrtype --dst-type LOCAL -j KUBE-NODEPORTS
-A KUBE-MARK-MASQ -j MARK --or-mark 0x4000
-A KUBE-POSTROUTING -m mark ! --mark 0x4000/0x4000 -j RETURN
-A KUBE-POSTROUTING -j MARK --xor-mark 0x4000
-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -j MASQUERADE
COMMIT
`)
assertIPTablesRulesEqual(t, getLine(), true, expected, fp.iptablesData.String())
})
}
}
func TestMasqueradeRule(t *testing.T) {
for _, testcase := range []bool{false, true} {
ipt := iptablestest.NewFake().SetHasRandomFully(testcase)