Change iptables fwmark to use single configurable bit instead of whole mark space

This commit is contained in:
Matt Dupre 2016-02-08 11:12:09 +00:00
parent 1e7db4a174
commit 9925cddc11
13 changed files with 2420 additions and 2232 deletions

View File

@ -71,6 +71,7 @@ func (s *ProxyServerConfig) AddFlags(fs *pflag.FlagSet) {
fs.Var(componentconfig.PortRangeVar{&s.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.")
fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.")
fs.Var(&s.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If blank, look at the Node object on the Kubernetes API and respect the '"+ExperimentalProxyModeAnnotation+"' annotation if provided. Otherwise use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.IntVar(s.IPTablesMasqueradeBit, "iptables-masquerade-bit", util.IntPtrDerefOr(s.IPTablesMasqueradeBit, 14), "If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].")
fs.DurationVar(&s.IPTablesSyncPeriod.Duration, "iptables-sync-period", s.IPTablesSyncPeriod.Duration, "How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.")
fs.DurationVar(&s.ConfigSyncPeriod, "config-sync-period", s.ConfigSyncPeriod, "How often configuration from the apiserver is refreshed. Must be greater than 0.")
fs.BoolVar(&s.MasqueradeAll, "masquerade-all", s.MasqueradeAll, "If using the pure iptables proxy, SNAT everything")

View File

@ -191,7 +191,12 @@ func NewProxyServerDefault(config *options.ProxyServerConfig) (*ProxyServer, err
proxyMode := getProxyMode(string(config.Mode), client.Nodes(), hostname, iptInterface)
if proxyMode == proxyModeIptables {
glog.V(0).Info("Using iptables Proxier.")
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll)
if config.IPTablesMasqueradeBit == nil {
// IPTablesMasqueradeBit must be specified or defaulted.
return nil, fmt.Errorf("Unable to read IPTablesMasqueradeBit from config")
}
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll, *config.IPTablesMasqueradeBit)
if err != nil {
glog.Fatalf("Unable to create proxier: %v", err)
}

View File

@ -63,6 +63,7 @@ kube-proxy
--healthz-bind-address=127.0.0.1: The IP address for the health check server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces)
--healthz-port=10249: The port to bind the health check server. Use 0 to disable.
--hostname-override="": If non-empty, will use this string as identification instead of the actual hostname.
--iptables-masquerade-bit=14: If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].
--iptables-sync-period=30s: How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.
--kube-api-burst=10: Burst to use while talking with kubernetes apiserver
--kube-api-qps=5: QPS to use while talking with kubernetes apiserver
@ -76,7 +77,7 @@ kube-proxy
--udp-timeout=250ms: How long an idle UDP connection will be kept open (e.g. '250ms', '2s'). Must be greater than 0. Only applicable for proxy-mode=userspace
```
###### Auto generated by spf13/cobra on 1-Feb-2016
###### Auto generated by spf13/cobra on 7-Feb-2016
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@ -513,11 +513,11 @@ then look at the logs again.
```console
u@node$ iptables-save | grep hostnames
-A KUBE-SEP-57KPRZ3JQVENLNBR -s 10.244.3.6/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff
-A KUBE-SEP-57KPRZ3JQVENLNBR -s 10.244.3.6/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-57KPRZ3JQVENLNBR -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.3.6:9376
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -s 10.244.1.7/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -s 10.244.1.7/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.1.7:9376
-A KUBE-SEP-X3P2623AGDH6CDF3 -s 10.244.2.3/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff
-A KUBE-SEP-X3P2623AGDH6CDF3 -s 10.244.2.3/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-X3P2623AGDH6CDF3 -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.2.3:9376
-A KUBE-SERVICES -d 10.0.1.175/32 -p tcp -m comment --comment "default/hostnames: cluster IP" -m tcp --dport 80 -j KUBE-SVC-NWV5X2332I4OT4T3
-A KUBE-SVC-NWV5X2332I4OT4T3 -m comment --comment "default/hostnames:" -m statistic --mode random --probability 0.33332999982 -j KUBE-SEP-WNBA2IHDGP2BOBGZ

View File

@ -148,6 +148,7 @@ input-dirs
insecure-bind-address
insecure-port
insecure-skip-tls-verify
iptables-masquerade-bit
iptables-sync-period
ir-data-source
ir-dbname

File diff suppressed because it is too large Load Diff

View File

@ -31,6 +31,9 @@ type KubeProxyConfiguration struct {
HealthzPort int `json:"healthzPort"`
// hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname.
HostnameOverride string `json:"hostnameOverride"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT if using
// the pure iptables proxy mode. Values must be within the range [0, 31].
IPTablesMasqueradeBit *int `json:"iptablesMasqueradeBit"`
// iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"`

View File

@ -37,6 +37,12 @@ func autoConvert_componentconfig_KubeProxyConfiguration_To_v1alpha1_KubeProxyCon
out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = int32(in.HealthzPort)
out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int32)
*out.IPTablesMasqueradeBit = int32(*in.IPTablesMasqueradeBit)
} else {
out.IPTablesMasqueradeBit = nil
}
if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil {
return err
}
@ -127,6 +133,12 @@ func autoConvert_v1alpha1_KubeProxyConfiguration_To_componentconfig_KubeProxyCon
out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = int(in.HealthzPort)
out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int)
*out.IPTablesMasqueradeBit = int(*in.IPTablesMasqueradeBit)
} else {
out.IPTablesMasqueradeBit = nil
}
if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil {
return err
}

View File

@ -43,6 +43,12 @@ func deepCopy_v1alpha1_KubeProxyConfiguration(in KubeProxyConfiguration, out *Ku
out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = in.HealthzPort
out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int32)
*out.IPTablesMasqueradeBit = *in.IPTablesMasqueradeBit
} else {
out.IPTablesMasqueradeBit = nil
}
if err := deepCopy_unversioned_Duration(in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, c); err != nil {
return err
}

View File

@ -55,6 +55,10 @@ func addDefaultingFuncs(scheme *runtime.Scheme) {
if obj.ConntrackMax == 0 {
obj.ConntrackMax = 256 * 1024 // 4x default (64k)
}
if obj.IPTablesMasqueradeBit == nil {
temp := int32(14)
obj.IPTablesMasqueradeBit = &temp
}
if obj.ConntrackTCPEstablishedTimeout == zero {
obj.ConntrackTCPEstablishedTimeout = unversioned.Duration{Duration: 24 * time.Hour} // 1 day (1/5 default)
}

File diff suppressed because it is too large Load Diff

View File

@ -31,6 +31,9 @@ type KubeProxyConfiguration struct {
HealthzPort int32 `json:"healthzPort"`
// hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname.
HostnameOverride string `json:"hostnameOverride"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT if using
// the pure iptables proxy mode. Values must be within the range [0, 31].
IPTablesMasqueradeBit *int32 `json:"iptablesMasqueradeBit"`
// iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"`

View File

@ -66,7 +66,8 @@ const kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
const kubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
// the mark we apply to traffic needing SNAT
const iptablesMasqueradeMark = "0x4d415351"
// TODO(thockin): Remove this for v1.3 or v1.4.
const oldIptablesMasqueradeMark = "0x4d415351"
// IptablesVersioner can query the current iptables version.
type IptablesVersioner interface {
@ -143,9 +144,10 @@ type Proxier struct {
haveReceivedEndpointsUpdate bool // true once we've seen an OnEndpointsUpdate event
// These are effectively const and do not need the mutex to be held.
syncPeriod time.Duration
iptables utiliptables.Interface
masqueradeAll bool
syncPeriod time.Duration
iptables utiliptables.Interface
masqueradeAll bool
masqueradeMark string
}
type localPort struct {
@ -171,7 +173,7 @@ var _ proxy.ProxyProvider = &Proxier{}
// An error will be returned if iptables fails to update or acquire the initial lock.
// Once a proxier is created, it will keep iptables up to date in the background and
// will not terminate if a particular iptables call fails.
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool) (*Proxier, error) {
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int) (*Proxier, error) {
// Set the route_localnet sysctl we need for
if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
@ -185,13 +187,21 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod
glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
}
// Generate the masquerade mark to use for SNAT rules.
if masqueradeBit < 0 || masqueradeBit > 31 {
return nil, fmt.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", masqueradeBit)
}
masqueradeValue := 1 << uint(masqueradeBit)
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
return &Proxier{
serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
endpointsMap: make(map[proxy.ServicePortName][]string),
portsMap: make(map[localPort]closeable),
syncPeriod: syncPeriod,
iptables: ipt,
masqueradeAll: masqueradeAll,
serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
endpointsMap: make(map[proxy.ServicePortName][]string),
portsMap: make(map[localPort]closeable),
syncPeriod: syncPeriod,
iptables: ipt,
masqueradeAll: masqueradeAll,
masqueradeMark: masqueradeMark,
}, nil
}
@ -283,7 +293,7 @@ func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
// TODO(thockin): Remove this for v1.3 or v1.4.
args = []string{
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", iptablesMasqueradeMark,
"-m", "mark", "--mark", oldIptablesMasqueradeMark,
"-j", "MASQUERADE",
}
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
@ -577,7 +587,7 @@ func (proxier *Proxier) syncProxyRules() {
comment := "kubernetes postrouting rules"
args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)}
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubeServicesChain, err)
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, err)
return
}
}
@ -643,7 +653,7 @@ func (proxier *Proxier) syncProxyRules() {
writeLine(natRules, []string{
"-A", string(kubePostroutingChain),
"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
"-m", "mark", "--mark", iptablesMasqueradeMark,
"-m", "mark", "--mark", proxier.masqueradeMark,
"-j", "MASQUERADE",
}...)
@ -652,7 +662,7 @@ func (proxier *Proxier) syncProxyRules() {
// value should ever change.
writeLine(natRules, []string{
"-A", string(kubeMarkMasqChain),
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark),
"-j", "MARK", "--set-xmark", proxier.masqueradeMark,
}...)
// Accumulate NAT chains to keep.
@ -931,7 +941,7 @@ func (proxier *Proxier) syncProxyRules() {
// TODO(thockin): Remove this for v1.3 or v1.4.
args := []string{
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", iptablesMasqueradeMark,
"-m", "mark", "--mark", oldIptablesMasqueradeMark,
"-j", "MASQUERADE",
}
if err := proxier.iptables.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {