kubernetes/cmd/kube-proxy/app/conntrack.go
Ziqi Zhao be4535bd34 convert k8s.io/kubernetes/pkg/proxy to contextual logging, part 1
Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
2024-04-22 13:08:41 +08:00

167 lines
5.6 KiB
Go

/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package app
import (
"context"
"errors"
"os"
"strconv"
"strings"
"k8s.io/component-helpers/node/util/sysctl"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
)
// Conntracker is an interface to the global sysctl. Descriptions of the various
// sysctl fields can be found here:
//
// https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt
type Conntracker interface {
// SetMax adjusts nf_conntrack_max.
SetMax(ctx context.Context, max int) error
// SetTCPEstablishedTimeout adjusts nf_conntrack_tcp_timeout_established.
SetTCPEstablishedTimeout(ctx context.Context, seconds int) error
// SetTCPCloseWaitTimeout adjusts nf_conntrack_tcp_timeout_close_wait.
SetTCPCloseWaitTimeout(ctx context.Context, seconds int) error
// SetTCPBeLiberal adjusts nf_conntrack_tcp_be_liberal.
SetTCPBeLiberal(ctx context.Context, value int) error
// SetUDPTimeout adjusts nf_conntrack_udp_timeout.
SetUDPTimeout(ctx context.Context, seconds int) error
// SetUDPStreamTimeout adjusts nf_conntrack_udp_timeout_stream.
SetUDPStreamTimeout(ctx context.Context, seconds int) error
}
type realConntracker struct {
}
var errReadOnlySysFS = errors.New("readOnlySysFS")
func (rct realConntracker) SetMax(ctx context.Context, max int) error {
logger := klog.FromContext(ctx)
if err := rct.setIntSysCtl(ctx, "nf_conntrack_max", max); err != nil {
return err
}
logger.Info("Setting nf_conntrack_max", "nfConntrackMax", max)
// Linux does not support writing to /sys/module/nf_conntrack/parameters/hashsize
// when the writer process is not in the initial network namespace
// (https://github.com/torvalds/linux/blob/v4.10/net/netfilter/nf_conntrack_core.c#L1795-L1796).
// Usually that's fine. But in some configurations such as with github.com/kinvolk/kubeadm-nspawn,
// kube-proxy is in another netns.
// Therefore, check if writing in hashsize is necessary and skip the writing if not.
hashsize, err := readIntStringFile("/sys/module/nf_conntrack/parameters/hashsize")
if err != nil {
return err
}
if hashsize >= (max / 4) {
return nil
}
// sysfs is expected to be mounted as 'rw'. However, it may be
// unexpectedly mounted as 'ro' by docker because of a known docker
// issue (https://github.com/docker/docker/issues/24000). Setting
// conntrack will fail when sysfs is readonly. When that happens, we
// don't set conntrack hashsize and return a special error
// errReadOnlySysFS here. The caller should deal with
// errReadOnlySysFS differently.
writable, err := rct.isSysFSWritable(ctx)
if err != nil {
return err
}
if !writable {
return errReadOnlySysFS
}
// TODO: generify this and sysctl to a new sysfs.WriteInt()
logger.Info("Setting conntrack hashsize", "conntrackHashsize", max/4)
return writeIntStringFile("/sys/module/nf_conntrack/parameters/hashsize", max/4)
}
func (rct realConntracker) SetTCPEstablishedTimeout(ctx context.Context, seconds int) error {
return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_timeout_established", seconds)
}
func (rct realConntracker) SetTCPCloseWaitTimeout(ctx context.Context, seconds int) error {
return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_timeout_close_wait", seconds)
}
func (rct realConntracker) SetTCPBeLiberal(ctx context.Context, value int) error {
return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_be_liberal", value)
}
func (rct realConntracker) SetUDPTimeout(ctx context.Context, seconds int) error {
return rct.setIntSysCtl(ctx, "nf_conntrack_udp_timeout", seconds)
}
func (rct realConntracker) SetUDPStreamTimeout(ctx context.Context, seconds int) error {
return rct.setIntSysCtl(ctx, "nf_conntrack_udp_timeout_stream", seconds)
}
func (rct realConntracker) setIntSysCtl(ctx context.Context, name string, value int) error {
logger := klog.FromContext(ctx)
entry := "net/netfilter/" + name
sys := sysctl.New()
if val, _ := sys.GetSysctl(entry); val != value {
logger.Info("Set sysctl", "entry", entry, "value", value)
if err := sys.SetSysctl(entry, value); err != nil {
return err
}
}
return nil
}
// isSysFSWritable checks /proc/mounts to see whether sysfs is 'rw' or not.
func (rct realConntracker) isSysFSWritable(ctx context.Context) (bool, error) {
logger := klog.FromContext(ctx)
const permWritable = "rw"
const sysfsDevice = "sysfs"
m := mount.New("" /* default mount path */)
mountPoints, err := m.List()
if err != nil {
logger.Error(err, "Failed to list mount points")
return false, err
}
for _, mountPoint := range mountPoints {
if mountPoint.Type != sysfsDevice {
continue
}
// Check whether sysfs is 'rw'
if len(mountPoint.Opts) > 0 && mountPoint.Opts[0] == permWritable {
return true, nil
}
logger.Error(nil, "Sysfs is not writable", "mountPoint", mountPoint, "mountOptions", mountPoint.Opts)
return false, errReadOnlySysFS
}
return false, errors.New("no sysfs mounted")
}
func readIntStringFile(filename string) (int, error) {
b, err := os.ReadFile(filename)
if err != nil {
return -1, err
}
return strconv.Atoi(strings.TrimSpace(string(b)))
}
func writeIntStringFile(filename string, value int) error {
return os.WriteFile(filename, []byte(strconv.Itoa(value)), 0640)
}