Merge pull request #35572 from bprashanth/ip_gc
Automatic merge from submit-queue GC pod ips Finally managed to write a *failing* test. Supersedes https://github.com/kubernetes/kubernetes/pull/34373 ```release-note GC pod ips ```
This commit is contained in:
@@ -21,6 +21,7 @@ package kubenet
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
@@ -69,6 +70,10 @@ const (
|
||||
|
||||
// ebtables Chain to store dedup rules
|
||||
dedupChain = utilebtables.Chain("KUBE-DEDUP")
|
||||
|
||||
// defaultIPAMDir is the default location for the checkpoint files stored by host-local ipam
|
||||
// https://github.com/containernetworking/cni/tree/master/plugins/ipam/host-local#backends
|
||||
defaultIPAMDir = "/var/lib/cni/networks"
|
||||
)
|
||||
|
||||
// CNI plugins required by kubenet in /opt/cni/bin or vendor directory
|
||||
@@ -434,6 +439,16 @@ func (plugin *kubenetNetworkPlugin) SetUpPod(namespace string, name string, id k
|
||||
// Not a hard error or warning
|
||||
glog.V(4).Infof("Failed to clean up %s/%s after SetUpPod failure: %v", namespace, name, err)
|
||||
}
|
||||
|
||||
// TODO: Remove this hack once we've figured out how to retrieve the netns
|
||||
// of an exited container. Currently, restarting docker will leak a bunch of
|
||||
// ips. This will exhaust available ip space unless we cleanup old ips. At the
|
||||
// same time we don't want to try GC'ing them periodically as that could lead
|
||||
// to a performance regression in starting pods. So on each setup failure, try
|
||||
// GC on the assumption that the kubelet is going to retry pod creation, and
|
||||
// when it does, there will be ips.
|
||||
plugin.ipamGarbageCollection()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -572,20 +587,32 @@ func (plugin *kubenetNetworkPlugin) checkCNIPluginInDir(dir string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Returns a list of pods running or ready to run on this node and each pod's IP address.
|
||||
// Assumes PodSpecs retrieved from the runtime include the name and ID of containers in
|
||||
// each pod.
|
||||
func (plugin *kubenetNetworkPlugin) getActivePods() ([]*hostport.ActivePod, error) {
|
||||
// getNonExitedPods returns a list of pods that have at least one running container.
|
||||
func (plugin *kubenetNetworkPlugin) getNonExitedPods() ([]*kubecontainer.Pod, error) {
|
||||
ret := []*kubecontainer.Pod{}
|
||||
pods, err := plugin.host.GetRuntime().GetPods(true)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to retrieve pods from runtime: %v", err)
|
||||
}
|
||||
activePods := make([]*hostport.ActivePod, 0)
|
||||
for _, p := range pods {
|
||||
if podIsExited(p) {
|
||||
continue
|
||||
}
|
||||
ret = append(ret, p)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// Returns a list of pods running or ready to run on this node and each pod's IP address.
|
||||
// Assumes PodSpecs retrieved from the runtime include the name and ID of containers in
|
||||
// each pod.
|
||||
func (plugin *kubenetNetworkPlugin) getActivePods() ([]*hostport.ActivePod, error) {
|
||||
pods, err := plugin.getNonExitedPods()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
activePods := make([]*hostport.ActivePod, 0)
|
||||
for _, p := range pods {
|
||||
containerID, err := plugin.host.GetRuntime().GetPodContainerID(p)
|
||||
if err != nil {
|
||||
continue
|
||||
@@ -608,6 +635,77 @@ func (plugin *kubenetNetworkPlugin) getActivePods() ([]*hostport.ActivePod, erro
|
||||
return activePods, nil
|
||||
}
|
||||
|
||||
// ipamGarbageCollection will release unused IP.
|
||||
// kubenet uses the CNI bridge plugin, which stores allocated ips on file. Each
|
||||
// file created under defaultIPAMDir has the format: ip/container-hash. So this
|
||||
// routine looks for hashes that are not reported by the currently running docker,
|
||||
// and invokes DelNetwork on each one. Note that this will only work for the
|
||||
// current CNI bridge plugin, because we have no way of finding the NetNs.
|
||||
func (plugin *kubenetNetworkPlugin) ipamGarbageCollection() {
|
||||
glog.V(2).Infof("Starting IP garbage collection")
|
||||
|
||||
ipamDir := filepath.Join(defaultIPAMDir, KubenetPluginName)
|
||||
files, err := ioutil.ReadDir(ipamDir)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to list files in %q: %v", ipamDir, err)
|
||||
return
|
||||
}
|
||||
|
||||
// gather containerIDs for allocated ips
|
||||
ipContainerIdMap := make(map[string]string)
|
||||
for _, file := range files {
|
||||
// skip non checkpoint file
|
||||
if ip := net.ParseIP(file.Name()); ip == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
content, err := ioutil.ReadFile(filepath.Join(ipamDir, file.Name()))
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to read file %v: %v", file, err)
|
||||
}
|
||||
ipContainerIdMap[file.Name()] = strings.TrimSpace(string(content))
|
||||
}
|
||||
|
||||
// gather infra container IDs of current running Pods
|
||||
runningContainerIDs := utilsets.String{}
|
||||
pods, err := plugin.getNonExitedPods()
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to get pods: %v", err)
|
||||
return
|
||||
}
|
||||
for _, pod := range pods {
|
||||
containerID, err := plugin.host.GetRuntime().GetPodContainerID(pod)
|
||||
if err != nil {
|
||||
glog.Warningf("Failed to get infra containerID of %q/%q: %v", pod.Namespace, pod.Name, err)
|
||||
continue
|
||||
}
|
||||
|
||||
runningContainerIDs.Insert(strings.TrimSpace(containerID.ID))
|
||||
}
|
||||
|
||||
// release leaked ips
|
||||
for ip, containerID := range ipContainerIdMap {
|
||||
// if the container is not running, release IP
|
||||
if runningContainerIDs.Has(containerID) {
|
||||
continue
|
||||
}
|
||||
// CNI requires all config to be presented, although only containerID is needed in this case
|
||||
rt := &libcni.RuntimeConf{
|
||||
ContainerID: containerID,
|
||||
IfName: network.DefaultInterfaceName,
|
||||
// TODO: How do we find the NetNs of an exited container? docker inspect
|
||||
// doesn't show us the pid, so we probably need to checkpoint
|
||||
NetNS: "",
|
||||
}
|
||||
|
||||
glog.V(2).Infof("Releasing IP %q allocated to %q.", ip, containerID)
|
||||
// CNI bridge plugin should try to release IP and then return
|
||||
if err := plugin.cniConfig.DelNetwork(plugin.netConfig, rt); err != nil {
|
||||
glog.Errorf("Error while releasing IP: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// podIsExited returns true if the pod is exited (all containers inside are exited).
|
||||
func podIsExited(p *kubecontainer.Pod) bool {
|
||||
for _, c := range p.Containers {
|
||||
|
||||
Reference in New Issue
Block a user