Merge pull request #35572 from bprashanth/ip_gc

Automatic merge from submit-queue

GC pod ips

Finally managed to write a *failing* test. 
Supersedes https://github.com/kubernetes/kubernetes/pull/34373

```release-note
GC pod ips
```
This commit is contained in:
Kubernetes Submit Queue
2016-10-28 14:44:28 -07:00
committed by GitHub
5 changed files with 240 additions and 5 deletions

View File

@@ -21,6 +21,7 @@ package kubenet
import (
"fmt"
"net"
"path/filepath"
"strings"
"sync"
"syscall"
@@ -69,6 +70,10 @@ const (
// ebtables Chain to store dedup rules
dedupChain = utilebtables.Chain("KUBE-DEDUP")
// defaultIPAMDir is the default location for the checkpoint files stored by host-local ipam
// https://github.com/containernetworking/cni/tree/master/plugins/ipam/host-local#backends
defaultIPAMDir = "/var/lib/cni/networks"
)
// CNI plugins required by kubenet in /opt/cni/bin or vendor directory
@@ -434,6 +439,16 @@ func (plugin *kubenetNetworkPlugin) SetUpPod(namespace string, name string, id k
// Not a hard error or warning
glog.V(4).Infof("Failed to clean up %s/%s after SetUpPod failure: %v", namespace, name, err)
}
// TODO: Remove this hack once we've figured out how to retrieve the netns
// of an exited container. Currently, restarting docker will leak a bunch of
// ips. This will exhaust available ip space unless we cleanup old ips. At the
// same time we don't want to try GC'ing them periodically as that could lead
// to a performance regression in starting pods. So on each setup failure, try
// GC on the assumption that the kubelet is going to retry pod creation, and
// when it does, there will be ips.
plugin.ipamGarbageCollection()
return err
}
@@ -572,20 +587,32 @@ func (plugin *kubenetNetworkPlugin) checkCNIPluginInDir(dir string) bool {
return true
}
// Returns a list of pods running or ready to run on this node and each pod's IP address.
// Assumes PodSpecs retrieved from the runtime include the name and ID of containers in
// each pod.
func (plugin *kubenetNetworkPlugin) getActivePods() ([]*hostport.ActivePod, error) {
// getNonExitedPods returns a list of pods that have at least one running container.
func (plugin *kubenetNetworkPlugin) getNonExitedPods() ([]*kubecontainer.Pod, error) {
ret := []*kubecontainer.Pod{}
pods, err := plugin.host.GetRuntime().GetPods(true)
if err != nil {
return nil, fmt.Errorf("Failed to retrieve pods from runtime: %v", err)
}
activePods := make([]*hostport.ActivePod, 0)
for _, p := range pods {
if podIsExited(p) {
continue
}
ret = append(ret, p)
}
return ret, nil
}
// Returns a list of pods running or ready to run on this node and each pod's IP address.
// Assumes PodSpecs retrieved from the runtime include the name and ID of containers in
// each pod.
func (plugin *kubenetNetworkPlugin) getActivePods() ([]*hostport.ActivePod, error) {
pods, err := plugin.getNonExitedPods()
if err != nil {
return nil, err
}
activePods := make([]*hostport.ActivePod, 0)
for _, p := range pods {
containerID, err := plugin.host.GetRuntime().GetPodContainerID(p)
if err != nil {
continue
@@ -608,6 +635,77 @@ func (plugin *kubenetNetworkPlugin) getActivePods() ([]*hostport.ActivePod, erro
return activePods, nil
}
// ipamGarbageCollection will release unused IP.
// kubenet uses the CNI bridge plugin, which stores allocated ips on file. Each
// file created under defaultIPAMDir has the format: ip/container-hash. So this
// routine looks for hashes that are not reported by the currently running docker,
// and invokes DelNetwork on each one. Note that this will only work for the
// current CNI bridge plugin, because we have no way of finding the NetNs.
func (plugin *kubenetNetworkPlugin) ipamGarbageCollection() {
glog.V(2).Infof("Starting IP garbage collection")
ipamDir := filepath.Join(defaultIPAMDir, KubenetPluginName)
files, err := ioutil.ReadDir(ipamDir)
if err != nil {
glog.Errorf("Failed to list files in %q: %v", ipamDir, err)
return
}
// gather containerIDs for allocated ips
ipContainerIdMap := make(map[string]string)
for _, file := range files {
// skip non checkpoint file
if ip := net.ParseIP(file.Name()); ip == nil {
continue
}
content, err := ioutil.ReadFile(filepath.Join(ipamDir, file.Name()))
if err != nil {
glog.Errorf("Failed to read file %v: %v", file, err)
}
ipContainerIdMap[file.Name()] = strings.TrimSpace(string(content))
}
// gather infra container IDs of current running Pods
runningContainerIDs := utilsets.String{}
pods, err := plugin.getNonExitedPods()
if err != nil {
glog.Errorf("Failed to get pods: %v", err)
return
}
for _, pod := range pods {
containerID, err := plugin.host.GetRuntime().GetPodContainerID(pod)
if err != nil {
glog.Warningf("Failed to get infra containerID of %q/%q: %v", pod.Namespace, pod.Name, err)
continue
}
runningContainerIDs.Insert(strings.TrimSpace(containerID.ID))
}
// release leaked ips
for ip, containerID := range ipContainerIdMap {
// if the container is not running, release IP
if runningContainerIDs.Has(containerID) {
continue
}
// CNI requires all config to be presented, although only containerID is needed in this case
rt := &libcni.RuntimeConf{
ContainerID: containerID,
IfName: network.DefaultInterfaceName,
// TODO: How do we find the NetNs of an exited container? docker inspect
// doesn't show us the pid, so we probably need to checkpoint
NetNS: "",
}
glog.V(2).Infof("Releasing IP %q allocated to %q.", ip, containerID)
// CNI bridge plugin should try to release IP and then return
if err := plugin.cniConfig.DelNetwork(plugin.netConfig, rt); err != nil {
glog.Errorf("Error while releasing IP: %v", err)
}
}
}
// podIsExited returns true if the pod is exited (all containers inside are exited).
func podIsExited(p *kubecontainer.Pod) bool {
for _, c := range p.Containers {