Fix sandbox garbage collection.

Sandboxes are garbage collected only when they are containing no containers at
all and not the latest sandbox if it is belonging to an existing pod.
This commit is contained in:
Pengfei Ni 2017-03-14 12:26:29 +08:00
parent 0a87487bda
commit 691f0482fb

View File

@ -30,20 +30,6 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
) )
// sandboxMinGCAge is the minimum age for an empty sandbox before it is garbage collected.
// This is introduced to avoid a sandbox being garbage collected before its containers are
// created.
// Notice that if the first container of a sandbox is created too late (exceeds sandboxMinGCAge),
// the sandbox could still be garbaged collected. In that case, SyncPod will recreate the
// sandbox and make sure old containers are all stopped.
// In the following figure, 'o' is a stopped sandbox, 'x' is a removed sandbox. It shows
// that, approximately if a sandbox keeps crashing and MinAge = 1/n GC Period, there will
// be 1/n more sandboxes not garbage collected.
// oooooo|xxxxxx|xxxxxx| <--- MinAge = 0
// gc gc gc gc
// oooooo|oooxxx|xxxxxx| <--- MinAge = 1/2 GC Perod
const sandboxMinGCAge time.Duration = 30 * time.Second
// containerGC is the manager of garbage collection. // containerGC is the manager of garbage collection.
type containerGC struct { type containerGC struct {
client internalapi.RuntimeService client internalapi.RuntimeService
@ -72,6 +58,16 @@ type containerGCInfo struct {
createTime time.Time createTime time.Time
} }
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
type sandboxGCInfo struct {
// The ID of the sandbox.
id string
// Creation time for the sandbox.
createTime time.Time
// If true, the sandbox is ready or still has containers.
active bool
}
// evictUnit is considered for eviction as units of (UID, container name) pair. // evictUnit is considered for eviction as units of (UID, container name) pair.
type evictUnit struct { type evictUnit struct {
// UID of the pod. // UID of the pod.
@ -81,6 +77,7 @@ type evictUnit struct {
} }
type containersByEvictUnit map[evictUnit][]containerGCInfo type containersByEvictUnit map[evictUnit][]containerGCInfo
type sandboxesByPodUID map[types.UID][]sandboxGCInfo
// NumContainers returns the number of containers in this map. // NumContainers returns the number of containers in this map.
func (cu containersByEvictUnit) NumContainers() int { func (cu containersByEvictUnit) NumContainers() int {
@ -103,6 +100,13 @@ func (a byCreated) Len() int { return len(a) }
func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) } func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
// Newest first.
type sandboxByCreated []sandboxGCInfo
func (a sandboxByCreated) Len() int { return len(a) }
func (a sandboxByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
// enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit. // enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) { func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
for key := range evictUnits { for key := range evictUnits {
@ -118,7 +122,7 @@ func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersBy
func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo { func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo {
// Remove from oldest to newest (last to first). // Remove from oldest to newest (last to first).
numToKeep := len(containers) - toRemove numToKeep := len(containers) - toRemove
for i := numToKeep; i < len(containers); i++ { for i := len(containers) - 1; i >= numToKeep; i-- {
if err := cgc.manager.removeContainer(containers[i].id); err != nil { if err := cgc.manager.removeContainer(containers[i].id); err != nil {
glog.Errorf("Failed to remove container %q: %v", containers[i].id, err) glog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
} }
@ -128,6 +132,18 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
return containers[:numToKeep] return containers[:numToKeep]
} }
// removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and
// returns the resulting slice.
func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) {
// Remove from oldest to newest (last to first).
numToKeep := len(sandboxes) - toRemove
for i := len(sandboxes) - 1; i >= numToKeep; i-- {
if !sandboxes[i].active {
cgc.removeSandbox(sandboxes[i].id)
}
}
}
// removeSandbox removes the sandbox by sandboxID. // removeSandbox removes the sandbox by sandboxID.
func (cgc *containerGC) removeSandbox(sandboxID string) { func (cgc *containerGC) removeSandbox(sandboxID string) {
glog.V(4).Infof("Removing sandbox %q", sandboxID) glog.V(4).Infof("Removing sandbox %q", sandboxID)
@ -239,9 +255,13 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
return nil return nil
} }
// evictSandboxes evicts all sandboxes that are evictable. Evictable sandboxes are: not running // evictSandboxes remove all evictable sandboxes. An evictable sandbox must
// and contains no containers at all. // meet the following requirements:
func (cgc *containerGC) evictSandboxes(minAge time.Duration) error { // 1. not in ready state
// 2. contains no containers.
// 3. belong to a non-existent (i.e., already removed) pod, or is not the
// most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes() error {
containers, err := cgc.manager.getKubeletContainers(true) containers, err := cgc.manager.getKubeletContainers(true)
if err != nil { if err != nil {
return err return err
@ -252,38 +272,50 @@ func (cgc *containerGC) evictSandboxes(minAge time.Duration) error {
return err return err
} }
evictSandboxes := make([]string, 0) sandboxesByPod := make(sandboxesByPodUID)
newestGCTime := time.Now().Add(-minAge)
for _, sandbox := range sandboxes { for _, sandbox := range sandboxes {
// Prune out ready sandboxes. podUID := types.UID(sandbox.Metadata.Uid)
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY { sandboxInfo := sandboxGCInfo{
continue id: sandbox.Id,
createTime: time.Unix(0, sandbox.CreatedAt),
} }
// Prune out sandboxes that still have containers. // Set ready sandboxes to be active.
found := false if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
sandboxInfo.active = true
}
// Set sandboxes that still have containers to be active.
hasContainers := false
sandboxID := sandbox.Id sandboxID := sandbox.Id
for _, container := range containers { for _, container := range containers {
if container.PodSandboxId == sandboxID { if container.PodSandboxId == sandboxID {
found = true hasContainers = true
break break
} }
} }
if found { if hasContainers {
continue sandboxInfo.active = true
} }
// Only garbage collect sandboxes older than sandboxMinGCAge. sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
createdAt := time.Unix(0, sandbox.CreatedAt)
if createdAt.After(newestGCTime) {
continue
}
glog.V(4).Infof("PodSandbox %q is eligible for garbage collection since it was created before %v: %+v", sandboxID, newestGCTime, sandbox)
evictSandboxes = append(evictSandboxes, sandboxID)
} }
for _, sandbox := range evictSandboxes { // Sort the sandboxes by age.
cgc.removeSandbox(sandbox) for uid := range sandboxesByPod {
sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.isPodDeleted(podUID) {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
} else {
// Keep latest one if the pod still exists.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
}
} }
return nil return nil
} }
@ -342,7 +374,7 @@ func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy,
} }
// Remove sandboxes with zero containers // Remove sandboxes with zero containers
if err := cgc.evictSandboxes(sandboxMinGCAge); err != nil { if err := cgc.evictSandboxes(); err != nil {
return err return err
} }