pod and qos level cgroup support

This commit is contained in:
derekwaynecarr
2016-10-17 13:23:48 -04:00
committed by Derek Carr
parent 0d228d6a61
commit 42289c2758
34 changed files with 1427 additions and 287 deletions

View File

@@ -18,14 +18,20 @@ package cm
import (
"fmt"
"io/ioutil"
"os"
"path"
"strings"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/kubelet/qos"
"k8s.io/kubernetes/pkg/types"
utilerrors "k8s.io/kubernetes/pkg/util/errors"
)
const (
podCgroupNamePrefix = "pod#"
podCgroupNamePrefix = "pod"
)
// podContainerManagerImpl implements podContainerManager interface.
@@ -56,7 +62,7 @@ func (m *podContainerManagerImpl) applyLimits(pod *api.Pod) error {
// Exists checks if the pod's cgroup already exists
func (m *podContainerManagerImpl) Exists(pod *api.Pod) bool {
podContainerName := m.GetPodContainerName(pod)
podContainerName, _ := m.GetPodContainerName(pod)
return m.cgroupManager.Exists(podContainerName)
}
@@ -64,14 +70,14 @@ func (m *podContainerManagerImpl) Exists(pod *api.Pod) bool {
// pod cgroup exists if qos cgroup hierarchy flag is enabled.
// If the pod level container doesen't already exist it is created.
func (m *podContainerManagerImpl) EnsureExists(pod *api.Pod) error {
podContainerName := m.GetPodContainerName(pod)
podContainerName, _ := m.GetPodContainerName(pod)
// check if container already exist
alreadyExists := m.Exists(pod)
if !alreadyExists {
// Create the pod container
containerConfig := &CgroupConfig{
Name: podContainerName,
ResourceParameters: &ResourceConfig{},
ResourceParameters: ResourceConfigForPod(pod),
}
if err := m.cgroupManager.Create(containerConfig); err != nil {
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
@@ -87,11 +93,8 @@ func (m *podContainerManagerImpl) EnsureExists(pod *api.Pod) error {
return nil
}
// GetPodContainerName is a util func takes in a pod as an argument
// and returns the pod's cgroup name. We follow a pod cgroup naming format
// which is opaque and deterministic. Given a pod it's cgroup would be named
// "pod-UID" where the UID is the Pod UID
func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) string {
// GetPodContainerName returns the CgroupName identifer, and its literal cgroupfs form on the host.
func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) (CgroupName, string) {
podQOS := qos.GetPodQOS(pod)
// Get the parent QOS container name
var parentContainer string
@@ -104,24 +107,127 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) string {
parentContainer = m.qosContainersInfo.BestEffort
}
podContainer := podCgroupNamePrefix + string(pod.UID)
// Get the absolute path of the cgroup
return path.Join(parentContainer, podContainer)
cgroupName := (CgroupName)(path.Join(parentContainer, podContainer))
// Get the literal cgroupfs name
cgroupfsName := m.cgroupManager.Name(cgroupName)
return cgroupName, cgroupfsName
}
// Scan through the whole cgroup directory and kill all processes either
// attached to the pod cgroup or to a container cgroup under the pod cgroup
func (m *podContainerManagerImpl) tryKillingCgroupProcesses(podCgroup CgroupName) error {
pidsToKill := m.cgroupManager.Pids(podCgroup)
// No pids charged to the terminated pod cgroup return
if len(pidsToKill) == 0 {
return nil
}
var errlist []error
// os.Kill often errors out,
// We try killing all the pids multiple times
for i := 0; i < 5; i++ {
if i != 0 {
glog.V(3).Infof("Attempt %v failed to kill all unwanted process. Retyring", i)
}
errlist = []error{}
for _, pid := range pidsToKill {
p, err := os.FindProcess(pid)
if err != nil {
// Process not running anymore, do nothing
continue
}
glog.V(3).Infof("Attempt to kill process with pid: %v", pid)
if err := p.Kill(); err != nil {
glog.V(3).Infof("failed to kill process with pid: %v", pid)
errlist = append(errlist, err)
}
}
if len(errlist) == 0 {
glog.V(3).Infof("successfully killed all unwanted processes.")
return nil
}
}
return utilerrors.NewAggregate(errlist)
}
// Destroy destroys the pod container cgroup paths
func (m *podContainerManagerImpl) Destroy(podCgroup string) error {
// This will house the logic for destroying the pod cgroups.
// Will be handled in the next PR.
func (m *podContainerManagerImpl) Destroy(podCgroup CgroupName) error {
// Try killing all the processes attached to the pod cgroup
if err := m.tryKillingCgroupProcesses(podCgroup); err != nil {
glog.V(3).Infof("failed to kill all the processes attached to the %v cgroups", podCgroup)
return fmt.Errorf("failed to kill all the processes attached to the %v cgroups : %v", podCgroup, err)
}
// Now its safe to remove the pod's cgroup
containerConfig := &CgroupConfig{
Name: podCgroup,
ResourceParameters: &ResourceConfig{},
}
if err := m.cgroupManager.Destroy(containerConfig); err != nil {
return fmt.Errorf("failed to delete cgroup paths for %v : %v", podCgroup, err)
}
return nil
}
// ReduceCPULimits reduces the CPU CFS values to the minimum amount of shares.
func (m *podContainerManagerImpl) ReduceCPULimits(podCgroup CgroupName) error {
return m.cgroupManager.ReduceCPULimits(podCgroup)
}
// GetAllPodsFromCgroups scans through all the subsytems of pod cgroups
// Get list of pods whose cgroup still exist on the cgroup mounts
func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
// Map for storing all the found pods on the disk
foundPods := make(map[types.UID]CgroupName)
qosContainersList := [3]string{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
// Scan through all the subsystem mounts
// and through each QoS cgroup directory for each subsystem mount
// If a pod cgroup exists in even a single subsystem mount
// we will attempt to delete it
for _, val := range m.subsystems.MountPoints {
for _, qosContainerName := range qosContainersList {
// get the subsystems QoS cgroup absolute name
qcConversion := m.cgroupManager.Name(CgroupName(qosContainerName))
qc := path.Join(val, qcConversion)
dirInfo, err := ioutil.ReadDir(qc)
if err != nil {
return nil, fmt.Errorf("failed to read the cgroup directory %v : %v", qc, err)
}
for i := range dirInfo {
// note: we do a contains check because on systemd, the literal cgroupfs name will prefix the qos as well.
if dirInfo[i].IsDir() && strings.Contains(dirInfo[i].Name(), podCgroupNamePrefix) {
// we need to convert the name to an internal identifier
internalName := m.cgroupManager.CgroupName(dirInfo[i].Name())
// we then split the name on the pod prefix to determine the uid
parts := strings.Split(string(internalName), podCgroupNamePrefix)
// the uid is missing, so we log the unexpected cgroup not of form pod<uid>
if len(parts) != 2 {
location := path.Join(qc, dirInfo[i].Name())
glog.Errorf("pod cgroup manager ignoring unexpected cgroup %v because it is not a pod", location)
continue
}
podUID := parts[1]
// because the literal cgroupfs name could encode the qos tier (on systemd), we avoid double encoding
// by just rebuilding the fully qualified CgroupName according to our internal convention.
cgroupName := CgroupName(path.Join(qosContainerName, podCgroupNamePrefix+podUID))
foundPods[types.UID(podUID)] = cgroupName
}
}
}
}
return foundPods, nil
}
// podContainerManagerNoop implements podContainerManager interface.
// It is a no-op implementation and basically does nothing
// podContainerManagerNoop is used in case the QoS cgroup Hierarchy is not
// enabled, so Exists() returns true always as the cgroupRoot
// is expected to always exist.
type podContainerManagerNoop struct {
cgroupRoot string
cgroupRoot CgroupName
}
// Make sure that podContainerManagerStub implements the PodContainerManager interface
@@ -135,11 +241,23 @@ func (m *podContainerManagerNoop) EnsureExists(_ *api.Pod) error {
return nil
}
func (m *podContainerManagerNoop) GetPodContainerName(_ *api.Pod) string {
return m.cgroupRoot
func (m *podContainerManagerNoop) GetPodContainerName(_ *api.Pod) (CgroupName, string) {
return m.cgroupRoot, string(m.cgroupRoot)
}
func (m *podContainerManagerNoop) GetPodContainerNameForDriver(_ *api.Pod) string {
return ""
}
// Destroy destroys the pod container cgroup paths
func (m *podContainerManagerNoop) Destroy(_ string) error {
func (m *podContainerManagerNoop) Destroy(_ CgroupName) error {
return nil
}
func (m *podContainerManagerNoop) ReduceCPULimits(_ CgroupName) error {
return nil
}
func (m *podContainerManagerNoop) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
return nil, nil
}