pod and qos level cgroup support
This commit is contained in:

committed by
Derek Carr

parent
0d228d6a61
commit
42289c2758
@@ -18,14 +18,20 @@ package cm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
"k8s.io/kubernetes/pkg/kubelet/qos"
|
||||
"k8s.io/kubernetes/pkg/types"
|
||||
utilerrors "k8s.io/kubernetes/pkg/util/errors"
|
||||
)
|
||||
|
||||
const (
|
||||
podCgroupNamePrefix = "pod#"
|
||||
podCgroupNamePrefix = "pod"
|
||||
)
|
||||
|
||||
// podContainerManagerImpl implements podContainerManager interface.
|
||||
@@ -56,7 +62,7 @@ func (m *podContainerManagerImpl) applyLimits(pod *api.Pod) error {
|
||||
|
||||
// Exists checks if the pod's cgroup already exists
|
||||
func (m *podContainerManagerImpl) Exists(pod *api.Pod) bool {
|
||||
podContainerName := m.GetPodContainerName(pod)
|
||||
podContainerName, _ := m.GetPodContainerName(pod)
|
||||
return m.cgroupManager.Exists(podContainerName)
|
||||
}
|
||||
|
||||
@@ -64,14 +70,14 @@ func (m *podContainerManagerImpl) Exists(pod *api.Pod) bool {
|
||||
// pod cgroup exists if qos cgroup hierarchy flag is enabled.
|
||||
// If the pod level container doesen't already exist it is created.
|
||||
func (m *podContainerManagerImpl) EnsureExists(pod *api.Pod) error {
|
||||
podContainerName := m.GetPodContainerName(pod)
|
||||
podContainerName, _ := m.GetPodContainerName(pod)
|
||||
// check if container already exist
|
||||
alreadyExists := m.Exists(pod)
|
||||
if !alreadyExists {
|
||||
// Create the pod container
|
||||
containerConfig := &CgroupConfig{
|
||||
Name: podContainerName,
|
||||
ResourceParameters: &ResourceConfig{},
|
||||
ResourceParameters: ResourceConfigForPod(pod),
|
||||
}
|
||||
if err := m.cgroupManager.Create(containerConfig); err != nil {
|
||||
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
|
||||
@@ -87,11 +93,8 @@ func (m *podContainerManagerImpl) EnsureExists(pod *api.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetPodContainerName is a util func takes in a pod as an argument
|
||||
// and returns the pod's cgroup name. We follow a pod cgroup naming format
|
||||
// which is opaque and deterministic. Given a pod it's cgroup would be named
|
||||
// "pod-UID" where the UID is the Pod UID
|
||||
func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) string {
|
||||
// GetPodContainerName returns the CgroupName identifer, and its literal cgroupfs form on the host.
|
||||
func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) (CgroupName, string) {
|
||||
podQOS := qos.GetPodQOS(pod)
|
||||
// Get the parent QOS container name
|
||||
var parentContainer string
|
||||
@@ -104,24 +107,127 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *api.Pod) string {
|
||||
parentContainer = m.qosContainersInfo.BestEffort
|
||||
}
|
||||
podContainer := podCgroupNamePrefix + string(pod.UID)
|
||||
|
||||
// Get the absolute path of the cgroup
|
||||
return path.Join(parentContainer, podContainer)
|
||||
cgroupName := (CgroupName)(path.Join(parentContainer, podContainer))
|
||||
// Get the literal cgroupfs name
|
||||
cgroupfsName := m.cgroupManager.Name(cgroupName)
|
||||
|
||||
return cgroupName, cgroupfsName
|
||||
}
|
||||
|
||||
// Scan through the whole cgroup directory and kill all processes either
|
||||
// attached to the pod cgroup or to a container cgroup under the pod cgroup
|
||||
func (m *podContainerManagerImpl) tryKillingCgroupProcesses(podCgroup CgroupName) error {
|
||||
pidsToKill := m.cgroupManager.Pids(podCgroup)
|
||||
// No pids charged to the terminated pod cgroup return
|
||||
if len(pidsToKill) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var errlist []error
|
||||
// os.Kill often errors out,
|
||||
// We try killing all the pids multiple times
|
||||
for i := 0; i < 5; i++ {
|
||||
if i != 0 {
|
||||
glog.V(3).Infof("Attempt %v failed to kill all unwanted process. Retyring", i)
|
||||
}
|
||||
errlist = []error{}
|
||||
for _, pid := range pidsToKill {
|
||||
p, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
// Process not running anymore, do nothing
|
||||
continue
|
||||
}
|
||||
glog.V(3).Infof("Attempt to kill process with pid: %v", pid)
|
||||
if err := p.Kill(); err != nil {
|
||||
glog.V(3).Infof("failed to kill process with pid: %v", pid)
|
||||
errlist = append(errlist, err)
|
||||
}
|
||||
}
|
||||
if len(errlist) == 0 {
|
||||
glog.V(3).Infof("successfully killed all unwanted processes.")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return utilerrors.NewAggregate(errlist)
|
||||
}
|
||||
|
||||
// Destroy destroys the pod container cgroup paths
|
||||
func (m *podContainerManagerImpl) Destroy(podCgroup string) error {
|
||||
// This will house the logic for destroying the pod cgroups.
|
||||
// Will be handled in the next PR.
|
||||
func (m *podContainerManagerImpl) Destroy(podCgroup CgroupName) error {
|
||||
// Try killing all the processes attached to the pod cgroup
|
||||
if err := m.tryKillingCgroupProcesses(podCgroup); err != nil {
|
||||
glog.V(3).Infof("failed to kill all the processes attached to the %v cgroups", podCgroup)
|
||||
return fmt.Errorf("failed to kill all the processes attached to the %v cgroups : %v", podCgroup, err)
|
||||
}
|
||||
|
||||
// Now its safe to remove the pod's cgroup
|
||||
containerConfig := &CgroupConfig{
|
||||
Name: podCgroup,
|
||||
ResourceParameters: &ResourceConfig{},
|
||||
}
|
||||
if err := m.cgroupManager.Destroy(containerConfig); err != nil {
|
||||
return fmt.Errorf("failed to delete cgroup paths for %v : %v", podCgroup, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReduceCPULimits reduces the CPU CFS values to the minimum amount of shares.
|
||||
func (m *podContainerManagerImpl) ReduceCPULimits(podCgroup CgroupName) error {
|
||||
return m.cgroupManager.ReduceCPULimits(podCgroup)
|
||||
}
|
||||
|
||||
// GetAllPodsFromCgroups scans through all the subsytems of pod cgroups
|
||||
// Get list of pods whose cgroup still exist on the cgroup mounts
|
||||
func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
|
||||
// Map for storing all the found pods on the disk
|
||||
foundPods := make(map[types.UID]CgroupName)
|
||||
qosContainersList := [3]string{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
|
||||
// Scan through all the subsystem mounts
|
||||
// and through each QoS cgroup directory for each subsystem mount
|
||||
// If a pod cgroup exists in even a single subsystem mount
|
||||
// we will attempt to delete it
|
||||
for _, val := range m.subsystems.MountPoints {
|
||||
for _, qosContainerName := range qosContainersList {
|
||||
// get the subsystems QoS cgroup absolute name
|
||||
qcConversion := m.cgroupManager.Name(CgroupName(qosContainerName))
|
||||
qc := path.Join(val, qcConversion)
|
||||
dirInfo, err := ioutil.ReadDir(qc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read the cgroup directory %v : %v", qc, err)
|
||||
}
|
||||
for i := range dirInfo {
|
||||
// note: we do a contains check because on systemd, the literal cgroupfs name will prefix the qos as well.
|
||||
if dirInfo[i].IsDir() && strings.Contains(dirInfo[i].Name(), podCgroupNamePrefix) {
|
||||
// we need to convert the name to an internal identifier
|
||||
internalName := m.cgroupManager.CgroupName(dirInfo[i].Name())
|
||||
// we then split the name on the pod prefix to determine the uid
|
||||
parts := strings.Split(string(internalName), podCgroupNamePrefix)
|
||||
// the uid is missing, so we log the unexpected cgroup not of form pod<uid>
|
||||
if len(parts) != 2 {
|
||||
location := path.Join(qc, dirInfo[i].Name())
|
||||
glog.Errorf("pod cgroup manager ignoring unexpected cgroup %v because it is not a pod", location)
|
||||
continue
|
||||
}
|
||||
podUID := parts[1]
|
||||
// because the literal cgroupfs name could encode the qos tier (on systemd), we avoid double encoding
|
||||
// by just rebuilding the fully qualified CgroupName according to our internal convention.
|
||||
cgroupName := CgroupName(path.Join(qosContainerName, podCgroupNamePrefix+podUID))
|
||||
foundPods[types.UID(podUID)] = cgroupName
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return foundPods, nil
|
||||
}
|
||||
|
||||
// podContainerManagerNoop implements podContainerManager interface.
|
||||
// It is a no-op implementation and basically does nothing
|
||||
// podContainerManagerNoop is used in case the QoS cgroup Hierarchy is not
|
||||
// enabled, so Exists() returns true always as the cgroupRoot
|
||||
// is expected to always exist.
|
||||
type podContainerManagerNoop struct {
|
||||
cgroupRoot string
|
||||
cgroupRoot CgroupName
|
||||
}
|
||||
|
||||
// Make sure that podContainerManagerStub implements the PodContainerManager interface
|
||||
@@ -135,11 +241,23 @@ func (m *podContainerManagerNoop) EnsureExists(_ *api.Pod) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *podContainerManagerNoop) GetPodContainerName(_ *api.Pod) string {
|
||||
return m.cgroupRoot
|
||||
func (m *podContainerManagerNoop) GetPodContainerName(_ *api.Pod) (CgroupName, string) {
|
||||
return m.cgroupRoot, string(m.cgroupRoot)
|
||||
}
|
||||
|
||||
func (m *podContainerManagerNoop) GetPodContainerNameForDriver(_ *api.Pod) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Destroy destroys the pod container cgroup paths
|
||||
func (m *podContainerManagerNoop) Destroy(_ string) error {
|
||||
func (m *podContainerManagerNoop) Destroy(_ CgroupName) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *podContainerManagerNoop) ReduceCPULimits(_ CgroupName) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *podContainerManagerNoop) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user