Merge pull request #8681 from vmarmol/system-container
Create a system container
This commit is contained in:
commit
50b9d6284a
@ -58,10 +58,12 @@
|
|||||||
{% set configure_cbr0 = "--configure-cbr0=" + pillar['allocate_node_cidrs'] -%}
|
{% set configure_cbr0 = "--configure-cbr0=" + pillar['allocate_node_cidrs'] -%}
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
|
|
||||||
# Run containers under the root cgroup.
|
# Run containers under the root cgroup and create a system container.
|
||||||
|
{% set system_container = "" -%}
|
||||||
{% set cgroup_root = "" -%}
|
{% set cgroup_root = "" -%}
|
||||||
{% if grains['os_family'] == 'Debian' -%}
|
{% if grains['os_family'] == 'Debian' -%}
|
||||||
|
{% set system_container = "--system-container=/system" -%}
|
||||||
{% set cgroup_root = "--cgroup_root=/" -%}
|
{% set cgroup_root = "--cgroup_root=/" -%}
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
|
|
||||||
DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{hostname_override}} {{cloud_provider}} {{config}} --allow_privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{configure_cbr0}} {{cgroup_root}}"
|
DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{hostname_override}} {{cloud_provider}} {{config}} --allow_privileged={{pillar['allow_privileged']}} {{pillar['log_level']}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{configure_cbr0}} {{cgroup_root}} {{system_container}}"
|
||||||
|
@ -108,6 +108,7 @@ type KubeletServer struct {
|
|||||||
CgroupRoot string
|
CgroupRoot string
|
||||||
ContainerRuntime string
|
ContainerRuntime string
|
||||||
DockerDaemonContainer string
|
DockerDaemonContainer string
|
||||||
|
SystemContainer string
|
||||||
ConfigureCBR0 bool
|
ConfigureCBR0 bool
|
||||||
MaxPods int
|
MaxPods int
|
||||||
|
|
||||||
@ -170,6 +171,7 @@ func NewKubeletServer() *KubeletServer {
|
|||||||
CgroupRoot: "",
|
CgroupRoot: "",
|
||||||
ContainerRuntime: "docker",
|
ContainerRuntime: "docker",
|
||||||
DockerDaemonContainer: "/docker-daemon",
|
DockerDaemonContainer: "/docker-daemon",
|
||||||
|
SystemContainer: "",
|
||||||
ConfigureCBR0: false,
|
ConfigureCBR0: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -228,7 +230,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
|||||||
fs.StringVar(&s.ResourceContainer, "resource-container", s.ResourceContainer, "Absolute name of the resource-only container to create and run the Kubelet in (Default: /kubelet).")
|
fs.StringVar(&s.ResourceContainer, "resource-container", s.ResourceContainer, "Absolute name of the resource-only container to create and run the Kubelet in (Default: /kubelet).")
|
||||||
fs.StringVar(&s.CgroupRoot, "cgroup_root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
|
fs.StringVar(&s.CgroupRoot, "cgroup_root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
|
||||||
fs.StringVar(&s.ContainerRuntime, "container_runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
|
fs.StringVar(&s.ContainerRuntime, "container_runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
|
||||||
fs.StringVar(&s.DockerDaemonContainer, "docker-daemon-container", s.DockerDaemonContainer, "Optional resource-only container in which to place the Docker Daemon. Empty for no container (Default: /docker-daemon).")
|
fs.StringVar(&s.SystemContainer, "system-container", s.SystemContainer, "Optional resource-only container in which to place all non-kernel processes that are not already in a container. Empty for no container. Rolling back the flag requires a reboot. (Default: \"\").")
|
||||||
fs.BoolVar(&s.ConfigureCBR0, "configure-cbr0", s.ConfigureCBR0, "If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR.")
|
fs.BoolVar(&s.ConfigureCBR0, "configure-cbr0", s.ConfigureCBR0, "If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR.")
|
||||||
fs.IntVar(&s.MaxPods, "max-pods", 100, "Number of Pods that can run on this Kubelet.")
|
fs.IntVar(&s.MaxPods, "max-pods", 100, "Number of Pods that can run on this Kubelet.")
|
||||||
|
|
||||||
@ -347,6 +349,7 @@ func (s *KubeletServer) Run(_ []string) error {
|
|||||||
ContainerRuntime: s.ContainerRuntime,
|
ContainerRuntime: s.ContainerRuntime,
|
||||||
Mounter: mounter,
|
Mounter: mounter,
|
||||||
DockerDaemonContainer: s.DockerDaemonContainer,
|
DockerDaemonContainer: s.DockerDaemonContainer,
|
||||||
|
SystemContainer: s.SystemContainer,
|
||||||
ConfigureCBR0: s.ConfigureCBR0,
|
ConfigureCBR0: s.ConfigureCBR0,
|
||||||
MaxPods: s.MaxPods,
|
MaxPods: s.MaxPods,
|
||||||
}
|
}
|
||||||
@ -513,6 +516,7 @@ func SimpleKubelet(client *client.Client,
|
|||||||
ContainerRuntime: "docker",
|
ContainerRuntime: "docker",
|
||||||
Mounter: mount.New(),
|
Mounter: mount.New(),
|
||||||
DockerDaemonContainer: "/docker-daemon",
|
DockerDaemonContainer: "/docker-daemon",
|
||||||
|
SystemContainer: "",
|
||||||
MaxPods: 32,
|
MaxPods: 32,
|
||||||
}
|
}
|
||||||
return &kcfg
|
return &kcfg
|
||||||
@ -648,6 +652,7 @@ type KubeletConfig struct {
|
|||||||
ContainerRuntime string
|
ContainerRuntime string
|
||||||
Mounter mount.Interface
|
Mounter mount.Interface
|
||||||
DockerDaemonContainer string
|
DockerDaemonContainer string
|
||||||
|
SystemContainer string
|
||||||
ConfigureCBR0 bool
|
ConfigureCBR0 bool
|
||||||
MaxPods int
|
MaxPods int
|
||||||
}
|
}
|
||||||
@ -701,6 +706,7 @@ func createAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
|
|||||||
kc.ContainerRuntime,
|
kc.ContainerRuntime,
|
||||||
kc.Mounter,
|
kc.Mounter,
|
||||||
kc.DockerDaemonContainer,
|
kc.DockerDaemonContainer,
|
||||||
|
kc.SystemContainer,
|
||||||
kc.ConfigureCBR0,
|
kc.ConfigureCBR0,
|
||||||
kc.MaxPods)
|
kc.MaxPods)
|
||||||
|
|
||||||
|
@ -20,5 +20,6 @@ package kubelet
|
|||||||
type containerManager interface {
|
type containerManager interface {
|
||||||
// Runs the container manager's housekeeping.
|
// Runs the container manager's housekeeping.
|
||||||
// - Ensures that the Docker daemon is in a container.
|
// - Ensures that the Docker daemon is in a container.
|
||||||
|
// - Creates the system container where all non-containerized processes run.
|
||||||
Start() error
|
Start() error
|
||||||
}
|
}
|
||||||
|
@ -35,33 +35,60 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type containerManagerImpl struct {
|
type containerManagerImpl struct {
|
||||||
// Absolute name of the desired container that Docker should be in.
|
// Whether to create and use the specified containers.
|
||||||
dockerContainerName string
|
useDockerContainer bool
|
||||||
|
useSystemContainer bool
|
||||||
|
|
||||||
// The manager of the resource-only container Docker should be in.
|
// OOM score for the Docker container.
|
||||||
manager fs.Manager
|
|
||||||
dockerOomScoreAdj int
|
dockerOomScoreAdj int
|
||||||
|
|
||||||
|
// Managers for containers.
|
||||||
|
dockerContainer fs.Manager
|
||||||
|
systemContainer fs.Manager
|
||||||
|
rootContainer fs.Manager
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ containerManager = &containerManagerImpl{}
|
var _ containerManager = &containerManagerImpl{}
|
||||||
|
|
||||||
// Takes the absolute name that the Docker daemon should be in.
|
// Takes the absolute name of the specified containers.
|
||||||
// Empty container name disables moving the Docker daemon.
|
// Empty container name disables use of the specified container.
|
||||||
func newContainerManager(dockerDaemonContainer string) (containerManager, error) {
|
func newContainerManager(dockerDaemonContainer, systemContainer string) (containerManager, error) {
|
||||||
|
if systemContainer == "/" {
|
||||||
|
return nil, fmt.Errorf("system container cannot be root (\"/\")")
|
||||||
|
}
|
||||||
|
|
||||||
return &containerManagerImpl{
|
return &containerManagerImpl{
|
||||||
dockerContainerName: dockerDaemonContainer,
|
useDockerContainer: dockerDaemonContainer != "",
|
||||||
manager: fs.Manager{
|
useSystemContainer: systemContainer != "",
|
||||||
|
dockerOomScoreAdj: -900,
|
||||||
|
dockerContainer: fs.Manager{
|
||||||
Cgroups: &configs.Cgroup{
|
Cgroups: &configs.Cgroup{
|
||||||
Name: dockerDaemonContainer,
|
Name: dockerDaemonContainer,
|
||||||
AllowAllDevices: true,
|
AllowAllDevices: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
dockerOomScoreAdj: -900,
|
systemContainer: fs.Manager{
|
||||||
|
Cgroups: &configs.Cgroup{
|
||||||
|
Name: systemContainer,
|
||||||
|
AllowAllDevices: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
rootContainer: fs.Manager{
|
||||||
|
Cgroups: &configs.Cgroup{
|
||||||
|
Name: "/",
|
||||||
|
},
|
||||||
|
},
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cm *containerManagerImpl) Start() error {
|
func (cm *containerManagerImpl) Start() error {
|
||||||
if cm.dockerContainerName != "" {
|
if cm.useSystemContainer {
|
||||||
|
err := cm.ensureSystemContainer()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if cm.useDockerContainer {
|
||||||
go util.Until(func() {
|
go util.Until(func() {
|
||||||
err := cm.ensureDockerInContainer()
|
err := cm.ensureDockerInContainer()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -99,10 +126,10 @@ func (cm *containerManagerImpl) ensureDockerInContainer() error {
|
|||||||
errs = append(errs, fmt.Errorf("failed to find container of PID %q: %v", pid, err))
|
errs = append(errs, fmt.Errorf("failed to find container of PID %q: %v", pid, err))
|
||||||
}
|
}
|
||||||
|
|
||||||
if cont != cm.dockerContainerName {
|
if cont != cm.dockerContainer.Cgroups.Name {
|
||||||
err = cm.manager.Apply(pid)
|
err = cm.dockerContainer.Apply(pid)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errs = append(errs, fmt.Errorf("failed to move PID %q (in %q) to %q", pid, cont, cm.dockerContainerName))
|
errs = append(errs, fmt.Errorf("failed to move PID %q (in %q) to %q", pid, cont, cm.dockerContainer.Cgroups.Name))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,3 +152,60 @@ func getContainer(pid int) (string, error) {
|
|||||||
|
|
||||||
return cgroups.ParseCgroupFile("cpu", f)
|
return cgroups.ParseCgroupFile("cpu", f)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensures the system container is created and all non-kernel processes without
|
||||||
|
// a container are moved to it.
|
||||||
|
func (cm *containerManagerImpl) ensureSystemContainer() error {
|
||||||
|
// Move non-kernel PIDs to the system container.
|
||||||
|
attemptsRemaining := 10
|
||||||
|
var errs []error
|
||||||
|
for attemptsRemaining >= 0 {
|
||||||
|
// Only keep errors on latest attempt.
|
||||||
|
errs = []error{}
|
||||||
|
attemptsRemaining--
|
||||||
|
|
||||||
|
allPids, err := cm.rootContainer.GetPids()
|
||||||
|
if err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("Failed to list PIDs for root: %v", err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove kernel pids
|
||||||
|
pids := make([]int, 0, len(allPids))
|
||||||
|
for _, pid := range allPids {
|
||||||
|
if isKernelPid(pid) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
pids = append(pids, pid)
|
||||||
|
}
|
||||||
|
glog.Infof("Found %d PIDs in root, %d of them are kernel related", len(allPids), len(allPids)-len(pids))
|
||||||
|
|
||||||
|
// Check if we moved all the non-kernel PIDs.
|
||||||
|
if len(pids) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
glog.Infof("Moving non-kernel threads: %v", pids)
|
||||||
|
for _, pid := range pids {
|
||||||
|
err := cm.systemContainer.Apply(pid)
|
||||||
|
if err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("failed to move PID %d into the system container %q: %v", pid, cm.systemContainer.Cgroups.Name, err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if attemptsRemaining < 0 {
|
||||||
|
errs = append(errs, fmt.Errorf("ran out of attempts to create system containers %q", cm.systemContainer.Cgroups.Name))
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.NewAggregate(errs)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determines whether the specified PID is a kernel PID.
|
||||||
|
func isKernelPid(pid int) bool {
|
||||||
|
// Kernel threads have no associated executable.
|
||||||
|
_, err := os.Readlink(fmt.Sprintf("/proc/%d/exe", pid))
|
||||||
|
return err != nil
|
||||||
|
}
|
||||||
|
@ -31,6 +31,6 @@ func (unsupportedContainerManager) Start() error {
|
|||||||
return fmt.Errorf("Container Manager is unsupported in this build")
|
return fmt.Errorf("Container Manager is unsupported in this build")
|
||||||
}
|
}
|
||||||
|
|
||||||
func newContainerManager(dockerDaemonContainer string) (containerManager, error) {
|
func newContainerManager(dockerDaemonContainer, systemContainer string) (containerManager, error) {
|
||||||
return &unsupportedContainerManager{}, nil
|
return &unsupportedContainerManager{}, nil
|
||||||
}
|
}
|
||||||
|
@ -139,6 +139,7 @@ func NewMainKubelet(
|
|||||||
containerRuntime string,
|
containerRuntime string,
|
||||||
mounter mount.Interface,
|
mounter mount.Interface,
|
||||||
dockerDaemonContainer string,
|
dockerDaemonContainer string,
|
||||||
|
systemContainer string,
|
||||||
configureCBR0 bool,
|
configureCBR0 bool,
|
||||||
pods int) (*Kubelet, error) {
|
pods int) (*Kubelet, error) {
|
||||||
if rootDirectory == "" {
|
if rootDirectory == "" {
|
||||||
@ -147,6 +148,9 @@ func NewMainKubelet(
|
|||||||
if resyncInterval <= 0 {
|
if resyncInterval <= 0 {
|
||||||
return nil, fmt.Errorf("invalid sync frequency %d", resyncInterval)
|
return nil, fmt.Errorf("invalid sync frequency %d", resyncInterval)
|
||||||
}
|
}
|
||||||
|
if systemContainer != "" && cgroupRoot == "" {
|
||||||
|
return nil, fmt.Errorf("invalid configuration: system container was specified and cgroup root was not specified")
|
||||||
|
}
|
||||||
dockerClient = dockertools.NewInstrumentedDockerInterface(dockerClient)
|
dockerClient = dockertools.NewInstrumentedDockerInterface(dockerClient)
|
||||||
|
|
||||||
serviceStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
|
serviceStore := cache.NewStore(cache.MetaNamespaceKeyFunc)
|
||||||
@ -295,7 +299,9 @@ func NewMainKubelet(
|
|||||||
return nil, fmt.Errorf("unsupported container runtime %q specified", containerRuntime)
|
return nil, fmt.Errorf("unsupported container runtime %q specified", containerRuntime)
|
||||||
}
|
}
|
||||||
|
|
||||||
containerManager, err := newContainerManager(dockerDaemonContainer)
|
// Setup container manager, can fail if the devices hierarchy is not mounted
|
||||||
|
// (it is required by Docker however).
|
||||||
|
containerManager, err := newContainerManager(dockerDaemonContainer, systemContainer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create the Container Manager: %v", err)
|
return nil, fmt.Errorf("failed to create the Container Manager: %v", err)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user