support DisableCgroup, DisableApparmor, RestrictOOMScoreAdj
Add following config for supporting "rootless" mode * DisableCgroup: disable cgroup * DisableApparmor: disable Apparmor * RestrictOOMScoreAdj: restrict the lower bound of OOMScoreAdj Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
This commit is contained in:
		| @@ -44,6 +44,19 @@ The explanation and default value of each configuration item are as follows: | ||||
|   # limit. | ||||
|   max_container_log_line_size = 16384 | ||||
|  | ||||
|   # disable_cgroup indicates to disable the cgroup support. | ||||
|   # This is useful when the daemon does not have permission to access cgroup. | ||||
|   disable_cgroup = false | ||||
|  | ||||
|   # disable_apparmor indicates to disable the apparmor support. | ||||
|   # This is useful when the daemon does not have permission to access apparmor. | ||||
|   disable_apparmor = false | ||||
|  | ||||
|   # restrict_oom_score_adj indicates to limit the lower bound of OOMScoreAdj to | ||||
|   # the containerd's current OOMScoreAdj. | ||||
|   # This is useful when the containerd does not have permission to decrease OOMScoreAdj. | ||||
|   restrict_oom_score_adj = false | ||||
|  | ||||
|   # "plugins.cri.containerd" contains config related to containerd | ||||
|   [plugins.cri.containerd] | ||||
|  | ||||
|   | ||||
| @@ -142,6 +142,16 @@ type PluginConfig struct { | ||||
| 	// Log line longer than the limit will be split into multiple lines. Non-positive | ||||
| 	// value means no limit. | ||||
| 	MaxContainerLogLineSize int `toml:"max_container_log_line_size" json:"maxContainerLogSize"` | ||||
| 	// DisableCgroup indicates to disable the cgroup support. | ||||
| 	// This is useful when the containerd does not have permission to access cgroup. | ||||
| 	DisableCgroup bool `toml:"disable_cgroup" json:"disableCgroup"` | ||||
| 	// DisableApparmor indicates to disable the apparmor support. | ||||
| 	// This is useful when the containerd does not have permission to access Apparmor. | ||||
| 	DisableApparmor bool `toml:"disable_apparmor" json:"disableApparmor"` | ||||
| 	// RestrictOOMScoreAdj indicates to limit the lower bound of OOMScoreAdj to the containerd's | ||||
| 	// current OOMScoreADj. | ||||
| 	// This is useful when the containerd does not have permission to decrease OOMScoreAdj. | ||||
| 	RestrictOOMScoreAdj bool `toml:"restrict_oom_score_adj" json:"restrictOOMScoreAdj"` | ||||
| } | ||||
|  | ||||
| // X509KeyPairStreaming contains the x509 configuration for streaming | ||||
|   | ||||
| @@ -417,13 +417,19 @@ func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxP | ||||
|  | ||||
| 	g.SetRootReadonly(securityContext.GetReadonlyRootfs()) | ||||
|  | ||||
| 	setOCILinuxResource(&g, config.GetLinux().GetResources()) | ||||
|  | ||||
| 	if c.config.DisableCgroup { | ||||
| 		g.SetLinuxCgroupsPath("") | ||||
| 	} else { | ||||
| 		setOCILinuxResourceCgroup(&g, config.GetLinux().GetResources()) | ||||
| 		if sandboxConfig.GetLinux().GetCgroupParent() != "" { | ||||
| 			cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id, | ||||
| 				c.config.SystemdCgroup) | ||||
| 			g.SetLinuxCgroupsPath(cgroupsPath) | ||||
| 		} | ||||
| 	} | ||||
| 	if err := setOCILinuxResourceOOMScoreAdj(&g, config.GetLinux().GetResources(), c.config.RestrictOOMScoreAdj); err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	// Set namespaces, share namespace with sandbox container. | ||||
| 	setOCINamespaces(&g, securityContext.GetNamespaceOptions(), sandboxPid) | ||||
| @@ -744,8 +750,8 @@ func setOCIBindMountsPrivileged(g *generate.Generator) { | ||||
| 	spec.Linux.MaskedPaths = nil | ||||
| } | ||||
|  | ||||
| // setOCILinuxResource set container resource limit. | ||||
| func setOCILinuxResource(g *generate.Generator, resources *runtime.LinuxContainerResources) { | ||||
| // setOCILinuxResourceCgroup set container cgroup resource limit. | ||||
| func setOCILinuxResourceCgroup(g *generate.Generator, resources *runtime.LinuxContainerResources) { | ||||
| 	if resources == nil { | ||||
| 		return | ||||
| 	} | ||||
| @@ -753,11 +759,28 @@ func setOCILinuxResource(g *generate.Generator, resources *runtime.LinuxContaine | ||||
| 	g.SetLinuxResourcesCPUQuota(resources.GetCpuQuota()) | ||||
| 	g.SetLinuxResourcesCPUShares(uint64(resources.GetCpuShares())) | ||||
| 	g.SetLinuxResourcesMemoryLimit(resources.GetMemoryLimitInBytes()) | ||||
| 	g.SetProcessOOMScoreAdj(int(resources.GetOomScoreAdj())) | ||||
| 	g.SetLinuxResourcesCPUCpus(resources.GetCpusetCpus()) | ||||
| 	g.SetLinuxResourcesCPUMems(resources.GetCpusetMems()) | ||||
| } | ||||
|  | ||||
| // setOCILinuxResourceOOMScoreAdj set container OOMScoreAdj resource limit. | ||||
| func setOCILinuxResourceOOMScoreAdj(g *generate.Generator, resources *runtime.LinuxContainerResources, restrictOOMScoreAdjFlag bool) error { | ||||
| 	if resources == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	adj := int(resources.GetOomScoreAdj()) | ||||
| 	if restrictOOMScoreAdjFlag { | ||||
| 		var err error | ||||
| 		adj, err = restrictOOMScoreAdj(adj) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 	} | ||||
| 	g.SetProcessOOMScoreAdj(adj) | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // getOCICapabilitiesList returns a list of all available capabilities. | ||||
| func getOCICapabilitiesList() []string { | ||||
| 	var caps []string | ||||
|   | ||||
| @@ -18,6 +18,7 @@ package server | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"io/ioutil" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"path/filepath" | ||||
| @@ -496,3 +497,27 @@ func getRuntimeOptions(c containers.Container) (interface{}, error) { | ||||
| 	} | ||||
| 	return opts, nil | ||||
| } | ||||
|  | ||||
| func getCurrentOOMScoreAdj() (int, error) { | ||||
| 	b, err := ioutil.ReadFile("/proc/self/oom_score_adj") | ||||
| 	if err != nil { | ||||
| 		return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") | ||||
| 	} | ||||
| 	s := strings.TrimSpace(string(b)) | ||||
| 	i, err := strconv.Atoi(s) | ||||
| 	if err != nil { | ||||
| 		return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") | ||||
| 	} | ||||
| 	return i, nil | ||||
| } | ||||
|  | ||||
| func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) { | ||||
| 	currentOOMScoreAdj, err := getCurrentOOMScoreAdj() | ||||
| 	if err != nil { | ||||
| 		return preferredOOMScoreAdj, err | ||||
| 	} | ||||
| 	if preferredOOMScoreAdj < currentOOMScoreAdj { | ||||
| 		return currentOOMScoreAdj, nil | ||||
| 	} | ||||
| 	return preferredOOMScoreAdj, nil | ||||
| } | ||||
|   | ||||
| @@ -371,11 +371,15 @@ func (c *criService) generateSandboxContainerSpec(id string, config *runtime.Pod | ||||
| 	// TODO(random-liu): [P2] Consider whether to add labels and annotations to the container. | ||||
|  | ||||
| 	// Set cgroups parent. | ||||
| 	if c.config.DisableCgroup { | ||||
| 		g.SetLinuxCgroupsPath("") | ||||
| 	} else { | ||||
| 		if config.GetLinux().GetCgroupParent() != "" { | ||||
| 			cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id, | ||||
| 				c.config.SystemdCgroup) | ||||
| 			g.SetLinuxCgroupsPath(cgroupsPath) | ||||
| 		} | ||||
| 	} | ||||
| 	// When cgroup parent is not set, containerd-shim will create container in a child cgroup | ||||
| 	// of the cgroup itself is in. | ||||
| 	// TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified. | ||||
| @@ -430,8 +434,17 @@ func (c *criService) generateSandboxContainerSpec(id string, config *runtime.Pod | ||||
|  | ||||
| 	// Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile | ||||
|  | ||||
| 	if !c.config.DisableCgroup { | ||||
| 		g.SetLinuxResourcesCPUShares(uint64(defaultSandboxCPUshares)) | ||||
| 	g.SetProcessOOMScoreAdj(int(defaultSandboxOOMAdj)) | ||||
| 	} | ||||
| 	adj := int(defaultSandboxOOMAdj) | ||||
| 	if c.config.RestrictOOMScoreAdj { | ||||
| 		adj, err = restrictOOMScoreAdj(adj) | ||||
| 		if err != nil { | ||||
| 			return nil, err | ||||
| 		} | ||||
| 	} | ||||
| 	g.SetProcessOOMScoreAdj(adj) | ||||
|  | ||||
| 	g.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox) | ||||
| 	g.AddAnnotation(annotations.SandboxID, id) | ||||
|   | ||||
| @@ -28,6 +28,7 @@ import ( | ||||
| 	cni "github.com/containerd/go-cni" | ||||
| 	runcapparmor "github.com/opencontainers/runc/libcontainer/apparmor" | ||||
| 	runcseccomp "github.com/opencontainers/runc/libcontainer/seccomp" | ||||
| 	runcsystem "github.com/opencontainers/runc/libcontainer/system" | ||||
| 	"github.com/opencontainers/selinux/go-selinux" | ||||
| 	"github.com/pkg/errors" | ||||
| 	"github.com/sirupsen/logrus" | ||||
| @@ -108,7 +109,7 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi | ||||
| 	c := &criService{ | ||||
| 		config:             config, | ||||
| 		client:             client, | ||||
| 		apparmorEnabled:    runcapparmor.IsEnabled(), | ||||
| 		apparmorEnabled:    runcapparmor.IsEnabled() && !config.DisableApparmor, | ||||
| 		seccompEnabled:     runcseccomp.IsEnabled(), | ||||
| 		os:                 osinterface.RealOS{}, | ||||
| 		sandboxStore:       sandboxstore.NewStore(), | ||||
| @@ -120,6 +121,12 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi | ||||
| 		initialized:        atomic.NewBool(false), | ||||
| 	} | ||||
|  | ||||
| 	if runcsystem.RunningInUserNS() { | ||||
| 		if !(config.DisableCgroup && !c.apparmorEnabled && config.RestrictOOMScoreAdj) { | ||||
| 			logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true") | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if c.config.EnableSelinux { | ||||
| 		if !selinux.GetEnabled() { | ||||
| 			logrus.Warn("Selinux is not supported") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Akihiro Suda
					Akihiro Suda