diff --git a/docs/config.md b/docs/config.md index 0e75f03cf..b5a6a66bd 100644 --- a/docs/config.md +++ b/docs/config.md @@ -44,6 +44,19 @@ The explanation and default value of each configuration item are as follows: # limit. max_container_log_line_size = 16384 + # disable_cgroup indicates to disable the cgroup support. + # This is useful when the daemon does not have permission to access cgroup. + disable_cgroup = false + + # disable_apparmor indicates to disable the apparmor support. + # This is useful when the daemon does not have permission to access apparmor. + disable_apparmor = false + + # restrict_oom_score_adj indicates to limit the lower bound of OOMScoreAdj to + # the containerd's current OOMScoreAdj. + # This is useful when the containerd does not have permission to decrease OOMScoreAdj. + restrict_oom_score_adj = false + # "plugins.cri.containerd" contains config related to containerd [plugins.cri.containerd] diff --git a/pkg/config/config.go b/pkg/config/config.go index 7d53a7b53..9a8d23da1 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -142,6 +142,16 @@ type PluginConfig struct { // Log line longer than the limit will be split into multiple lines. Non-positive // value means no limit. MaxContainerLogLineSize int `toml:"max_container_log_line_size" json:"maxContainerLogSize"` + // DisableCgroup indicates to disable the cgroup support. + // This is useful when the containerd does not have permission to access cgroup. + DisableCgroup bool `toml:"disable_cgroup" json:"disableCgroup"` + // DisableApparmor indicates to disable the apparmor support. + // This is useful when the containerd does not have permission to access Apparmor. + DisableApparmor bool `toml:"disable_apparmor" json:"disableApparmor"` + // RestrictOOMScoreAdj indicates to limit the lower bound of OOMScoreAdj to the containerd's + // current OOMScoreADj. + // This is useful when the containerd does not have permission to decrease OOMScoreAdj. + RestrictOOMScoreAdj bool `toml:"restrict_oom_score_adj" json:"restrictOOMScoreAdj"` } // X509KeyPairStreaming contains the x509 configuration for streaming diff --git a/pkg/server/container_create.go b/pkg/server/container_create.go index 782626878..b5d16f506 100644 --- a/pkg/server/container_create.go +++ b/pkg/server/container_create.go @@ -417,12 +417,18 @@ func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxP g.SetRootReadonly(securityContext.GetReadonlyRootfs()) - setOCILinuxResource(&g, config.GetLinux().GetResources()) - - if sandboxConfig.GetLinux().GetCgroupParent() != "" { - cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id, - c.config.SystemdCgroup) - g.SetLinuxCgroupsPath(cgroupsPath) + if c.config.DisableCgroup { + g.SetLinuxCgroupsPath("") + } else { + setOCILinuxResourceCgroup(&g, config.GetLinux().GetResources()) + if sandboxConfig.GetLinux().GetCgroupParent() != "" { + cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id, + c.config.SystemdCgroup) + g.SetLinuxCgroupsPath(cgroupsPath) + } + } + if err := setOCILinuxResourceOOMScoreAdj(&g, config.GetLinux().GetResources(), c.config.RestrictOOMScoreAdj); err != nil { + return nil, err } // Set namespaces, share namespace with sandbox container. @@ -744,8 +750,8 @@ func setOCIBindMountsPrivileged(g *generate.Generator) { spec.Linux.MaskedPaths = nil } -// setOCILinuxResource set container resource limit. -func setOCILinuxResource(g *generate.Generator, resources *runtime.LinuxContainerResources) { +// setOCILinuxResourceCgroup set container cgroup resource limit. +func setOCILinuxResourceCgroup(g *generate.Generator, resources *runtime.LinuxContainerResources) { if resources == nil { return } @@ -753,11 +759,28 @@ func setOCILinuxResource(g *generate.Generator, resources *runtime.LinuxContaine g.SetLinuxResourcesCPUQuota(resources.GetCpuQuota()) g.SetLinuxResourcesCPUShares(uint64(resources.GetCpuShares())) g.SetLinuxResourcesMemoryLimit(resources.GetMemoryLimitInBytes()) - g.SetProcessOOMScoreAdj(int(resources.GetOomScoreAdj())) g.SetLinuxResourcesCPUCpus(resources.GetCpusetCpus()) g.SetLinuxResourcesCPUMems(resources.GetCpusetMems()) } +// setOCILinuxResourceOOMScoreAdj set container OOMScoreAdj resource limit. +func setOCILinuxResourceOOMScoreAdj(g *generate.Generator, resources *runtime.LinuxContainerResources, restrictOOMScoreAdjFlag bool) error { + if resources == nil { + return nil + } + adj := int(resources.GetOomScoreAdj()) + if restrictOOMScoreAdjFlag { + var err error + adj, err = restrictOOMScoreAdj(adj) + if err != nil { + return err + } + } + g.SetProcessOOMScoreAdj(adj) + + return nil +} + // getOCICapabilitiesList returns a list of all available capabilities. func getOCICapabilitiesList() []string { var caps []string diff --git a/pkg/server/helpers.go b/pkg/server/helpers.go index c8a5778af..4b710b948 100644 --- a/pkg/server/helpers.go +++ b/pkg/server/helpers.go @@ -18,6 +18,7 @@ package server import ( "fmt" + "io/ioutil" "os" "path" "path/filepath" @@ -496,3 +497,27 @@ func getRuntimeOptions(c containers.Container) (interface{}, error) { } return opts, nil } + +func getCurrentOOMScoreAdj() (int, error) { + b, err := ioutil.ReadFile("/proc/self/oom_score_adj") + if err != nil { + return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") + } + s := strings.TrimSpace(string(b)) + i, err := strconv.Atoi(s) + if err != nil { + return 0, errors.Wrap(err, "could not get the daemon oom_score_adj") + } + return i, nil +} + +func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) { + currentOOMScoreAdj, err := getCurrentOOMScoreAdj() + if err != nil { + return preferredOOMScoreAdj, err + } + if preferredOOMScoreAdj < currentOOMScoreAdj { + return currentOOMScoreAdj, nil + } + return preferredOOMScoreAdj, nil +} diff --git a/pkg/server/sandbox_run.go b/pkg/server/sandbox_run.go index d5aa700b3..f6c4588c3 100644 --- a/pkg/server/sandbox_run.go +++ b/pkg/server/sandbox_run.go @@ -371,10 +371,14 @@ func (c *criService) generateSandboxContainerSpec(id string, config *runtime.Pod // TODO(random-liu): [P2] Consider whether to add labels and annotations to the container. // Set cgroups parent. - if config.GetLinux().GetCgroupParent() != "" { - cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id, - c.config.SystemdCgroup) - g.SetLinuxCgroupsPath(cgroupsPath) + if c.config.DisableCgroup { + g.SetLinuxCgroupsPath("") + } else { + if config.GetLinux().GetCgroupParent() != "" { + cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id, + c.config.SystemdCgroup) + g.SetLinuxCgroupsPath(cgroupsPath) + } } // When cgroup parent is not set, containerd-shim will create container in a child cgroup // of the cgroup itself is in. @@ -430,8 +434,17 @@ func (c *criService) generateSandboxContainerSpec(id string, config *runtime.Pod // Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile - g.SetLinuxResourcesCPUShares(uint64(defaultSandboxCPUshares)) - g.SetProcessOOMScoreAdj(int(defaultSandboxOOMAdj)) + if !c.config.DisableCgroup { + g.SetLinuxResourcesCPUShares(uint64(defaultSandboxCPUshares)) + } + adj := int(defaultSandboxOOMAdj) + if c.config.RestrictOOMScoreAdj { + adj, err = restrictOOMScoreAdj(adj) + if err != nil { + return nil, err + } + } + g.SetProcessOOMScoreAdj(adj) g.AddAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox) g.AddAnnotation(annotations.SandboxID, id) diff --git a/pkg/server/service.go b/pkg/server/service.go index ce648ae52..d041d30dd 100644 --- a/pkg/server/service.go +++ b/pkg/server/service.go @@ -28,6 +28,7 @@ import ( cni "github.com/containerd/go-cni" runcapparmor "github.com/opencontainers/runc/libcontainer/apparmor" runcseccomp "github.com/opencontainers/runc/libcontainer/seccomp" + runcsystem "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/selinux/go-selinux" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -108,7 +109,7 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi c := &criService{ config: config, client: client, - apparmorEnabled: runcapparmor.IsEnabled(), + apparmorEnabled: runcapparmor.IsEnabled() && !config.DisableApparmor, seccompEnabled: runcseccomp.IsEnabled(), os: osinterface.RealOS{}, sandboxStore: sandboxstore.NewStore(), @@ -120,6 +121,12 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi initialized: atomic.NewBool(false), } + if runcsystem.RunningInUserNS() { + if !(config.DisableCgroup && !c.apparmorEnabled && config.RestrictOOMScoreAdj) { + logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true") + } + } + if c.config.EnableSelinux { if !selinux.GetEnabled() { logrus.Warn("Selinux is not supported")