Merge pull request #9045 from thaJeztah/less_libcontainer
remove uses of github.com/runc/libcontainer/cgroups
This commit is contained in:
commit
74705ae4f4
1
go.mod
1
go.mod
@ -88,7 +88,6 @@ require (
|
||||
github.com/containerd/typeurl v1.0.2 // indirect
|
||||
github.com/containers/ocicrypt v1.1.6 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/cyphar/filepath-securejoin v0.2.3 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/godbus/dbus/v5 v5.1.0 // indirect
|
||||
|
2
go.sum
2
go.sum
@ -342,7 +342,6 @@ github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7Do
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4=
|
||||
github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI=
|
||||
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
|
||||
github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ=
|
||||
github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s=
|
||||
@ -885,7 +884,6 @@ github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
|
||||
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
|
||||
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
|
||||
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 h1:RpforrEYXWkmGwJHIGnLZ3tTWStkjVVstwzNGqxX2Ds=
|
||||
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||
github.com/sirupsen/logrus v1.0.4-0.20170822132746-89742aefa4b2/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
|
||||
github.com/sirupsen/logrus v1.0.6/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
|
||||
|
@ -21,8 +21,8 @@ import (
|
||||
"sort"
|
||||
|
||||
"github.com/containerd/containerd/log"
|
||||
"github.com/containerd/containerd/pkg/systemd"
|
||||
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
|
@ -21,8 +21,8 @@ import (
|
||||
"testing"
|
||||
|
||||
criconfig "github.com/containerd/containerd/pkg/cri/config"
|
||||
"github.com/containerd/containerd/pkg/systemd"
|
||||
"github.com/containerd/containerd/plugin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/stretchr/testify/assert"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
@ -21,8 +21,8 @@ import (
|
||||
"sort"
|
||||
|
||||
"github.com/containerd/containerd/log"
|
||||
"github.com/containerd/containerd/pkg/systemd"
|
||||
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
||||
|
@ -21,8 +21,8 @@ import (
|
||||
"testing"
|
||||
|
||||
criconfig "github.com/containerd/containerd/pkg/cri/config"
|
||||
"github.com/containerd/containerd/pkg/systemd"
|
||||
"github.com/containerd/containerd/plugin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/stretchr/testify/assert"
|
||||
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
)
|
||||
|
58
pkg/systemd/util.go
Normal file
58
pkg/systemd/util.go
Normal file
@ -0,0 +1,58 @@
|
||||
//go:build linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var (
|
||||
runningSystemd bool
|
||||
detectSystemd sync.Once
|
||||
)
|
||||
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// https://github.com/coreos/go-systemd/blob/d843340ab4bd3815fda02e648f9b09ae2dc722a7/util/util.go#L68-L78
|
||||
func IsRunningSystemd() bool {
|
||||
detectSystemd.Do(func() {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
runningSystemd = fi.IsDir()
|
||||
})
|
||||
return runningSystemd
|
||||
}
|
21
vendor/github.com/cyphar/filepath-securejoin/.travis.yml
generated
vendored
21
vendor/github.com/cyphar/filepath-securejoin/.travis.yml
generated
vendored
@ -1,21 +0,0 @@
|
||||
# Copyright (C) 2017 SUSE LLC. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
language: go
|
||||
go:
|
||||
- 1.13.x
|
||||
- 1.16.x
|
||||
- tip
|
||||
arch:
|
||||
- AMD64
|
||||
- ppc64le
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
script:
|
||||
- go test -cover -v ./...
|
||||
|
||||
notifications:
|
||||
email: false
|
28
vendor/github.com/cyphar/filepath-securejoin/LICENSE
generated
vendored
28
vendor/github.com/cyphar/filepath-securejoin/LICENSE
generated
vendored
@ -1,28 +0,0 @@
|
||||
Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
|
||||
Copyright (C) 2017 SUSE LLC. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
79
vendor/github.com/cyphar/filepath-securejoin/README.md
generated
vendored
79
vendor/github.com/cyphar/filepath-securejoin/README.md
generated
vendored
@ -1,79 +0,0 @@
|
||||
## `filepath-securejoin` ##
|
||||
|
||||
[](https://travis-ci.org/cyphar/filepath-securejoin)
|
||||
|
||||
An implementation of `SecureJoin`, a [candidate for inclusion in the Go
|
||||
standard library][go#20126]. The purpose of this function is to be a "secure"
|
||||
alternative to `filepath.Join`, and in particular it provides certain
|
||||
guarantees that are not provided by `filepath.Join`.
|
||||
|
||||
> **NOTE**: This code is *only* safe if you are not at risk of other processes
|
||||
> modifying path components after you've used `SecureJoin`. If it is possible
|
||||
> for a malicious process to modify path components of the resolved path, then
|
||||
> you will be vulnerable to some fairly trivial TOCTOU race conditions. [There
|
||||
> are some Linux kernel patches I'm working on which might allow for a better
|
||||
> solution.][lwn-obeneath]
|
||||
>
|
||||
> In addition, with a slightly modified API it might be possible to use
|
||||
> `O_PATH` and verify that the opened path is actually the resolved one -- but
|
||||
> I have not done that yet. I might add it in the future as a helper function
|
||||
> to help users verify the path (we can't just return `/proc/self/fd/<foo>`
|
||||
> because that doesn't always work transparently for all users).
|
||||
|
||||
This is the function prototype:
|
||||
|
||||
```go
|
||||
func SecureJoin(root, unsafePath string) (string, error)
|
||||
```
|
||||
|
||||
This library **guarantees** the following:
|
||||
|
||||
* If no error is set, the resulting string **must** be a child path of
|
||||
`root` and will not contain any symlink path components (they will all be
|
||||
expanded).
|
||||
|
||||
* When expanding symlinks, all symlink path components **must** be resolved
|
||||
relative to the provided root. In particular, this can be considered a
|
||||
userspace implementation of how `chroot(2)` operates on file paths. Note that
|
||||
these symlinks will **not** be expanded lexically (`filepath.Clean` is not
|
||||
called on the input before processing).
|
||||
|
||||
* Non-existent path components are unaffected by `SecureJoin` (similar to
|
||||
`filepath.EvalSymlinks`'s semantics).
|
||||
|
||||
* The returned path will always be `filepath.Clean`ed and thus not contain any
|
||||
`..` components.
|
||||
|
||||
A (trivial) implementation of this function on GNU/Linux systems could be done
|
||||
with the following (note that this requires root privileges and is far more
|
||||
opaque than the implementation in this library, and also requires that
|
||||
`readlink` is inside the `root` path):
|
||||
|
||||
```go
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
func SecureJoin(root, unsafePath string) (string, error) {
|
||||
unsafePath = string(filepath.Separator) + unsafePath
|
||||
cmd := exec.Command("chroot", root,
|
||||
"readlink", "--canonicalize-missing", "--no-newline", unsafePath)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
expanded := string(output)
|
||||
return filepath.Join(root, expanded), nil
|
||||
}
|
||||
```
|
||||
|
||||
[lwn-obeneath]: https://lwn.net/Articles/767547/
|
||||
[go#20126]: https://github.com/golang/go/issues/20126
|
||||
|
||||
### License ###
|
||||
|
||||
The license of this project is the same as Go, which is a BSD 3-clause license
|
||||
available in the `LICENSE` file.
|
1
vendor/github.com/cyphar/filepath-securejoin/VERSION
generated
vendored
1
vendor/github.com/cyphar/filepath-securejoin/VERSION
generated
vendored
@ -1 +0,0 @@
|
||||
0.2.3
|
115
vendor/github.com/cyphar/filepath-securejoin/join.go
generated
vendored
115
vendor/github.com/cyphar/filepath-securejoin/join.go
generated
vendored
@ -1,115 +0,0 @@
|
||||
// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved.
|
||||
// Copyright (C) 2017 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package securejoin is an implementation of the hopefully-soon-to-be-included
|
||||
// SecureJoin helper that is meant to be part of the "path/filepath" package.
|
||||
// The purpose of this project is to provide a PoC implementation to make the
|
||||
// SecureJoin proposal (https://github.com/golang/go/issues/20126) more
|
||||
// tangible.
|
||||
package securejoin
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// IsNotExist tells you if err is an error that implies that either the path
|
||||
// accessed does not exist (or path components don't exist). This is
|
||||
// effectively a more broad version of os.IsNotExist.
|
||||
func IsNotExist(err error) bool {
|
||||
// Check that it's not actually an ENOTDIR, which in some cases is a more
|
||||
// convoluted case of ENOENT (usually involving weird paths).
|
||||
return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTDIR) || errors.Is(err, syscall.ENOENT)
|
||||
}
|
||||
|
||||
// SecureJoinVFS joins the two given path components (similar to Join) except
|
||||
// that the returned path is guaranteed to be scoped inside the provided root
|
||||
// path (when evaluated). Any symbolic links in the path are evaluated with the
|
||||
// given root treated as the root of the filesystem, similar to a chroot. The
|
||||
// filesystem state is evaluated through the given VFS interface (if nil, the
|
||||
// standard os.* family of functions are used).
|
||||
//
|
||||
// Note that the guarantees provided by this function only apply if the path
|
||||
// components in the returned string are not modified (in other words are not
|
||||
// replaced with symlinks on the filesystem) after this function has returned.
|
||||
// Such a symlink race is necessarily out-of-scope of SecureJoin.
|
||||
func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) {
|
||||
// Use the os.* VFS implementation if none was specified.
|
||||
if vfs == nil {
|
||||
vfs = osVFS{}
|
||||
}
|
||||
|
||||
var path bytes.Buffer
|
||||
n := 0
|
||||
for unsafePath != "" {
|
||||
if n > 255 {
|
||||
return "", &os.PathError{Op: "SecureJoin", Path: root + "/" + unsafePath, Err: syscall.ELOOP}
|
||||
}
|
||||
|
||||
// Next path component, p.
|
||||
i := strings.IndexRune(unsafePath, filepath.Separator)
|
||||
var p string
|
||||
if i == -1 {
|
||||
p, unsafePath = unsafePath, ""
|
||||
} else {
|
||||
p, unsafePath = unsafePath[:i], unsafePath[i+1:]
|
||||
}
|
||||
|
||||
// Create a cleaned path, using the lexical semantics of /../a, to
|
||||
// create a "scoped" path component which can safely be joined to fullP
|
||||
// for evaluation. At this point, path.String() doesn't contain any
|
||||
// symlink components.
|
||||
cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p)
|
||||
if cleanP == string(filepath.Separator) {
|
||||
path.Reset()
|
||||
continue
|
||||
}
|
||||
fullP := filepath.Clean(root + cleanP)
|
||||
|
||||
// Figure out whether the path is a symlink.
|
||||
fi, err := vfs.Lstat(fullP)
|
||||
if err != nil && !IsNotExist(err) {
|
||||
return "", err
|
||||
}
|
||||
// Treat non-existent path components the same as non-symlinks (we
|
||||
// can't do any better here).
|
||||
if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 {
|
||||
path.WriteString(p)
|
||||
path.WriteRune(filepath.Separator)
|
||||
continue
|
||||
}
|
||||
|
||||
// Only increment when we actually dereference a link.
|
||||
n++
|
||||
|
||||
// It's a symlink, expand it by prepending it to the yet-unparsed path.
|
||||
dest, err := vfs.Readlink(fullP)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// Absolute symlinks reset any work we've already done.
|
||||
if filepath.IsAbs(dest) {
|
||||
path.Reset()
|
||||
}
|
||||
unsafePath = dest + string(filepath.Separator) + unsafePath
|
||||
}
|
||||
|
||||
// We have to clean path.String() here because it may contain '..'
|
||||
// components that are entirely lexical, but would be misleading otherwise.
|
||||
// And finally do a final clean to ensure that root is also lexically
|
||||
// clean.
|
||||
fullP := filepath.Clean(string(filepath.Separator) + path.String())
|
||||
return filepath.Clean(root + fullP), nil
|
||||
}
|
||||
|
||||
// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library
|
||||
// of functions as the VFS. If in doubt, use this function over SecureJoinVFS.
|
||||
func SecureJoin(root, unsafePath string) (string, error) {
|
||||
return SecureJoinVFS(root, unsafePath, nil)
|
||||
}
|
41
vendor/github.com/cyphar/filepath-securejoin/vfs.go
generated
vendored
41
vendor/github.com/cyphar/filepath-securejoin/vfs.go
generated
vendored
@ -1,41 +0,0 @@
|
||||
// Copyright (C) 2017 SUSE LLC. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package securejoin
|
||||
|
||||
import "os"
|
||||
|
||||
// In future this should be moved into a separate package, because now there
|
||||
// are several projects (umoci and go-mtree) that are using this sort of
|
||||
// interface.
|
||||
|
||||
// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is
|
||||
// equivalent to using the standard os.* family of functions. This is mainly
|
||||
// used for the purposes of mock testing, but also can be used to otherwise use
|
||||
// SecureJoin with VFS-like system.
|
||||
type VFS interface {
|
||||
// Lstat returns a FileInfo describing the named file. If the file is a
|
||||
// symbolic link, the returned FileInfo describes the symbolic link. Lstat
|
||||
// makes no attempt to follow the link. These semantics are identical to
|
||||
// os.Lstat.
|
||||
Lstat(name string) (os.FileInfo, error)
|
||||
|
||||
// Readlink returns the destination of the named symbolic link. These
|
||||
// semantics are identical to os.Readlink.
|
||||
Readlink(name string) (string, error)
|
||||
}
|
||||
|
||||
// osVFS is the "nil" VFS, in that it just passes everything through to the os
|
||||
// module.
|
||||
type osVFS struct{}
|
||||
|
||||
// Lstat returns a FileInfo describing the named file. If the file is a
|
||||
// symbolic link, the returned FileInfo describes the symbolic link. Lstat
|
||||
// makes no attempt to follow the link. These semantics are identical to
|
||||
// os.Lstat.
|
||||
func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) }
|
||||
|
||||
// Readlink returns the destination of the named symbolic link. These
|
||||
// semantics are identical to os.Readlink.
|
||||
func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) }
|
59
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
59
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
@ -1,59 +0,0 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
// Apply creates a cgroup, if not yet created, and adds a process
|
||||
// with the specified pid into that cgroup. A special value of -1
|
||||
// can be used to merely create a cgroup.
|
||||
Apply(pid int) error
|
||||
|
||||
// GetPids returns the PIDs of all processes inside the cgroup.
|
||||
GetPids() ([]int, error)
|
||||
|
||||
// GetAllPids returns the PIDs of all processes inside the cgroup
|
||||
// any all its sub-cgroups.
|
||||
GetAllPids() ([]int, error)
|
||||
|
||||
// GetStats returns cgroups statistics.
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Freeze sets the freezer cgroup to the specified state.
|
||||
Freeze(state configs.FreezerState) error
|
||||
|
||||
// Destroy removes cgroup.
|
||||
Destroy() error
|
||||
|
||||
// Path returns a cgroup path to the specified controller/subsystem.
|
||||
// For cgroupv2, the argument is unused and can be empty.
|
||||
Path(string) string
|
||||
|
||||
// Set sets cgroup resources parameters/limits. If the argument is nil,
|
||||
// the resources specified during Manager creation (or the previous call
|
||||
// to Set) are used.
|
||||
Set(r *configs.Resources) error
|
||||
|
||||
// GetPaths returns cgroup path(s) to save in a state file in order to
|
||||
// restore later.
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the
|
||||
// path to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the
|
||||
// unified path.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetCgroups returns the cgroup data as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
|
||||
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||
GetFreezerState() (configs.FreezerState, error)
|
||||
|
||||
// Exists returns whether the cgroup path exists or not.
|
||||
Exists() bool
|
||||
|
||||
// OOMKillCount reports OOM kill count for the cgroup.
|
||||
OOMKillCount() (uint64, error)
|
||||
}
|
386
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
386
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
@ -1,386 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
/*
|
||||
* Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
|
||||
* Copyright (C) 2020 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package devices
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
)
|
||||
|
||||
// deviceMeta is a Rule without the Allow or Permissions fields, and no
|
||||
// wildcard-type support. It's effectively the "match" portion of a metadata
|
||||
// rule, for the purposes of our emulation.
|
||||
type deviceMeta struct {
|
||||
node devices.Type
|
||||
major int64
|
||||
minor int64
|
||||
}
|
||||
|
||||
// deviceRule is effectively the tuple (deviceMeta, Permissions).
|
||||
type deviceRule struct {
|
||||
meta deviceMeta
|
||||
perms devices.Permissions
|
||||
}
|
||||
|
||||
// deviceRules is a mapping of device metadata rules to the associated
|
||||
// permissions in the ruleset.
|
||||
type deviceRules map[deviceMeta]devices.Permissions
|
||||
|
||||
func (r deviceRules) orderedEntries() []deviceRule {
|
||||
var rules []deviceRule
|
||||
for meta, perms := range r {
|
||||
rules = append(rules, deviceRule{meta: meta, perms: perms})
|
||||
}
|
||||
sort.Slice(rules, func(i, j int) bool {
|
||||
// Sort by (major, minor, type).
|
||||
a, b := rules[i].meta, rules[j].meta
|
||||
return a.major < b.major ||
|
||||
(a.major == b.major && a.minor < b.minor) ||
|
||||
(a.major == b.major && a.minor == b.minor && a.node < b.node)
|
||||
})
|
||||
return rules
|
||||
}
|
||||
|
||||
type Emulator struct {
|
||||
defaultAllow bool
|
||||
rules deviceRules
|
||||
}
|
||||
|
||||
func (e *Emulator) IsBlacklist() bool {
|
||||
return e.defaultAllow
|
||||
}
|
||||
|
||||
func (e *Emulator) IsAllowAll() bool {
|
||||
return e.IsBlacklist() && len(e.rules) == 0
|
||||
}
|
||||
|
||||
func parseLine(line string) (*deviceRule, error) {
|
||||
// Input: node major:minor perms.
|
||||
fields := strings.FieldsFunc(line, func(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
})
|
||||
if len(fields) != 4 {
|
||||
return nil, fmt.Errorf("malformed devices.list rule %s", line)
|
||||
}
|
||||
|
||||
var (
|
||||
rule deviceRule
|
||||
node = fields[0]
|
||||
major = fields[1]
|
||||
minor = fields[2]
|
||||
perms = fields[3]
|
||||
)
|
||||
|
||||
// Parse the node type.
|
||||
switch node {
|
||||
case "a":
|
||||
// Super-special case -- "a" always means every device with every
|
||||
// access mode. In fact, for devices.list this actually indicates that
|
||||
// the cgroup is in black-list mode.
|
||||
// TODO: Double-check that the entire file is "a *:* rwm".
|
||||
return nil, nil
|
||||
case "b":
|
||||
rule.meta.node = devices.BlockDevice
|
||||
case "c":
|
||||
rule.meta.node = devices.CharDevice
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown device type %q", node)
|
||||
}
|
||||
|
||||
// Parse the major number.
|
||||
if major == "*" {
|
||||
rule.meta.major = devices.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(major, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid major number: %w", err)
|
||||
}
|
||||
rule.meta.major = int64(val)
|
||||
}
|
||||
|
||||
// Parse the minor number.
|
||||
if minor == "*" {
|
||||
rule.meta.minor = devices.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(minor, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid minor number: %w", err)
|
||||
}
|
||||
rule.meta.minor = int64(val)
|
||||
}
|
||||
|
||||
// Parse the access permissions.
|
||||
rule.perms = devices.Permissions(perms)
|
||||
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
|
||||
return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||
}
|
||||
return &rule, nil
|
||||
}
|
||||
|
||||
func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
|
||||
if e.rules == nil {
|
||||
e.rules = make(map[deviceMeta]devices.Permissions)
|
||||
}
|
||||
|
||||
// Merge with any pre-existing permissions.
|
||||
oldPerms := e.rules[rule.meta]
|
||||
newPerms := rule.perms.Union(oldPerms)
|
||||
e.rules[rule.meta] = newPerms
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) rmRule(rule deviceRule) error {
|
||||
// Give an error if any of the permissions requested to be removed are
|
||||
// present in a partially-matching wildcard rule, because such rules will
|
||||
// be ignored by cgroupv1.
|
||||
//
|
||||
// This is a diversion from cgroupv1, but is necessary to avoid leading
|
||||
// users into a false sense of security. cgroupv1 will silently(!) ignore
|
||||
// requests to remove partial exceptions, but we really shouldn't do that.
|
||||
//
|
||||
// It may seem like we could just "split" wildcard rules which hit this
|
||||
// issue, but unfortunately there are 2^32 possible major and minor
|
||||
// numbers, which would exhaust kernel memory quickly if we did this. Not
|
||||
// to mention it'd be really slow (the kernel side is implemented as a
|
||||
// linked-list of exceptions).
|
||||
for _, partialMeta := range []deviceMeta{
|
||||
{node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor},
|
||||
{node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard},
|
||||
{node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard},
|
||||
} {
|
||||
// This wildcard rule is equivalent to the requested rule, so skip it.
|
||||
if rule.meta == partialMeta {
|
||||
continue
|
||||
}
|
||||
// Only give an error if the set of permissions overlap.
|
||||
partialPerms := e.rules[partialMeta]
|
||||
if !partialPerms.Intersection(rule.perms).IsEmpty() {
|
||||
return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||
}
|
||||
}
|
||||
|
||||
// Subtract all of the permissions listed from the full match rule. If the
|
||||
// rule didn't exist, all of this is a no-op.
|
||||
newPerms := e.rules[rule.meta].Difference(rule.perms)
|
||||
if newPerms.IsEmpty() {
|
||||
delete(e.rules, rule.meta)
|
||||
} else {
|
||||
e.rules[rule.meta] = newPerms
|
||||
}
|
||||
// TODO: The actual cgroup code doesn't care if an exception didn't exist
|
||||
// during removal, so not erroring out here is /accurate/ but quite
|
||||
// worrying. Maybe we should do additional validation, but again we
|
||||
// have to worry about backwards-compatibility.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) allow(rule *deviceRule) error {
|
||||
// This cgroup is configured as a black-list. Reset the entire emulator,
|
||||
// and put is into black-list mode.
|
||||
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: true,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
|
||||
} else {
|
||||
err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) deny(rule *deviceRule) error {
|
||||
// This cgroup is configured as a white-list. Reset the entire emulator,
|
||||
// and put is into white-list mode.
|
||||
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: false,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
|
||||
} else {
|
||||
err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) Apply(rule devices.Rule) error {
|
||||
if !rule.Type.CanCgroup() {
|
||||
return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||
}
|
||||
|
||||
innerRule := &deviceRule{
|
||||
meta: deviceMeta{
|
||||
node: rule.Type,
|
||||
major: rule.Major,
|
||||
minor: rule.Minor,
|
||||
},
|
||||
perms: rule.Permissions,
|
||||
}
|
||||
if innerRule.meta.node == devices.WildcardDevice {
|
||||
innerRule = nil
|
||||
}
|
||||
|
||||
if rule.Allow {
|
||||
return e.allow(innerRule)
|
||||
}
|
||||
|
||||
return e.deny(innerRule)
|
||||
}
|
||||
|
||||
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
|
||||
// a new Emulator that represents the state of the devices cgroup. Note that
|
||||
// black-list devices cgroups cannot be fully reconstructed, due to limitations
|
||||
// in the devices cgroup API. Instead, such cgroups are always treated as
|
||||
// "allow all" cgroups.
|
||||
func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||
// Normally cgroups are in black-list mode by default, but the way we
|
||||
// figure out the current mode is whether or not devices.list has an
|
||||
// allow-all rule. So we default to a white-list, and the existence of an
|
||||
// "a *:* rwm" entry will tell us otherwise.
|
||||
e := &Emulator{
|
||||
defaultAllow: false,
|
||||
}
|
||||
|
||||
// Parse the "devices.list".
|
||||
s := bufio.NewScanner(list)
|
||||
for s.Scan() {
|
||||
line := s.Text()
|
||||
deviceRule, err := parseLine(line)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing line %q: %w", line, err)
|
||||
}
|
||||
// "devices.list" is an allow list. Note that this means that in
|
||||
// black-list mode, we have no idea what rules are in play. As a
|
||||
// result, we need to be very careful in Transition().
|
||||
if err := e.allow(deviceRule); err != nil {
|
||||
return nil, fmt.Errorf("error adding devices.list rule: %w", err)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error reading devices.list lines: %w", err)
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// Transition calculates what is the minimally-disruptive set of rules need to
|
||||
// be applied to a devices cgroup in order to transition to the given target.
|
||||
// This means that any already-existing rules will not be applied, and
|
||||
// disruptive rules (like denying all device access) will only be applied if
|
||||
// necessary.
|
||||
//
|
||||
// This function is the sole reason for all of Emulator -- to allow us
|
||||
// to figure out how to update a containers' cgroups without causing spurious
|
||||
// device errors (if possible).
|
||||
func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
|
||||
var transitionRules []*devices.Rule
|
||||
oldRules := source.rules
|
||||
|
||||
// If the default policy doesn't match, we need to include a "disruptive"
|
||||
// rule (either allow-all or deny-all) in order to switch the cgroup to the
|
||||
// correct default policy.
|
||||
//
|
||||
// However, due to a limitation in "devices.list" we cannot be sure what
|
||||
// deny rules are in place in a black-list cgroup. Thus if the source is a
|
||||
// black-list we also have to include a disruptive rule.
|
||||
if source.IsBlacklist() || source.defaultAllow != target.defaultAllow {
|
||||
transitionRules = append(transitionRules, &devices.Rule{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
Minor: -1,
|
||||
Permissions: devices.Permissions("rwm"),
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
// The old rules are only relevant if we aren't starting out with a
|
||||
// disruptive rule.
|
||||
oldRules = nil
|
||||
}
|
||||
|
||||
// NOTE: We traverse through the rules in a sorted order so we always write
|
||||
// the same set of rules (this is to aid testing).
|
||||
|
||||
// First, we create inverse rules for any old rules not in the new set.
|
||||
// This includes partial-inverse rules for specific permissions. This is a
|
||||
// no-op if we added a disruptive rule, since oldRules will be empty.
|
||||
for _, rule := range oldRules.orderedEntries() {
|
||||
meta, oldPerms := rule.meta, rule.perms
|
||||
newPerms := target.rules[meta]
|
||||
droppedPerms := oldPerms.Difference(newPerms)
|
||||
if !droppedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &devices.Rule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: droppedPerms,
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Add any additional rules which weren't in the old set. We happen to
|
||||
// filter out rules which are present in both sets, though this isn't
|
||||
// strictly necessary.
|
||||
for _, rule := range target.rules.orderedEntries() {
|
||||
meta, newPerms := rule.meta, rule.perms
|
||||
oldPerms := oldRules[meta]
|
||||
gainedPerms := newPerms.Difference(oldPerms)
|
||||
if !gainedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &devices.Rule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: gainedPerms,
|
||||
Allow: !target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
return transitionRules, nil
|
||||
}
|
||||
|
||||
// Rules returns the minimum set of rules necessary to convert a *deny-all*
|
||||
// cgroup to the emulated filter state (note that this is not the same as a
|
||||
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
|
||||
// wrapper around Transition() with the source emulator being an empty cgroup.
|
||||
func (e *Emulator) Rules() ([]*devices.Rule, error) {
|
||||
defaultCgroup := &Emulator{defaultAllow: false}
|
||||
return defaultCgroup.Transition(e)
|
||||
}
|
||||
|
||||
func wrapErr(err error, text string) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf(text+": %w", err)
|
||||
}
|
208
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
208
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
@ -1,208 +0,0 @@
|
||||
// Package devicefilter contains eBPF device filter program
|
||||
//
|
||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||
//
|
||||
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
|
||||
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
|
||||
package devicefilter
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// license string format is same as kernel MODULE_LICENSE macro
|
||||
license = "Apache"
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
|
||||
// Generate the minimum ruleset for the device rules we are given. While we
|
||||
// don't care about minimum transitions in cgroupv2, using the emulator
|
||||
// gives us a guarantee that the behaviour of devices filtering is the same
|
||||
// as cgroupv1, including security hardenings to avoid misconfiguration
|
||||
// (such as punching holes in wildcard rules).
|
||||
emu := new(devicesemulator.Emulator)
|
||||
for _, rule := range rules {
|
||||
if err := emu.Apply(*rule); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
cleanRules, err := emu.Rules()
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
p := &program{
|
||||
defaultAllow: emu.IsBlacklist(),
|
||||
}
|
||||
p.init()
|
||||
|
||||
for idx, rule := range cleanRules {
|
||||
if rule.Type == devices.WildcardDevice {
|
||||
// We can safely skip over wildcard entries because there should
|
||||
// only be one (at most) at the very start to instruct cgroupv1 to
|
||||
// go into allow-list mode. However we do double-check this here.
|
||||
if idx != 0 || rule.Allow != emu.IsBlacklist() {
|
||||
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
continue
|
||||
}
|
||||
if rule.Allow == p.defaultAllow {
|
||||
// There should be no rules which have an action equal to the
|
||||
// default action, the emulator removes those.
|
||||
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
if err := p.appendRule(rule); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
return p.finalize(), license, nil
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
defaultAllow bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
|
||||
/*
|
||||
u32 access_type
|
||||
u32 major
|
||||
u32 minor
|
||||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
|
||||
asm.And.Imm32(asm.R2, 0xFFFF))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
|
||||
// RSh: bitwise shift right
|
||||
asm.RSh.Imm32(asm.R3, 16))
|
||||
|
||||
// R4 <- major (u32 major at R1[4])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
|
||||
|
||||
// R5 <- minor (u32 minor at R1[8])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
|
||||
// to the in-progress filter program. In order to operate properly, it must be
|
||||
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
|
||||
// with any "a" rules removed).
|
||||
func (p *program) appendRule(rule *devices.Rule) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
|
||||
var bpfType int32
|
||||
switch rule.Type {
|
||||
case devices.CharDevice:
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case devices.BlockDevice:
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
default:
|
||||
// We do not permit 'a', nor any other types we don't know about.
|
||||
return fmt.Errorf("invalid type %q", string(rule.Type))
|
||||
}
|
||||
if rule.Major > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid major %d", rule.Major)
|
||||
}
|
||||
if rule.Minor > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid minor %d", rule.Major)
|
||||
}
|
||||
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := rule.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range rule.Permissions {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
case 'w':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
|
||||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return fmt.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||
|
||||
var (
|
||||
blockSym = "block-" + strconv.Itoa(p.blockID)
|
||||
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
|
||||
prevBlockLastIdx = len(p.insts) - 1
|
||||
)
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||
p.blockID++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() asm.Instructions {
|
||||
var v int32
|
||||
if p.defaultAllow {
|
||||
v = 1
|
||||
}
|
||||
blockSym := "block-" + strconv.Itoa(p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
var v int32
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
return []asm.Instruction{
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v),
|
||||
asm.Return(),
|
||||
}
|
||||
}
|
253
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
253
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
@ -1,253 +0,0 @@
|
||||
package ebpf
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func nilCloser() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
|
||||
type bpfAttrQuery struct {
|
||||
TargetFd uint32
|
||||
AttachType uint32
|
||||
QueryType uint32
|
||||
AttachFlags uint32
|
||||
ProgIds uint64 // __aligned_u64
|
||||
ProgCnt uint32
|
||||
}
|
||||
|
||||
// Currently you can only have 64 eBPF programs attached to a cgroup.
|
||||
size := 64
|
||||
retries := 0
|
||||
for retries < 10 {
|
||||
progIds := make([]uint32, size)
|
||||
query := bpfAttrQuery{
|
||||
TargetFd: uint32(dirFd),
|
||||
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
|
||||
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
|
||||
ProgCnt: uint32(len(progIds)),
|
||||
}
|
||||
|
||||
// Fetch the list of program ids.
|
||||
_, _, errno := unix.Syscall(unix.SYS_BPF,
|
||||
uintptr(unix.BPF_PROG_QUERY),
|
||||
uintptr(unsafe.Pointer(&query)),
|
||||
unsafe.Sizeof(query))
|
||||
size = int(query.ProgCnt)
|
||||
runtime.KeepAlive(query)
|
||||
if errno != 0 {
|
||||
// On ENOSPC we get the correct number of programs.
|
||||
if errno == unix.ENOSPC {
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
|
||||
}
|
||||
|
||||
// Convert the ids to program handles.
|
||||
progIds = progIds[:size]
|
||||
programs := make([]*ebpf.Program, 0, len(progIds))
|
||||
for _, progId := range progIds {
|
||||
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
|
||||
if err != nil {
|
||||
// We skip over programs that give us -EACCES or -EPERM. This
|
||||
// is necessary because there may be BPF programs that have
|
||||
// been attached (such as with --systemd-cgroup) which have an
|
||||
// LSM label that blocks us from interacting with the program.
|
||||
//
|
||||
// Because additional BPF_CGROUP_DEVICE programs only can add
|
||||
// restrictions, there's no real issue with just ignoring these
|
||||
// programs (and stops runc from breaking on distributions with
|
||||
// very strict SELinux policies).
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err)
|
||||
continue
|
||||
}
|
||||
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
|
||||
}
|
||||
programs = append(programs, program)
|
||||
}
|
||||
runtime.KeepAlive(progIds)
|
||||
return programs, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
|
||||
}
|
||||
|
||||
var (
|
||||
haveBpfProgReplaceBool bool
|
||||
haveBpfProgReplaceOnce sync.Once
|
||||
)
|
||||
|
||||
// Loosely based on the BPF_F_REPLACE support check in
|
||||
// https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go.
|
||||
//
|
||||
// TODO: move this logic to cilium/ebpf
|
||||
func haveBpfProgReplace() bool {
|
||||
haveBpfProgReplaceOnce.Do(func() {
|
||||
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
License: "MIT",
|
||||
Instructions: asm.Instructions{
|
||||
asm.Mov.Imm(asm.R0, 0),
|
||||
asm.Return(),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
|
||||
return
|
||||
}
|
||||
defer prog.Close()
|
||||
|
||||
devnull, err := os.Open("/dev/null")
|
||||
if err != nil {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
|
||||
return
|
||||
}
|
||||
defer devnull.Close()
|
||||
|
||||
// We know that we have BPF_PROG_ATTACH since we can load
|
||||
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
|
||||
// we know that the feature isn't present.
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
// We rely on this fd being checked after attachFlags.
|
||||
Target: int(devnull.Fd()),
|
||||
// Attempt to "replace" bad fds with this program.
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
|
||||
})
|
||||
if errors.Is(err, unix.EINVAL) {
|
||||
// not supported
|
||||
return
|
||||
}
|
||||
// attach_flags test succeeded.
|
||||
if !errors.Is(err, unix.EBADF) {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
|
||||
}
|
||||
haveBpfProgReplaceBool = true
|
||||
})
|
||||
return haveBpfProgReplaceBool
|
||||
}
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
|
||||
// Get the list of existing programs.
|
||||
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
|
||||
|
||||
// Generate new program.
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
|
||||
// If there is only one old program, we can just replace it directly.
|
||||
var (
|
||||
replaceProg *ebpf.Program
|
||||
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
|
||||
)
|
||||
if useReplaceProg {
|
||||
replaceProg = oldProgs[0]
|
||||
attachFlags |= unix.BPF_F_REPLACE
|
||||
}
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: prog,
|
||||
Replace: replaceProg,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: attachFlags,
|
||||
})
|
||||
if err != nil {
|
||||
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
|
||||
}
|
||||
closer := func() error {
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
|
||||
}
|
||||
// TODO: Should we attach the old filters back in this case? Otherwise
|
||||
// we fail-open on a security feature, which is a bit scary.
|
||||
return nil
|
||||
}
|
||||
if !useReplaceProg {
|
||||
logLevel := logrus.DebugLevel
|
||||
// If there was more than one old program, give a warning (since this
|
||||
// really shouldn't happen with runc-managed cgroups) and then detach
|
||||
// all the old programs.
|
||||
if len(oldProgs) > 1 {
|
||||
// NOTE: Ideally this should be a warning but it turns out that
|
||||
// systemd-managed cgroups trigger this warning (apparently
|
||||
// systemd doesn't delete old non-systemd programs when
|
||||
// setting properties).
|
||||
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
|
||||
logLevel = logrus.InfoLevel
|
||||
}
|
||||
for idx, oldProg := range oldProgs {
|
||||
// Output some extra debug info.
|
||||
if info, err := oldProg.Info(); err == nil {
|
||||
fields := logrus.Fields{
|
||||
"type": info.Type.String(),
|
||||
"tag": info.Tag,
|
||||
"name": info.Name,
|
||||
}
|
||||
if id, ok := info.ID(); ok {
|
||||
fields["id"] = id
|
||||
}
|
||||
if runCount, ok := info.RunCount(); ok {
|
||||
fields["run_count"] = runCount
|
||||
}
|
||||
if runtime, ok := info.Runtime(); ok {
|
||||
fields["runtime"] = runtime.String()
|
||||
}
|
||||
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
|
||||
}
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: oldProg,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return closer, nil
|
||||
}
|
190
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
190
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
@ -1,190 +0,0 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||
// It is supposed to be used for cgroup files only, and returns
|
||||
// an error if the file is not a cgroup file.
|
||||
//
|
||||
// Arguments dir and file are joined together to form an absolute path
|
||||
// to a file being opened.
|
||||
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
return openFile(dir, file, flags)
|
||||
}
|
||||
|
||||
// ReadFile reads data from a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func ReadFile(dir, file string) (string, error) {
|
||||
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fd.Close()
|
||||
var buf bytes.Buffer
|
||||
|
||||
_, err = buf.ReadFrom(fd)
|
||||
return buf.String(), err
|
||||
}
|
||||
|
||||
// WriteFile writes data to a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func WriteFile(dir, file, data string) error {
|
||||
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
if err := retryingWriteFile(fd, data); err != nil {
|
||||
// Having data in the error message helps in debugging.
|
||||
return fmt.Errorf("failed to write %q: %w", data, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func retryingWriteFile(fd *os.File, data string) error {
|
||||
for {
|
||||
_, err := fd.Write([]byte(data))
|
||||
if errors.Is(err, unix.EINTR) {
|
||||
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
cgroupfsDir = "/sys/fs/cgroup"
|
||||
cgroupfsPrefix = cgroupfsDir + "/"
|
||||
)
|
||||
|
||||
var (
|
||||
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
||||
TestMode bool
|
||||
|
||||
cgroupFd int = -1
|
||||
prepOnce sync.Once
|
||||
prepErr error
|
||||
resolveFlags uint64
|
||||
)
|
||||
|
||||
func prepareOpenat2() error {
|
||||
prepOnce.Do(func() {
|
||||
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
||||
Flags: unix.O_DIRECTORY | unix.O_PATH,
|
||||
})
|
||||
if err != nil {
|
||||
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
||||
if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
} else {
|
||||
logrus.Debug("openat2 not available, falling back to securejoin")
|
||||
}
|
||||
return
|
||||
}
|
||||
var st unix.Statfs_t
|
||||
if err = unix.Fstatfs(fd, &st); err != nil {
|
||||
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
return
|
||||
}
|
||||
|
||||
cgroupFd = fd
|
||||
|
||||
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
||||
if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
||||
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
||||
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
||||
}
|
||||
})
|
||||
|
||||
return prepErr
|
||||
}
|
||||
|
||||
func openFile(dir, file string, flags int) (*os.File, error) {
|
||||
mode := os.FileMode(0)
|
||||
if TestMode && flags&os.O_WRONLY != 0 {
|
||||
// "emulate" cgroup fs for unit tests
|
||||
flags |= os.O_TRUNC | os.O_CREATE
|
||||
mode = 0o600
|
||||
}
|
||||
path := path.Join(dir, file)
|
||||
if prepareOpenat2() != nil {
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
|
||||
if len(relPath) == len(path) { // non-standard path, old system?
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
|
||||
fd, err := unix.Openat2(cgroupFd, relPath,
|
||||
&unix.OpenHow{
|
||||
Resolve: resolveFlags,
|
||||
Flags: uint64(flags) | unix.O_CLOEXEC,
|
||||
Mode: uint64(mode),
|
||||
})
|
||||
if err != nil {
|
||||
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
||||
// Check if cgroupFd is still opened to cgroupfsDir
|
||||
// (happens when this package is incorrectly used
|
||||
// across the chroot/pivot_root/mntns boundary, or
|
||||
// when /sys/fs/cgroup is remounted).
|
||||
//
|
||||
// TODO: if such usage will ever be common, amend this
|
||||
// to reopen cgroupFd and retry openat2.
|
||||
fdStr := strconv.Itoa(cgroupFd)
|
||||
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
|
||||
if fdDest != cgroupfsDir {
|
||||
// Wrap the error so it is clear that cgroupFd
|
||||
// is opened to an unexpected/wrong directory.
|
||||
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
|
||||
fdStr, fdDest, cgroupfsDir, err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||
|
||||
// Can be changed by unit tests.
|
||||
var openFallback = openAndCheck
|
||||
|
||||
// openAndCheck is used when openat2(2) is not available. It checks the opened
|
||||
// file is on cgroupfs, returning an error otherwise.
|
||||
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
fd, err := os.OpenFile(path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if TestMode {
|
||||
return fd, nil
|
||||
}
|
||||
// Check this is a cgroupfs file.
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||
}
|
||||
if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
||||
}
|
||||
|
||||
return fd, nil
|
||||
}
|
311
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
311
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
@ -1,311 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type BlkioGroup struct {
|
||||
weightFilename string
|
||||
weightDeviceFilename string
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||
s.detectWeightFilenames(path)
|
||||
if r.BlkioWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.BlkioLeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if wd.Weight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if wd.LeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
examples:
|
||||
|
||||
blkio.sectors
|
||||
8:0 6792
|
||||
|
||||
blkio.io_service_bytes
|
||||
8:0 Read 1282048
|
||||
8:0 Write 2195456
|
||||
8:0 Sync 2195456
|
||||
8:0 Async 1282048
|
||||
8:0 Total 3477504
|
||||
Total 3477504
|
||||
|
||||
blkio.io_serviced
|
||||
8:0 Read 124
|
||||
8:0 Write 104
|
||||
8:0 Sync 104
|
||||
8:0 Async 124
|
||||
8:0 Total 228
|
||||
Total 228
|
||||
|
||||
blkio.io_queued
|
||||
8:0 Read 0
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 0
|
||||
8:0 Total 0
|
||||
Total 0
|
||||
*/
|
||||
|
||||
func splitBlkioStatLine(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
}
|
||||
|
||||
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return blkioStats, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
// format: dev type amount
|
||||
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
|
||||
if len(fields) < 3 {
|
||||
if len(fields) == 2 && fields[0] == "Total" {
|
||||
// skip total line
|
||||
continue
|
||||
} else {
|
||||
return nil, malformedLine(dir, file, sc.Text())
|
||||
}
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
major := v
|
||||
|
||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
minor := v
|
||||
|
||||
op := ""
|
||||
valueField := 2
|
||||
if len(fields) == 4 {
|
||||
op = fields[2]
|
||||
valueField = 3
|
||||
}
|
||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||
if err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
|
||||
return blkioStats, nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
type blkioStatInfo struct {
|
||||
filename string
|
||||
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
|
||||
}
|
||||
bfqDebugStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
bfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
cfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
throttleRecursiveStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
baseStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
orderedStats := [][]blkioStatInfo{
|
||||
bfqDebugStats,
|
||||
bfqStats,
|
||||
cfqStats,
|
||||
throttleRecursiveStats,
|
||||
baseStats,
|
||||
}
|
||||
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
for _, statGroup := range orderedStats {
|
||||
for i, statInfo := range statGroup {
|
||||
if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
|
||||
// if error occurs on first file, move to next group
|
||||
if i == 0 {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
*statInfo.blkioStatEntriesPtr = blkioStats
|
||||
// finish if all stats are gathered
|
||||
if i == len(statGroup)-1 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) detectWeightFilenames(path string) {
|
||||
if s.weightFilename != "" {
|
||||
// Already detected.
|
||||
return
|
||||
}
|
||||
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
|
||||
s.weightFilename = "blkio.weight"
|
||||
s.weightDeviceFilename = "blkio.weight_device"
|
||||
} else {
|
||||
s.weightFilename = "blkio.bfq.weight"
|
||||
s.weightDeviceFilename = "blkio.bfq.weight_device"
|
||||
}
|
||||
}
|
129
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
129
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
@ -1,129 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type CpuGroup struct{}
|
||||
|
||||
func (s *CpuGroup) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||
if r.CpuRtPeriod != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.CpuRtRuntime != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpuShares != 0 {
|
||||
shares := r.CpuShares
|
||||
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
// read it back
|
||||
sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// ... and check
|
||||
if shares > sharesRead {
|
||||
return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
|
||||
} else if shares < sharesRead {
|
||||
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||
}
|
||||
}
|
||||
|
||||
var period string
|
||||
if r.CpuPeriod != 0 {
|
||||
period = strconv.FormatUint(r.CpuPeriod, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
// Sometimes when the period to be set is smaller
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_period exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
period = ""
|
||||
}
|
||||
}
|
||||
if r.CpuQuota != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if period != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return s.SetRtSched(path, r)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_time":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
166
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
166
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
@ -1,166 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
cgroupCpuacctUsageAll = "cpuacct.usage_all"
|
||||
|
||||
nanosecondsInSecond = 1000000000
|
||||
|
||||
userModeColumn = 1
|
||||
kernelModeColumn = 2
|
||||
cuacctUsageAllColumnsNumber = 3
|
||||
|
||||
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||
// on Linux it's a constant which is safe to be hard coded,
|
||||
// so we can avoid using cgo here. For details, see:
|
||||
// https://github.com/containerd/cgroups/pull/12
|
||||
clockTicks uint64 = 100
|
||||
)
|
||||
|
||||
type CpuacctGroup struct{}
|
||||
|
||||
func (s *CpuacctGroup) Name() string {
|
||||
return "cpuacct"
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsage, err := getPercpuUsage(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns user and kernel usage breakdown in nanoseconds.
|
||||
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||
var userModeUsage, kernelModeUsage uint64
|
||||
const (
|
||||
userField = "user"
|
||||
systemField = "system"
|
||||
file = cgroupCpuacctStat
|
||||
)
|
||||
|
||||
// Expected format:
|
||||
// user <usage in ticks>
|
||||
// system <usage in ticks>
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
// TODO: use strings.SplitN instead.
|
||||
fields := strings.Fields(data)
|
||||
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
|
||||
return 0, 0, malformedLine(path, file, data)
|
||||
}
|
||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
||||
|
||||
func getPercpuUsage(path string) ([]uint64, error) {
|
||||
const file = "cpuacct.usage_percpu"
|
||||
percpuUsage := []uint64{}
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return percpuUsage, err
|
||||
}
|
||||
// TODO: use strings.SplitN instead.
|
||||
for _, value := range strings.Fields(data) {
|
||||
value, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return percpuUsage, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
percpuUsage = append(percpuUsage, value)
|
||||
}
|
||||
return percpuUsage, nil
|
||||
}
|
||||
|
||||
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||
usageKernelMode := []uint64{}
|
||||
usageUserMode := []uint64{}
|
||||
const file = cgroupCpuacctUsageAll
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
} else if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
scanner.Scan() // skipping header line
|
||||
|
||||
for scanner.Scan() {
|
||||
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
|
||||
if len(lineFields) != cuacctUsageAllColumnsNumber {
|
||||
continue
|
||||
}
|
||||
|
||||
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||
|
||||
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
}
|
245
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
245
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
@ -1,245 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type CpusetGroup struct{}
|
||||
|
||||
func (s *CpusetGroup) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
return s.ApplyDir(path, r, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpusetCpus != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.CpusetMems != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCpusetStat(path string, file string) ([]uint16, error) {
|
||||
var extracted []uint16
|
||||
fileContent, err := fscommon.GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
}
|
||||
if len(fileContent) == 0 {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
|
||||
}
|
||||
|
||||
for _, s := range strings.Split(fileContent, ",") {
|
||||
sp := strings.SplitN(s, "-", 3)
|
||||
switch len(sp) {
|
||||
case 3:
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
|
||||
case 2:
|
||||
min, err := strconv.ParseUint(sp[0], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
max, err := strconv.ParseUint(sp[1], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if min > max {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
|
||||
}
|
||||
for i := min; i <= max; i++ {
|
||||
extracted = append(extracted, uint16(i))
|
||||
}
|
||||
case 1:
|
||||
value, err := strconv.ParseUint(s, 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
extracted = append(extracted, uint16(value))
|
||||
}
|
||||
}
|
||||
|
||||
return extracted, nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
var err error
|
||||
|
||||
stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
// We didn't inherit cpuset configs from parent, but we have
|
||||
// to ensure cpuset configs are set before moving task into the
|
||||
// cgroup.
|
||||
// The logic is, if user specified cpuset configs, use these
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
|
||||
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
|
||||
return
|
||||
}
|
||||
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
|
||||
return
|
||||
}
|
||||
return cpus, mems, nil
|
||||
}
|
||||
|
||||
// cpusetEnsureParent makes sure that the parent directories of current
|
||||
// are created and populated with the proper cpus and mems files copied
|
||||
// from their respective parent. It does that recursively, starting from
|
||||
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
|
||||
func cpusetEnsureParent(current string) error {
|
||||
var st unix.Statfs_t
|
||||
|
||||
parent := filepath.Dir(current)
|
||||
err := unix.Statfs(parent, &st)
|
||||
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return nil
|
||||
}
|
||||
// Treat non-existing directory as cgroupfs as it will be created,
|
||||
// and the root cpuset directory obviously exists.
|
||||
if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||
return &os.PathError{Op: "statfs", Path: parent, Err: err}
|
||||
}
|
||||
|
||||
if err := cpusetEnsureParent(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
return cpusetCopyIfNeeded(current, parent)
|
||||
}
|
||||
|
||||
// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// directory to the current directory if the file's contents are 0
|
||||
func cpusetCopyIfNeeded(current, parent string) error {
|
||||
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if isEmptyCpuset(currentCpus) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if isEmptyCpuset(currentMems) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isEmptyCpuset(str string) bool {
|
||||
return str == "" || str == "\n"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
|
||||
if err := s.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
return cpusetCopyIfNeeded(path, filepath.Dir(path))
|
||||
}
|
109
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
109
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
@ -1,109 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"reflect"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
TestingSkipFinalCheck bool
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
return "devices"
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
if path == "" {
|
||||
// Return error here, since devices cgroup
|
||||
// is a hard requirement for container's security.
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
|
||||
list, err := cgroups.ReadFile(path, "devices.list")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list))
|
||||
}
|
||||
|
||||
func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) {
|
||||
// This defaults to a white-list -- which is what we want!
|
||||
emu := &cgroupdevices.Emulator{}
|
||||
for _, rule := range rules {
|
||||
if err := emu.Apply(*rule); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return emu, nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||
if userns.RunningInUserNS() || r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generate two emulators, one for the current state of the cgroup and one
|
||||
// for the requested state by the user.
|
||||
current, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
target, err := buildEmulator(r.Devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Compute the minimal set of transition rules needed to achieve the
|
||||
// requested state.
|
||||
transitionRules, err := current.Transition(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, rule := range transitionRules {
|
||||
file := "devices.deny"
|
||||
if rule.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Final safety check -- ensure that the resulting state is what was
|
||||
// requested. This is only really correct for white-lists, but for
|
||||
// black-lists we can at least check that the cgroup is in the right mode.
|
||||
//
|
||||
// This safety-check is skipped for the unit tests because we cannot
|
||||
// currently mock devices.list correctly.
|
||||
if !s.TestingSkipFinalCheck {
|
||||
currentAfter, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
|
||||
return errors.New("resulting devices cgroup doesn't precisely match target")
|
||||
} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
|
||||
return errors.New("resulting devices cgroup doesn't match target mode")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
@ -1,15 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
// malformedLine is used by all cgroupfs file parsers that expect a line
|
||||
// in a particular format but get some garbage instead.
|
||||
func malformedLine(path, file, line string) error {
|
||||
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
|
||||
}
|
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
@ -1,158 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type FreezerGroup struct{}
|
||||
|
||||
func (s *FreezerGroup) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||
switch r.Freezer {
|
||||
case configs.Frozen:
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
// Freezing failed, and it is bad and dangerous
|
||||
// to leave the cgroup in FROZEN or FREEZING
|
||||
// state, so (try to) thaw it back.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
}
|
||||
}()
|
||||
|
||||
// As per older kernel docs (freezer-subsystem.txt before
|
||||
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
||||
// userspace should either retry or thaw. While current
|
||||
// kernel cgroup v1 docs no longer mention a need to retry,
|
||||
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
||||
// freeze a cgroup v1 while new processes keep appearing in it
|
||||
// (either via fork/clone or by writing new PIDs to
|
||||
// cgroup.procs).
|
||||
//
|
||||
// The numbers below are empirically chosen to have a decent
|
||||
// chance to succeed in various scenarios ("runc pause/unpause
|
||||
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
||||
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
||||
//
|
||||
// Adding any amount of sleep in between retries did not
|
||||
// increase the chances of successful freeze in "pause/unpause
|
||||
// with parallel exec" reproducer. OTOH, adding an occasional
|
||||
// sleep helped for the case where the system is extremely slow
|
||||
// (CentOS 7 VM on GHA CI).
|
||||
//
|
||||
// Alas, this is still a game of chances, since the real fix
|
||||
// belong to the kernel (cgroup v2 do not have this bug).
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
if i%50 == 49 {
|
||||
// Occasional thaw and sleep improves
|
||||
// the chances to succeed in freezing
|
||||
// in case new processes keep appearing
|
||||
// in the cgroup.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if i%25 == 24 {
|
||||
// Occasional short sleep before reading
|
||||
// the state back also improves the chances to
|
||||
// succeed in freezing in case of a very slow
|
||||
// system.
|
||||
time.Sleep(10 * time.Microsecond)
|
||||
}
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
state = strings.TrimSpace(state)
|
||||
switch state {
|
||||
case "FREEZING":
|
||||
continue
|
||||
case string(configs.Frozen):
|
||||
if i > 1 {
|
||||
logrus.Debugf("frozen after %d retries", i)
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
// should never happen
|
||||
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
|
||||
}
|
||||
}
|
||||
// Despite our best efforts, it got stuck in FREEZING.
|
||||
return errors.New("unable to freeze")
|
||||
case configs.Thawed:
|
||||
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||
for {
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as
|
||||
// being in an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch strings.TrimSpace(state) {
|
||||
case "THAWED":
|
||||
return configs.Thawed, nil
|
||||
case "FROZEN":
|
||||
// Find out whether the cgroup is frozen directly,
|
||||
// or indirectly via an ancestor.
|
||||
self, err := cgroups.ReadFile(path, "freezer.self_freezing")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat
|
||||
// it as being frozen.
|
||||
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Frozen, err
|
||||
}
|
||||
switch self {
|
||||
case "0\n":
|
||||
return configs.Thawed, nil
|
||||
case "1\n":
|
||||
return configs.Frozen, nil
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
|
||||
}
|
||||
case "FREEZING":
|
||||
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||
// is still undergoing freezing. This should be a temporary delay.
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||
}
|
||||
}
|
||||
}
|
265
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
265
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
@ -1,265 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var subsystems = []subsystem{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&RdmaGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
&NameGroup{GroupName: "misc", Join: true},
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
func init() {
|
||||
// If using cgroups-hybrid mode then add a "" controller indicating
|
||||
// it should join the cgroups v2.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
|
||||
}
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// GetStats fills in the stats for the subsystem.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Apply creates and joins a cgroup, adding pid into it. Some
|
||||
// subsystems use resources to pre-configure the cgroup parents
|
||||
// before creating or joining it.
|
||||
Apply(path string, r *configs.Resources, pid int) error
|
||||
// Set sets the cgroup resources.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
// Some v1 controllers (cpu, cpuset, and devices) expect
|
||||
// cgroups.Resources to not be nil in Apply.
|
||||
if cg.Resources == nil {
|
||||
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
|
||||
}
|
||||
if cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &manager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(rootless bool, err error) bool {
|
||||
// We do not ignore errors if we are root.
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// Is it an ordinary EPERM?
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
return true
|
||||
}
|
||||
// Handle some specific syscall errors.
|
||||
var errno unix.Errno
|
||||
if errors.As(err, &errno) {
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) (err error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
c := m.cgroups
|
||||
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
p, ok := m.paths[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := sys.Apply(p, c.Resources, pid); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an
|
||||
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||
// case of permission problems here, but do delete the path from
|
||||
// the m.paths map, since it is either non-existent and could not
|
||||
// be created, or the pid could not be added to it.
|
||||
//
|
||||
// Cases where limits for the subsystem have been set are handled
|
||||
// later by Set, which fails with a friendly error (see
|
||||
// if path == "" in Set).
|
||||
if isIgnorableError(c.Rootless, err) && c.Path == "" {
|
||||
delete(m.paths, name)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return cgroups.RemovePaths(m.paths)
|
||||
}
|
||||
|
||||
func (m *manager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
// When rootless is true, errors from the device subsystem
|
||||
// are ignored, as it is really not expected to work.
|
||||
if m.cgroups.Rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
path := m.Path("freezer")
|
||||
if path == "" {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
dir := m.Path("freezer")
|
||||
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||
if dir == "" {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
freezer := &FreezerGroup{}
|
||||
return freezer.GetState(dir)
|
||||
}
|
||||
|
||||
func (m *manager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.Path("memory"))
|
||||
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
@ -1,62 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type HugetlbGroup struct{}
|
||||
|
||||
func (s *HugetlbGroup) Name() string {
|
||||
return "hugetlb"
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
usage := "hugetlb." + pageSize + ".usage_in_bytes"
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
failcnt := "hugetlb." + pageSize + ".failcnt"
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pageSize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
348
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
348
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
@ -1,348 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemoryUsage = "memory.usage_in_bytes"
|
||||
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct{}
|
||||
|
||||
func (s *MemoryGroup) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func setMemory(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
|
||||
if !errors.Is(err, unix.EBUSY) {
|
||||
return err
|
||||
}
|
||||
|
||||
// EBUSY means the kernel can't set new limit as it's too low
|
||||
// (lower than the current usage). Return more specific error.
|
||||
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||
}
|
||||
|
||||
func setSwap(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, r *configs.Resources) error {
|
||||
// If the memory update is set to -1 and the swap is not explicitly
|
||||
// set, we should also set swap to -1, it means unlimited memory.
|
||||
if r.Memory == -1 && r.MemorySwap == 0 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
r.MemorySwap = -1
|
||||
}
|
||||
}
|
||||
|
||||
// When memory and swap memory are both set, we need to handle the cases
|
||||
// for updating container.
|
||||
if r.Memory != 0 && r.MemorySwap != 0 {
|
||||
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// When update memory limit, we should adapt the write sequence
|
||||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||
if err := setMemoryAndSwap(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ignore KernelMemory and KernelMemoryTCP
|
||||
|
||||
if r.MemoryReservation != 0 {
|
||||
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.OomKillDisable {
|
||||
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *r.MemorySwappiness <= 100 {
|
||||
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryData(path, "memsw")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
kernelUsage, err := getMemoryData(path, "kmem")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelUsage = kernelUsage
|
||||
kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == 1 {
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
}
|
||||
|
||||
pagesByNUMA, err := getPageUsageByNUMA(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = "memory." + name
|
||||
}
|
||||
var (
|
||||
usage = moduleName + ".usage_in_bytes"
|
||||
maxUsage = moduleName + ".max_usage_in_bytes"
|
||||
failcnt = moduleName + ".failcnt"
|
||||
limit = moduleName + ".limit_in_bytes"
|
||||
)
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore ENOENT as swap and kmem controllers
|
||||
// are optional in the kernel.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
|
||||
const (
|
||||
maxColumns = math.MaxUint8 + 1
|
||||
file = "memory.numa_stat"
|
||||
)
|
||||
stats := cgroups.PageUsageByNUMA{}
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return stats, nil
|
||||
} else if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
|
||||
// and it looks like this:
|
||||
//
|
||||
// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
var field *cgroups.PageStats
|
||||
|
||||
line := scanner.Text()
|
||||
columns := strings.SplitN(line, " ", maxColumns)
|
||||
for i, column := range columns {
|
||||
byNode := strings.SplitN(column, "=", 2)
|
||||
// Some custom kernels have non-standard fields, like
|
||||
// numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||
// numa_exectime 0
|
||||
if len(byNode) < 2 {
|
||||
if i == 0 {
|
||||
// Ignore/skip those.
|
||||
break
|
||||
} else {
|
||||
// The first column was already validated,
|
||||
// so be strict to the rest.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
}
|
||||
key, val := byNode[0], byNode[1]
|
||||
if i == 0 { // First column: key is name, val is total.
|
||||
field = getNUMAField(&stats, key)
|
||||
if field == nil { // unknown field (new kernel?)
|
||||
break
|
||||
}
|
||||
field.Total, err = strconv.ParseUint(val, 0, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
field.Nodes = map[uint8]uint64{}
|
||||
} else { // Subsequent columns: key is N<id>, val is usage.
|
||||
if len(key) < 2 || key[0] != 'N' {
|
||||
// This is definitely an error.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
|
||||
n, err := strconv.ParseUint(key[1:], 10, 8)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
usage, err := strconv.ParseUint(val, 10, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
field.Nodes[uint8(n)] = usage
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
|
||||
switch name {
|
||||
case "total":
|
||||
return &stats.Total
|
||||
case "file":
|
||||
return &stats.File
|
||||
case "anon":
|
||||
return &stats.Anon
|
||||
case "unevictable":
|
||||
return &stats.Unevictable
|
||||
case "hierarchical_total":
|
||||
return &stats.Hierarchical.Total
|
||||
case "hierarchical_file":
|
||||
return &stats.Hierarchical.File
|
||||
case "hierarchical_anon":
|
||||
return &stats.Hierarchical.Anon
|
||||
case "hierarchical_unevictable":
|
||||
return &stats.Hierarchical.Unevictable
|
||||
}
|
||||
return nil
|
||||
}
|
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
@ -1,31 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NameGroup struct {
|
||||
GroupName string
|
||||
Join bool
|
||||
}
|
||||
|
||||
func (s *NameGroup) Name() string {
|
||||
return s.GroupName
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
if s.Join {
|
||||
// Ignore errors if the named cgroup does not exist.
|
||||
_ = apply(path, pid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
@ -1,32 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetClsGroup struct{}
|
||||
|
||||
func (s *NetClsGroup) Name() string {
|
||||
return "net_cls"
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.NetClsClassid != 0 {
|
||||
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
@ -1,30 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetPrioGroup struct{}
|
||||
|
||||
func (s *NetPrioGroup) Name() string {
|
||||
return "net_prio"
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, prioMap := range r.NetPrioIfpriomap {
|
||||
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
@ -1,186 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var (
|
||||
cgroupRootLock sync.Mutex
|
||||
cgroupRoot string
|
||||
)
|
||||
|
||||
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||
|
||||
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
|
||||
root, err := rootPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
inner, err := innerPath(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
path, err := subsysPath(root, inner, name)
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem
|
||||
// is considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func tryDefaultCgroupRoot() string {
|
||||
var st, pst unix.Stat_t
|
||||
|
||||
// (1) it should be a directory...
|
||||
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) ... and a mount point ...
|
||||
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Dev == pst.Dev {
|
||||
// parent dir has the same dev -- not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) ... of 'tmpfs' fs type.
|
||||
var fst unix.Statfs_t
|
||||
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (4) it should have at least 1 entry ...
|
||||
dir, err := os.Open(defaultCgroupRoot)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
names, err := dir.Readdirnames(1)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if len(names) < 1 {
|
||||
return ""
|
||||
}
|
||||
// ... which is a cgroup mount point.
|
||||
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return defaultCgroupRoot
|
||||
}
|
||||
|
||||
// rootPath finds and returns path to the root of the cgroup hierarchies.
|
||||
func rootPath() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// fast path
|
||||
cgroupRoot = tryDefaultCgroupRoot()
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// slow path: parse mountinfo
|
||||
mi, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errors.New("no cgroup mount found in mountinfo")
|
||||
}
|
||||
|
||||
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||
// use its parent directory.
|
||||
root := filepath.Dir(mi[0].Mountpoint)
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
func innerPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(c.Path)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(c.Parent)
|
||||
cgName := utils.CleanPath(c.Name)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return innerPath, nil
|
||||
}
|
||||
|
||||
func subsysPath(root, inner, subsystem string) (string, error) {
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(inner) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(root, filepath.Base(mnt), inner), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, inner), nil
|
||||
}
|
||||
|
||||
func apply(path string, pid int) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
@ -1,24 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PerfEventGroup struct{}
|
||||
|
||||
func (s *PerfEventGroup) Name() string {
|
||||
return "perf_event"
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
@ -1,62 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct{}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if r.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(r.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
@ -1,25 +0,0 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type RdmaGroup struct{}
|
||||
|
||||
func (s *RdmaGroup) Name() string {
|
||||
return "rdma"
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
|
||||
return fscommon.RdmaSet(path, r)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return fscommon.RdmaGetStats(path, stats)
|
||||
}
|
87
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
87
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
@ -1,87 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpuSet(r *configs.Resources) bool {
|
||||
return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0
|
||||
}
|
||||
|
||||
func setCpu(dirPath string, r *configs.Resources) error {
|
||||
if !isCpuSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
|
||||
if r.CpuWeight != 0 {
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.CpuQuota != 0 || r.CpuPeriod != 0 {
|
||||
str := "max"
|
||||
if r.CpuQuota > 0 {
|
||||
str = strconv.FormatInt(r.CpuQuota, 10)
|
||||
}
|
||||
period := r.CpuPeriod
|
||||
if period == 0 {
|
||||
// This default value is documented in
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
period = 100000
|
||||
}
|
||||
str += " " + strconv.FormatUint(period, 10)
|
||||
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||
|
||||
case "user_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_usec":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
@ -1,28 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isCpusetSet(r *configs.Resources) bool {
|
||||
return r.CpusetCpus != "" || r.CpusetMems != ""
|
||||
}
|
||||
|
||||
func setCpuset(dirPath string, r *configs.Resources) error {
|
||||
if !isCpusetSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.CpusetCpus != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.CpusetMems != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
152
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
152
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
@ -1,152 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func supportedControllers() (string, error) {
|
||||
return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
|
||||
}
|
||||
|
||||
// needAnyControllers returns whether we enable some supported controllers or not,
|
||||
// based on (1) controllers available and (2) resources that are being set.
|
||||
// We don't check "pseudo" controllers such as
|
||||
// "freezer" and "devices".
|
||||
func needAnyControllers(r *configs.Resources) (bool, error) {
|
||||
if r == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// list of all available controllers
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
avail := make(map[string]struct{})
|
||||
for _, ctr := range strings.Fields(content) {
|
||||
avail[ctr] = struct{}{}
|
||||
}
|
||||
|
||||
// check whether the controller if available or not
|
||||
have := func(controller string) bool {
|
||||
_, ok := avail[controller]
|
||||
return ok
|
||||
}
|
||||
|
||||
if isPidsSet(r) && have("pids") {
|
||||
return true, nil
|
||||
}
|
||||
if isMemorySet(r) && have("memory") {
|
||||
return true, nil
|
||||
}
|
||||
if isIoSet(r) && have("io") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpuSet(r) && have("cpu") {
|
||||
return true, nil
|
||||
}
|
||||
if isCpusetSet(r) && have("cpuset") {
|
||||
return true, nil
|
||||
}
|
||||
if isHugeTlbSet(r) && have("hugetlb") {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// containsDomainController returns whether the current config contains domain controller or not.
|
||||
// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids.
|
||||
func containsDomainController(r *configs.Resources) bool {
|
||||
return isMemorySet(r) || isIoSet(r) || isCpuSet(r) || isHugeTlbSet(r)
|
||||
}
|
||||
|
||||
// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers.
|
||||
func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||
if !strings.HasPrefix(path, UnifiedMountpoint) {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
content, err := supportedControllers()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
cgTypeFile = "cgroup.type"
|
||||
cgStCtlFile = "cgroup.subtree_control"
|
||||
)
|
||||
ctrs := strings.Fields(content)
|
||||
res := "+" + strings.Join(ctrs, " +")
|
||||
|
||||
elements := strings.Split(path, "/")
|
||||
elements = elements[3:]
|
||||
current := "/sys/fs"
|
||||
for i, e := range elements {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0o755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
current := current
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
cgType, _ := cgroups.ReadFile(current, cgTypeFile)
|
||||
cgType = strings.TrimSpace(cgType)
|
||||
switch cgType {
|
||||
// If the cgroup is in an invalid mode (usually this means there's an internal
|
||||
// process in the cgroup tree, because we created a cgroup under an
|
||||
// already-populated-by-other-processes cgroup), then we have to error out if
|
||||
// the user requested controllers which are not thread-aware. However, if all
|
||||
// the controllers requested are thread-aware we can simply put the cgroup into
|
||||
// threaded mode.
|
||||
case "domain invalid":
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current)
|
||||
} else {
|
||||
// Not entirely correct (in theory we'd always want to be a domain --
|
||||
// since that means we're a properly delegated cgroup subtree) but in
|
||||
// this case there's not much we can do and it's better than giving an
|
||||
// error.
|
||||
_ = cgroups.WriteFile(current, cgTypeFile, "threaded")
|
||||
}
|
||||
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
|
||||
// (and you cannot usually take a cgroup out of threaded mode).
|
||||
case "domain threaded":
|
||||
fallthrough
|
||||
case "threaded":
|
||||
if containsDomainController(c.Resources) {
|
||||
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType)
|
||||
}
|
||||
}
|
||||
}
|
||||
// enable all supported controllers
|
||||
if i < len(elements)-1 {
|
||||
if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil {
|
||||
// try write one by one
|
||||
allCtrs := strings.Split(res, " ")
|
||||
for _, ctr := range allCtrs {
|
||||
_ = cgroups.WriteFile(current, cgStCtlFile, ctr)
|
||||
}
|
||||
}
|
||||
// Some controllers might not be enabled when rootless or containerized,
|
||||
// but we don't catch the error here. (Caught in setXXX() functions.)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
@ -1,99 +0,0 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||
|
||||
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
|
||||
return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(cgPath)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(cgParent)
|
||||
cgName := utils.CleanPath(cgName)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
if filepath.IsAbs(innerPath) {
|
||||
return filepath.Join(root, innerPath), nil
|
||||
}
|
||||
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// The current user scope most probably has tasks in it already,
|
||||
// making it impossible to enable controllers for its sub-cgroup.
|
||||
// A parent cgroup (with no tasks in it) is what we need.
|
||||
ownCgroup = filepath.Dir(ownCgroup)
|
||||
|
||||
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
for s.Scan() {
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", errors.New("cgroup path not found")
|
||||
}
|
75
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
75
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
@ -1,75 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
)
|
||||
|
||||
func isRWM(perms devices.Permissions) bool {
|
||||
var r, w, m bool
|
||||
for _, perm := range perms {
|
||||
switch perm {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
w = true
|
||||
case 'm':
|
||||
m = true
|
||||
}
|
||||
}
|
||||
return r && w && m
|
||||
}
|
||||
|
||||
// This is similar to the logic applied in crun for handling errors from bpf(2)
|
||||
// <https://github.com/containers/crun/blob/0.17/src/libcrun/cgroup.c#L2438-L2470>.
|
||||
func canSkipEBPFError(r *configs.Resources) bool {
|
||||
// If we're running in a user namespace we can ignore eBPF rules because we
|
||||
// usually cannot use bpf(2), as well as rootless containers usually don't
|
||||
// have the necessary privileges to mknod(2) device inodes or access
|
||||
// host-level instances (though ideally we would be blocking device access
|
||||
// for rootless containers anyway).
|
||||
if userns.RunningInUserNS() {
|
||||
return true
|
||||
}
|
||||
|
||||
// We cannot ignore an eBPF load error if any rule if is a block rule or it
|
||||
// doesn't permit all access modes.
|
||||
//
|
||||
// NOTE: This will sometimes trigger in cases where access modes are split
|
||||
// between different rules but to handle this correctly would require
|
||||
// using ".../libcontainer/cgroup/devices".Emulator.
|
||||
for _, dev := range r.Devices {
|
||||
if !dev.Allow || !isRWM(dev.Permissions) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, r *configs.Resources) error {
|
||||
if r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
insts, license, err := devicefilter.DeviceFilter(r.Devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(r) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
127
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
127
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
@ -1,127 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var stateStr string
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
stateStr = "1"
|
||||
case configs.Thawed:
|
||||
stateStr = "0"
|
||||
default:
|
||||
return fmt.Errorf("invalid freezer state %q requested", state)
|
||||
}
|
||||
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
|
||||
if err != nil {
|
||||
// We can ignore this request as long as the user didn't ask us to
|
||||
// freeze the container (since without the freezer cgroup, that's a
|
||||
// no-op).
|
||||
if state != configs.Frozen {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("freezer not supported: %w", err)
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
if _, err := fd.WriteString(stateStr); err != nil {
|
||||
return err
|
||||
}
|
||||
// Confirm that the cgroup did actually change states.
|
||||
if actualState, err := readFreezer(dirPath, fd); err != nil {
|
||||
return err
|
||||
} else if actualState != state {
|
||||
return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getFreezer(dirPath string) (configs.FreezerState, error) {
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY)
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as being in
|
||||
// an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
return readFreezer(dirPath, fd)
|
||||
}
|
||||
|
||||
func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
|
||||
if _, err := fd.Seek(0, 0); err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
state := make([]byte, 2)
|
||||
if _, err := fd.Read(state); err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch string(state) {
|
||||
case "0\n":
|
||||
return configs.Thawed, nil
|
||||
case "1\n":
|
||||
return waitFrozen(dirPath)
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||
}
|
||||
}
|
||||
|
||||
// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
|
||||
func waitFrozen(dirPath string) (configs.FreezerState, error) {
|
||||
fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
// XXX: Simple wait/read/retry is used here. An implementation
|
||||
// based on poll(2) or inotify(7) is possible, but it makes the code
|
||||
// much more complicated. Maybe address this later.
|
||||
const (
|
||||
// Perform maxIter with waitTime in between iterations.
|
||||
waitTime = 10 * time.Millisecond
|
||||
maxIter = 1000
|
||||
)
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for i := 0; scanner.Scan(); {
|
||||
if i == maxIter {
|
||||
return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
|
||||
}
|
||||
line := scanner.Text()
|
||||
val := strings.TrimPrefix(line, "frozen ")
|
||||
if val != line { // got prefix
|
||||
if val[0] == '1' {
|
||||
return configs.Frozen, nil
|
||||
}
|
||||
|
||||
i++
|
||||
// wait, then re-read
|
||||
time.Sleep(waitTime)
|
||||
_, err := fd.Seek(0, 0)
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
}
|
||||
}
|
||||
// Should only reach here either on read error,
|
||||
// or if the file does not contain "frozen " line.
|
||||
return configs.Undefined, scanner.Err()
|
||||
}
|
259
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
259
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
@ -1,259 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
type manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
dirPath string
|
||||
// controllers is content of "cgroup.controllers" file.
|
||||
// excludes pseudo-controllers ("devices" and "freezer").
|
||||
controllers map[string]struct{}
|
||||
}
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) {
|
||||
if dirPath == "" {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
m := &manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func (m *manager) getControllers() error {
|
||||
if m.controllers != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
if m.config.Rootless && m.config.Path == "" {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
fields := strings.Fields(data)
|
||||
m.controllers = make(map[string]struct{}, len(fields))
|
||||
for _, c := range fields {
|
||||
m.controllers[c] = struct{}{}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) error {
|
||||
if err := CreateCgroupPath(m.dirPath, m.config); err != nil {
|
||||
// Related tests:
|
||||
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
|
||||
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
|
||||
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if m.config.Rootless {
|
||||
if m.config.Path == "" {
|
||||
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
var errs []error
|
||||
|
||||
st := cgroups.NewStats()
|
||||
|
||||
// pids (since kernel 4.5)
|
||||
if err := statPids(m.dirPath, st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
// Note cpu.stat is available even if the controller is not enabled.
|
||||
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.config.Rootless {
|
||||
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
if m.config.Resources == nil {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
if err := setFreezer(m.dirPath, state); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources.Freezer = state
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
return cgroups.RemovePath(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) Path(_ string) string {
|
||||
return m.dirPath
|
||||
}
|
||||
|
||||
func (m *manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if err := m.getControllers(); err != nil {
|
||||
return err
|
||||
}
|
||||
// pids (since kernel 4.5)
|
||||
if err := setPids(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if err := setMemory(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if err := setIo(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if err := setCpu(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
//
|
||||
// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless {
|
||||
return err
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if err := setCpuset(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// hugetlb (since kernel 5.6)
|
||||
if err := setHugeTlb(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// rdma (since kernel 4.11)
|
||||
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := m.setUnified(r.Unified); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources = r
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) setUnified(res map[string]string) error {
|
||||
for k, v := range res {
|
||||
if strings.Contains(k, "/") {
|
||||
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||
}
|
||||
if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
|
||||
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
|
||||
if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
|
||||
// Check if a controller is available,
|
||||
// to give more specific error if not.
|
||||
sk := strings.SplitN(k, ".", 2)
|
||||
if len(sk) != 2 {
|
||||
return fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||
}
|
||||
c := sk[0]
|
||||
if _, ok := m.controllers[c]; !ok && c != "cgroup" {
|
||||
return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("unable to set unified resource %q: %w", k, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.dirPath
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
||||
|
||||
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return getFreezer(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) Exists() bool {
|
||||
return cgroups.PathExists(m.dirPath)
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.events", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.dirPath)
|
||||
if err != nil && m.config.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
48
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
48
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
@ -1,48 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isHugeTlbSet(r *configs.Resources) bool {
|
||||
return len(r.HugetlbLimit) > 0
|
||||
}
|
||||
|
||||
func setHugeTlb(dirPath string, r *configs.Resources) error {
|
||||
if !isHugeTlbSet(r) {
|
||||
return nil
|
||||
}
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pagesize := range cgroups.HugePageSizes() {
|
||||
value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
fileName := "hugetlb." + pagesize + ".events"
|
||||
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pagesize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
193
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
193
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
@ -1,193 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isIoSet(r *configs.Resources) bool {
|
||||
return r.BlkioWeight != 0 ||
|
||||
len(r.BlkioWeightDevice) > 0 ||
|
||||
len(r.BlkioThrottleReadBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteBpsDevice) > 0 ||
|
||||
len(r.BlkioThrottleReadIOPSDevice) > 0 ||
|
||||
len(r.BlkioThrottleWriteIOPSDevice) > 0
|
||||
}
|
||||
|
||||
// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
|
||||
// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
|
||||
func bfqDeviceWeightSupported(bfq *os.File) bool {
|
||||
if bfq == nil {
|
||||
return false
|
||||
}
|
||||
_, _ = bfq.Seek(0, 0)
|
||||
buf := make([]byte, 32)
|
||||
_, _ = bfq.Read(buf)
|
||||
// If only a single number (default weight) if read back, we have older kernel.
|
||||
_, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
|
||||
return err != nil
|
||||
}
|
||||
|
||||
func setIo(dirPath string, r *configs.Resources) error {
|
||||
if !isIoSet(r) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If BFQ IO scheduler is available, use it.
|
||||
var bfq *os.File
|
||||
if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
|
||||
var err error
|
||||
bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
|
||||
if err == nil {
|
||||
defer bfq.Close()
|
||||
} else if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
if bfq != nil { // Use BFQ.
|
||||
if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Fallback to io.weight with a conversion scheme.
|
||||
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
|
||||
if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if bfqDeviceWeightSupported(bfq) {
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
|
||||
return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
|
||||
ret := map[string][]string{}
|
||||
f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, &parseError{Path: dirPath, File: name, Err: err}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "io.stat"
|
||||
values, err := readCgroup2MapFile(dirPath, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var parsedStats cgroups.BlkioStats
|
||||
for k, v := range values {
|
||||
d := strings.Split(k, ":")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
major, err := strconv.ParseUint(d[0], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
d := strings.Split(item, "=")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
op := d[0]
|
||||
|
||||
// Map to the cgroupv1 naming and layout (in separate tables).
|
||||
var targetTable *[]cgroups.BlkioStatEntry
|
||||
switch op {
|
||||
// Equivalent to cgroupv1's blkio.io_service_bytes.
|
||||
case "rbytes":
|
||||
op = "Read"
|
||||
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||
case "wbytes":
|
||||
op = "Write"
|
||||
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||
// Equivalent to cgroupv1's blkio.io_serviced.
|
||||
case "rios":
|
||||
op = "Read"
|
||||
targetTable = &parsedStats.IoServicedRecursive
|
||||
case "wios":
|
||||
op = "Write"
|
||||
targetTable = &parsedStats.IoServicedRecursive
|
||||
default:
|
||||
// Skip over entries we cannot map to cgroupv1 stats for now.
|
||||
// In the future we should expand the stats struct to include
|
||||
// them.
|
||||
logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
|
||||
continue
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
Op: op,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Value: value,
|
||||
}
|
||||
*targetTable = append(*targetTable, entry)
|
||||
}
|
||||
}
|
||||
stats.BlkioStats = parsedStats
|
||||
return nil
|
||||
}
|
220
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
220
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
@ -1,220 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// numToStr converts an int64 value to a string for writing to a
|
||||
// cgroupv2 files with .min, .max, .low, or .high suffix.
|
||||
// The value of -1 is converted to "max" for cgroupv1 compatibility
|
||||
// (which used to write -1 to remove the limit).
|
||||
func numToStr(value int64) (ret string) {
|
||||
switch {
|
||||
case value == 0:
|
||||
ret = ""
|
||||
case value == -1:
|
||||
ret = "max"
|
||||
default:
|
||||
ret = strconv.FormatInt(value, 10)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func isMemorySet(r *configs.Resources) bool {
|
||||
return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0
|
||||
}
|
||||
|
||||
func setMemory(dirPath string, r *configs.Resources) error {
|
||||
if !isMemorySet(r) {
|
||||
return nil
|
||||
}
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
swapStr := numToStr(swap)
|
||||
if swapStr == "" && swap == 0 && r.MemorySwap > 0 {
|
||||
// memory and memorySwap set to the same value -- disable swap
|
||||
swapStr = "0"
|
||||
}
|
||||
// never write empty string to `memory.swap.max`, it means set to 0.
|
||||
if swapStr != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if val := numToStr(r.Memory); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if val := numToStr(r.MemoryReservation); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: dirPath, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
|
||||
// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
|
||||
// cgroup v2 is always hierarchical.
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||
if err != nil {
|
||||
if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint {
|
||||
// The root cgroup does not have memory.{current,max}
|
||||
// so emulate those using data from /proc/meminfo and
|
||||
// /sys/fs/cgroup/memory.stat
|
||||
return rootStatsFromMeminfo(stats)
|
||||
}
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryDataV2(dirPath, "swap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// As cgroup v1 reports SwapUsage values as mem+swap combined,
|
||||
// while in cgroup v2 swap values do not include memory,
|
||||
// report combined mem+swap for v1 compatibility.
|
||||
swapUsage.Usage += memoryUsage.Usage
|
||||
if swapUsage.Limit != math.MaxUint64 {
|
||||
swapUsage.Limit += memoryUsage.Limit
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = "memory." + name
|
||||
}
|
||||
usage := moduleName + ".current"
|
||||
limit := moduleName + ".max"
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore EEXIST as there's no swap accounting
|
||||
// if kernel CONFIG_MEMCG_SWAP is not set or
|
||||
// swapaccount=0 kernel boot parameter is given.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func rootStatsFromMeminfo(stats *cgroups.Stats) error {
|
||||
const file = "/proc/meminfo"
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Fields we are interested in.
|
||||
var (
|
||||
swap_free uint64
|
||||
swap_total uint64
|
||||
)
|
||||
mem := map[string]*uint64{
|
||||
"SwapFree": &swap_free,
|
||||
"SwapTotal": &swap_total,
|
||||
}
|
||||
|
||||
found := 0
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
parts := strings.SplitN(sc.Text(), ":", 3)
|
||||
if len(parts) != 2 {
|
||||
// Should not happen.
|
||||
continue
|
||||
}
|
||||
k := parts[0]
|
||||
p, ok := mem[k]
|
||||
if !ok {
|
||||
// Unknown field -- not interested.
|
||||
continue
|
||||
}
|
||||
vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
|
||||
*p, err = strconv.ParseUint(vStr, 10, 64)
|
||||
if err != nil {
|
||||
return &parseError{File: file, Err: errors.New("bad value for " + k)}
|
||||
}
|
||||
|
||||
found++
|
||||
if found == len(mem) {
|
||||
// Got everything we need -- skip the rest.
|
||||
break
|
||||
}
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return &parseError{Path: "", File: file, Err: err}
|
||||
}
|
||||
|
||||
// cgroup v1 `usage_in_bytes` reports memory usage as the sum of
|
||||
// - rss (NR_ANON_MAPPED)
|
||||
// - cache (NR_FILE_PAGES)
|
||||
// cgroup v1 reports SwapUsage values as mem+swap combined
|
||||
// cgroup v2 reports rss and cache as anon and file.
|
||||
// sum `anon` + `file` to report the same value as `usage_in_bytes` in v1.
|
||||
// sum swap usage as combined mem+swap usage for consistency as well.
|
||||
stats.MemoryStats.Usage.Usage = stats.MemoryStats.Stats["anon"] + stats.MemoryStats.Stats["file"]
|
||||
stats.MemoryStats.Usage.Limit = math.MaxUint64
|
||||
stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
|
||||
stats.MemoryStats.SwapUsage.Limit = math.MaxUint64
|
||||
stats.MemoryStats.SwapUsage.Usage += stats.MemoryStats.Usage.Usage
|
||||
|
||||
return nil
|
||||
}
|
72
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
72
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
@ -1,72 +0,0 @@
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func isPidsSet(r *configs.Resources) bool {
|
||||
return r.PidsLimit != 0
|
||||
}
|
||||
|
||||
func setPids(dirPath string, r *configs.Resources) error {
|
||||
if !isPidsSet(r) {
|
||||
return nil
|
||||
}
|
||||
if val := numToStr(r.PidsLimit); val != "" {
|
||||
if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := cgroups.ReadFile(dirPath, "cgroup.procs")
|
||||
if errors.Is(err, unix.ENOTSUP) {
|
||||
contents, err = cgroups.ReadFile(dirPath, "cgroup.threads")
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := strings.Count(contents, "\n")
|
||||
stats.PidsStats.Current = uint64(pids)
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return statPidsFromCgroupProcs(dirPath, stats)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
@ -1,121 +0,0 @@
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"math"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// parseRdmaKV parses raw string to RdmaEntry.
|
||||
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
|
||||
var value uint32
|
||||
|
||||
parts := strings.SplitN(raw, "=", 3)
|
||||
|
||||
if len(parts) != 2 {
|
||||
return errors.New("Unable to parse RDMA entry")
|
||||
}
|
||||
|
||||
k, v := parts[0], parts[1]
|
||||
|
||||
if v == "max" {
|
||||
value = math.MaxUint32
|
||||
} else {
|
||||
val64, err := strconv.ParseUint(v, 10, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
value = uint32(val64)
|
||||
}
|
||||
if k == "hca_handle" {
|
||||
entry.HcaHandles = value
|
||||
} else if k == "hca_object" {
|
||||
entry.HcaObjects = value
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
|
||||
// example entry: mlx4_0 hca_handle=2 hca_object=2000
|
||||
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
|
||||
rdmaEntries := make([]cgroups.RdmaEntry, 0)
|
||||
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fd.Close() //nolint:errorlint
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
parts := strings.SplitN(scanner.Text(), " ", 4)
|
||||
if len(parts) == 3 {
|
||||
entry := new(cgroups.RdmaEntry)
|
||||
entry.Device = parts[0]
|
||||
err = parseRdmaKV(parts[1], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
err = parseRdmaKV(parts[2], entry)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
rdmaEntries = append(rdmaEntries, *entry)
|
||||
}
|
||||
}
|
||||
return rdmaEntries, scanner.Err()
|
||||
}
|
||||
|
||||
// RdmaGetStats returns rdma stats such as totalLimit and current entries.
|
||||
func RdmaGetStats(path string, stats *cgroups.Stats) error {
|
||||
currentEntries, err := readRdmaEntries(path, "rdma.current")
|
||||
if err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
maxEntries, err := readRdmaEntries(path, "rdma.max")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If device got removed between reading two files, ignore returning stats.
|
||||
if len(currentEntries) != len(maxEntries) {
|
||||
return nil
|
||||
}
|
||||
|
||||
stats.RdmaStats = cgroups.RdmaStats{
|
||||
RdmaLimit: maxEntries,
|
||||
RdmaCurrent: currentEntries,
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createCmdString(device string, limits configs.LinuxRdma) string {
|
||||
cmdString := device
|
||||
if limits.HcaHandles != nil {
|
||||
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
|
||||
}
|
||||
if limits.HcaObjects != nil {
|
||||
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
|
||||
}
|
||||
return cmdString
|
||||
}
|
||||
|
||||
// RdmaSet sets RDMA resources.
|
||||
func RdmaSet(path string, r *configs.Resources) error {
|
||||
for device, limits := range r.Rdma {
|
||||
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
145
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
145
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
@ -1,145 +0,0 @@
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
var (
|
||||
// Deprecated: use cgroups.OpenFile instead.
|
||||
OpenFile = cgroups.OpenFile
|
||||
// Deprecated: use cgroups.ReadFile instead.
|
||||
ReadFile = cgroups.ReadFile
|
||||
// Deprecated: use cgroups.WriteFile instead.
|
||||
WriteFile = cgroups.WriteFile
|
||||
)
|
||||
|
||||
// ParseError records a parse error details, including the file path.
|
||||
type ParseError struct {
|
||||
Path string
|
||||
File string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *ParseError) Error() string {
|
||||
return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
func (e *ParseError) Unwrap() error { return e.Err }
|
||||
|
||||
// ParseUint converts a string to an uint64 integer.
|
||||
// Negative values are returned at zero as, due to kernel bugs,
|
||||
// some of the memory cgroup stats can be negative.
|
||||
func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// ParseKeyValue parses a space-separated "name value" kind of cgroup
|
||||
// parameter and returns its key as a string, and its value as uint64
|
||||
// (ParseUint is used to convert the value). For example,
|
||||
// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234.
|
||||
func ParseKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.SplitN(t, " ", 3)
|
||||
if len(parts) != 2 {
|
||||
return "", 0, fmt.Errorf("line %q is not in key value format", t)
|
||||
}
|
||||
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, err
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
}
|
||||
|
||||
// GetValueByKey reads a key-value pairs from the specified cgroup file,
|
||||
// and returns a value of the specified key. ParseUint is used for value
|
||||
// conversion.
|
||||
func GetValueByKey(path, file, key string) (uint64, error) {
|
||||
content, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
arr := strings.Split(line, " ")
|
||||
if len(arr) == 2 && arr[0] == key {
|
||||
val, err := ParseUint(arr[1], 10, 64)
|
||||
if err != nil {
|
||||
err = &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return val, err
|
||||
}
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamUint reads a single uint64 value from the specified cgroup file.
|
||||
// If the value read is "max", the math.MaxUint64 is returned.
|
||||
func GetCgroupParamUint(path, file string) (uint64, error) {
|
||||
contents, err := GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
contents = strings.TrimSpace(contents)
|
||||
if contents == "max" {
|
||||
return math.MaxUint64, nil
|
||||
}
|
||||
|
||||
res, err := ParseUint(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamInt reads a single int64 value from specified cgroup file.
|
||||
// If the value read is "max", the math.MaxInt64 is returned.
|
||||
func GetCgroupParamInt(path, file string) (int64, error) {
|
||||
contents, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
contents = strings.TrimSpace(contents)
|
||||
if contents == "max" {
|
||||
return math.MaxInt64, nil
|
||||
}
|
||||
|
||||
res, err := strconv.ParseInt(contents, 10, 64)
|
||||
if err != nil {
|
||||
return res, &ParseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupParamString reads a string from the specified cgroup file.
|
||||
func GetCgroupParamString(path, file string) (string, error) {
|
||||
contents, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(contents), nil
|
||||
}
|
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
@ -1,27 +0,0 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GetAllPids returns all pids from the cgroup identified by path, and all its
|
||||
// sub-cgroups.
|
||||
func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
|
||||
if iErr != nil {
|
||||
return iErr
|
||||
}
|
||||
if !d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
cPids, err := readProcsFile(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids = append(pids, cPids...)
|
||||
return nil
|
||||
})
|
||||
return pids, err
|
||||
}
|
173
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
173
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
@ -1,173 +0,0 @@
|
||||
package cgroups
|
||||
|
||||
type ThrottlingData struct {
|
||||
// Number of periods with throttling active
|
||||
Periods uint64 `json:"periods,omitempty"`
|
||||
// Number of periods when the container hit its throttling limit.
|
||||
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
|
||||
// Aggregate time the container was throttled for in nanoseconds.
|
||||
ThrottledTime uint64 `json:"throttled_time,omitempty"`
|
||||
}
|
||||
|
||||
// CpuUsage denotes the usage of a CPU.
|
||||
// All CPU stats are aggregate since container inception.
|
||||
type CpuUsage struct {
|
||||
// Total CPU time consumed.
|
||||
// Units: nanoseconds.
|
||||
TotalUsage uint64 `json:"total_usage,omitempty"`
|
||||
// Total CPU time consumed per core.
|
||||
// Units: nanoseconds.
|
||||
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
|
||||
// CPU time consumed per core in kernel mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"`
|
||||
// CPU time consumed per core in user mode
|
||||
// Units: nanoseconds.
|
||||
PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"`
|
||||
// Time spent by tasks of the cgroup in kernel mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
|
||||
// Time spent by tasks of the cgroup in user mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInUsermode uint64 `json:"usage_in_usermode"`
|
||||
}
|
||||
|
||||
type CpuStats struct {
|
||||
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
|
||||
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
|
||||
}
|
||||
|
||||
type CPUSetStats struct {
|
||||
// List of the physical numbers of the CPUs on which processes
|
||||
// in that cpuset are allowed to execute
|
||||
CPUs []uint16 `json:"cpus,omitempty"`
|
||||
// cpu_exclusive flag
|
||||
CPUExclusive uint64 `json:"cpu_exclusive"`
|
||||
// List of memory nodes on which processes in that cpuset
|
||||
// are allowed to allocate memory
|
||||
Mems []uint16 `json:"mems,omitempty"`
|
||||
// mem_hardwall flag
|
||||
MemHardwall uint64 `json:"mem_hardwall"`
|
||||
// mem_exclusive flag
|
||||
MemExclusive uint64 `json:"mem_exclusive"`
|
||||
// memory_migrate flag
|
||||
MemoryMigrate uint64 `json:"memory_migrate"`
|
||||
// memory_spread page flag
|
||||
MemorySpreadPage uint64 `json:"memory_spread_page"`
|
||||
// memory_spread slab flag
|
||||
MemorySpreadSlab uint64 `json:"memory_spread_slab"`
|
||||
// memory_pressure
|
||||
MemoryPressure uint64 `json:"memory_pressure"`
|
||||
// sched_load balance flag
|
||||
SchedLoadBalance uint64 `json:"sched_load_balance"`
|
||||
// sched_relax_domain_level
|
||||
SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"`
|
||||
}
|
||||
|
||||
type MemoryData struct {
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
||||
|
||||
type MemoryStats struct {
|
||||
// memory used for cache
|
||||
Cache uint64 `json:"cache,omitempty"`
|
||||
// usage of memory
|
||||
Usage MemoryData `json:"usage,omitempty"`
|
||||
// usage of memory + swap
|
||||
SwapUsage MemoryData `json:"swap_usage,omitempty"`
|
||||
// usage of kernel memory
|
||||
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||
// usage of kernel TCP memory
|
||||
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
|
||||
// usage of memory pages by NUMA node
|
||||
// see chapter 5.6 of memory controller documentation
|
||||
PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"`
|
||||
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
|
||||
UseHierarchy bool `json:"use_hierarchy"`
|
||||
|
||||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMA struct {
|
||||
// Embedding is used as types can't be recursive.
|
||||
PageUsageByNUMAInner
|
||||
Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"`
|
||||
}
|
||||
|
||||
type PageUsageByNUMAInner struct {
|
||||
Total PageStats `json:"total,omitempty"`
|
||||
File PageStats `json:"file,omitempty"`
|
||||
Anon PageStats `json:"anon,omitempty"`
|
||||
Unevictable PageStats `json:"unevictable,omitempty"`
|
||||
}
|
||||
|
||||
type PageStats struct {
|
||||
Total uint64 `json:"total,omitempty"`
|
||||
Nodes map[uint8]uint64 `json:"nodes,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
// active pids hard limit
|
||||
Limit uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStatEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
Op string `json:"op,omitempty"`
|
||||
Value uint64 `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStats struct {
|
||||
// number of bytes transferred to and from the block device
|
||||
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
|
||||
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
|
||||
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
|
||||
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
|
||||
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
|
||||
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
|
||||
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
|
||||
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
|
||||
}
|
||||
|
||||
type HugetlbStats struct {
|
||||
// current res_counter usage for hugetlb
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
// maximum usage ever recorded.
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
// number of times hugetlb usage allocation failure.
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type RdmaEntry struct {
|
||||
Device string `json:"device,omitempty"`
|
||||
HcaHandles uint32 `json:"hca_handles,omitempty"`
|
||||
HcaObjects uint32 `json:"hca_objects,omitempty"`
|
||||
}
|
||||
|
||||
type RdmaStats struct {
|
||||
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
|
||||
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
|
||||
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
|
||||
PidsStats PidsStats `json:"pids_stats,omitempty"`
|
||||
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
||||
hugetlbStats := make(map[string]HugetlbStats)
|
||||
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
|
||||
}
|
564
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
564
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
@ -1,564 +0,0 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
)
|
||||
|
||||
const (
|
||||
// Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2.
|
||||
// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
|
||||
// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||
defCPUQuotaPeriod = uint64(100000)
|
||||
)
|
||||
|
||||
var (
|
||||
versionOnce sync.Once
|
||||
version int
|
||||
|
||||
isRunningSystemdOnce sync.Once
|
||||
isRunningSystemd bool
|
||||
)
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func IsRunningSystemd() bool {
|
||||
isRunningSystemdOnce.Do(func() {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
isRunningSystemd = err == nil && fi.IsDir()
|
||||
})
|
||||
return isRunningSystemd
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func groupPrefix(ruleType devices.Type) (string, error) {
|
||||
switch ruleType {
|
||||
case devices.BlockDevice:
|
||||
return "block-", nil
|
||||
case devices.CharDevice:
|
||||
return "char-", nil
|
||||
default:
|
||||
return "", fmt.Errorf("device type %v has no group prefix", ruleType)
|
||||
}
|
||||
}
|
||||
|
||||
// findDeviceGroup tries to find the device group name (as listed in
|
||||
// /proc/devices) with the type prefixed as required for DeviceAllow, for a
|
||||
// given (type, major) combination. If more than one device group exists, an
|
||||
// arbitrary one is chosen.
|
||||
func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
|
||||
fh, err := os.Open("/proc/devices")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
prefix, err := groupPrefix(ruleType)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(fh)
|
||||
var currentType devices.Type
|
||||
for scanner.Scan() {
|
||||
// We need to strip spaces because the first number is column-aligned.
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// Handle the "header" lines.
|
||||
switch line {
|
||||
case "Block devices:":
|
||||
currentType = devices.BlockDevice
|
||||
continue
|
||||
case "Character devices:":
|
||||
currentType = devices.CharDevice
|
||||
continue
|
||||
case "":
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip lines unrelated to our type.
|
||||
if currentType != ruleType {
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse out the (major, name).
|
||||
var (
|
||||
currMajor int64
|
||||
currName string
|
||||
)
|
||||
if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
|
||||
if err == nil {
|
||||
err = errors.New("wrong number of fields")
|
||||
}
|
||||
return "", fmt.Errorf("scan /proc/devices line %q: %w", line, err)
|
||||
}
|
||||
|
||||
if currMajor == ruleMajor {
|
||||
return prefix + currName, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", fmt.Errorf("reading /proc/devices: %w", err)
|
||||
}
|
||||
// Couldn't find the device group.
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// DeviceAllow is the dbus type "a(ss)" which means we need a struct
|
||||
// to represent it in Go.
|
||||
type deviceAllowEntry struct {
|
||||
Path string
|
||||
Perms string
|
||||
}
|
||||
|
||||
func allowAllDevices() []systemdDbus.Property {
|
||||
// Setting mode to auto and removing all DeviceAllow rules
|
||||
// results in allowing access to all devices.
|
||||
return []systemdDbus.Property{
|
||||
newProp("DevicePolicy", "auto"),
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}
|
||||
}
|
||||
|
||||
// generateDeviceProperties takes the configured device rules and generates a
|
||||
// corresponding set of systemd properties to configure the devices correctly.
|
||||
func generateDeviceProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error) {
|
||||
if r.SkipDevices {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
// Always run in the strictest white-list mode.
|
||||
newProp("DevicePolicy", "strict"),
|
||||
// Empty the DeviceAllow array before filling it.
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}
|
||||
|
||||
// Figure out the set of rules.
|
||||
configEmu := &cgroupdevices.Emulator{}
|
||||
for _, rule := range r.Devices {
|
||||
if err := configEmu.Apply(*rule); err != nil {
|
||||
return nil, fmt.Errorf("unable to apply rule for systemd: %w", err)
|
||||
}
|
||||
}
|
||||
// systemd doesn't support blacklists. So we log a warning, and tell
|
||||
// systemd to act as a deny-all whitelist. This ruleset will be replaced
|
||||
// with our normal fallback code. This may result in spurious errors, but
|
||||
// the only other option is to error out here.
|
||||
if configEmu.IsBlacklist() {
|
||||
// However, if we're dealing with an allow-all rule then we can do it.
|
||||
if configEmu.IsAllowAll() {
|
||||
return allowAllDevices(), nil
|
||||
}
|
||||
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// Now generate the set of rules we actually need to apply. Unlike the
|
||||
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all
|
||||
// whitelist which is the default for devices.Emulator.
|
||||
finalRules, err := configEmu.Rules()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to get simplified rules for systemd: %w", err)
|
||||
}
|
||||
var deviceAllowList []deviceAllowEntry
|
||||
for _, rule := range finalRules {
|
||||
if !rule.Allow {
|
||||
// Should never happen.
|
||||
return nil, fmt.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
|
||||
}
|
||||
switch rule.Type {
|
||||
case devices.BlockDevice, devices.CharDevice:
|
||||
default:
|
||||
// Should never happen.
|
||||
return nil, fmt.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
|
||||
}
|
||||
|
||||
entry := deviceAllowEntry{
|
||||
Perms: string(rule.Permissions),
|
||||
}
|
||||
|
||||
// systemd has a fairly odd (though understandable) syntax here, and
|
||||
// because of the OCI configuration format we have to do quite a bit of
|
||||
// trickery to convert things:
|
||||
//
|
||||
// * Concrete rules with non-wildcard major/minor numbers have to use
|
||||
// /dev/{block,char}/MAJOR:minor paths. Before v240, systemd uses
|
||||
// stat(2) on such paths to look up device properties, meaning we
|
||||
// cannot add whitelist rules for devices that don't exist. Since v240,
|
||||
// device properties are parsed from the path string.
|
||||
//
|
||||
// However, path globbing is not support for path-based rules so we
|
||||
// need to handle wildcards in some other manner.
|
||||
//
|
||||
// * Wildcard-minor rules have to specify a "device group name" (the
|
||||
// second column in /proc/devices).
|
||||
//
|
||||
// * Wildcard (major and minor) rules can just specify a glob with the
|
||||
// type ("char-*" or "block-*").
|
||||
//
|
||||
// The only type of rule we can't handle is wildcard-major rules, and
|
||||
// so we'll give a warning in that case (note that the fallback code
|
||||
// will insert any rules systemd couldn't handle). What amazing fun.
|
||||
|
||||
if rule.Major == devices.Wildcard {
|
||||
// "_ *:n _" rules aren't supported by systemd.
|
||||
if rule.Minor != devices.Wildcard {
|
||||
logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// "_ *:* _" rules just wildcard everything.
|
||||
prefix, err := groupPrefix(rule.Type)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entry.Path = prefix + "*"
|
||||
} else if rule.Minor == devices.Wildcard {
|
||||
// "_ n:* _" rules require a device group from /proc/devices.
|
||||
group, err := findDeviceGroup(rule.Type, rule.Major)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find device '%v/%d': %w", rule.Type, rule.Major, err)
|
||||
}
|
||||
if group == "" {
|
||||
// Couldn't find a group.
|
||||
logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule)
|
||||
continue
|
||||
}
|
||||
entry.Path = group
|
||||
} else {
|
||||
// "_ n:m _" rules are just a path in /dev/{block,char}/.
|
||||
switch rule.Type {
|
||||
case devices.BlockDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor)
|
||||
case devices.CharDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor)
|
||||
}
|
||||
if sdVer < 240 {
|
||||
// Old systemd versions use stat(2) on path to find out device major:minor
|
||||
// numbers and type. If the path doesn't exist, it will not add the rule,
|
||||
// emitting a warning instead.
|
||||
// Since all of this logic is best-effort anyway (we manually set these
|
||||
// rules separately to systemd) we can safely skip entries that don't
|
||||
// have a corresponding path.
|
||||
if _, err := os.Stat(entry.Path); err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
deviceAllowList = append(deviceAllowList, entry)
|
||||
}
|
||||
|
||||
properties = append(properties, newProp("DeviceAllow", deviceAllowList))
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return c.ScopePrefix + "-" + c.Name + ".scope"
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
// This code should be in sync with getUnitName.
|
||||
func getUnitType(unitName string) string {
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
return "Slice"
|
||||
}
|
||||
return "Scope"
|
||||
}
|
||||
|
||||
// isDbusError returns true if the error is a specific dbus error.
|
||||
func isDbusError(err error, name string) bool {
|
||||
if err != nil {
|
||||
var derr dbus.Error
|
||||
if errors.As(err, &derr) {
|
||||
return strings.Contains(derr.Name, name)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
|
||||
func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
|
||||
statusChan := make(chan string, 1)
|
||||
retry := true
|
||||
|
||||
retry:
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
if ignoreExist {
|
||||
// TODO: remove this hack.
|
||||
// This is kubelet making sure a slice exists (see
|
||||
// https://github.com/opencontainers/runc/pull/1124).
|
||||
return nil
|
||||
}
|
||||
if retry {
|
||||
// In case a unit with the same name exists, this may
|
||||
// be a leftover failed unit. Reset it, so systemd can
|
||||
// remove it, and retry once.
|
||||
err = resetFailedUnit(cm, unitName)
|
||||
if err != nil {
|
||||
logrus.Warnf("unable to reset failed unit: %v", err)
|
||||
}
|
||||
retry = false
|
||||
goto retry
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
timeout := time.NewTimer(30 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
return errors.New("Timeout waiting for systemd to create " + unitName)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopUnit(cm *dbusConnManager, unitName string) error {
|
||||
statusChan := make(chan string, 1)
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
_, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
|
||||
return err
|
||||
})
|
||||
if err == nil {
|
||||
timeout := time.NewTimer(30 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
select {
|
||||
case s := <-statusChan:
|
||||
close(statusChan)
|
||||
// Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||
if s != "done" {
|
||||
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
||||
}
|
||||
case <-timeout.C:
|
||||
return errors.New("Timed out while waiting for systemd to remove " + unitName)
|
||||
}
|
||||
}
|
||||
|
||||
// In case of a failed unit, let systemd remove it.
|
||||
_ = resetFailedUnit(cm, unitName)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func resetFailedUnit(cm *dbusConnManager, name string) error {
|
||||
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.ResetFailedUnitContext(context.TODO(), name)
|
||||
})
|
||||
}
|
||||
|
||||
func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) {
|
||||
var prop *systemdDbus.Property
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
|
||||
prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName)
|
||||
return Err
|
||||
})
|
||||
return prop, err
|
||||
}
|
||||
|
||||
func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
|
||||
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
|
||||
})
|
||||
}
|
||||
|
||||
func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
|
||||
str := ""
|
||||
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||
var err error
|
||||
str, err = c.GetManagerProperty(name)
|
||||
return err
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return strconv.Unquote(str)
|
||||
}
|
||||
|
||||
func systemdVersion(cm *dbusConnManager) int {
|
||||
versionOnce.Do(func() {
|
||||
version = -1
|
||||
verStr, err := getManagerProperty(cm, "Version")
|
||||
if err == nil {
|
||||
version, err = systemdVersionAtoi(verStr)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logrus.WithError(err).Error("unable to get systemd version")
|
||||
}
|
||||
})
|
||||
|
||||
return version
|
||||
}
|
||||
|
||||
func systemdVersionAtoi(verStr string) (int, error) {
|
||||
// verStr should be of the form:
|
||||
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32" (without quotes).
|
||||
// The result for all of the above should be 245.
|
||||
// Thus, we unconditionally remove the "v" prefix
|
||||
// and then match on the first integer we can grab.
|
||||
re := regexp.MustCompile(`v?([0-9]+)`)
|
||||
matches := re.FindStringSubmatch(verStr)
|
||||
if len(matches) < 2 {
|
||||
return 0, fmt.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
|
||||
}
|
||||
ver, err := strconv.Atoi(matches[1])
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("can't parse version: %w", err)
|
||||
}
|
||||
return ver, nil
|
||||
}
|
||||
|
||||
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||
if period != 0 {
|
||||
// systemd only supports CPUQuotaPeriodUSec since v242
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 242 {
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPeriodUSec", period))
|
||||
} else {
|
||||
logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+
|
||||
" (setting will still be applied to cgroupfs)", sdVer)
|
||||
}
|
||||
}
|
||||
if quota != 0 || period != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if quota > 0 {
|
||||
if period == 0 {
|
||||
// assume the default
|
||||
period = defCPUQuotaPeriod
|
||||
}
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
*properties = append(*properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
}
|
||||
|
||||
func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
|
||||
if cpus == "" && mems == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd only supports AllowedCPUs/AllowedMemoryNodes since v244
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer < 244 {
|
||||
logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
|
||||
" (settings will still be applied to cgroupfs)", sdVer)
|
||||
return nil
|
||||
}
|
||||
|
||||
if cpus != "" {
|
||||
bits, err := RangeToBits(cpus)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
|
||||
cpus, err)
|
||||
}
|
||||
*props = append(*props,
|
||||
newProp("AllowedCPUs", bits))
|
||||
}
|
||||
if mems != "" {
|
||||
bits, err := RangeToBits(mems)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
|
||||
mems, err)
|
||||
}
|
||||
*props = append(*props,
|
||||
newProp("AllowedMemoryNodes", bits))
|
||||
}
|
||||
return nil
|
||||
}
|
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
@ -1,60 +0,0 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math/big"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RangeToBits converts a text representation of a CPU mask (as written to
|
||||
// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes
|
||||
// with the corresponding bits set (as consumed by systemd over dbus as
|
||||
// AllowedCPUs/AllowedMemoryNodes unit property value).
|
||||
func RangeToBits(str string) ([]byte, error) {
|
||||
bits := new(big.Int)
|
||||
|
||||
for _, r := range strings.Split(str, ",") {
|
||||
// allow extra spaces around
|
||||
r = strings.TrimSpace(r)
|
||||
// allow empty elements (extra commas)
|
||||
if r == "" {
|
||||
continue
|
||||
}
|
||||
ranges := strings.SplitN(r, "-", 2)
|
||||
if len(ranges) > 1 {
|
||||
start, err := strconv.ParseUint(ranges[0], 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
end, err := strconv.ParseUint(ranges[1], 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if start > end {
|
||||
return nil, errors.New("invalid range: " + r)
|
||||
}
|
||||
for i := start; i <= end; i++ {
|
||||
bits.SetBit(bits, int(i), 1)
|
||||
}
|
||||
} else {
|
||||
val, err := strconv.ParseUint(ranges[0], 10, 32)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bits.SetBit(bits, int(val), 1)
|
||||
}
|
||||
}
|
||||
|
||||
ret := bits.Bytes()
|
||||
if len(ret) == 0 {
|
||||
// do not allow empty values
|
||||
return nil, errors.New("empty value")
|
||||
}
|
||||
|
||||
// fit cpuset parsing order in systemd
|
||||
for l, r := 0, len(ret)-1; l < r; l, r = l+1, r-1 {
|
||||
ret[l], ret[r] = ret[r], ret[l]
|
||||
}
|
||||
return ret, nil
|
||||
}
|
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
@ -1,102 +0,0 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
)
|
||||
|
||||
var (
|
||||
dbusC *systemdDbus.Conn
|
||||
dbusMu sync.RWMutex
|
||||
dbusInited bool
|
||||
dbusRootless bool
|
||||
)
|
||||
|
||||
type dbusConnManager struct{}
|
||||
|
||||
// newDbusConnManager initializes systemd dbus connection manager.
|
||||
func newDbusConnManager(rootless bool) *dbusConnManager {
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if dbusInited && rootless != dbusRootless {
|
||||
panic("can't have both root and rootless dbus")
|
||||
}
|
||||
dbusInited = true
|
||||
dbusRootless = rootless
|
||||
return &dbusConnManager{}
|
||||
}
|
||||
|
||||
// getConnection lazily initializes and returns systemd dbus connection.
|
||||
func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) {
|
||||
// In the case where dbusC != nil
|
||||
// Use the read lock the first time to ensure
|
||||
// that Conn can be acquired at the same time.
|
||||
dbusMu.RLock()
|
||||
if conn := dbusC; conn != nil {
|
||||
dbusMu.RUnlock()
|
||||
return conn, nil
|
||||
}
|
||||
dbusMu.RUnlock()
|
||||
|
||||
// In the case where dbusC == nil
|
||||
// Use write lock to ensure that only one
|
||||
// will be created
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if conn := dbusC; conn != nil {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
conn, err := d.newConnection()
|
||||
if err != nil {
|
||||
// When dbus-user-session is not installed, we can't detect whether we should try to connect to user dbus or system dbus, so d.dbusRootless is set to false.
|
||||
// This may fail with a cryptic error "read unix @->/run/systemd/private: read: connection reset by peer: unknown."
|
||||
// https://github.com/moby/moby/issues/42793
|
||||
return nil, fmt.Errorf("failed to connect to dbus (hint: for rootless containers, maybe you need to install dbus-user-session package, see https://github.com/opencontainers/runc/blob/master/docs/cgroup-v2.md): %w", err)
|
||||
}
|
||||
dbusC = conn
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) {
|
||||
if dbusRootless {
|
||||
return newUserSystemdDbus()
|
||||
}
|
||||
return systemdDbus.NewWithContext(context.TODO())
|
||||
}
|
||||
|
||||
// resetConnection resets the connection to its initial state
|
||||
// (so it can be reconnected if necessary).
|
||||
func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) {
|
||||
dbusMu.Lock()
|
||||
defer dbusMu.Unlock()
|
||||
if dbusC != nil && dbusC == conn {
|
||||
dbusC.Close()
|
||||
dbusC = nil
|
||||
}
|
||||
}
|
||||
|
||||
// retryOnDisconnect calls op, and if the error it returns is about closed dbus
|
||||
// connection, the connection is re-established and the op is retried. This helps
|
||||
// with the situation when dbus is restarted and we have a stale connection.
|
||||
func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error {
|
||||
for {
|
||||
conn, err := d.getConnection()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = op(conn)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if !errors.Is(err, dbus.ErrClosed) {
|
||||
return err
|
||||
}
|
||||
d.resetConnection(conn)
|
||||
}
|
||||
}
|
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
@ -1,106 +0,0 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
)
|
||||
|
||||
// newUserSystemdDbus creates a connection for systemd user-instance.
|
||||
func newUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||
addr, err := DetectUserDbusSessionBusAddress()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
uid, err := DetectUID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
|
||||
conn, err := dbus.Dial(addr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while dialing %q: %w", addr, err)
|
||||
}
|
||||
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
|
||||
err = conn.Auth(methods)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
if err = conn.Hello(); err != nil {
|
||||
conn.Close()
|
||||
return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err)
|
||||
}
|
||||
return conn, nil
|
||||
})
|
||||
}
|
||||
|
||||
// DetectUID detects UID from the OwnerUID field of `busctl --user status`
|
||||
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
|
||||
//
|
||||
// Otherwise returns os.Getuid() .
|
||||
func DetectUID() (int, error) {
|
||||
if !userns.RunningInUserNS() {
|
||||
return os.Getuid(), nil
|
||||
}
|
||||
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err)
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "OwnerUID=") {
|
||||
uidStr := strings.TrimPrefix(s, "OwnerUID=")
|
||||
i, err := strconv.Atoi(uidStr)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("could not detect the OwnerUID: %w", err)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return -1, errors.New("could not detect the OwnerUID")
|
||||
}
|
||||
|
||||
// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
|
||||
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
|
||||
// Otherwise parses the value from `systemctl --user show-environment` .
|
||||
func DetectUserDbusSessionBusAddress() (string, error) {
|
||||
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
|
||||
return env, nil
|
||||
}
|
||||
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
|
||||
busPath := filepath.Join(xdr, "bus")
|
||||
if _, err := os.Stat(busPath); err == nil {
|
||||
busAddress := "unix:path=" + busPath
|
||||
return busAddress, nil
|
||||
}
|
||||
}
|
||||
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("could not execute `systemctl --user --no-pager show-environment` (output=%q): %w", string(b), err)
|
||||
}
|
||||
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
|
||||
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`. Make sure you have installed the dbus-user-session or dbus-daemon package and then run: `systemctl --user start dbus`")
|
||||
}
|
480
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
480
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
@ -1,480 +0,0 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/godbus/dbus/v5"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type legacyManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
dbus *dbusConnManager
|
||||
}
|
||||
|
||||
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
if cg.Rootless {
|
||||
return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
|
||||
}
|
||||
if cg.Resources != nil && cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return &legacyManager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
dbus: newDbusConnManager(false),
|
||||
}, nil
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Set sets cgroup resource limits.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
var legacySubsystems = []subsystem{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
&fs.RdmaGroup{},
|
||||
&fs.NameGroup{GroupName: "misc"},
|
||||
}
|
||||
|
||||
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
deviceProperties, err := generateDeviceProperties(r, systemdVersion(cm))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(r.Memory)))
|
||||
}
|
||||
|
||||
if r.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", r.CpuShares))
|
||||
}
|
||||
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(r.BlkioWeight)))
|
||||
}
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// initPaths figures out and returns paths to cgroups.
|
||||
func initPaths(c *configs.Cgroup) (map[string]string, error) {
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
unit := getUnitName(c)
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
|
||||
if err != nil {
|
||||
// Even if it's `not found` error, we'll return err
|
||||
// because devices cgroup is hard requirement for
|
||||
// container security.
|
||||
if s.Name() == "devices" {
|
||||
return nil, err
|
||||
}
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
|
||||
// If systemd is using cgroups-hybrid mode then add the slice path of
|
||||
// this container to the paths so the following process executed with
|
||||
// "runc exec" joins that cgroup as well.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
// "" means cgroup-hybrid path
|
||||
cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
|
||||
if err != nil && cgroups.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
paths[""] = cgroupsHybridPath
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := m.joinCgroups(pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
|
||||
|
||||
// Both on success and on error, cleanup all the cgroups
|
||||
// we are aware of, as some of them were created directly
|
||||
// by Apply() and are not managed by systemd.
|
||||
if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return stopErr
|
||||
}
|
||||
|
||||
func (m *legacyManager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *legacyManager) joinCgroups(pid int) error {
|
||||
for _, sys := range legacySubsystems {
|
||||
name := sys.Name()
|
||||
switch name {
|
||||
case "name=systemd":
|
||||
// let systemd handle this
|
||||
case "cpuset":
|
||||
if path, ok := m.paths[name]; ok {
|
||||
s := &fs.CpusetGroup{}
|
||||
if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
if path, ok := m.paths[name]; ok {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(slice, unit, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mountpoint, slice, unit), nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) Freeze(state configs.FreezerState) error {
|
||||
err := m.doFreeze(state)
|
||||
if err == nil {
|
||||
m.cgroups.Resources.Freezer = state
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// doFreeze is the same as Freeze but without
|
||||
// changing the m.cgroups.Resources.Frozen field.
|
||||
func (m *legacyManager) doFreeze(state configs.FreezerState) error {
|
||||
path, ok := m.paths["freezer"]
|
||||
if !ok {
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
freezer := &fs.FreezerGroup{}
|
||||
resources := &configs.Resources{Freezer: state}
|
||||
return freezer.Set(path, resources)
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetPids() ([]int, error) {
|
||||
path, ok := m.paths["devices"]
|
||||
if !ok {
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetAllPids() ([]int, error) {
|
||||
path, ok := m.paths["devices"]
|
||||
if !ok {
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for _, sys := range legacySubsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// freezeBeforeSet answers whether there is a need to freeze the cgroup before
|
||||
// applying its systemd unit properties, and thaw after, while avoiding
|
||||
// unnecessary freezer state changes.
|
||||
//
|
||||
// The reason why we have to freeze is that systemd's application of device
|
||||
// rules is done disruptively, resulting in spurious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we have to freeze the container to avoid the container get
|
||||
// an occasional "permission denied" error.
|
||||
func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) {
|
||||
// Special case for SkipDevices, as used by Kubernetes to create pod
|
||||
// cgroups with allow-all device policy).
|
||||
if r.SkipDevices {
|
||||
if r.SkipFreezeOnSet {
|
||||
// Both needsFreeze and needsThaw are false.
|
||||
return
|
||||
}
|
||||
|
||||
// No need to freeze if SkipDevices is set, and either
|
||||
// (1) systemd unit does not (yet) exist, or
|
||||
// (2) it has DevicePolicy=auto and empty DeviceAllow list.
|
||||
//
|
||||
// Interestingly, (1) and (2) are the same here because
|
||||
// a non-existent unit returns default properties,
|
||||
// and settings in (2) are the defaults.
|
||||
//
|
||||
// Do not return errors from getUnitTypeProperty, as they alone
|
||||
// should not prevent Set from working.
|
||||
|
||||
unitType := getUnitType(unitName)
|
||||
|
||||
devPolicy, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DevicePolicy")
|
||||
if e == nil && devPolicy.Value == dbus.MakeVariant("auto") {
|
||||
devAllow, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DeviceAllow")
|
||||
if e == nil {
|
||||
if rv := reflect.ValueOf(devAllow.Value.Value()); rv.Kind() == reflect.Slice && rv.Len() == 0 {
|
||||
needsFreeze = false
|
||||
needsThaw = false
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
needsFreeze = true
|
||||
needsThaw = true
|
||||
|
||||
// Check the current freezer state.
|
||||
freezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if freezerState == configs.Frozen {
|
||||
// Already frozen, and should stay frozen.
|
||||
needsFreeze = false
|
||||
needsThaw = false
|
||||
}
|
||||
|
||||
if r.Freezer == configs.Frozen {
|
||||
// Will be frozen anyway -- no need to thaw.
|
||||
needsThaw = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m *legacyManager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
properties, err := genV1ResourcesProperties(r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
unitName := getUnitName(m.cgroups)
|
||||
needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if needsFreeze {
|
||||
if err := m.doFreeze(configs.Frozen); err != nil {
|
||||
// If freezer cgroup isn't supported, we just warn about it.
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
// skip update the cgroup while frozen failed. #3803
|
||||
if !errors.Is(err, errSubsystemDoesNotExist) {
|
||||
if needsThaw {
|
||||
if thawErr := m.doFreeze(configs.Thawed); thawErr != nil {
|
||||
logrus.Infof("thaw container after doFreeze failed: %v", thawErr)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
setErr := setUnitProperties(m.dbus, unitName, properties...)
|
||||
if needsThaw {
|
||||
if err := m.doFreeze(configs.Thawed); err != nil {
|
||||
logrus.Infof("thaw container after SetUnitProperties failed: %v", err)
|
||||
}
|
||||
}
|
||||
if setErr != nil {
|
||||
return setErr
|
||||
}
|
||||
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, ok := m.paths[sys.Name()]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
path, ok := m.paths["freezer"]
|
||||
if !ok {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
freezer := &fs.FreezerGroup{}
|
||||
return freezer.GetState(path)
|
||||
}
|
||||
|
||||
func (m *legacyManager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *legacyManager) OOMKillCount() (uint64, error) {
|
||||
return fs.OOMKillCount(m.Path("memory"))
|
||||
}
|
472
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
472
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
@ -1,472 +0,0 @@
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type unifiedManager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
path string
|
||||
dbus *dbusConnManager
|
||||
fsMgr cgroups.Manager
|
||||
}
|
||||
|
||||
func NewUnifiedManager(config *configs.Cgroup, path string) (cgroups.Manager, error) {
|
||||
m := &unifiedManager{
|
||||
cgroups: config,
|
||||
path: path,
|
||||
dbus: newDbusConnManager(config.Rootless),
|
||||
}
|
||||
if err := m.initPath(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fsMgr, err := fs2.NewManager(config, m.path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m.fsMgr = fsMgr
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified
|
||||
// key/value map (where key is cgroupfs file name) to systemd unit properties.
|
||||
// This is on a best-effort basis, so the properties that are not known
|
||||
// (to this function and/or systemd) are ignored (but logged with "debug"
|
||||
// log level).
|
||||
//
|
||||
// For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
//
|
||||
// For the list of systemd unit properties, see systemd.resource-control(5).
|
||||
func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) {
|
||||
var err error
|
||||
|
||||
for k, v := range res {
|
||||
if strings.Contains(k, "/") {
|
||||
return nil, fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||
}
|
||||
sk := strings.SplitN(k, ".", 2)
|
||||
if len(sk) != 2 {
|
||||
return nil, fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||
}
|
||||
// Kernel is quite forgiving to extra whitespace
|
||||
// around the value, and so should we.
|
||||
v = strings.TrimSpace(v)
|
||||
// Please keep cases in alphabetical order.
|
||||
switch k {
|
||||
case "cpu.max":
|
||||
// value: quota [period]
|
||||
quota := int64(0) // 0 means "unlimited" for addCpuQuota, if period is set
|
||||
period := defCPUQuotaPeriod
|
||||
sv := strings.Fields(v)
|
||||
if len(sv) < 1 || len(sv) > 2 {
|
||||
return nil, fmt.Errorf("unified resource %q value invalid: %q", k, v)
|
||||
}
|
||||
// quota
|
||||
if sv[0] != "max" {
|
||||
quota, err = strconv.ParseInt(sv[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q period value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
// period
|
||||
if len(sv) == 2 {
|
||||
period, err = strconv.ParseUint(sv[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
addCpuQuota(cm, &props, quota, period)
|
||||
|
||||
case "cpu.weight":
|
||||
num, err := strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
props = append(props,
|
||||
newProp("CPUWeight", num))
|
||||
|
||||
case "cpuset.cpus", "cpuset.mems":
|
||||
bits, err := RangeToBits(v)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err)
|
||||
}
|
||||
m := map[string]string{
|
||||
"cpuset.cpus": "AllowedCPUs",
|
||||
"cpuset.mems": "AllowedMemoryNodes",
|
||||
}
|
||||
// systemd only supports these properties since v244
|
||||
sdVer := systemdVersion(cm)
|
||||
if sdVer >= 244 {
|
||||
props = append(props,
|
||||
newProp(m[k], bits))
|
||||
} else {
|
||||
logrus.Debugf("systemd v%d is too old to support %s"+
|
||||
" (setting will still be applied to cgroupfs)",
|
||||
sdVer, m[k])
|
||||
}
|
||||
|
||||
case "memory.high", "memory.low", "memory.min", "memory.max", "memory.swap.max":
|
||||
num := uint64(math.MaxUint64)
|
||||
if v != "max" {
|
||||
num, err = strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
m := map[string]string{
|
||||
"memory.high": "MemoryHigh",
|
||||
"memory.low": "MemoryLow",
|
||||
"memory.min": "MemoryMin",
|
||||
"memory.max": "MemoryMax",
|
||||
"memory.swap.max": "MemorySwapMax",
|
||||
}
|
||||
props = append(props,
|
||||
newProp(m[k], num))
|
||||
|
||||
case "pids.max":
|
||||
num := uint64(math.MaxUint64)
|
||||
if v != "max" {
|
||||
var err error
|
||||
num, err = strconv.ParseUint(v, 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||
}
|
||||
}
|
||||
props = append(props,
|
||||
newProp("TasksMax", num))
|
||||
|
||||
case "memory.oom.group":
|
||||
// Setting this to 1 is roughly equivalent to OOMPolicy=kill
|
||||
// (as per systemd.service(5) and
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html),
|
||||
// but it's not clear what to do if it is unset or set
|
||||
// to 0 in runc update, as there are two other possible
|
||||
// values for OOMPolicy (continue/stop).
|
||||
fallthrough
|
||||
|
||||
default:
|
||||
// Ignore the unknown resource here -- will still be
|
||||
// applied in Set which calls fs2.Set.
|
||||
logrus.Debugf("don't know how to convert unified resource %q=%q to systemd unit property; skipping (will still be applied to cgroupfs)", k, v)
|
||||
}
|
||||
}
|
||||
|
||||
return props, nil
|
||||
}
|
||||
|
||||
func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
// NOTE: This is of questionable correctness because we insert our own
|
||||
// devices eBPF program later. Two programs with identical rules
|
||||
// aren't the end of the world, but it is a bit concerning. However
|
||||
// it's unclear if systemd removes all eBPF programs attached when
|
||||
// doing SetUnitProperties...
|
||||
deviceProperties, err := generateDeviceProperties(r, systemdVersion(cm))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if r.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryMax", uint64(r.Memory)))
|
||||
}
|
||||
if r.MemoryReservation != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLow", uint64(r.MemoryReservation)))
|
||||
}
|
||||
|
||||
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if swap != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemorySwapMax", uint64(swap)))
|
||||
}
|
||||
|
||||
if r.CpuWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUWeight", r.CpuWeight))
|
||||
}
|
||||
|
||||
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||
|
||||
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||
properties = append(properties,
|
||||
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||
}
|
||||
|
||||
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ignore r.KernelMemory
|
||||
|
||||
// convert Resources.Unified map to systemd properties
|
||||
if r.Unified != nil {
|
||||
unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, unifiedProps...)
|
||||
}
|
||||
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.cgroups
|
||||
unitName = getUnitName(c)
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
slice := "system.slice"
|
||||
if m.cgroups.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// If we create a slice, the parent is defined via a Wants=.
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// Otherwise it's a scope, which we put into a Slice=.
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
// Assume scopes always support delegation (supported since systemd v218).
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("IOAccounting", true),
|
||||
newProp("TasksAccounting", true),
|
||||
)
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
properties = append(properties, c.SystemdProps...)
|
||||
|
||||
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||
return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
|
||||
}
|
||||
|
||||
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.OwnerUID != nil {
|
||||
// The directory itself must be chowned.
|
||||
err := os.Chown(m.path, *c.OwnerUID, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
filesToChown, err := cgroupFilesToChown()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, v := range filesToChown {
|
||||
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
|
||||
// Some files might not be present.
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// The kernel exposes a list of files that should be chowned to the delegate
|
||||
// uid in /sys/kernel/cgroup/delegate. If the file is not present
|
||||
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
|
||||
func cgroupFilesToChown() ([]string, error) {
|
||||
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
|
||||
|
||||
f, err := os.Open(cgroupDelegateFile)
|
||||
if err != nil {
|
||||
return []string{"cgroup.procs", "cgroup.subtree_control", "cgroup.threads"}, nil
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
filesToChown := []string{}
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
filesToChown = append(filesToChown, scanner.Text())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
|
||||
}
|
||||
|
||||
return filesToChown, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
unitName := getUnitName(m.cgroups)
|
||||
if err := stopUnit(m.dbus, unitName); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// systemd 239 do not remove sub-cgroups.
|
||||
err := m.fsMgr.Destroy()
|
||||
// fsMgr.Destroy has handled ErrNotExist
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Path(_ string) string {
|
||||
return m.path
|
||||
}
|
||||
|
||||
// getSliceFull value is used in initPath.
|
||||
// The value is incompatible with systemdDbus.PropSlice.
|
||||
func (m *unifiedManager) getSliceFull() (string, error) {
|
||||
c := m.cgroups
|
||||
slice := "system.slice"
|
||||
if c.Rootless {
|
||||
slice = "user.slice"
|
||||
}
|
||||
if c.Parent != "" {
|
||||
var err error
|
||||
slice, err = ExpandSlice(c.Parent)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
if c.Rootless {
|
||||
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
|
||||
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
slice = filepath.Join(managerCG, slice)
|
||||
}
|
||||
|
||||
// an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice"
|
||||
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified.
|
||||
return slice, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) initPath() error {
|
||||
if m.path != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
sliceFull, err := m.getSliceFull()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c := m.cgroups
|
||||
path := filepath.Join(sliceFull, getUnitName(c))
|
||||
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// an example of the final path in rootless:
|
||||
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
|
||||
m.path = path
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Freeze(state configs.FreezerState) error {
|
||||
return m.fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
return m.fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
properties, err := genV2ResourcesProperties(r, m.dbus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
|
||||
return fmt.Errorf("unable to set unit properties: %w", err)
|
||||
}
|
||||
|
||||
return m.fsMgr.Set(r)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetPaths() map[string]string {
|
||||
paths := make(map[string]string, 1)
|
||||
paths[""] = m.path
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return m.fsMgr.GetFreezerState()
|
||||
}
|
||||
|
||||
func (m *unifiedManager) Exists() bool {
|
||||
return cgroups.PathExists(m.path)
|
||||
}
|
||||
|
||||
func (m *unifiedManager) OOMKillCount() (uint64, error) {
|
||||
return m.fsMgr.OOMKillCount()
|
||||
}
|
469
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
469
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
@ -1,469 +0,0 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
hybridMountpoint = "/sys/fs/cgroup/unified"
|
||||
)
|
||||
|
||||
var (
|
||||
isUnifiedOnce sync.Once
|
||||
isUnified bool
|
||||
isHybridOnce sync.Once
|
||||
isHybrid bool
|
||||
)
|
||||
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(unifiedMountpoint, &st)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) && userns.RunningInUserNS() {
|
||||
// ignore the "not found" error if running in userns
|
||||
logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint)
|
||||
isUnified = false
|
||||
return
|
||||
}
|
||||
panic(fmt.Sprintf("cannot statfs cgroup root: %s", err))
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode.
|
||||
func IsCgroup2HybridMode() bool {
|
||||
isHybridOnce.Do(func() {
|
||||
var st unix.Statfs_t
|
||||
err := unix.Statfs(hybridMountpoint, &st)
|
||||
if err != nil {
|
||||
isHybrid = false
|
||||
if !os.IsNotExist(err) {
|
||||
// Report unexpected errors.
|
||||
logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint)
|
||||
}
|
||||
return
|
||||
}
|
||||
isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isHybrid
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
Mountpoint string
|
||||
Root string
|
||||
Subsystems []string
|
||||
}
|
||||
|
||||
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||
// all indicates whether to return just the first instance or all the mounts.
|
||||
// This function should not be used from cgroupv2 code, as in this case
|
||||
// all the controllers are available under the constant unifiedMountpoint.
|
||||
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// TODO: remove cgroupv2 case once all external users are converted
|
||||
availableControllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Mount{
|
||||
Mountpoint: unifiedMountpoint,
|
||||
Root: unifiedMountpoint,
|
||||
Subsystems: availableControllers,
|
||||
}
|
||||
return []Mount{m}, nil
|
||||
}
|
||||
|
||||
return getCgroupMountsV1(all)
|
||||
}
|
||||
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
func GetAllSubsystems() ([]string, error) {
|
||||
// /proc/cgroups is meaningless for v2
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||
// - devices: implemented in kernel 4.15
|
||||
// - freezer: implemented in kernel 5.2
|
||||
// We assume these are always available, as it is hard to detect availability.
|
||||
pseudo := []string{"devices", "freezer"}
|
||||
data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
subsystems := append(pseudo, strings.Fields(data)...)
|
||||
return subsystems, nil
|
||||
}
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
subsystems := []string{}
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
if text[0] != '#' {
|
||||
parts := strings.Fields(text)
|
||||
if len(parts) >= 4 && parts[3] != "0" {
|
||||
subsystems = append(subsystems, parts[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return subsystems, nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) ([]int, error) {
|
||||
f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(f)
|
||||
out = []int{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, s.Err()
|
||||
}
|
||||
|
||||
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
|
||||
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
|
||||
//
|
||||
// "cpu": "/user.slice/user-1000.slice"
|
||||
// "pids": "/user.slice/user-1000.slice"
|
||||
//
|
||||
// etc.
|
||||
//
|
||||
// Note that for cgroup v2 unified hierarchy, there are no per-controller
|
||||
// cgroup paths, so the resulting map will have a single element where the key
|
||||
// is empty string ("") and the value is the cgroup path the <pid> is in.
|
||||
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
// helper function for ParseCgroupFile to make testing easier
|
||||
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
cgroups := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
// from cgroups(7):
|
||||
// /proc/[pid]/cgroup
|
||||
// ...
|
||||
// For each cgroup hierarchy ... there is one entry
|
||||
// containing three colon-separated fields of the form:
|
||||
// hierarchy-ID:subsystem-list:cgroup-path
|
||||
parts := strings.SplitN(text, ":", 3)
|
||||
if len(parts) < 3 {
|
||||
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
||||
}
|
||||
|
||||
for _, subs := range strings.Split(parts[1], ",") {
|
||||
cgroups[subs] = parts[2]
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroups, nil
|
||||
}
|
||||
|
||||
func PathExists(path string) bool {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func EnterPid(cgroupPaths map[string]string, pid int) error {
|
||||
for _, path := range cgroupPaths {
|
||||
if PathExists(path) {
|
||||
if err := WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func rmdir(path string) error {
|
||||
err := unix.Rmdir(path)
|
||||
if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||
return nil
|
||||
}
|
||||
return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
||||
}
|
||||
|
||||
// RemovePath aims to remove cgroup path. It does so recursively,
|
||||
// by removing any subdirectories (sub-cgroups) first.
|
||||
func RemovePath(path string) error {
|
||||
// try the fast path first
|
||||
if err := rmdir(path); err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
infos, err := os.ReadDir(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
for _, info := range infos {
|
||||
if info.IsDir() {
|
||||
// We should remove subcgroups dir first
|
||||
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
err = rmdir(path)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// RemovePaths iterates over the provided paths removing them.
|
||||
// We trying to remove all paths five times with increasing delay between tries.
|
||||
// If after all there are not removed cgroups - appropriate error will be
|
||||
// returned.
|
||||
func RemovePaths(paths map[string]string) (err error) {
|
||||
const retries = 5
|
||||
delay := 10 * time.Millisecond
|
||||
for i := 0; i < retries; i++ {
|
||||
if i != 0 {
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
}
|
||||
for s, p := range paths {
|
||||
if err := RemovePath(p); err != nil {
|
||||
// do not log intermediate iterations
|
||||
switch i {
|
||||
case 0:
|
||||
logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)")
|
||||
case retries - 1:
|
||||
logrus.WithError(err).Error("Failed to remove cgroup")
|
||||
}
|
||||
}
|
||||
_, err := os.Stat(p)
|
||||
// We need this strange way of checking cgroups existence because
|
||||
// RemoveAll almost always returns error, even on already removed
|
||||
// cgroups
|
||||
if os.IsNotExist(err) {
|
||||
delete(paths, s)
|
||||
}
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
|
||||
paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Failed to remove paths: %v", paths)
|
||||
}
|
||||
|
||||
var (
|
||||
hugePageSizes []string
|
||||
initHPSOnce sync.Once
|
||||
)
|
||||
|
||||
func HugePageSizes() []string {
|
||||
initHPSOnce.Do(func() {
|
||||
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
files, err := dir.Readdirnames(0)
|
||||
dir.Close()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
hugePageSizes, err = getHugePageSizeFromFilenames(files)
|
||||
if err != nil {
|
||||
logrus.Warn("HugePageSizes: ", err)
|
||||
}
|
||||
})
|
||||
|
||||
return hugePageSizes
|
||||
}
|
||||
|
||||
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
pageSizes := make([]string, 0, len(fileNames))
|
||||
var warn error
|
||||
|
||||
for _, file := range fileNames {
|
||||
// example: hugepages-1048576kB
|
||||
val := strings.TrimPrefix(file, "hugepages-")
|
||||
if len(val) == len(file) {
|
||||
// Unexpected file name: no prefix found, ignore it.
|
||||
continue
|
||||
}
|
||||
// The suffix is always "kB" (as of Linux 5.13). If we find
|
||||
// something else, produce an error but keep going.
|
||||
eLen := len(val) - 2
|
||||
val = strings.TrimSuffix(val, "kB")
|
||||
if len(val) != eLen {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = errors.New(file + `: invalid suffix (expected "kB")`)
|
||||
}
|
||||
continue
|
||||
}
|
||||
size, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
// Highly unlikely.
|
||||
if warn == nil {
|
||||
warn = fmt.Errorf("%s: %w", file, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
|
||||
// but in our case the size is in KB already.
|
||||
if size >= (1 << 20) {
|
||||
val = strconv.Itoa(size>>20) + "GB"
|
||||
} else if size >= (1 << 10) {
|
||||
val = strconv.Itoa(size>>10) + "MB"
|
||||
} else {
|
||||
val += "KB"
|
||||
}
|
||||
pageSizes = append(pageSizes, val)
|
||||
}
|
||||
|
||||
return pageSizes, warn
|
||||
}
|
||||
|
||||
// GetPids returns all pids, that were added to cgroup at path.
|
||||
func GetPids(dir string) ([]int, error) {
|
||||
return readProcsFile(dir)
|
||||
}
|
||||
|
||||
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
|
||||
func WriteCgroupProc(dir string, pid int) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
||||
}
|
||||
|
||||
// Dont attach any pid to the cgroup if -1 is specified as a pid
|
||||
if pid == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
_, err = file.WriteString(strconv.Itoa(pid))
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EINVAL might mean that the task being added to cgroup.procs is in state
|
||||
// TASK_NEW. We should attempt to do so again.
|
||||
if errors.Is(err, unix.EINVAL) {
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
|
||||
// convert from [2-262144] to [1-10000]
|
||||
// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)"
|
||||
func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
||||
if cpuShares == 0 {
|
||||
return 0
|
||||
}
|
||||
return (1 + ((cpuShares-2)*9999)/262142)
|
||||
}
|
||||
|
||||
// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec
|
||||
// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap
|
||||
// is defined as memory+swap combined, while in cgroup v2 swap is a separate value.
|
||||
func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
||||
// for compatibility with cgroup1 controller, set swap to unlimited in
|
||||
// case the memory is set to unlimited, and swap is not explicitly set,
|
||||
// treating the request as "set both memory and swap to unlimited".
|
||||
if memory == -1 && memorySwap == 0 {
|
||||
return -1, nil
|
||||
}
|
||||
if memorySwap == -1 || memorySwap == 0 {
|
||||
// -1 is "max", 0 is "unset", so treat as is
|
||||
return memorySwap, nil
|
||||
}
|
||||
// sanity checks
|
||||
if memory == 0 || memory == -1 {
|
||||
return 0, errors.New("unable to set swap limit without memory limit")
|
||||
}
|
||||
if memory < 0 {
|
||||
return 0, fmt.Errorf("invalid memory value: %d", memory)
|
||||
}
|
||||
if memorySwap < memory {
|
||||
return 0, errors.New("memory+swap limit should be >= memory limit")
|
||||
}
|
||||
|
||||
return memorySwap - memory, nil
|
||||
}
|
||||
|
||||
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||
// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990)
|
||||
// convert linearly from [10-1000] to [1-10000]
|
||||
func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
||||
if blkIoWeight == 0 {
|
||||
return 0
|
||||
}
|
||||
return 1 + (uint64(blkIoWeight)-10)*9999/990
|
||||
}
|
290
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
290
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
@ -1,290 +0,0 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/moby/sys/mountinfo"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Code in this source file are specific to cgroup v1,
|
||||
// and must not be used from any cgroup v2 code.
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
defaultPrefix = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
||||
ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1")
|
||||
|
||||
readMountinfoOnce sync.Once
|
||||
readMountinfoErr error
|
||||
cgroupMountinfo []*mountinfo.Info
|
||||
)
|
||||
|
||||
type NotFoundError struct {
|
||||
Subsystem string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||
}
|
||||
|
||||
func NewNotFoundError(sub string) error {
|
||||
return &NotFoundError{
|
||||
Subsystem: sub,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
var nfErr *NotFoundError
|
||||
return errors.As(err, &nfErr)
|
||||
}
|
||||
|
||||
func tryDefaultPath(cgroupPath, subsystem string) string {
|
||||
if !strings.HasPrefix(defaultPrefix, cgroupPath) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// remove possible prefix
|
||||
subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix)
|
||||
|
||||
// Make sure we're still under defaultPrefix, and resolve
|
||||
// a possible symlink (like cpu -> cpu,cpuacct).
|
||||
path, err := securejoin.SecureJoin(defaultPrefix, subsystem)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (1) path should be a directory.
|
||||
st, err := os.Lstat(path)
|
||||
if err != nil || !st.IsDir() {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) path should be a mount point.
|
||||
pst, err := os.Lstat(filepath.Dir(path))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev {
|
||||
// parent dir has the same dev -- path is not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) path should have 'cgroup' fs type.
|
||||
fst := unix.Statfs_t{}
|
||||
err = unix.Statfs(path, &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return path
|
||||
}
|
||||
|
||||
// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones
|
||||
// with fstype of "cgroup") for the current running process.
|
||||
//
|
||||
// The results are cached (to avoid re-reading mountinfo which is relatively
|
||||
// expensive), so it is assumed that cgroup mounts are not being changed.
|
||||
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
|
||||
readMountinfoOnce.Do(func() {
|
||||
cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
|
||||
mountinfo.FSTypeFilter("cgroup"),
|
||||
)
|
||||
})
|
||||
|
||||
return cgroupMountinfo, readMountinfoErr
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
// Avoid parsing mountinfo by trying the default path first, if possible.
|
||||
if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
||||
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", "", errUnified
|
||||
}
|
||||
|
||||
mi, err := readCgroupMountinfo()
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem)
|
||||
}
|
||||
|
||||
func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
|
||||
for _, mi := range mounts {
|
||||
if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
|
||||
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||
if opt == subsystem {
|
||||
return mi.Mountpoint, mi.Root, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", errors.New("no subsystem for mount")
|
||||
}
|
||||
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
numFound := 0
|
||||
for _, mi := range mounts {
|
||||
m := Mount{
|
||||
Mountpoint: mi.Mountpoint,
|
||||
Root: mi.Root,
|
||||
}
|
||||
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||
seen, known := ss[opt]
|
||||
if !known || (!all && seen) {
|
||||
continue
|
||||
}
|
||||
ss[opt] = true
|
||||
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
numFound++
|
||||
}
|
||||
if len(m.Subsystems) > 0 || all {
|
||||
res = append(res, m)
|
||||
}
|
||||
if !all && numFound >= len(ss) {
|
||||
break
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func getCgroupMountsV1(all bool) ([]Mount, error) {
|
||||
mi, err := readCgroupMountinfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMap := make(map[string]bool)
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = false
|
||||
}
|
||||
|
||||
return getCgroupMountsHelper(allMap, mi, all)
|
||||
}
|
||||
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||
if len(subsystem) == 0 {
|
||||
return hybridMountpoint, nil
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func GetInitCgroup(subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "", errUnified
|
||||
}
|
||||
|
||||
if p, ok := cgroups[subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
return "", NewNotFoundError(subsystem)
|
||||
}
|
66
vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
generated
vendored
66
vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
generated
vendored
@ -1,66 +0,0 @@
|
||||
package configs
|
||||
|
||||
import "fmt"
|
||||
|
||||
// blockIODevice holds major:minor format supported in blkio cgroup
|
||||
type blockIODevice struct {
|
||||
// Major is the device's major number
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number
|
||||
Minor int64 `json:"minor"`
|
||||
}
|
||||
|
||||
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
|
||||
type WeightDevice struct {
|
||||
blockIODevice
|
||||
// Weight is the bandwidth rate for the device, range is from 10 to 1000
|
||||
Weight uint16 `json:"weight"`
|
||||
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||
LeafWeight uint16 `json:"leafWeight"`
|
||||
}
|
||||
|
||||
// NewWeightDevice returns a configured WeightDevice pointer
|
||||
func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
|
||||
wd := &WeightDevice{}
|
||||
wd.Major = major
|
||||
wd.Minor = minor
|
||||
wd.Weight = weight
|
||||
wd.LeafWeight = leafWeight
|
||||
return wd
|
||||
}
|
||||
|
||||
// WeightString formats the struct to be writable to the cgroup specific file
|
||||
func (wd *WeightDevice) WeightString() string {
|
||||
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
|
||||
}
|
||||
|
||||
// LeafWeightString formats the struct to be writable to the cgroup specific file
|
||||
func (wd *WeightDevice) LeafWeightString() string {
|
||||
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
|
||||
}
|
||||
|
||||
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||
type ThrottleDevice struct {
|
||||
blockIODevice
|
||||
// Rate is the IO rate limit per cgroup per device
|
||||
Rate uint64 `json:"rate"`
|
||||
}
|
||||
|
||||
// NewThrottleDevice returns a configured ThrottleDevice pointer
|
||||
func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
||||
td := &ThrottleDevice{}
|
||||
td.Major = major
|
||||
td.Minor = minor
|
||||
td.Rate = rate
|
||||
return td
|
||||
}
|
||||
|
||||
// String formats the struct to be writable to the cgroup specific file
|
||||
func (td *ThrottleDevice) String() string {
|
||||
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
||||
}
|
||||
|
||||
// StringName formats the struct to be writable to the cgroup specific file
|
||||
func (td *ThrottleDevice) StringName(name string) string {
|
||||
return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
|
||||
}
|
158
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
158
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
@ -1,158 +0,0 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
)
|
||||
|
||||
type FreezerState string
|
||||
|
||||
const (
|
||||
Undefined FreezerState = ""
|
||||
Frozen FreezerState = "FROZEN"
|
||||
Thawed FreezerState = "THAWED"
|
||||
)
|
||||
|
||||
// Cgroup holds properties of a cgroup on Linux.
|
||||
type Cgroup struct {
|
||||
// Name specifies the name of the cgroup
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// Parent specifies the name of parent of cgroup or slice
|
||||
Parent string `json:"parent,omitempty"`
|
||||
|
||||
// Path specifies the path to cgroups that are created and/or joined by the container.
|
||||
// The path is assumed to be relative to the host system cgroup mountpoint.
|
||||
Path string `json:"path"`
|
||||
|
||||
// ScopePrefix describes prefix for the scope name
|
||||
ScopePrefix string `json:"scope_prefix"`
|
||||
|
||||
// Resources contains various cgroups settings to apply
|
||||
*Resources
|
||||
|
||||
// Systemd tells if systemd should be used to manage cgroups.
|
||||
Systemd bool
|
||||
|
||||
// SystemdProps are any additional properties for systemd,
|
||||
// derived from org.systemd.property.xxx annotations.
|
||||
// Ignored unless systemd is used for managing cgroups.
|
||||
SystemdProps []systemdDbus.Property `json:"-"`
|
||||
|
||||
// Rootless tells if rootless cgroups should be used.
|
||||
Rootless bool
|
||||
|
||||
// The host UID that should own the cgroup, or nil to accept
|
||||
// the default ownership. This should only be set when the
|
||||
// cgroupfs is to be mounted read/write.
|
||||
// Not all cgroup manager implementations support changing
|
||||
// the ownership.
|
||||
OwnerUID *int `json:"owner_uid,omitempty"`
|
||||
}
|
||||
|
||||
type Resources struct {
|
||||
// Devices is the set of access rules for devices in the container.
|
||||
Devices []*devices.Rule `json:"devices"`
|
||||
|
||||
// Memory limit (in bytes)
|
||||
Memory int64 `json:"memory"`
|
||||
|
||||
// Memory reservation or soft_limit (in bytes)
|
||||
MemoryReservation int64 `json:"memory_reservation"`
|
||||
|
||||
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
|
||||
MemorySwap int64 `json:"memory_swap"`
|
||||
|
||||
// CPU shares (relative weight vs. other containers)
|
||||
CpuShares uint64 `json:"cpu_shares"`
|
||||
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
CpuQuota int64 `json:"cpu_quota"`
|
||||
|
||||
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
|
||||
CpuPeriod uint64 `json:"cpu_period"`
|
||||
|
||||
// How many time CPU will use in realtime scheduling (in usecs).
|
||||
CpuRtRuntime int64 `json:"cpu_rt_quota"`
|
||||
|
||||
// CPU period to be used for realtime scheduling (in usecs).
|
||||
CpuRtPeriod uint64 `json:"cpu_rt_period"`
|
||||
|
||||
// CPU to use
|
||||
CpusetCpus string `json:"cpuset_cpus"`
|
||||
|
||||
// MEM to use
|
||||
CpusetMems string `json:"cpuset_mems"`
|
||||
|
||||
// Process limit; set <= `0' to disable limit.
|
||||
PidsLimit int64 `json:"pids_limit"`
|
||||
|
||||
// Specifies per cgroup weight, range is from 10 to 1000.
|
||||
BlkioWeight uint16 `json:"blkio_weight"`
|
||||
|
||||
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
|
||||
|
||||
// Weight per cgroup per device, can override BlkioWeight.
|
||||
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
|
||||
|
||||
// IO read rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
|
||||
|
||||
// IO write rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
|
||||
|
||||
// IO read rate limit per cgroup per device, IO per second.
|
||||
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
|
||||
|
||||
// IO write rate limit per cgroup per device, IO per second.
|
||||
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
|
||||
|
||||
// set the freeze value for the process
|
||||
Freezer FreezerState `json:"freezer"`
|
||||
|
||||
// Hugetlb limit (in bytes)
|
||||
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
|
||||
|
||||
// Whether to disable OOM Killer
|
||||
OomKillDisable bool `json:"oom_kill_disable"`
|
||||
|
||||
// Tuning swappiness behaviour per cgroup
|
||||
MemorySwappiness *uint64 `json:"memory_swappiness"`
|
||||
|
||||
// Set priority of network traffic for container
|
||||
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
|
||||
|
||||
// Set class identifier for container's network packets
|
||||
NetClsClassid uint32 `json:"net_cls_classid_u"`
|
||||
|
||||
// Rdma resource restriction configuration
|
||||
Rdma map[string]LinuxRdma `json:"rdma"`
|
||||
|
||||
// Used on cgroups v2:
|
||||
|
||||
// CpuWeight sets a proportional bandwidth limit.
|
||||
CpuWeight uint64 `json:"cpu_weight"`
|
||||
|
||||
// Unified is cgroupv2-only key-value map.
|
||||
Unified map[string]string `json:"unified"`
|
||||
|
||||
// SkipDevices allows to skip configuring device permissions.
|
||||
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
|
||||
// common for many containers, and by runc update.
|
||||
//
|
||||
// NOTE it is impossible to start a container which has this flag set.
|
||||
SkipDevices bool `json:"-"`
|
||||
|
||||
// SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup
|
||||
// freeze when setting resources. Only applicable to systemd legacy
|
||||
// (i.e. cgroup v1) manager (which uses freeze by default to avoid
|
||||
// spurious permission errors caused by systemd inability to update
|
||||
// device rules in a non-disruptive manner).
|
||||
//
|
||||
// If not set, a few methods (such as looking into cgroup's
|
||||
// devices.list and querying the systemd unit properties) are used
|
||||
// during Set() to figure out whether the freeze is required. Those
|
||||
// methods may be relatively slow, thus this flag.
|
||||
SkipFreezeOnSet bool `json:"-"`
|
||||
}
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
9
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
@ -1,9 +0,0 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package configs
|
||||
|
||||
// Cgroup holds properties of a cgroup on Linux
|
||||
// TODO Windows: This can ultimately be entirely factored out on Windows as
|
||||
// cgroups are a Unix-specific construct.
|
||||
type Cgroup struct{}
|
414
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
414
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
@ -1,414 +0,0 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
type Rlimit struct {
|
||||
Type int `json:"type"`
|
||||
Hard uint64 `json:"hard"`
|
||||
Soft uint64 `json:"soft"`
|
||||
}
|
||||
|
||||
// IDMap represents UID/GID Mappings for User Namespaces.
|
||||
type IDMap struct {
|
||||
ContainerID int `json:"container_id"`
|
||||
HostID int `json:"host_id"`
|
||||
Size int `json:"size"`
|
||||
}
|
||||
|
||||
// Seccomp represents syscall restrictions
|
||||
// By default, only the native architecture of the kernel is allowed to be used
|
||||
// for syscalls. Additional architectures can be added by specifying them in
|
||||
// Architectures.
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"default_action"`
|
||||
Architectures []string `json:"architectures"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
DefaultErrnoRet *uint `json:"default_errno_ret"`
|
||||
ListenerPath string `json:"listener_path,omitempty"`
|
||||
ListenerMetadata string `json:"listener_metadata,omitempty"`
|
||||
}
|
||||
|
||||
// Action is taken upon rule match in Seccomp
|
||||
type Action int
|
||||
|
||||
const (
|
||||
Kill Action = iota + 1
|
||||
Errno
|
||||
Trap
|
||||
Allow
|
||||
Trace
|
||||
Log
|
||||
Notify
|
||||
KillThread
|
||||
KillProcess
|
||||
)
|
||||
|
||||
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
|
||||
type Operator int
|
||||
|
||||
const (
|
||||
EqualTo Operator = iota + 1
|
||||
NotEqualTo
|
||||
GreaterThan
|
||||
GreaterThanOrEqualTo
|
||||
LessThan
|
||||
LessThanOrEqualTo
|
||||
MaskEqualTo
|
||||
)
|
||||
|
||||
// Arg is a rule to match a specific syscall argument in Seccomp
|
||||
type Arg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
ValueTwo uint64 `json:"value_two"`
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
// Syscall is a rule to match a syscall in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
ErrnoRet *uint `json:"errnoRet"`
|
||||
Args []*Arg `json:"args"`
|
||||
}
|
||||
|
||||
// TODO Windows. Many of these fields should be factored out into those parts
|
||||
// which are common across platforms, and those which are platform specific.
|
||||
|
||||
// Config defines configuration options for executing a process inside a contained environment.
|
||||
type Config struct {
|
||||
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
|
||||
// This is a common option when the container is running in ramdisk
|
||||
NoPivotRoot bool `json:"no_pivot_root"`
|
||||
|
||||
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
|
||||
// that the parent process dies.
|
||||
ParentDeathSignal int `json:"parent_death_signal"`
|
||||
|
||||
// Path to a directory containing the container's root filesystem.
|
||||
Rootfs string `json:"rootfs"`
|
||||
|
||||
// Umask is the umask to use inside of the container.
|
||||
Umask *uint32 `json:"umask"`
|
||||
|
||||
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
|
||||
// bind mounts are writtable.
|
||||
Readonlyfs bool `json:"readonlyfs"`
|
||||
|
||||
// Specifies the mount propagation flags to be applied to /.
|
||||
RootPropagation int `json:"rootPropagation"`
|
||||
|
||||
// Mounts specify additional source and destination paths that will be mounted inside the container's
|
||||
// rootfs and mount namespace if specified
|
||||
Mounts []*Mount `json:"mounts"`
|
||||
|
||||
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
|
||||
Devices []*devices.Device `json:"devices"`
|
||||
|
||||
MountLabel string `json:"mount_label"`
|
||||
|
||||
// Hostname optionally sets the container's hostname if provided
|
||||
Hostname string `json:"hostname"`
|
||||
|
||||
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
|
||||
// If a namespace is not provided that namespace is shared from the container's parent process
|
||||
Namespaces Namespaces `json:"namespaces"`
|
||||
|
||||
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||
// All capabilities not specified will be dropped from the processes capability mask
|
||||
Capabilities *Capabilities `json:"capabilities"`
|
||||
|
||||
// Networks specifies the container's network setup to be created
|
||||
Networks []*Network `json:"networks"`
|
||||
|
||||
// Routes can be specified to create entries in the route table as the container is started
|
||||
Routes []*Route `json:"routes"`
|
||||
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
||||
// placed into to limit the resources the container has available
|
||||
Cgroups *Cgroup `json:"cgroups"`
|
||||
|
||||
// AppArmorProfile specifies the profile to apply to the process running in the container and is
|
||||
// change at the time the process is execed
|
||||
AppArmorProfile string `json:"apparmor_profile,omitempty"`
|
||||
|
||||
// ProcessLabel specifies the label to apply to the process running in the container. It is
|
||||
// commonly used by selinux
|
||||
ProcessLabel string `json:"process_label,omitempty"`
|
||||
|
||||
// Rlimits specifies the resource limits, such as max open files, to set in the container
|
||||
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
||||
|
||||
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||
// higher scores are preferred for being killed. If it is unset then we don't touch the current
|
||||
// value.
|
||||
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
|
||||
|
||||
// UidMappings is an array of User ID mappings for User Namespaces
|
||||
UidMappings []IDMap `json:"uid_mappings"`
|
||||
|
||||
// GidMappings is an array of Group ID mappings for User Namespaces
|
||||
GidMappings []IDMap `json:"gid_mappings"`
|
||||
|
||||
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
|
||||
// mount pointing to /dev/null as to prevent reads of the file.
|
||||
MaskPaths []string `json:"mask_paths"`
|
||||
|
||||
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
|
||||
// so that these files prevent any writes.
|
||||
ReadonlyPaths []string `json:"readonly_paths"`
|
||||
|
||||
// Sysctl is a map of properties and their values. It is the equivalent of using
|
||||
// sysctl -w my.property.name value in Linux.
|
||||
Sysctl map[string]string `json:"sysctl"`
|
||||
|
||||
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
||||
// A number of rules are given, each having an action to be taken if a syscall matches it.
|
||||
// A default action to be taken if no rules match is also given.
|
||||
Seccomp *Seccomp `json:"seccomp"`
|
||||
|
||||
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
|
||||
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
|
||||
|
||||
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||
// CommandHooks are serialized to JSON, but other hooks are not.
|
||||
Hooks Hooks
|
||||
|
||||
// Version is the version of opencontainer specification that is supported.
|
||||
Version string `json:"version"`
|
||||
|
||||
// Labels are user defined metadata that is stored in the config and populated on the state
|
||||
Labels []string `json:"labels"`
|
||||
|
||||
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
|
||||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||
|
||||
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
|
||||
// When RootlessEUID is set, runc creates a new userns for the container.
|
||||
// (config.json needs to contain userns settings)
|
||||
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||
|
||||
// RootlessCgroups is set when unlikely to have the full access to cgroups.
|
||||
// When RootlessCgroups is set, cgroups errors are ignored.
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
}
|
||||
|
||||
type (
|
||||
HookName string
|
||||
HookList []Hook
|
||||
Hooks map[HookName]HookList
|
||||
)
|
||||
|
||||
const (
|
||||
// Prestart commands are executed after the container namespaces are created,
|
||||
// but before the user supplied command is executed from init.
|
||||
// Note: This hook is now deprecated
|
||||
// Prestart commands are called in the Runtime namespace.
|
||||
Prestart HookName = "prestart"
|
||||
|
||||
// CreateRuntime commands MUST be called as part of the create operation after
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateRuntime is called immediately after the deprecated Prestart hook.
|
||||
// CreateRuntime commands are called in the Runtime Namespace.
|
||||
CreateRuntime HookName = "createRuntime"
|
||||
|
||||
// CreateContainer commands MUST be called as part of the create operation after
|
||||
// the runtime environment has been created but before the pivot_root has been executed.
|
||||
// CreateContainer commands are called in the Container namespace.
|
||||
CreateContainer HookName = "createContainer"
|
||||
|
||||
// StartContainer commands MUST be called as part of the start operation and before
|
||||
// the container process is started.
|
||||
// StartContainer commands are called in the Container namespace.
|
||||
StartContainer HookName = "startContainer"
|
||||
|
||||
// Poststart commands are executed after the container init process starts.
|
||||
// Poststart commands are called in the Runtime Namespace.
|
||||
Poststart HookName = "poststart"
|
||||
|
||||
// Poststop commands are executed after the container init process exits.
|
||||
// Poststop commands are called in the Runtime Namespace.
|
||||
Poststop HookName = "poststop"
|
||||
)
|
||||
|
||||
// KnownHookNames returns the known hook names.
|
||||
// Used by `runc features`.
|
||||
func KnownHookNames() []string {
|
||||
return []string{
|
||||
string(Prestart), // deprecated
|
||||
string(CreateRuntime),
|
||||
string(CreateContainer),
|
||||
string(StartContainer),
|
||||
string(Poststart),
|
||||
string(Poststop),
|
||||
}
|
||||
}
|
||||
|
||||
type Capabilities struct {
|
||||
// Bounding is the set of capabilities checked by the kernel.
|
||||
Bounding []string
|
||||
// Effective is the set of capabilities checked by the kernel.
|
||||
Effective []string
|
||||
// Inheritable is the capabilities preserved across execve.
|
||||
Inheritable []string
|
||||
// Permitted is the limiting superset for effective capabilities.
|
||||
Permitted []string
|
||||
// Ambient is the ambient set of capabilities that are kept.
|
||||
Ambient []string
|
||||
}
|
||||
|
||||
func (hooks HookList) RunHooks(state *specs.State) error {
|
||||
for i, h := range hooks {
|
||||
if err := h.Run(state); err != nil {
|
||||
return fmt.Errorf("error running hook #%d: %w", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
||||
var state map[HookName][]CommandHook
|
||||
|
||||
if err := json.Unmarshal(b, &state); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*hooks = Hooks{}
|
||||
for n, commandHooks := range state {
|
||||
if len(commandHooks) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
(*hooks)[n] = HookList{}
|
||||
for _, h := range commandHooks {
|
||||
(*hooks)[n] = append((*hooks)[n], h)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks *Hooks) MarshalJSON() ([]byte, error) {
|
||||
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
||||
for _, hook := range hooks {
|
||||
switch chook := hook.(type) {
|
||||
case CommandHook:
|
||||
serializableHooks = append(serializableHooks, chook)
|
||||
default:
|
||||
logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
|
||||
}
|
||||
}
|
||||
|
||||
return serializableHooks
|
||||
}
|
||||
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"prestart": serialize((*hooks)[Prestart]),
|
||||
"createRuntime": serialize((*hooks)[CreateRuntime]),
|
||||
"createContainer": serialize((*hooks)[CreateContainer]),
|
||||
"startContainer": serialize((*hooks)[StartContainer]),
|
||||
"poststart": serialize((*hooks)[Poststart]),
|
||||
"poststop": serialize((*hooks)[Poststop]),
|
||||
})
|
||||
}
|
||||
|
||||
type Hook interface {
|
||||
// Run executes the hook with the provided state.
|
||||
Run(*specs.State) error
|
||||
}
|
||||
|
||||
// NewFunctionHook will call the provided function when the hook is run.
|
||||
func NewFunctionHook(f func(*specs.State) error) FuncHook {
|
||||
return FuncHook{
|
||||
run: f,
|
||||
}
|
||||
}
|
||||
|
||||
type FuncHook struct {
|
||||
run func(*specs.State) error
|
||||
}
|
||||
|
||||
func (f FuncHook) Run(s *specs.State) error {
|
||||
return f.run(s)
|
||||
}
|
||||
|
||||
type Command struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Dir string `json:"dir"`
|
||||
Timeout *time.Duration `json:"timeout"`
|
||||
}
|
||||
|
||||
// NewCommandHook will execute the provided command when the hook is run.
|
||||
func NewCommandHook(cmd Command) CommandHook {
|
||||
return CommandHook{
|
||||
Command: cmd,
|
||||
}
|
||||
}
|
||||
|
||||
type CommandHook struct {
|
||||
Command
|
||||
}
|
||||
|
||||
func (c Command) Run(s *specs.State) error {
|
||||
b, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd := exec.Cmd{
|
||||
Path: c.Path,
|
||||
Args: c.Args,
|
||||
Env: c.Env,
|
||||
Stdin: bytes.NewReader(b),
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
errC := make(chan error, 1)
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
errC <- err
|
||||
}()
|
||||
var timerCh <-chan time.Time
|
||||
if c.Timeout != nil {
|
||||
timer := time.NewTimer(*c.Timeout)
|
||||
defer timer.Stop()
|
||||
timerCh = timer.C
|
||||
}
|
||||
select {
|
||||
case err := <-errC:
|
||||
return err
|
||||
case <-timerCh:
|
||||
_ = cmd.Process.Kill()
|
||||
<-errC
|
||||
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
|
||||
}
|
||||
}
|
68
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
68
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
@ -1,68 +0,0 @@
|
||||
package configs
|
||||
|
||||
import "errors"
|
||||
|
||||
var (
|
||||
errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.")
|
||||
errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.")
|
||||
errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.")
|
||||
errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
|
||||
)
|
||||
|
||||
// HostUID gets the translated uid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostUID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.UidMappings == nil {
|
||||
return -1, errNoUIDMap
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
|
||||
if !found {
|
||||
return -1, errNoUserMap
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootUID gets the root uid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootUID() (int, error) {
|
||||
return c.HostUID(0)
|
||||
}
|
||||
|
||||
// HostGID gets the translated gid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostGID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.GidMappings == nil {
|
||||
return -1, errNoGIDMap
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
|
||||
if !found {
|
||||
return -1, errNoGroupMap
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootGID gets the root gid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootGID() (int, error) {
|
||||
return c.HostGID(0)
|
||||
}
|
||||
|
||||
// Utility function that gets a host ID for a container ID from user namespace map
|
||||
// if that ID is present in the map.
|
||||
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
|
||||
for _, m := range uMap {
|
||||
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
||||
hostID := m.HostID + (containerID - m.ContainerID)
|
||||
return hostID, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
10
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
@ -1,10 +0,0 @@
|
||||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
package configs
|
||||
|
||||
func FuzzUnmarshalJSON(data []byte) int {
|
||||
hooks := Hooks{}
|
||||
_ = hooks.UnmarshalJSON(data)
|
||||
return 1
|
||||
}
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
generated
vendored
9
vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
generated
vendored
@ -1,9 +0,0 @@
|
||||
package configs
|
||||
|
||||
type HugepageLimit struct {
|
||||
// which type of hugepage to limit.
|
||||
Pagesize string `json:"page_size"`
|
||||
|
||||
// usage limit for hugepage.
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
16
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
@ -1,16 +0,0 @@
|
||||
package configs
|
||||
|
||||
type IntelRdt struct {
|
||||
// The identity for RDT Class of Service
|
||||
ClosID string `json:"closID,omitempty"`
|
||||
|
||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// The unit of memory bandwidth is specified in "percentages" by
|
||||
// default, and in "MBps" if MBA Software Controller is enabled.
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
}
|
14
vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
generated
vendored
14
vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
generated
vendored
@ -1,14 +0,0 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type IfPrioMap struct {
|
||||
Interface string `json:"interface"`
|
||||
Priority int64 `json:"priority"`
|
||||
}
|
||||
|
||||
func (i *IfPrioMap) CgroupString() string {
|
||||
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
|
||||
}
|
48
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
48
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
@ -1,48 +0,0 @@
|
||||
package configs
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
const (
|
||||
// EXT_COPYUP is a directive to copy up the contents of a directory when
|
||||
// a tmpfs is mounted over it.
|
||||
EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning
|
||||
)
|
||||
|
||||
type Mount struct {
|
||||
// Source path for the mount.
|
||||
Source string `json:"source"`
|
||||
|
||||
// Destination path for the mount inside the container.
|
||||
Destination string `json:"destination"`
|
||||
|
||||
// Device the mount is for.
|
||||
Device string `json:"device"`
|
||||
|
||||
// Mount flags.
|
||||
Flags int `json:"flags"`
|
||||
|
||||
// Propagation Flags
|
||||
PropagationFlags []int `json:"propagation_flags"`
|
||||
|
||||
// Mount data applied to the mount.
|
||||
Data string `json:"data"`
|
||||
|
||||
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
|
||||
Relabel string `json:"relabel"`
|
||||
|
||||
// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
|
||||
RecAttr *unix.MountAttr `json:"rec_attr"`
|
||||
|
||||
// Extensions are additional flags that are specific to runc.
|
||||
Extensions int `json:"extensions"`
|
||||
|
||||
// Optional Command to be run before Source is mounted.
|
||||
PremountCmds []Command `json:"premount_cmds"`
|
||||
|
||||
// Optional Command to be run after Source is mounted.
|
||||
PostmountCmds []Command `json:"postmount_cmds"`
|
||||
}
|
||||
|
||||
func (m *Mount) IsBind() bool {
|
||||
return m.Flags&unix.MS_BIND != 0
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
generated
vendored
@ -1,5 +0,0 @@
|
||||
package configs
|
||||
|
||||
type NamespaceType string
|
||||
|
||||
type Namespaces []Namespace
|
126
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
generated
vendored
126
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
generated
vendored
@ -1,126 +0,0 @@
|
||||
package configs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
NEWNET NamespaceType = "NEWNET"
|
||||
NEWPID NamespaceType = "NEWPID"
|
||||
NEWNS NamespaceType = "NEWNS"
|
||||
NEWUTS NamespaceType = "NEWUTS"
|
||||
NEWIPC NamespaceType = "NEWIPC"
|
||||
NEWUSER NamespaceType = "NEWUSER"
|
||||
NEWCGROUP NamespaceType = "NEWCGROUP"
|
||||
)
|
||||
|
||||
var (
|
||||
nsLock sync.Mutex
|
||||
supportedNamespaces = make(map[NamespaceType]bool)
|
||||
)
|
||||
|
||||
// NsName converts the namespace type to its filename
|
||||
func NsName(ns NamespaceType) string {
|
||||
switch ns {
|
||||
case NEWNET:
|
||||
return "net"
|
||||
case NEWNS:
|
||||
return "mnt"
|
||||
case NEWPID:
|
||||
return "pid"
|
||||
case NEWIPC:
|
||||
return "ipc"
|
||||
case NEWUSER:
|
||||
return "user"
|
||||
case NEWUTS:
|
||||
return "uts"
|
||||
case NEWCGROUP:
|
||||
return "cgroup"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// IsNamespaceSupported returns whether a namespace is available or
|
||||
// not
|
||||
func IsNamespaceSupported(ns NamespaceType) bool {
|
||||
nsLock.Lock()
|
||||
defer nsLock.Unlock()
|
||||
supported, ok := supportedNamespaces[ns]
|
||||
if ok {
|
||||
return supported
|
||||
}
|
||||
nsFile := NsName(ns)
|
||||
// if the namespace type is unknown, just return false
|
||||
if nsFile == "" {
|
||||
return false
|
||||
}
|
||||
_, err := os.Stat("/proc/self/ns/" + nsFile)
|
||||
// a namespace is supported if it exists and we have permissions to read it
|
||||
supported = err == nil
|
||||
supportedNamespaces[ns] = supported
|
||||
return supported
|
||||
}
|
||||
|
||||
func NamespaceTypes() []NamespaceType {
|
||||
return []NamespaceType{
|
||||
NEWUSER, // Keep user NS always first, don't move it.
|
||||
NEWIPC,
|
||||
NEWUTS,
|
||||
NEWNET,
|
||||
NEWPID,
|
||||
NEWNS,
|
||||
NEWCGROUP,
|
||||
}
|
||||
}
|
||||
|
||||
// Namespace defines configuration for each namespace. It specifies an
|
||||
// alternate path that is able to be joined via setns.
|
||||
type Namespace struct {
|
||||
Type NamespaceType `json:"type"`
|
||||
Path string `json:"path"`
|
||||
}
|
||||
|
||||
func (n *Namespace) GetPath(pid int) string {
|
||||
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
|
||||
}
|
||||
|
||||
func (n *Namespaces) Remove(t NamespaceType) bool {
|
||||
i := n.index(t)
|
||||
if i == -1 {
|
||||
return false
|
||||
}
|
||||
*n = append((*n)[:i], (*n)[i+1:]...)
|
||||
return true
|
||||
}
|
||||
|
||||
func (n *Namespaces) Add(t NamespaceType, path string) {
|
||||
i := n.index(t)
|
||||
if i == -1 {
|
||||
*n = append(*n, Namespace{Type: t, Path: path})
|
||||
return
|
||||
}
|
||||
(*n)[i].Path = path
|
||||
}
|
||||
|
||||
func (n *Namespaces) index(t NamespaceType) int {
|
||||
for i, ns := range *n {
|
||||
if ns.Type == t {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func (n *Namespaces) Contains(t NamespaceType) bool {
|
||||
return n.index(t) != -1
|
||||
}
|
||||
|
||||
func (n *Namespaces) PathOf(t NamespaceType) string {
|
||||
i := n.index(t)
|
||||
if i == -1 {
|
||||
return ""
|
||||
}
|
||||
return (*n)[i].Path
|
||||
}
|
33
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
33
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
@ -1,33 +0,0 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package configs
|
||||
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func (n *Namespace) Syscall() int {
|
||||
return namespaceInfo[n.Type]
|
||||
}
|
||||
|
||||
var namespaceInfo = map[NamespaceType]int{
|
||||
NEWNET: unix.CLONE_NEWNET,
|
||||
NEWNS: unix.CLONE_NEWNS,
|
||||
NEWUSER: unix.CLONE_NEWUSER,
|
||||
NEWIPC: unix.CLONE_NEWIPC,
|
||||
NEWUTS: unix.CLONE_NEWUTS,
|
||||
NEWPID: unix.CLONE_NEWPID,
|
||||
NEWCGROUP: unix.CLONE_NEWCGROUP,
|
||||
}
|
||||
|
||||
// CloneFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||
func (n *Namespaces) CloneFlags() uintptr {
|
||||
var flag int
|
||||
for _, v := range *n {
|
||||
if v.Path != "" {
|
||||
continue
|
||||
}
|
||||
flag |= namespaceInfo[v.Type]
|
||||
}
|
||||
return uintptr(flag)
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
//go:build !linux && !windows
|
||||
// +build !linux,!windows
|
||||
|
||||
package configs
|
||||
|
||||
func (n *Namespace) Syscall() int {
|
||||
panic("No namespace syscall support")
|
||||
}
|
||||
|
||||
// CloneFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||
func (n *Namespaces) CloneFlags() uintptr {
|
||||
panic("No namespace syscall support")
|
||||
}
|
8
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
@ -1,8 +0,0 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package configs
|
||||
|
||||
// Namespace defines configuration for each namespace. It specifies an
|
||||
// alternate path that is able to be joined via setns.
|
||||
type Namespace struct{}
|
75
vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
generated
vendored
75
vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
generated
vendored
@ -1,75 +0,0 @@
|
||||
package configs
|
||||
|
||||
// Network defines configuration for a container's networking stack
|
||||
//
|
||||
// The network configuration can be omitted from a container causing the
|
||||
// container to be setup with the host's networking stack
|
||||
type Network struct {
|
||||
// Type sets the networks type, commonly veth and loopback
|
||||
Type string `json:"type"`
|
||||
|
||||
// Name of the network interface
|
||||
Name string `json:"name"`
|
||||
|
||||
// The bridge to use.
|
||||
Bridge string `json:"bridge"`
|
||||
|
||||
// MacAddress contains the MAC address to set on the network interface
|
||||
MacAddress string `json:"mac_address"`
|
||||
|
||||
// Address contains the IPv4 and mask to set on the network interface
|
||||
Address string `json:"address"`
|
||||
|
||||
// Gateway sets the gateway address that is used as the default for the interface
|
||||
Gateway string `json:"gateway"`
|
||||
|
||||
// IPv6Address contains the IPv6 and mask to set on the network interface
|
||||
IPv6Address string `json:"ipv6_address"`
|
||||
|
||||
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
|
||||
IPv6Gateway string `json:"ipv6_gateway"`
|
||||
|
||||
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
|
||||
// container's interfaces if a pair is created, specifically in the case of type veth
|
||||
// Note: This does not apply to loopback interfaces.
|
||||
Mtu int `json:"mtu"`
|
||||
|
||||
// TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
|
||||
// container's interfaces if a pair is created, specifically in the case of type veth
|
||||
// Note: This does not apply to loopback interfaces.
|
||||
TxQueueLen int `json:"txqueuelen"`
|
||||
|
||||
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
|
||||
// container.
|
||||
HostInterfaceName string `json:"host_interface_name"`
|
||||
|
||||
// HairpinMode specifies if hairpin NAT should be enabled on the virtual interface
|
||||
// bridge port in the case of type veth
|
||||
// Note: This is unsupported on some systems.
|
||||
// Note: This does not apply to loopback interfaces.
|
||||
HairpinMode bool `json:"hairpin_mode"`
|
||||
}
|
||||
|
||||
// Route defines a routing table entry.
|
||||
//
|
||||
// Routes can be specified to create entries in the routing table as the container
|
||||
// is started.
|
||||
//
|
||||
// All of destination, source, and gateway should be either IPv4 or IPv6.
|
||||
// One of the three options must be present, and omitted entries will use their
|
||||
// IP family default for the route table. For IPv4 for example, setting the
|
||||
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
|
||||
// destination of 0.0.0.0(or *) when viewed in the route table.
|
||||
type Route struct {
|
||||
// Destination specifies the destination IP address and mask in the CIDR form.
|
||||
Destination string `json:"destination"`
|
||||
|
||||
// Source specifies the source IP address and mask in the CIDR form.
|
||||
Source string `json:"source"`
|
||||
|
||||
// Gateway specifies the gateway IP address.
|
||||
Gateway string `json:"gateway"`
|
||||
|
||||
// InterfaceName specifies the device to set this route up for, for example eth0.
|
||||
InterfaceName string `json:"interface_name"`
|
||||
}
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
9
vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
@ -1,9 +0,0 @@
|
||||
package configs
|
||||
|
||||
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
|
||||
type LinuxRdma struct {
|
||||
// Maximum number of HCA handles that can be opened. Default is "no limit".
|
||||
HcaHandles *uint32 `json:"hca_handles,omitempty"`
|
||||
// Maximum number of HCA objects that can be created. Default is "no limit".
|
||||
HcaObjects *uint32 `json:"hca_objects,omitempty"`
|
||||
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
generated
vendored
5
vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
generated
vendored
@ -1,5 +0,0 @@
|
||||
package userns
|
||||
|
||||
// RunningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||
var RunningInUserNS = runningInUserNS
|
16
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
generated
vendored
@ -1,16 +0,0 @@
|
||||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
package userns
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
|
||||
func FuzzUIDMap(data []byte) int {
|
||||
uidmap, _ := user.ParseIDMap(strings.NewReader(string(data)))
|
||||
_ = uidMapInUserNS(uidmap)
|
||||
return 1
|
||||
}
|
37
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
generated
vendored
37
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
generated
vendored
@ -1,37 +0,0 @@
|
||||
package userns
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
)
|
||||
|
||||
var (
|
||||
inUserNS bool
|
||||
nsOnce sync.Once
|
||||
)
|
||||
|
||||
// runningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||
func runningInUserNS() bool {
|
||||
nsOnce.Do(func() {
|
||||
uidmap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil {
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return
|
||||
}
|
||||
inUserNS = uidMapInUserNS(uidmap)
|
||||
})
|
||||
return inUserNS
|
||||
}
|
||||
|
||||
func uidMapInUserNS(uidmap []user.IDMap) bool {
|
||||
/*
|
||||
* We assume we are in the initial user namespace if we have a full
|
||||
* range - 4294967295 uids starting at uid 0.
|
||||
*/
|
||||
if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
18
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
generated
vendored
18
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
generated
vendored
@ -1,18 +0,0 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package userns
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/user"
|
||||
|
||||
// runningInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func runningInUserNS() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// uidMapInUserNS is a stub for non-Linux systems
|
||||
// Always returns false
|
||||
func uidMapInUserNS(uidmap []user.IDMap) bool {
|
||||
return false
|
||||
}
|
96
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
96
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
@ -1,96 +0,0 @@
|
||||
package utils
|
||||
|
||||
/*
|
||||
* Copyright 2016, 2017 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// MaxSendfdLen is the maximum length of the name of a file descriptor being
|
||||
// sent using SendFd. The name of the file handle returned by RecvFd will never
|
||||
// be larger than this value.
|
||||
const MaxNameLen = 4096
|
||||
|
||||
// oobSpace is the size of the oob slice required to store a single FD. Note
|
||||
// that unix.UnixRights appears to make the assumption that fd is always int32,
|
||||
// so sizeof(fd) = 4.
|
||||
var oobSpace = unix.CmsgSpace(4)
|
||||
|
||||
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
|
||||
// socket. The file name of the remote file descriptor will be recreated
|
||||
// locally (it is sent as non-auxiliary data in the same payload).
|
||||
func RecvFd(socket *os.File) (*os.File, error) {
|
||||
// For some reason, unix.Recvmsg uses the length rather than the capacity
|
||||
// when passing the msg_controllen and other attributes to recvmsg. So we
|
||||
// have to actually set the length.
|
||||
name := make([]byte, MaxNameLen)
|
||||
oob := make([]byte, oobSpace)
|
||||
|
||||
sockfd := socket.Fd()
|
||||
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if n >= MaxNameLen || oobn != oobSpace {
|
||||
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
||||
}
|
||||
|
||||
// Truncate.
|
||||
name = name[:n]
|
||||
oob = oob[:oobn]
|
||||
|
||||
scms, err := unix.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(scms) != 1 {
|
||||
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
||||
}
|
||||
scm := scms[0]
|
||||
|
||||
fds, err := unix.ParseUnixRights(&scm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(fds) != 1 {
|
||||
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
||||
}
|
||||
fd := uintptr(fds[0])
|
||||
|
||||
return os.NewFile(fd, string(name)), nil
|
||||
}
|
||||
|
||||
// SendFd sends a file descriptor over the given AF_UNIX socket. In
|
||||
// addition, the file.Name() of the given file will also be sent as
|
||||
// non-auxiliary data in the same payload (allowing to send contextual
|
||||
// information for a file descriptor).
|
||||
func SendFd(socket *os.File, name string, fd uintptr) error {
|
||||
if len(name) >= MaxNameLen {
|
||||
return fmt.Errorf("sendfd: filename too long: %s", name)
|
||||
}
|
||||
return SendFds(socket, []byte(name), int(fd))
|
||||
}
|
||||
|
||||
// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket.
|
||||
func SendFds(socket *os.File, msg []byte, fds ...int) error {
|
||||
oob := unix.UnixRights(fds...)
|
||||
return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0)
|
||||
}
|
167
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
167
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
@ -1,167 +0,0 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
exitSignalOffset = 128
|
||||
)
|
||||
|
||||
// NativeEndian is the native byte order of the host system.
|
||||
var NativeEndian binary.ByteOrder
|
||||
|
||||
func init() {
|
||||
// Copied from <golang.org/x/net/internal/socket/sys.go>.
|
||||
i := uint32(1)
|
||||
b := (*[4]byte)(unsafe.Pointer(&i))
|
||||
if b[0] == 1 {
|
||||
NativeEndian = binary.LittleEndian
|
||||
} else {
|
||||
NativeEndian = binary.BigEndian
|
||||
}
|
||||
}
|
||||
|
||||
// ExitStatus returns the correct exit status for a process based on if it
|
||||
// was signaled or exited cleanly
|
||||
func ExitStatus(status unix.WaitStatus) int {
|
||||
if status.Signaled() {
|
||||
return exitSignalOffset + int(status.Signal())
|
||||
}
|
||||
return status.ExitStatus()
|
||||
}
|
||||
|
||||
// WriteJSON writes the provided struct v to w using standard json marshaling
|
||||
func WriteJSON(w io.Writer, v interface{}) error {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = w.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// CleanPath makes a path safe for use with filepath.Join. This is done by not
|
||||
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||
// path resulting from prepending another path will always resolve to lexically
|
||||
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||
// that include symlinks won't be safe as a result of using CleanPath.
|
||||
func CleanPath(path string) string {
|
||||
// Deal with empty strings nicely.
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure that all paths are cleaned (especially problematic ones like
|
||||
// "/../../../../../" which can cause lots of issues).
|
||||
path = filepath.Clean(path)
|
||||
|
||||
// If the path isn't absolute, we need to do more processing to fix paths
|
||||
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||
// paths to relative ones.
|
||||
if !filepath.IsAbs(path) {
|
||||
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||
// This can't fail, as (by definition) all paths are relative to root.
|
||||
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||
}
|
||||
|
||||
// Clean the path again for good measure.
|
||||
return filepath.Clean(path)
|
||||
}
|
||||
|
||||
// stripRoot returns the passed path, stripping the root path if it was
|
||||
// (lexicially) inside it. Note that both passed paths will always be treated
|
||||
// as absolute, and the returned path will also always be absolute. In
|
||||
// addition, the paths are cleaned before stripping the root.
|
||||
func stripRoot(root, path string) string {
|
||||
// Make the paths clean and absolute.
|
||||
root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||
switch {
|
||||
case path == root:
|
||||
path = "/"
|
||||
case root == "/":
|
||||
// do nothing
|
||||
case strings.HasPrefix(path, root+"/"):
|
||||
path = strings.TrimPrefix(path, root+"/")
|
||||
}
|
||||
return CleanPath("/" + path)
|
||||
}
|
||||
|
||||
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||
// corresponding to the unsafePath resolved within the root. Before passing the
|
||||
// fd, this path is verified to have been inside the root -- so operating on it
|
||||
// through the passed fdpath should be safe. Do not access this path through
|
||||
// the original path strings, and do not attempt to use the pathname outside of
|
||||
// the passed closure (the file handle will be freed once the closure returns).
|
||||
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||
// Remove the root then forcefully resolve inside the root.
|
||||
unsafePath = stripRoot(root, unsafePath)
|
||||
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
||||
}
|
||||
|
||||
// Open the target path.
|
||||
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open o_path procfd: %w", err)
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Double-check the path is the one we expected.
|
||||
procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
||||
if realpath, err := os.Readlink(procfd); err != nil {
|
||||
return fmt.Errorf("procfd verification failed: %w", err)
|
||||
} else if realpath != path {
|
||||
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||
}
|
||||
|
||||
// Run the closure.
|
||||
return fn(procfd)
|
||||
}
|
||||
|
||||
// SearchLabels searches a list of key-value pairs for the provided key and
|
||||
// returns the corresponding value. The pairs must be separated with '='.
|
||||
func SearchLabels(labels []string, query string) string {
|
||||
for _, l := range labels {
|
||||
parts := strings.SplitN(l, "=", 2)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
if parts[0] == query {
|
||||
return parts[1]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Annotations returns the bundle path and user defined annotations from the
|
||||
// libcontainer state. We need to remove the bundle because that is a label
|
||||
// added by libcontainer.
|
||||
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
||||
userAnnotations = make(map[string]string)
|
||||
for _, l := range labels {
|
||||
parts := strings.SplitN(l, "=", 2)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
if parts[0] == "bundle" {
|
||||
bundle = parts[1]
|
||||
} else {
|
||||
userAnnotations[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
69
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
69
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
@ -1,69 +0,0 @@
|
||||
//go:build !windows
|
||||
// +build !windows
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// EnsureProcHandle returns whether or not the given file handle is on procfs.
|
||||
func EnsureProcHandle(fh *os.File) error {
|
||||
var buf unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
|
||||
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
|
||||
}
|
||||
if buf.Type != unix.PROC_SUPER_MAGIC {
|
||||
return fmt.Errorf("%s is not on procfs", fh.Name())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
|
||||
// the process (except for those below the given fd value).
|
||||
func CloseExecFrom(minFd int) error {
|
||||
fdDir, err := os.Open("/proc/self/fd")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fdDir.Close()
|
||||
|
||||
if err := EnsureProcHandle(fdDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fdList, err := fdDir.Readdirnames(-1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fdStr := range fdList {
|
||||
fd, err := strconv.Atoi(fdStr)
|
||||
// Ignore non-numeric file names.
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// Ignore descriptors lower than our specified minimum.
|
||||
if fd < minFd {
|
||||
continue
|
||||
}
|
||||
// Intentionally ignore errors from unix.CloseOnExec -- the cases where
|
||||
// this might fail are basically file descriptors that have already
|
||||
// been closed (including and especially the one that was created when
|
||||
// os.ReadDir did the "opendir" syscall).
|
||||
unix.CloseOnExec(fd)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewSockPair returns a new unix socket pair
|
||||
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
||||
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
|
||||
}
|
14
vendor/modules.txt
vendored
14
vendor/modules.txt
vendored
@ -190,9 +190,6 @@ github.com/coreos/go-systemd/v22/dbus
|
||||
# github.com/cpuguy83/go-md2man/v2 v2.0.2
|
||||
## explicit; go 1.11
|
||||
github.com/cpuguy83/go-md2man/v2/md2man
|
||||
# github.com/cyphar/filepath-securejoin v0.2.3
|
||||
## explicit; go 1.13
|
||||
github.com/cyphar/filepath-securejoin
|
||||
# github.com/davecgh/go-spew v1.1.1
|
||||
## explicit
|
||||
github.com/davecgh/go-spew/spew
|
||||
@ -342,19 +339,8 @@ github.com/opencontainers/image-spec/specs-go
|
||||
github.com/opencontainers/image-spec/specs-go/v1
|
||||
# github.com/opencontainers/runc v1.1.9
|
||||
## explicit; go 1.17
|
||||
github.com/opencontainers/runc/libcontainer/cgroups
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/devices
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/ebpf
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/fs
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/fs2
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/fscommon
|
||||
github.com/opencontainers/runc/libcontainer/cgroups/systemd
|
||||
github.com/opencontainers/runc/libcontainer/configs
|
||||
github.com/opencontainers/runc/libcontainer/devices
|
||||
github.com/opencontainers/runc/libcontainer/user
|
||||
github.com/opencontainers/runc/libcontainer/userns
|
||||
github.com/opencontainers/runc/libcontainer/utils
|
||||
# github.com/opencontainers/runtime-spec v1.1.0
|
||||
## explicit
|
||||
github.com/opencontainers/runtime-spec/specs-go
|
||||
|
Loading…
Reference in New Issue
Block a user