Merge pull request #8722 from marquiz/devel/cgroup-driver-autoconfig
cri: implement RuntimeConfig rpc
This commit is contained in:
commit
2b2195c36b
@ -32,6 +32,22 @@ cgroupDriver: "systemd"
|
|||||||
|
|
||||||
kubeadm users should also see [the kubeadm documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/).
|
kubeadm users should also see [the kubeadm documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/configure-cgroup-driver/).
|
||||||
|
|
||||||
|
> Note: Kubernetes v1.28 supports automatic detection of the cgroup driver as
|
||||||
|
> an alpha feature. With the `KubeletCgroupDriverFromCRI` kubelet feature gate
|
||||||
|
> enabled, the kubelet automatically detects the cgroup driver from the CRI
|
||||||
|
> runtime and the `KubeletConfiguration` configuration step above is not
|
||||||
|
> needed.
|
||||||
|
>
|
||||||
|
> When determining the cgroup driver, containerd uses the `SystemdCgroup`
|
||||||
|
> setting from runc-based runtime classes, starting from the default runtime
|
||||||
|
> class. If no runc-based runtime classes have been configured containerd
|
||||||
|
> relies on auto-detection based on determining if systemd is running.
|
||||||
|
> Note that all runc-based runtime classes should be configured to have the
|
||||||
|
> same `SystemdCgroup` setting in order to avoid unexpected behavior.
|
||||||
|
>
|
||||||
|
> The automatic cgroup driver configuration for kubelet feature is supported in
|
||||||
|
> containerd v2.0 and later.
|
||||||
|
|
||||||
### Snapshotter
|
### Snapshotter
|
||||||
|
|
||||||
The default snapshotter is set to `overlayfs` (akin to Docker's `overlay2` storage driver):
|
The default snapshotter is set to `overlayfs` (akin to Docker's `overlay2` storage driver):
|
||||||
|
2
go.mod
2
go.mod
@ -75,7 +75,7 @@ require (
|
|||||||
k8s.io/apiserver v0.26.2
|
k8s.io/apiserver v0.26.2
|
||||||
k8s.io/client-go v0.26.2
|
k8s.io/client-go v0.26.2
|
||||||
k8s.io/component-base v0.26.2
|
k8s.io/component-base v0.26.2
|
||||||
k8s.io/cri-api v0.27.1
|
k8s.io/cri-api v0.28.0-beta.0
|
||||||
k8s.io/klog/v2 v2.90.1
|
k8s.io/klog/v2 v2.90.1
|
||||||
k8s.io/utils v0.0.0-20230220204549-a5ecb0141aa5
|
k8s.io/utils v0.0.0-20230220204549-a5ecb0141aa5
|
||||||
)
|
)
|
||||||
|
7
go.sum
7
go.sum
@ -874,7 +874,7 @@ github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40T
|
|||||||
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
|
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
|
||||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||||
github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
|
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
@ -887,6 +887,7 @@ github.com/sclevine/spec v1.2.0/go.mod h1:W4J29eT/Kzv7/b9IWLB055Z+qvVC9vt0Arko24
|
|||||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
|
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
|
||||||
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
|
github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo=
|
||||||
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
|
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
|
||||||
|
github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646 h1:RpforrEYXWkmGwJHIGnLZ3tTWStkjVVstwzNGqxX2Ds=
|
||||||
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||||
github.com/sirupsen/logrus v1.0.4-0.20170822132746-89742aefa4b2/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
|
github.com/sirupsen/logrus v1.0.4-0.20170822132746-89742aefa4b2/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
|
||||||
github.com/sirupsen/logrus v1.0.6/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
|
github.com/sirupsen/logrus v1.0.6/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc=
|
||||||
@ -1634,8 +1635,8 @@ k8s.io/cri-api v0.20.1/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI=
|
|||||||
k8s.io/cri-api v0.20.4/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI=
|
k8s.io/cri-api v0.20.4/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI=
|
||||||
k8s.io/cri-api v0.20.6/go.mod h1:ew44AjNXwyn1s0U4xCKGodU7J1HzBeZ1MpGrpa5r8Yc=
|
k8s.io/cri-api v0.20.6/go.mod h1:ew44AjNXwyn1s0U4xCKGodU7J1HzBeZ1MpGrpa5r8Yc=
|
||||||
k8s.io/cri-api v0.23.1/go.mod h1:REJE3PSU0h/LOV1APBrupxrEJqnoxZC8KWzkBUHwrK4=
|
k8s.io/cri-api v0.23.1/go.mod h1:REJE3PSU0h/LOV1APBrupxrEJqnoxZC8KWzkBUHwrK4=
|
||||||
k8s.io/cri-api v0.27.1 h1:KWO+U8MfI9drXB/P4oU9VchaWYOlwDglJZVHWMpTT3Q=
|
k8s.io/cri-api v0.28.0-beta.0 h1:JGtnKV4s7/1Pl2dWJX5s/Cl2074Fgry5TGLpDYkEapE=
|
||||||
k8s.io/cri-api v0.27.1/go.mod h1:+Ts/AVYbIo04S86XbTD73UPp/DkTiYxtsFeOFEu32L0=
|
k8s.io/cri-api v0.28.0-beta.0/go.mod h1:PgM+VelU7VKINUeaNLdE4fElKXfORIfTRNRM5wFBRCw=
|
||||||
k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
||||||
k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
||||||
k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
|
k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
|
||||||
|
@ -1539,8 +1539,9 @@ github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6So
|
|||||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||||
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
|
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
|
||||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
|
||||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||||
|
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||||
|
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||||
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
|
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
|
||||||
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
|
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
|
||||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
@ -2572,7 +2573,7 @@ k8s.io/code-generator v0.19.7/go.mod h1:lwEq3YnLYb/7uVXLorOJfxg+cUu2oihFhHZ0n9NI
|
|||||||
k8s.io/component-base v0.26.2/go.mod h1:DxbuIe9M3IZPRxPIzhch2m1eT7uFrSBJUBuVCQEBivs=
|
k8s.io/component-base v0.26.2/go.mod h1:DxbuIe9M3IZPRxPIzhch2m1eT7uFrSBJUBuVCQEBivs=
|
||||||
k8s.io/cri-api v0.20.1/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI=
|
k8s.io/cri-api v0.20.1/go.mod h1:2JRbKt+BFLTjtrILYVqQK5jqhI+XNdF6UiGMgczeBCI=
|
||||||
k8s.io/cri-api v0.25.3/go.mod h1:riC/P0yOGUf2K1735wW+CXs1aY2ctBgePtnnoFLd0dU=
|
k8s.io/cri-api v0.25.3/go.mod h1:riC/P0yOGUf2K1735wW+CXs1aY2ctBgePtnnoFLd0dU=
|
||||||
k8s.io/cri-api v0.27.1/go.mod h1:+Ts/AVYbIo04S86XbTD73UPp/DkTiYxtsFeOFEu32L0=
|
k8s.io/cri-api v0.28.0-beta.0/go.mod h1:PgM+VelU7VKINUeaNLdE4fElKXfORIfTRNRM5wFBRCw=
|
||||||
k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
||||||
k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
k8s.io/gengo v0.0.0-20200428234225-8167cfdcfc14/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
|
||||||
k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
|
k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
|
||||||
|
@ -621,3 +621,19 @@ func (in *instrumentedService) ListPodSandboxMetrics(ctx context.Context, r *run
|
|||||||
res, err = in.c.ListPodSandboxMetrics(ctx, r)
|
res, err = in.c.ListPodSandboxMetrics(ctx, r)
|
||||||
return res, errdefs.ToGRPC(err)
|
return res, errdefs.ToGRPC(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (in *instrumentedService) RuntimeConfig(ctx context.Context, r *runtime.RuntimeConfigRequest) (res *runtime.RuntimeConfigResponse, err error) {
|
||||||
|
if err := in.checkInitialized(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
log.G(ctx).Tracef("RuntimeConfig")
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
log.G(ctx).WithError(err).Error("RuntimeConfig failed")
|
||||||
|
} else {
|
||||||
|
log.G(ctx).Tracef("RuntimeConfig returns config %+v", res)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
res, err = in.c.RuntimeConfig(ctx, r)
|
||||||
|
return res, errdefs.ToGRPC(err)
|
||||||
|
}
|
||||||
|
31
pkg/cri/sbserver/runtime_config.go
Normal file
31
pkg/cri/sbserver/runtime_config.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sbserver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RuntimeConfig returns configuration information of the runtime.
|
||||||
|
func (c *criService) RuntimeConfig(ctx context.Context, r *runtime.RuntimeConfigRequest) (*runtime.RuntimeConfigResponse, error) {
|
||||||
|
resp := &runtime.RuntimeConfigResponse{
|
||||||
|
Linux: c.getLinuxRuntimeConfig(ctx),
|
||||||
|
}
|
||||||
|
return resp, nil
|
||||||
|
}
|
81
pkg/cri/sbserver/runtime_config_linux.go
Normal file
81
pkg/cri/sbserver/runtime_config_linux.go
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sbserver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/log"
|
||||||
|
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration {
|
||||||
|
return &runtime.LinuxRuntimeConfiguration{CgroupDriver: c.getCgroupDriver(ctx)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *criService) getCgroupDriver(ctx context.Context) runtime.CgroupDriver {
|
||||||
|
// Go through the runtime handlers in a predictable order, starting from the
|
||||||
|
// default handler, others sorted in alphabetical order
|
||||||
|
handlerNames := make([]string, 0, len(c.config.ContainerdConfig.Runtimes))
|
||||||
|
for n := range c.config.ContainerdConfig.Runtimes {
|
||||||
|
handlerNames = append(handlerNames, n)
|
||||||
|
}
|
||||||
|
sort.Slice(handlerNames, func(i, j int) bool {
|
||||||
|
if handlerNames[i] == c.config.ContainerdConfig.DefaultRuntimeName {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if handlerNames[j] == c.config.ContainerdConfig.DefaultRuntimeName {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return handlerNames[i] < handlerNames[j]
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, handler := range handlerNames {
|
||||||
|
opts, err := generateRuntimeOptions(c.config.ContainerdConfig.Runtimes[handler])
|
||||||
|
if err != nil {
|
||||||
|
log.G(ctx).Debugf("failed to parse runtime handler options for %q", handler)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if d, ok := getCgroupDriverFromRuntimeHandlerOpts(opts); ok {
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
log.G(ctx).Debugf("runtime handler %q does not provide cgroup driver information", handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no runtime handlers have a setting, detect if systemd is running
|
||||||
|
d := runtime.CgroupDriver_CGROUPFS
|
||||||
|
if systemd.IsRunningSystemd() {
|
||||||
|
d = runtime.CgroupDriver_SYSTEMD
|
||||||
|
}
|
||||||
|
log.G(ctx).Debugf("no runtime handler provided cgroup driver setting, using auto-detected %s", runtime.CgroupDriver_name[int32(d)])
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCgroupDriverFromRuntimeHandlerOpts(opts interface{}) (runtime.CgroupDriver, bool) {
|
||||||
|
switch v := opts.(type) {
|
||||||
|
case *runcoptions.Options:
|
||||||
|
systemdCgroup := v.SystemdCgroup
|
||||||
|
if systemdCgroup {
|
||||||
|
return runtime.CgroupDriver_SYSTEMD, true
|
||||||
|
}
|
||||||
|
return runtime.CgroupDriver_CGROUPFS, true
|
||||||
|
}
|
||||||
|
return runtime.CgroupDriver_SYSTEMD, false
|
||||||
|
}
|
105
pkg/cri/sbserver/runtime_config_linux_test.go
Normal file
105
pkg/cri/sbserver/runtime_config_linux_test.go
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sbserver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
criconfig "github.com/containerd/containerd/pkg/cri/config"
|
||||||
|
"github.com/containerd/containerd/plugin"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newFakeRuntimeConfig(runcV2, systemdCgroup bool) criconfig.Runtime {
|
||||||
|
r := criconfig.Runtime{Type: "default", Options: map[string]interface{}{}}
|
||||||
|
if runcV2 {
|
||||||
|
r.Type = plugin.RuntimeRuncV2
|
||||||
|
if systemdCgroup {
|
||||||
|
r.Options["SystemdCgroup"] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRuntimeConfig(t *testing.T) {
|
||||||
|
autoDetected := runtime.CgroupDriver_CGROUPFS
|
||||||
|
if systemd.IsRunningSystemd() {
|
||||||
|
autoDetected = runtime.CgroupDriver_SYSTEMD
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range []struct {
|
||||||
|
desc string
|
||||||
|
defaultRuntime string
|
||||||
|
runtimes map[string]criconfig.Runtime
|
||||||
|
expectedCgroupDriver runtime.CgroupDriver
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "no runtimes",
|
||||||
|
expectedCgroupDriver: autoDetected,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "non-runc runtime",
|
||||||
|
defaultRuntime: "non-runc",
|
||||||
|
runtimes: map[string]criconfig.Runtime{"non-runc": newFakeRuntimeConfig(false, false)},
|
||||||
|
expectedCgroupDriver: autoDetected,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "no default, pick first in alphabetical order",
|
||||||
|
runtimes: map[string]criconfig.Runtime{
|
||||||
|
"non-runc": newFakeRuntimeConfig(false, false),
|
||||||
|
"runc-2": newFakeRuntimeConfig(true, true),
|
||||||
|
"runc": newFakeRuntimeConfig(true, false),
|
||||||
|
"non-runc-2": newFakeRuntimeConfig(false, false),
|
||||||
|
},
|
||||||
|
expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "pick default, cgroupfs",
|
||||||
|
defaultRuntime: "runc-2",
|
||||||
|
runtimes: map[string]criconfig.Runtime{
|
||||||
|
"non-runc": newFakeRuntimeConfig(false, false),
|
||||||
|
"runc": newFakeRuntimeConfig(true, true),
|
||||||
|
"runc-2": newFakeRuntimeConfig(true, false),
|
||||||
|
},
|
||||||
|
expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "pick default, systemd",
|
||||||
|
defaultRuntime: "runc-2",
|
||||||
|
runtimes: map[string]criconfig.Runtime{
|
||||||
|
"non-runc": newFakeRuntimeConfig(false, false),
|
||||||
|
"runc": newFakeRuntimeConfig(true, false),
|
||||||
|
"runc-2": newFakeRuntimeConfig(true, true),
|
||||||
|
},
|
||||||
|
expectedCgroupDriver: runtime.CgroupDriver_SYSTEMD,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
test := test
|
||||||
|
t.Run(test.desc, func(t *testing.T) {
|
||||||
|
c := newTestCRIService()
|
||||||
|
c.config.PluginConfig.ContainerdConfig.DefaultRuntimeName = test.defaultRuntime
|
||||||
|
c.config.PluginConfig.ContainerdConfig.Runtimes = test.runtimes
|
||||||
|
|
||||||
|
resp, err := c.RuntimeConfig(context.TODO(), &runtime.RuntimeConfigRequest{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, test.expectedCgroupDriver, resp.Linux.CgroupDriver, "got unexpected cgroup driver")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
29
pkg/cri/sbserver/runtime_config_other.go
Normal file
29
pkg/cri/sbserver/runtime_config_other.go
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
//go:build !linux
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sbserver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration {
|
||||||
|
return nil
|
||||||
|
}
|
31
pkg/cri/server/runtime_config.go
Normal file
31
pkg/cri/server/runtime_config.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RuntimeConfig returns configuration information of the runtime.
|
||||||
|
func (c *criService) RuntimeConfig(ctx context.Context, r *runtime.RuntimeConfigRequest) (*runtime.RuntimeConfigResponse, error) {
|
||||||
|
resp := &runtime.RuntimeConfigResponse{
|
||||||
|
Linux: c.getLinuxRuntimeConfig(ctx),
|
||||||
|
}
|
||||||
|
return resp, nil
|
||||||
|
}
|
81
pkg/cri/server/runtime_config_linux.go
Normal file
81
pkg/cri/server/runtime_config_linux.go
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/containerd/containerd/log"
|
||||||
|
runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration {
|
||||||
|
return &runtime.LinuxRuntimeConfiguration{CgroupDriver: c.getCgroupDriver(ctx)}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *criService) getCgroupDriver(ctx context.Context) runtime.CgroupDriver {
|
||||||
|
// Go through the runtime handlers in a predictable order, starting from the
|
||||||
|
// default handler, others sorted in alphabetical order
|
||||||
|
handlerNames := make([]string, 0, len(c.config.ContainerdConfig.Runtimes))
|
||||||
|
for n := range c.config.ContainerdConfig.Runtimes {
|
||||||
|
handlerNames = append(handlerNames, n)
|
||||||
|
}
|
||||||
|
sort.Slice(handlerNames, func(i, j int) bool {
|
||||||
|
if handlerNames[i] == c.config.ContainerdConfig.DefaultRuntimeName {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if handlerNames[j] == c.config.ContainerdConfig.DefaultRuntimeName {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return handlerNames[i] < handlerNames[j]
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, handler := range handlerNames {
|
||||||
|
opts, err := generateRuntimeOptions(c.config.ContainerdConfig.Runtimes[handler])
|
||||||
|
if err != nil {
|
||||||
|
log.G(ctx).Debugf("failed to parse runtime handler options for %q", handler)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if d, ok := getCgroupDriverFromRuntimeHandlerOpts(opts); ok {
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
log.G(ctx).Debugf("runtime handler %q does not provide cgroup driver information", handler)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no runtime handlers have a setting, detect if systemd is running
|
||||||
|
d := runtime.CgroupDriver_CGROUPFS
|
||||||
|
if systemd.IsRunningSystemd() {
|
||||||
|
d = runtime.CgroupDriver_SYSTEMD
|
||||||
|
}
|
||||||
|
log.G(ctx).Debugf("no runtime handler provided cgroup driver setting, using auto-detected %s", runtime.CgroupDriver_name[int32(d)])
|
||||||
|
return d
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCgroupDriverFromRuntimeHandlerOpts(opts interface{}) (runtime.CgroupDriver, bool) {
|
||||||
|
switch v := opts.(type) {
|
||||||
|
case *runcoptions.Options:
|
||||||
|
systemdCgroup := v.SystemdCgroup
|
||||||
|
if systemdCgroup {
|
||||||
|
return runtime.CgroupDriver_SYSTEMD, true
|
||||||
|
}
|
||||||
|
return runtime.CgroupDriver_CGROUPFS, true
|
||||||
|
}
|
||||||
|
return runtime.CgroupDriver_SYSTEMD, false
|
||||||
|
}
|
105
pkg/cri/server/runtime_config_linux_test.go
Normal file
105
pkg/cri/server/runtime_config_linux_test.go
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
criconfig "github.com/containerd/containerd/pkg/cri/config"
|
||||||
|
"github.com/containerd/containerd/plugin"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newFakeRuntimeConfig(runcV2, systemdCgroup bool) criconfig.Runtime {
|
||||||
|
r := criconfig.Runtime{Type: "default", Options: map[string]interface{}{}}
|
||||||
|
if runcV2 {
|
||||||
|
r.Type = plugin.RuntimeRuncV2
|
||||||
|
if systemdCgroup {
|
||||||
|
r.Options["SystemdCgroup"] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRuntimeConfig(t *testing.T) {
|
||||||
|
autoDetected := runtime.CgroupDriver_CGROUPFS
|
||||||
|
if systemd.IsRunningSystemd() {
|
||||||
|
autoDetected = runtime.CgroupDriver_SYSTEMD
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range []struct {
|
||||||
|
desc string
|
||||||
|
defaultRuntime string
|
||||||
|
runtimes map[string]criconfig.Runtime
|
||||||
|
expectedCgroupDriver runtime.CgroupDriver
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "no runtimes",
|
||||||
|
expectedCgroupDriver: autoDetected,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "non-runc runtime",
|
||||||
|
defaultRuntime: "non-runc",
|
||||||
|
runtimes: map[string]criconfig.Runtime{"non-runc": newFakeRuntimeConfig(false, false)},
|
||||||
|
expectedCgroupDriver: autoDetected,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "no default, pick first in alphabetical order",
|
||||||
|
runtimes: map[string]criconfig.Runtime{
|
||||||
|
"non-runc": newFakeRuntimeConfig(false, false),
|
||||||
|
"runc-2": newFakeRuntimeConfig(true, true),
|
||||||
|
"runc": newFakeRuntimeConfig(true, false),
|
||||||
|
"non-runc-2": newFakeRuntimeConfig(false, false),
|
||||||
|
},
|
||||||
|
expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "pick default, cgroupfs",
|
||||||
|
defaultRuntime: "runc-2",
|
||||||
|
runtimes: map[string]criconfig.Runtime{
|
||||||
|
"non-runc": newFakeRuntimeConfig(false, false),
|
||||||
|
"runc": newFakeRuntimeConfig(true, true),
|
||||||
|
"runc-2": newFakeRuntimeConfig(true, false),
|
||||||
|
},
|
||||||
|
expectedCgroupDriver: runtime.CgroupDriver_CGROUPFS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "pick default, systemd",
|
||||||
|
defaultRuntime: "runc-2",
|
||||||
|
runtimes: map[string]criconfig.Runtime{
|
||||||
|
"non-runc": newFakeRuntimeConfig(false, false),
|
||||||
|
"runc": newFakeRuntimeConfig(true, false),
|
||||||
|
"runc-2": newFakeRuntimeConfig(true, true),
|
||||||
|
},
|
||||||
|
expectedCgroupDriver: runtime.CgroupDriver_SYSTEMD,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
test := test
|
||||||
|
t.Run(test.desc, func(t *testing.T) {
|
||||||
|
c := newTestCRIService()
|
||||||
|
c.config.PluginConfig.ContainerdConfig.DefaultRuntimeName = test.defaultRuntime
|
||||||
|
c.config.PluginConfig.ContainerdConfig.Runtimes = test.runtimes
|
||||||
|
|
||||||
|
resp, err := c.RuntimeConfig(context.TODO(), &runtime.RuntimeConfigRequest{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, test.expectedCgroupDriver, resp.Linux.CgroupDriver, "got unexpected cgroup driver")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
29
pkg/cri/server/runtime_config_other.go
Normal file
29
pkg/cri/server/runtime_config_other.go
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
//go:build !linux
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (c *criService) getLinuxRuntimeConfig(ctx context.Context) *runtime.LinuxRuntimeConfiguration {
|
||||||
|
return nil
|
||||||
|
}
|
59
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
Normal file
59
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
package cgroups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Manager interface {
|
||||||
|
// Apply creates a cgroup, if not yet created, and adds a process
|
||||||
|
// with the specified pid into that cgroup. A special value of -1
|
||||||
|
// can be used to merely create a cgroup.
|
||||||
|
Apply(pid int) error
|
||||||
|
|
||||||
|
// GetPids returns the PIDs of all processes inside the cgroup.
|
||||||
|
GetPids() ([]int, error)
|
||||||
|
|
||||||
|
// GetAllPids returns the PIDs of all processes inside the cgroup
|
||||||
|
// any all its sub-cgroups.
|
||||||
|
GetAllPids() ([]int, error)
|
||||||
|
|
||||||
|
// GetStats returns cgroups statistics.
|
||||||
|
GetStats() (*Stats, error)
|
||||||
|
|
||||||
|
// Freeze sets the freezer cgroup to the specified state.
|
||||||
|
Freeze(state configs.FreezerState) error
|
||||||
|
|
||||||
|
// Destroy removes cgroup.
|
||||||
|
Destroy() error
|
||||||
|
|
||||||
|
// Path returns a cgroup path to the specified controller/subsystem.
|
||||||
|
// For cgroupv2, the argument is unused and can be empty.
|
||||||
|
Path(string) string
|
||||||
|
|
||||||
|
// Set sets cgroup resources parameters/limits. If the argument is nil,
|
||||||
|
// the resources specified during Manager creation (or the previous call
|
||||||
|
// to Set) are used.
|
||||||
|
Set(r *configs.Resources) error
|
||||||
|
|
||||||
|
// GetPaths returns cgroup path(s) to save in a state file in order to
|
||||||
|
// restore later.
|
||||||
|
//
|
||||||
|
// For cgroup v1, a key is cgroup subsystem name, and the value is the
|
||||||
|
// path to the cgroup for this subsystem.
|
||||||
|
//
|
||||||
|
// For cgroup v2 unified hierarchy, a key is "", and the value is the
|
||||||
|
// unified path.
|
||||||
|
GetPaths() map[string]string
|
||||||
|
|
||||||
|
// GetCgroups returns the cgroup data as configured.
|
||||||
|
GetCgroups() (*configs.Cgroup, error)
|
||||||
|
|
||||||
|
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||||
|
GetFreezerState() (configs.FreezerState, error)
|
||||||
|
|
||||||
|
// Exists returns whether the cgroup path exists or not.
|
||||||
|
Exists() bool
|
||||||
|
|
||||||
|
// OOMKillCount reports OOM kill count for the cgroup.
|
||||||
|
OOMKillCount() (uint64, error)
|
||||||
|
}
|
386
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
Normal file
386
vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go
generated
vendored
Normal file
@ -0,0 +1,386 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
|
||||||
|
* Copyright (C) 2020 SUSE LLC
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package devices
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
)
|
||||||
|
|
||||||
|
// deviceMeta is a Rule without the Allow or Permissions fields, and no
|
||||||
|
// wildcard-type support. It's effectively the "match" portion of a metadata
|
||||||
|
// rule, for the purposes of our emulation.
|
||||||
|
type deviceMeta struct {
|
||||||
|
node devices.Type
|
||||||
|
major int64
|
||||||
|
minor int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// deviceRule is effectively the tuple (deviceMeta, Permissions).
|
||||||
|
type deviceRule struct {
|
||||||
|
meta deviceMeta
|
||||||
|
perms devices.Permissions
|
||||||
|
}
|
||||||
|
|
||||||
|
// deviceRules is a mapping of device metadata rules to the associated
|
||||||
|
// permissions in the ruleset.
|
||||||
|
type deviceRules map[deviceMeta]devices.Permissions
|
||||||
|
|
||||||
|
func (r deviceRules) orderedEntries() []deviceRule {
|
||||||
|
var rules []deviceRule
|
||||||
|
for meta, perms := range r {
|
||||||
|
rules = append(rules, deviceRule{meta: meta, perms: perms})
|
||||||
|
}
|
||||||
|
sort.Slice(rules, func(i, j int) bool {
|
||||||
|
// Sort by (major, minor, type).
|
||||||
|
a, b := rules[i].meta, rules[j].meta
|
||||||
|
return a.major < b.major ||
|
||||||
|
(a.major == b.major && a.minor < b.minor) ||
|
||||||
|
(a.major == b.major && a.minor == b.minor && a.node < b.node)
|
||||||
|
})
|
||||||
|
return rules
|
||||||
|
}
|
||||||
|
|
||||||
|
type Emulator struct {
|
||||||
|
defaultAllow bool
|
||||||
|
rules deviceRules
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) IsBlacklist() bool {
|
||||||
|
return e.defaultAllow
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) IsAllowAll() bool {
|
||||||
|
return e.IsBlacklist() && len(e.rules) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseLine(line string) (*deviceRule, error) {
|
||||||
|
// Input: node major:minor perms.
|
||||||
|
fields := strings.FieldsFunc(line, func(r rune) bool {
|
||||||
|
return r == ' ' || r == ':'
|
||||||
|
})
|
||||||
|
if len(fields) != 4 {
|
||||||
|
return nil, fmt.Errorf("malformed devices.list rule %s", line)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
rule deviceRule
|
||||||
|
node = fields[0]
|
||||||
|
major = fields[1]
|
||||||
|
minor = fields[2]
|
||||||
|
perms = fields[3]
|
||||||
|
)
|
||||||
|
|
||||||
|
// Parse the node type.
|
||||||
|
switch node {
|
||||||
|
case "a":
|
||||||
|
// Super-special case -- "a" always means every device with every
|
||||||
|
// access mode. In fact, for devices.list this actually indicates that
|
||||||
|
// the cgroup is in black-list mode.
|
||||||
|
// TODO: Double-check that the entire file is "a *:* rwm".
|
||||||
|
return nil, nil
|
||||||
|
case "b":
|
||||||
|
rule.meta.node = devices.BlockDevice
|
||||||
|
case "c":
|
||||||
|
rule.meta.node = devices.CharDevice
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unknown device type %q", node)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the major number.
|
||||||
|
if major == "*" {
|
||||||
|
rule.meta.major = devices.Wildcard
|
||||||
|
} else {
|
||||||
|
val, err := strconv.ParseUint(major, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid major number: %w", err)
|
||||||
|
}
|
||||||
|
rule.meta.major = int64(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the minor number.
|
||||||
|
if minor == "*" {
|
||||||
|
rule.meta.minor = devices.Wildcard
|
||||||
|
} else {
|
||||||
|
val, err := strconv.ParseUint(minor, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid minor number: %w", err)
|
||||||
|
}
|
||||||
|
rule.meta.minor = int64(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the access permissions.
|
||||||
|
rule.perms = devices.Permissions(perms)
|
||||||
|
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
|
||||||
|
return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||||
|
}
|
||||||
|
return &rule, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
|
||||||
|
if e.rules == nil {
|
||||||
|
e.rules = make(map[deviceMeta]devices.Permissions)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge with any pre-existing permissions.
|
||||||
|
oldPerms := e.rules[rule.meta]
|
||||||
|
newPerms := rule.perms.Union(oldPerms)
|
||||||
|
e.rules[rule.meta] = newPerms
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) rmRule(rule deviceRule) error {
|
||||||
|
// Give an error if any of the permissions requested to be removed are
|
||||||
|
// present in a partially-matching wildcard rule, because such rules will
|
||||||
|
// be ignored by cgroupv1.
|
||||||
|
//
|
||||||
|
// This is a diversion from cgroupv1, but is necessary to avoid leading
|
||||||
|
// users into a false sense of security. cgroupv1 will silently(!) ignore
|
||||||
|
// requests to remove partial exceptions, but we really shouldn't do that.
|
||||||
|
//
|
||||||
|
// It may seem like we could just "split" wildcard rules which hit this
|
||||||
|
// issue, but unfortunately there are 2^32 possible major and minor
|
||||||
|
// numbers, which would exhaust kernel memory quickly if we did this. Not
|
||||||
|
// to mention it'd be really slow (the kernel side is implemented as a
|
||||||
|
// linked-list of exceptions).
|
||||||
|
for _, partialMeta := range []deviceMeta{
|
||||||
|
{node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor},
|
||||||
|
{node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard},
|
||||||
|
{node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard},
|
||||||
|
} {
|
||||||
|
// This wildcard rule is equivalent to the requested rule, so skip it.
|
||||||
|
if rule.meta == partialMeta {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Only give an error if the set of permissions overlap.
|
||||||
|
partialPerms := e.rules[partialMeta]
|
||||||
|
if !partialPerms.Intersection(rule.perms).IsEmpty() {
|
||||||
|
return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subtract all of the permissions listed from the full match rule. If the
|
||||||
|
// rule didn't exist, all of this is a no-op.
|
||||||
|
newPerms := e.rules[rule.meta].Difference(rule.perms)
|
||||||
|
if newPerms.IsEmpty() {
|
||||||
|
delete(e.rules, rule.meta)
|
||||||
|
} else {
|
||||||
|
e.rules[rule.meta] = newPerms
|
||||||
|
}
|
||||||
|
// TODO: The actual cgroup code doesn't care if an exception didn't exist
|
||||||
|
// during removal, so not erroring out here is /accurate/ but quite
|
||||||
|
// worrying. Maybe we should do additional validation, but again we
|
||||||
|
// have to worry about backwards-compatibility.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) allow(rule *deviceRule) error {
|
||||||
|
// This cgroup is configured as a black-list. Reset the entire emulator,
|
||||||
|
// and put is into black-list mode.
|
||||||
|
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||||
|
*e = Emulator{
|
||||||
|
defaultAllow: true,
|
||||||
|
rules: nil,
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if e.defaultAllow {
|
||||||
|
err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
|
||||||
|
} else {
|
||||||
|
err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) deny(rule *deviceRule) error {
|
||||||
|
// This cgroup is configured as a white-list. Reset the entire emulator,
|
||||||
|
// and put is into white-list mode.
|
||||||
|
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||||
|
*e = Emulator{
|
||||||
|
defaultAllow: false,
|
||||||
|
rules: nil,
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if e.defaultAllow {
|
||||||
|
err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
|
||||||
|
} else {
|
||||||
|
err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) Apply(rule devices.Rule) error {
|
||||||
|
if !rule.Type.CanCgroup() {
|
||||||
|
return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
innerRule := &deviceRule{
|
||||||
|
meta: deviceMeta{
|
||||||
|
node: rule.Type,
|
||||||
|
major: rule.Major,
|
||||||
|
minor: rule.Minor,
|
||||||
|
},
|
||||||
|
perms: rule.Permissions,
|
||||||
|
}
|
||||||
|
if innerRule.meta.node == devices.WildcardDevice {
|
||||||
|
innerRule = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if rule.Allow {
|
||||||
|
return e.allow(innerRule)
|
||||||
|
}
|
||||||
|
|
||||||
|
return e.deny(innerRule)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
|
||||||
|
// a new Emulator that represents the state of the devices cgroup. Note that
|
||||||
|
// black-list devices cgroups cannot be fully reconstructed, due to limitations
|
||||||
|
// in the devices cgroup API. Instead, such cgroups are always treated as
|
||||||
|
// "allow all" cgroups.
|
||||||
|
func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||||
|
// Normally cgroups are in black-list mode by default, but the way we
|
||||||
|
// figure out the current mode is whether or not devices.list has an
|
||||||
|
// allow-all rule. So we default to a white-list, and the existence of an
|
||||||
|
// "a *:* rwm" entry will tell us otherwise.
|
||||||
|
e := &Emulator{
|
||||||
|
defaultAllow: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the "devices.list".
|
||||||
|
s := bufio.NewScanner(list)
|
||||||
|
for s.Scan() {
|
||||||
|
line := s.Text()
|
||||||
|
deviceRule, err := parseLine(line)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error parsing line %q: %w", line, err)
|
||||||
|
}
|
||||||
|
// "devices.list" is an allow list. Note that this means that in
|
||||||
|
// black-list mode, we have no idea what rules are in play. As a
|
||||||
|
// result, we need to be very careful in Transition().
|
||||||
|
if err := e.allow(deviceRule); err != nil {
|
||||||
|
return nil, fmt.Errorf("error adding devices.list rule: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := s.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("error reading devices.list lines: %w", err)
|
||||||
|
}
|
||||||
|
return e, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transition calculates what is the minimally-disruptive set of rules need to
|
||||||
|
// be applied to a devices cgroup in order to transition to the given target.
|
||||||
|
// This means that any already-existing rules will not be applied, and
|
||||||
|
// disruptive rules (like denying all device access) will only be applied if
|
||||||
|
// necessary.
|
||||||
|
//
|
||||||
|
// This function is the sole reason for all of Emulator -- to allow us
|
||||||
|
// to figure out how to update a containers' cgroups without causing spurious
|
||||||
|
// device errors (if possible).
|
||||||
|
func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
|
||||||
|
var transitionRules []*devices.Rule
|
||||||
|
oldRules := source.rules
|
||||||
|
|
||||||
|
// If the default policy doesn't match, we need to include a "disruptive"
|
||||||
|
// rule (either allow-all or deny-all) in order to switch the cgroup to the
|
||||||
|
// correct default policy.
|
||||||
|
//
|
||||||
|
// However, due to a limitation in "devices.list" we cannot be sure what
|
||||||
|
// deny rules are in place in a black-list cgroup. Thus if the source is a
|
||||||
|
// black-list we also have to include a disruptive rule.
|
||||||
|
if source.IsBlacklist() || source.defaultAllow != target.defaultAllow {
|
||||||
|
transitionRules = append(transitionRules, &devices.Rule{
|
||||||
|
Type: 'a',
|
||||||
|
Major: -1,
|
||||||
|
Minor: -1,
|
||||||
|
Permissions: devices.Permissions("rwm"),
|
||||||
|
Allow: target.defaultAllow,
|
||||||
|
})
|
||||||
|
// The old rules are only relevant if we aren't starting out with a
|
||||||
|
// disruptive rule.
|
||||||
|
oldRules = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: We traverse through the rules in a sorted order so we always write
|
||||||
|
// the same set of rules (this is to aid testing).
|
||||||
|
|
||||||
|
// First, we create inverse rules for any old rules not in the new set.
|
||||||
|
// This includes partial-inverse rules for specific permissions. This is a
|
||||||
|
// no-op if we added a disruptive rule, since oldRules will be empty.
|
||||||
|
for _, rule := range oldRules.orderedEntries() {
|
||||||
|
meta, oldPerms := rule.meta, rule.perms
|
||||||
|
newPerms := target.rules[meta]
|
||||||
|
droppedPerms := oldPerms.Difference(newPerms)
|
||||||
|
if !droppedPerms.IsEmpty() {
|
||||||
|
transitionRules = append(transitionRules, &devices.Rule{
|
||||||
|
Type: meta.node,
|
||||||
|
Major: meta.major,
|
||||||
|
Minor: meta.minor,
|
||||||
|
Permissions: droppedPerms,
|
||||||
|
Allow: target.defaultAllow,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any additional rules which weren't in the old set. We happen to
|
||||||
|
// filter out rules which are present in both sets, though this isn't
|
||||||
|
// strictly necessary.
|
||||||
|
for _, rule := range target.rules.orderedEntries() {
|
||||||
|
meta, newPerms := rule.meta, rule.perms
|
||||||
|
oldPerms := oldRules[meta]
|
||||||
|
gainedPerms := newPerms.Difference(oldPerms)
|
||||||
|
if !gainedPerms.IsEmpty() {
|
||||||
|
transitionRules = append(transitionRules, &devices.Rule{
|
||||||
|
Type: meta.node,
|
||||||
|
Major: meta.major,
|
||||||
|
Minor: meta.minor,
|
||||||
|
Permissions: gainedPerms,
|
||||||
|
Allow: !target.defaultAllow,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return transitionRules, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rules returns the minimum set of rules necessary to convert a *deny-all*
|
||||||
|
// cgroup to the emulated filter state (note that this is not the same as a
|
||||||
|
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
|
||||||
|
// wrapper around Transition() with the source emulator being an empty cgroup.
|
||||||
|
func (e *Emulator) Rules() ([]*devices.Rule, error) {
|
||||||
|
defaultCgroup := &Emulator{defaultAllow: false}
|
||||||
|
return defaultCgroup.Transition(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
func wrapErr(err error, text string) error {
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf(text+": %w", err)
|
||||||
|
}
|
208
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
Normal file
208
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
// Package devicefilter contains eBPF device filter program
|
||||||
|
//
|
||||||
|
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||||
|
//
|
||||||
|
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
|
||||||
|
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
|
||||||
|
package devicefilter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/cilium/ebpf/asm"
|
||||||
|
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// license string format is same as kernel MODULE_LICENSE macro
|
||||||
|
license = "Apache"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DeviceFilter returns eBPF device filter program and its license string
|
||||||
|
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
|
||||||
|
// Generate the minimum ruleset for the device rules we are given. While we
|
||||||
|
// don't care about minimum transitions in cgroupv2, using the emulator
|
||||||
|
// gives us a guarantee that the behaviour of devices filtering is the same
|
||||||
|
// as cgroupv1, including security hardenings to avoid misconfiguration
|
||||||
|
// (such as punching holes in wildcard rules).
|
||||||
|
emu := new(devicesemulator.Emulator)
|
||||||
|
for _, rule := range rules {
|
||||||
|
if err := emu.Apply(*rule); err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cleanRules, err := emu.Rules()
|
||||||
|
if err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
p := &program{
|
||||||
|
defaultAllow: emu.IsBlacklist(),
|
||||||
|
}
|
||||||
|
p.init()
|
||||||
|
|
||||||
|
for idx, rule := range cleanRules {
|
||||||
|
if rule.Type == devices.WildcardDevice {
|
||||||
|
// We can safely skip over wildcard entries because there should
|
||||||
|
// only be one (at most) at the very start to instruct cgroupv1 to
|
||||||
|
// go into allow-list mode. However we do double-check this here.
|
||||||
|
if idx != 0 || rule.Allow != emu.IsBlacklist() {
|
||||||
|
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rule.Allow == p.defaultAllow {
|
||||||
|
// There should be no rules which have an action equal to the
|
||||||
|
// default action, the emulator removes those.
|
||||||
|
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
|
||||||
|
}
|
||||||
|
if err := p.appendRule(rule); err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p.finalize(), license, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type program struct {
|
||||||
|
insts asm.Instructions
|
||||||
|
defaultAllow bool
|
||||||
|
blockID int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *program) init() {
|
||||||
|
// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
|
||||||
|
/*
|
||||||
|
u32 access_type
|
||||||
|
u32 major
|
||||||
|
u32 minor
|
||||||
|
*/
|
||||||
|
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
|
||||||
|
asm.And.Imm32(asm.R2, 0xFFFF))
|
||||||
|
|
||||||
|
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
|
||||||
|
// RSh: bitwise shift right
|
||||||
|
asm.RSh.Imm32(asm.R3, 16))
|
||||||
|
|
||||||
|
// R4 <- major (u32 major at R1[4])
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
|
||||||
|
|
||||||
|
// R5 <- minor (u32 minor at R1[8])
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||||
|
}
|
||||||
|
|
||||||
|
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
|
||||||
|
// to the in-progress filter program. In order to operate properly, it must be
|
||||||
|
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
|
||||||
|
// with any "a" rules removed).
|
||||||
|
func (p *program) appendRule(rule *devices.Rule) error {
|
||||||
|
if p.blockID < 0 {
|
||||||
|
return errors.New("the program is finalized")
|
||||||
|
}
|
||||||
|
|
||||||
|
var bpfType int32
|
||||||
|
switch rule.Type {
|
||||||
|
case devices.CharDevice:
|
||||||
|
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||||
|
case devices.BlockDevice:
|
||||||
|
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||||
|
default:
|
||||||
|
// We do not permit 'a', nor any other types we don't know about.
|
||||||
|
return fmt.Errorf("invalid type %q", string(rule.Type))
|
||||||
|
}
|
||||||
|
if rule.Major > math.MaxUint32 {
|
||||||
|
return fmt.Errorf("invalid major %d", rule.Major)
|
||||||
|
}
|
||||||
|
if rule.Minor > math.MaxUint32 {
|
||||||
|
return fmt.Errorf("invalid minor %d", rule.Major)
|
||||||
|
}
|
||||||
|
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||||
|
hasMinor := rule.Minor >= 0
|
||||||
|
bpfAccess := int32(0)
|
||||||
|
for _, r := range rule.Permissions {
|
||||||
|
switch r {
|
||||||
|
case 'r':
|
||||||
|
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||||
|
case 'w':
|
||||||
|
bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
|
||||||
|
case 'm':
|
||||||
|
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unknown device access %v", r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If the access is rwm, skip the check.
|
||||||
|
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||||
|
|
||||||
|
var (
|
||||||
|
blockSym = "block-" + strconv.Itoa(p.blockID)
|
||||||
|
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
|
||||||
|
prevBlockLastIdx = len(p.insts) - 1
|
||||||
|
)
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
// if (R2 != bpfType) goto next
|
||||||
|
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||||
|
)
|
||||||
|
if hasAccess {
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
|
||||||
|
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||||
|
asm.And.Imm32(asm.R1, bpfAccess),
|
||||||
|
asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
if hasMajor {
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
// if (R4 != major) goto next
|
||||||
|
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
if hasMinor {
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
// if (R5 != minor) goto next
|
||||||
|
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
|
||||||
|
// set blockSym to the first instruction we added in this iteration
|
||||||
|
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||||
|
p.blockID++
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *program) finalize() asm.Instructions {
|
||||||
|
var v int32
|
||||||
|
if p.defaultAllow {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
blockSym := "block-" + strconv.Itoa(p.blockID)
|
||||||
|
p.insts = append(p.insts,
|
||||||
|
// R0 <- v
|
||||||
|
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
|
||||||
|
asm.Return(),
|
||||||
|
)
|
||||||
|
p.blockID = -1
|
||||||
|
return p.insts
|
||||||
|
}
|
||||||
|
|
||||||
|
func acceptBlock(accept bool) asm.Instructions {
|
||||||
|
var v int32
|
||||||
|
if accept {
|
||||||
|
v = 1
|
||||||
|
}
|
||||||
|
return []asm.Instruction{
|
||||||
|
// R0 <- v
|
||||||
|
asm.Mov.Imm32(asm.R0, v),
|
||||||
|
asm.Return(),
|
||||||
|
}
|
||||||
|
}
|
253
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
Normal file
253
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
Normal file
@ -0,0 +1,253 @@
|
|||||||
|
package ebpf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/cilium/ebpf"
|
||||||
|
"github.com/cilium/ebpf/asm"
|
||||||
|
"github.com/cilium/ebpf/link"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
func nilCloser() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
|
||||||
|
type bpfAttrQuery struct {
|
||||||
|
TargetFd uint32
|
||||||
|
AttachType uint32
|
||||||
|
QueryType uint32
|
||||||
|
AttachFlags uint32
|
||||||
|
ProgIds uint64 // __aligned_u64
|
||||||
|
ProgCnt uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently you can only have 64 eBPF programs attached to a cgroup.
|
||||||
|
size := 64
|
||||||
|
retries := 0
|
||||||
|
for retries < 10 {
|
||||||
|
progIds := make([]uint32, size)
|
||||||
|
query := bpfAttrQuery{
|
||||||
|
TargetFd: uint32(dirFd),
|
||||||
|
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
|
||||||
|
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
|
||||||
|
ProgCnt: uint32(len(progIds)),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch the list of program ids.
|
||||||
|
_, _, errno := unix.Syscall(unix.SYS_BPF,
|
||||||
|
uintptr(unix.BPF_PROG_QUERY),
|
||||||
|
uintptr(unsafe.Pointer(&query)),
|
||||||
|
unsafe.Sizeof(query))
|
||||||
|
size = int(query.ProgCnt)
|
||||||
|
runtime.KeepAlive(query)
|
||||||
|
if errno != 0 {
|
||||||
|
// On ENOSPC we get the correct number of programs.
|
||||||
|
if errno == unix.ENOSPC {
|
||||||
|
retries++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the ids to program handles.
|
||||||
|
progIds = progIds[:size]
|
||||||
|
programs := make([]*ebpf.Program, 0, len(progIds))
|
||||||
|
for _, progId := range progIds {
|
||||||
|
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
|
||||||
|
if err != nil {
|
||||||
|
// We skip over programs that give us -EACCES or -EPERM. This
|
||||||
|
// is necessary because there may be BPF programs that have
|
||||||
|
// been attached (such as with --systemd-cgroup) which have an
|
||||||
|
// LSM label that blocks us from interacting with the program.
|
||||||
|
//
|
||||||
|
// Because additional BPF_CGROUP_DEVICE programs only can add
|
||||||
|
// restrictions, there's no real issue with just ignoring these
|
||||||
|
// programs (and stops runc from breaking on distributions with
|
||||||
|
// very strict SELinux policies).
|
||||||
|
if errors.Is(err, os.ErrPermission) {
|
||||||
|
logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
|
||||||
|
}
|
||||||
|
programs = append(programs, program)
|
||||||
|
}
|
||||||
|
runtime.KeepAlive(progIds)
|
||||||
|
return programs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
haveBpfProgReplaceBool bool
|
||||||
|
haveBpfProgReplaceOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
// Loosely based on the BPF_F_REPLACE support check in
|
||||||
|
// https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go.
|
||||||
|
//
|
||||||
|
// TODO: move this logic to cilium/ebpf
|
||||||
|
func haveBpfProgReplace() bool {
|
||||||
|
haveBpfProgReplaceOnce.Do(func() {
|
||||||
|
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
|
||||||
|
Type: ebpf.CGroupDevice,
|
||||||
|
License: "MIT",
|
||||||
|
Instructions: asm.Instructions{
|
||||||
|
asm.Mov.Imm(asm.R0, 0),
|
||||||
|
asm.Return(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer prog.Close()
|
||||||
|
|
||||||
|
devnull, err := os.Open("/dev/null")
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer devnull.Close()
|
||||||
|
|
||||||
|
// We know that we have BPF_PROG_ATTACH since we can load
|
||||||
|
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
|
||||||
|
// we know that the feature isn't present.
|
||||||
|
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||||
|
// We rely on this fd being checked after attachFlags.
|
||||||
|
Target: int(devnull.Fd()),
|
||||||
|
// Attempt to "replace" bad fds with this program.
|
||||||
|
Program: prog,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
|
||||||
|
})
|
||||||
|
if errors.Is(err, unix.EINVAL) {
|
||||||
|
// not supported
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// attach_flags test succeeded.
|
||||||
|
if !errors.Is(err, unix.EBADF) {
|
||||||
|
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
|
||||||
|
}
|
||||||
|
haveBpfProgReplaceBool = true
|
||||||
|
})
|
||||||
|
return haveBpfProgReplaceBool
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||||
|
//
|
||||||
|
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||||
|
//
|
||||||
|
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||||
|
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
|
||||||
|
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||||
|
// This limit is not inherited into the container.
|
||||||
|
memlockLimit := &unix.Rlimit{
|
||||||
|
Cur: unix.RLIM_INFINITY,
|
||||||
|
Max: unix.RLIM_INFINITY,
|
||||||
|
}
|
||||||
|
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||||
|
|
||||||
|
// Get the list of existing programs.
|
||||||
|
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
|
||||||
|
if err != nil {
|
||||||
|
return nilCloser, err
|
||||||
|
}
|
||||||
|
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
|
||||||
|
|
||||||
|
// Generate new program.
|
||||||
|
spec := &ebpf.ProgramSpec{
|
||||||
|
Type: ebpf.CGroupDevice,
|
||||||
|
Instructions: insts,
|
||||||
|
License: license,
|
||||||
|
}
|
||||||
|
prog, err := ebpf.NewProgram(spec)
|
||||||
|
if err != nil {
|
||||||
|
return nilCloser, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there is only one old program, we can just replace it directly.
|
||||||
|
var (
|
||||||
|
replaceProg *ebpf.Program
|
||||||
|
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
|
||||||
|
)
|
||||||
|
if useReplaceProg {
|
||||||
|
replaceProg = oldProgs[0]
|
||||||
|
attachFlags |= unix.BPF_F_REPLACE
|
||||||
|
}
|
||||||
|
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||||
|
Target: dirFd,
|
||||||
|
Program: prog,
|
||||||
|
Replace: replaceProg,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
Flags: attachFlags,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
|
||||||
|
}
|
||||||
|
closer := func() error {
|
||||||
|
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||||
|
Target: dirFd,
|
||||||
|
Program: prog,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
|
||||||
|
}
|
||||||
|
// TODO: Should we attach the old filters back in this case? Otherwise
|
||||||
|
// we fail-open on a security feature, which is a bit scary.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !useReplaceProg {
|
||||||
|
logLevel := logrus.DebugLevel
|
||||||
|
// If there was more than one old program, give a warning (since this
|
||||||
|
// really shouldn't happen with runc-managed cgroups) and then detach
|
||||||
|
// all the old programs.
|
||||||
|
if len(oldProgs) > 1 {
|
||||||
|
// NOTE: Ideally this should be a warning but it turns out that
|
||||||
|
// systemd-managed cgroups trigger this warning (apparently
|
||||||
|
// systemd doesn't delete old non-systemd programs when
|
||||||
|
// setting properties).
|
||||||
|
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
|
||||||
|
logLevel = logrus.InfoLevel
|
||||||
|
}
|
||||||
|
for idx, oldProg := range oldProgs {
|
||||||
|
// Output some extra debug info.
|
||||||
|
if info, err := oldProg.Info(); err == nil {
|
||||||
|
fields := logrus.Fields{
|
||||||
|
"type": info.Type.String(),
|
||||||
|
"tag": info.Tag,
|
||||||
|
"name": info.Name,
|
||||||
|
}
|
||||||
|
if id, ok := info.ID(); ok {
|
||||||
|
fields["id"] = id
|
||||||
|
}
|
||||||
|
if runCount, ok := info.RunCount(); ok {
|
||||||
|
fields["run_count"] = runCount
|
||||||
|
}
|
||||||
|
if runtime, ok := info.Runtime(); ok {
|
||||||
|
fields["runtime"] = runtime.String()
|
||||||
|
}
|
||||||
|
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
|
||||||
|
}
|
||||||
|
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||||
|
Target: dirFd,
|
||||||
|
Program: oldProg,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return closer, nil
|
||||||
|
}
|
190
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
Normal file
190
vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go
generated
vendored
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
package cgroups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||||
|
// It is supposed to be used for cgroup files only, and returns
|
||||||
|
// an error if the file is not a cgroup file.
|
||||||
|
//
|
||||||
|
// Arguments dir and file are joined together to form an absolute path
|
||||||
|
// to a file being opened.
|
||||||
|
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||||
|
if dir == "" {
|
||||||
|
return nil, fmt.Errorf("no directory specified for %s", file)
|
||||||
|
}
|
||||||
|
return openFile(dir, file, flags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadFile reads data from a cgroup file in dir.
|
||||||
|
// It is supposed to be used for cgroup files only.
|
||||||
|
func ReadFile(dir, file string) (string, error) {
|
||||||
|
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
_, err = buf.ReadFrom(fd)
|
||||||
|
return buf.String(), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteFile writes data to a cgroup file in dir.
|
||||||
|
// It is supposed to be used for cgroup files only.
|
||||||
|
func WriteFile(dir, file, data string) error {
|
||||||
|
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
if err := retryingWriteFile(fd, data); err != nil {
|
||||||
|
// Having data in the error message helps in debugging.
|
||||||
|
return fmt.Errorf("failed to write %q: %w", data, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func retryingWriteFile(fd *os.File, data string) error {
|
||||||
|
for {
|
||||||
|
_, err := fd.Write([]byte(data))
|
||||||
|
if errors.Is(err, unix.EINTR) {
|
||||||
|
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
cgroupfsDir = "/sys/fs/cgroup"
|
||||||
|
cgroupfsPrefix = cgroupfsDir + "/"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
||||||
|
TestMode bool
|
||||||
|
|
||||||
|
cgroupFd int = -1
|
||||||
|
prepOnce sync.Once
|
||||||
|
prepErr error
|
||||||
|
resolveFlags uint64
|
||||||
|
)
|
||||||
|
|
||||||
|
func prepareOpenat2() error {
|
||||||
|
prepOnce.Do(func() {
|
||||||
|
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
||||||
|
Flags: unix.O_DIRECTORY | unix.O_PATH,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
||||||
|
if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
|
||||||
|
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||||
|
} else {
|
||||||
|
logrus.Debug("openat2 not available, falling back to securejoin")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var st unix.Statfs_t
|
||||||
|
if err = unix.Fstatfs(fd, &st); err != nil {
|
||||||
|
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
||||||
|
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroupFd = fd
|
||||||
|
|
||||||
|
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
||||||
|
if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
||||||
|
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
||||||
|
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return prepErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func openFile(dir, file string, flags int) (*os.File, error) {
|
||||||
|
mode := os.FileMode(0)
|
||||||
|
if TestMode && flags&os.O_WRONLY != 0 {
|
||||||
|
// "emulate" cgroup fs for unit tests
|
||||||
|
flags |= os.O_TRUNC | os.O_CREATE
|
||||||
|
mode = 0o600
|
||||||
|
}
|
||||||
|
path := path.Join(dir, file)
|
||||||
|
if prepareOpenat2() != nil {
|
||||||
|
return openFallback(path, flags, mode)
|
||||||
|
}
|
||||||
|
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
|
||||||
|
if len(relPath) == len(path) { // non-standard path, old system?
|
||||||
|
return openFallback(path, flags, mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err := unix.Openat2(cgroupFd, relPath,
|
||||||
|
&unix.OpenHow{
|
||||||
|
Resolve: resolveFlags,
|
||||||
|
Flags: uint64(flags) | unix.O_CLOEXEC,
|
||||||
|
Mode: uint64(mode),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
||||||
|
// Check if cgroupFd is still opened to cgroupfsDir
|
||||||
|
// (happens when this package is incorrectly used
|
||||||
|
// across the chroot/pivot_root/mntns boundary, or
|
||||||
|
// when /sys/fs/cgroup is remounted).
|
||||||
|
//
|
||||||
|
// TODO: if such usage will ever be common, amend this
|
||||||
|
// to reopen cgroupFd and retry openat2.
|
||||||
|
fdStr := strconv.Itoa(cgroupFd)
|
||||||
|
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
|
||||||
|
if fdDest != cgroupfsDir {
|
||||||
|
// Wrap the error so it is clear that cgroupFd
|
||||||
|
// is opened to an unexpected/wrong directory.
|
||||||
|
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
|
||||||
|
fdStr, fdDest, cgroupfsDir, err)
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.NewFile(uintptr(fd), path), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||||
|
|
||||||
|
// Can be changed by unit tests.
|
||||||
|
var openFallback = openAndCheck
|
||||||
|
|
||||||
|
// openAndCheck is used when openat2(2) is not available. It checks the opened
|
||||||
|
// file is on cgroupfs, returning an error otherwise.
|
||||||
|
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
||||||
|
fd, err := os.OpenFile(path, flags, mode)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if TestMode {
|
||||||
|
return fd, nil
|
||||||
|
}
|
||||||
|
// Check this is a cgroupfs file.
|
||||||
|
var st unix.Statfs_t
|
||||||
|
if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
||||||
|
_ = fd.Close()
|
||||||
|
return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||||
|
}
|
||||||
|
if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
||||||
|
_ = fd.Close()
|
||||||
|
return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fd, nil
|
||||||
|
}
|
311
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
Normal file
311
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go
generated
vendored
Normal file
@ -0,0 +1,311 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type BlkioGroup struct {
|
||||||
|
weightFilename string
|
||||||
|
weightDeviceFilename string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BlkioGroup) Name() string {
|
||||||
|
return "blkio"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
s.detectWeightFilenames(path)
|
||||||
|
if r.BlkioWeight != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.BlkioLeafWeight != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, wd := range r.BlkioWeightDevice {
|
||||||
|
if wd.Weight != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if wd.LeafWeight != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
examples:
|
||||||
|
|
||||||
|
blkio.sectors
|
||||||
|
8:0 6792
|
||||||
|
|
||||||
|
blkio.io_service_bytes
|
||||||
|
8:0 Read 1282048
|
||||||
|
8:0 Write 2195456
|
||||||
|
8:0 Sync 2195456
|
||||||
|
8:0 Async 1282048
|
||||||
|
8:0 Total 3477504
|
||||||
|
Total 3477504
|
||||||
|
|
||||||
|
blkio.io_serviced
|
||||||
|
8:0 Read 124
|
||||||
|
8:0 Write 104
|
||||||
|
8:0 Sync 104
|
||||||
|
8:0 Async 124
|
||||||
|
8:0 Total 228
|
||||||
|
Total 228
|
||||||
|
|
||||||
|
blkio.io_queued
|
||||||
|
8:0 Read 0
|
||||||
|
8:0 Write 0
|
||||||
|
8:0 Sync 0
|
||||||
|
8:0 Async 0
|
||||||
|
8:0 Total 0
|
||||||
|
Total 0
|
||||||
|
*/
|
||||||
|
|
||||||
|
func splitBlkioStatLine(r rune) bool {
|
||||||
|
return r == ' ' || r == ':'
|
||||||
|
}
|
||||||
|
|
||||||
|
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||||
|
var blkioStats []cgroups.BlkioStatEntry
|
||||||
|
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return blkioStats, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
for sc.Scan() {
|
||||||
|
// format: dev type amount
|
||||||
|
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
|
||||||
|
if len(fields) < 3 {
|
||||||
|
if len(fields) == 2 && fields[0] == "Total" {
|
||||||
|
// skip total line
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
return nil, malformedLine(dir, file, sc.Text())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
|
}
|
||||||
|
major := v
|
||||||
|
|
||||||
|
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
|
}
|
||||||
|
minor := v
|
||||||
|
|
||||||
|
op := ""
|
||||||
|
valueField := 2
|
||||||
|
if len(fields) == 4 {
|
||||||
|
op = fields[2]
|
||||||
|
valueField = 3
|
||||||
|
}
|
||||||
|
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
|
}
|
||||||
|
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||||
|
}
|
||||||
|
if err := sc.Err(); err != nil {
|
||||||
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
return blkioStats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
type blkioStatInfo struct {
|
||||||
|
filename string
|
||||||
|
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
|
||||||
|
}
|
||||||
|
bfqDebugStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.sectors_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_service_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_wait_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_merged_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_queued_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
bfqStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cfqStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.sectors_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_service_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_wait_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_merged_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_queued_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
throttleRecursiveStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
baseStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_serviced",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_service_bytes",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
orderedStats := [][]blkioStatInfo{
|
||||||
|
bfqDebugStats,
|
||||||
|
bfqStats,
|
||||||
|
cfqStats,
|
||||||
|
throttleRecursiveStats,
|
||||||
|
baseStats,
|
||||||
|
}
|
||||||
|
|
||||||
|
var blkioStats []cgroups.BlkioStatEntry
|
||||||
|
var err error
|
||||||
|
|
||||||
|
for _, statGroup := range orderedStats {
|
||||||
|
for i, statInfo := range statGroup {
|
||||||
|
if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
|
||||||
|
// if error occurs on first file, move to next group
|
||||||
|
if i == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
*statInfo.blkioStatEntriesPtr = blkioStats
|
||||||
|
// finish if all stats are gathered
|
||||||
|
if i == len(statGroup)-1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BlkioGroup) detectWeightFilenames(path string) {
|
||||||
|
if s.weightFilename != "" {
|
||||||
|
// Already detected.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
|
||||||
|
s.weightFilename = "blkio.weight"
|
||||||
|
s.weightDeviceFilename = "blkio.weight_device"
|
||||||
|
} else {
|
||||||
|
s.weightFilename = "blkio.bfq.weight"
|
||||||
|
s.weightDeviceFilename = "blkio.bfq.weight_device"
|
||||||
|
}
|
||||||
|
}
|
129
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
Normal file
129
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CpuGroup struct{}
|
||||||
|
|
||||||
|
func (s *CpuGroup) Name() string {
|
||||||
|
return "cpu"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||||
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// We should set the real-Time group scheduling settings before moving
|
||||||
|
// in the process because if the process is already in SCHED_RR mode
|
||||||
|
// and no RT bandwidth is set, adding it will fail.
|
||||||
|
if err := s.SetRtSched(path, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Since we are not using apply(), we need to place the pid
|
||||||
|
// into the procs file.
|
||||||
|
return cgroups.WriteCgroupProc(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||||
|
if r.CpuRtPeriod != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if r.CpuRtRuntime != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if r.CpuShares != 0 {
|
||||||
|
shares := r.CpuShares
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// read it back
|
||||||
|
sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// ... and check
|
||||||
|
if shares > sharesRead {
|
||||||
|
return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
|
||||||
|
} else if shares < sharesRead {
|
||||||
|
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var period string
|
||||||
|
if r.CpuPeriod != 0 {
|
||||||
|
period = strconv.FormatUint(r.CpuPeriod, 10)
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||||
|
// Sometimes when the period to be set is smaller
|
||||||
|
// than the current one, it is rejected by the kernel
|
||||||
|
// (EINVAL) as old_quota/new_period exceeds the parent
|
||||||
|
// cgroup quota limit. If this happens and the quota is
|
||||||
|
// going to be set, ignore the error for now and retry
|
||||||
|
// after setting the quota.
|
||||||
|
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
period = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if r.CpuQuota != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if period != "" {
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s.SetRtSched(path, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
const file = "cpu.stat"
|
||||||
|
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
for sc.Scan() {
|
||||||
|
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
switch t {
|
||||||
|
case "nr_periods":
|
||||||
|
stats.CpuStats.ThrottlingData.Periods = v
|
||||||
|
|
||||||
|
case "nr_throttled":
|
||||||
|
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||||
|
|
||||||
|
case "throttled_time":
|
||||||
|
stats.CpuStats.ThrottlingData.ThrottledTime = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
166
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
Normal file
166
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go
generated
vendored
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
cgroupCpuacctStat = "cpuacct.stat"
|
||||||
|
cgroupCpuacctUsageAll = "cpuacct.usage_all"
|
||||||
|
|
||||||
|
nanosecondsInSecond = 1000000000
|
||||||
|
|
||||||
|
userModeColumn = 1
|
||||||
|
kernelModeColumn = 2
|
||||||
|
cuacctUsageAllColumnsNumber = 3
|
||||||
|
|
||||||
|
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||||
|
// on Linux it's a constant which is safe to be hard coded,
|
||||||
|
// so we can avoid using cgo here. For details, see:
|
||||||
|
// https://github.com/containerd/cgroups/pull/12
|
||||||
|
clockTicks uint64 = 100
|
||||||
|
)
|
||||||
|
|
||||||
|
type CpuacctGroup struct{}
|
||||||
|
|
||||||
|
func (s *CpuacctGroup) Name() string {
|
||||||
|
return "cpuacct"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
if !cgroups.PathExists(path) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
percpuUsage, err := getPercpuUsage(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||||
|
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||||
|
stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
|
||||||
|
stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
|
||||||
|
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||||
|
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns user and kernel usage breakdown in nanoseconds.
|
||||||
|
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||||
|
var userModeUsage, kernelModeUsage uint64
|
||||||
|
const (
|
||||||
|
userField = "user"
|
||||||
|
systemField = "system"
|
||||||
|
file = cgroupCpuacctStat
|
||||||
|
)
|
||||||
|
|
||||||
|
// Expected format:
|
||||||
|
// user <usage in ticks>
|
||||||
|
// system <usage in ticks>
|
||||||
|
data, err := cgroups.ReadFile(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0, err
|
||||||
|
}
|
||||||
|
// TODO: use strings.SplitN instead.
|
||||||
|
fields := strings.Fields(data)
|
||||||
|
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
|
||||||
|
return 0, 0, malformedLine(path, file, data)
|
||||||
|
}
|
||||||
|
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||||
|
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||||
|
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPercpuUsage(path string) ([]uint64, error) {
|
||||||
|
const file = "cpuacct.usage_percpu"
|
||||||
|
percpuUsage := []uint64{}
|
||||||
|
data, err := cgroups.ReadFile(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return percpuUsage, err
|
||||||
|
}
|
||||||
|
// TODO: use strings.SplitN instead.
|
||||||
|
for _, value := range strings.Fields(data) {
|
||||||
|
value, err := strconv.ParseUint(value, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return percpuUsage, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
percpuUsage = append(percpuUsage, value)
|
||||||
|
}
|
||||||
|
return percpuUsage, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||||
|
usageKernelMode := []uint64{}
|
||||||
|
usageUserMode := []uint64{}
|
||||||
|
const file = cgroupCpuacctUsageAll
|
||||||
|
|
||||||
|
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return usageKernelMode, usageUserMode, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(fd)
|
||||||
|
scanner.Scan() // skipping header line
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
|
||||||
|
if len(lineFields) != cuacctUsageAllColumnsNumber {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||||
|
|
||||||
|
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
return usageKernelMode, usageUserMode, nil
|
||||||
|
}
|
245
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
Normal file
245
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type CpusetGroup struct{}
|
||||||
|
|
||||||
|
func (s *CpusetGroup) Name() string {
|
||||||
|
return "cpuset"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||||
|
return s.ApplyDir(path, r, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if r.CpusetCpus != "" {
|
||||||
|
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if r.CpusetMems != "" {
|
||||||
|
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCpusetStat(path string, file string) ([]uint16, error) {
|
||||||
|
var extracted []uint16
|
||||||
|
fileContent, err := fscommon.GetCgroupParamString(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, err
|
||||||
|
}
|
||||||
|
if len(fileContent) == 0 {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range strings.Split(fileContent, ",") {
|
||||||
|
sp := strings.SplitN(s, "-", 3)
|
||||||
|
switch len(sp) {
|
||||||
|
case 3:
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
|
||||||
|
case 2:
|
||||||
|
min, err := strconv.ParseUint(sp[0], 10, 16)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
max, err := strconv.ParseUint(sp[1], 10, 16)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
if min > max {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
|
||||||
|
}
|
||||||
|
for i := min; i <= max; i++ {
|
||||||
|
extracted = append(extracted, uint16(i))
|
||||||
|
}
|
||||||
|
case 1:
|
||||||
|
value, err := strconv.ParseUint(s, 10, 16)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
extracted = append(extracted, uint16(value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return extracted, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
|
||||||
|
// This might happen if we have no cpuset cgroup mounted.
|
||||||
|
// Just do nothing and don't fail.
|
||||||
|
if dir == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// 'ensureParent' start with parent because we don't want to
|
||||||
|
// explicitly inherit from parent, it could conflict with
|
||||||
|
// 'cpuset.cpu_exclusive'.
|
||||||
|
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// We didn't inherit cpuset configs from parent, but we have
|
||||||
|
// to ensure cpuset configs are set before moving task into the
|
||||||
|
// cgroup.
|
||||||
|
// The logic is, if user specified cpuset configs, use these
|
||||||
|
// specified configs, otherwise, inherit from parent. This makes
|
||||||
|
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||||
|
// keep backward compatibility.
|
||||||
|
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Since we are not using apply(), we need to place the pid
|
||||||
|
// into the procs file.
|
||||||
|
return cgroups.WriteCgroupProc(dir, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
|
||||||
|
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
return cpus, mems, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// cpusetEnsureParent makes sure that the parent directories of current
|
||||||
|
// are created and populated with the proper cpus and mems files copied
|
||||||
|
// from their respective parent. It does that recursively, starting from
|
||||||
|
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
|
||||||
|
func cpusetEnsureParent(current string) error {
|
||||||
|
var st unix.Statfs_t
|
||||||
|
|
||||||
|
parent := filepath.Dir(current)
|
||||||
|
err := unix.Statfs(parent, &st)
|
||||||
|
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// Treat non-existing directory as cgroupfs as it will be created,
|
||||||
|
// and the root cpuset directory obviously exists.
|
||||||
|
if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||||
|
return &os.PathError{Op: "statfs", Path: parent, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cpusetEnsureParent(parent); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return cpusetCopyIfNeeded(current, parent)
|
||||||
|
}
|
||||||
|
|
||||||
|
// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||||
|
// directory to the current directory if the file's contents are 0
|
||||||
|
func cpusetCopyIfNeeded(current, parent string) error {
|
||||||
|
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if isEmptyCpuset(currentCpus) {
|
||||||
|
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if isEmptyCpuset(currentMems) {
|
||||||
|
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func isEmptyCpuset(str string) bool {
|
||||||
|
return str == "" || str == "\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
|
||||||
|
if err := s.Set(path, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return cpusetCopyIfNeeded(path, filepath.Dir(path))
|
||||||
|
}
|
109
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
Normal file
109
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/userns"
|
||||||
|
)
|
||||||
|
|
||||||
|
type DevicesGroup struct {
|
||||||
|
TestingSkipFinalCheck bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *DevicesGroup) Name() string {
|
||||||
|
return "devices"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||||
|
if r.SkipDevices {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if path == "" {
|
||||||
|
// Return error here, since devices cgroup
|
||||||
|
// is a hard requirement for container's security.
|
||||||
|
return errSubsystemDoesNotExist
|
||||||
|
}
|
||||||
|
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
|
||||||
|
list, err := cgroups.ReadFile(path, "devices.list")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list))
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) {
|
||||||
|
// This defaults to a white-list -- which is what we want!
|
||||||
|
emu := &cgroupdevices.Emulator{}
|
||||||
|
for _, rule := range rules {
|
||||||
|
if err := emu.Apply(*rule); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return emu, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if userns.RunningInUserNS() || r.SkipDevices {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate two emulators, one for the current state of the cgroup and one
|
||||||
|
// for the requested state by the user.
|
||||||
|
current, err := loadEmulator(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
target, err := buildEmulator(r.Devices)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the minimal set of transition rules needed to achieve the
|
||||||
|
// requested state.
|
||||||
|
transitionRules, err := current.Transition(target)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, rule := range transitionRules {
|
||||||
|
file := "devices.deny"
|
||||||
|
if rule.Allow {
|
||||||
|
file = "devices.allow"
|
||||||
|
}
|
||||||
|
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final safety check -- ensure that the resulting state is what was
|
||||||
|
// requested. This is only really correct for white-lists, but for
|
||||||
|
// black-lists we can at least check that the cgroup is in the right mode.
|
||||||
|
//
|
||||||
|
// This safety-check is skipped for the unit tests because we cannot
|
||||||
|
// currently mock devices.list correctly.
|
||||||
|
if !s.TestingSkipFinalCheck {
|
||||||
|
currentAfter, err := loadEmulator(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
|
||||||
|
return errors.New("resulting devices cgroup doesn't precisely match target")
|
||||||
|
} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
|
||||||
|
return errors.New("resulting devices cgroup doesn't match target mode")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return nil
|
||||||
|
}
|
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
15
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/error.go
generated
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
)
|
||||||
|
|
||||||
|
type parseError = fscommon.ParseError
|
||||||
|
|
||||||
|
// malformedLine is used by all cgroupfs file parsers that expect a line
|
||||||
|
// in a particular format but get some garbage instead.
|
||||||
|
func malformedLine(path, file, line string) error {
|
||||||
|
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
|
||||||
|
}
|
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
Normal file
158
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FreezerGroup struct{}
|
||||||
|
|
||||||
|
func (s *FreezerGroup) Name() string {
|
||||||
|
return "freezer"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||||
|
switch r.Freezer {
|
||||||
|
case configs.Frozen:
|
||||||
|
defer func() {
|
||||||
|
if Err != nil {
|
||||||
|
// Freezing failed, and it is bad and dangerous
|
||||||
|
// to leave the cgroup in FROZEN or FREEZING
|
||||||
|
// state, so (try to) thaw it back.
|
||||||
|
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// As per older kernel docs (freezer-subsystem.txt before
|
||||||
|
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
||||||
|
// userspace should either retry or thaw. While current
|
||||||
|
// kernel cgroup v1 docs no longer mention a need to retry,
|
||||||
|
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
||||||
|
// freeze a cgroup v1 while new processes keep appearing in it
|
||||||
|
// (either via fork/clone or by writing new PIDs to
|
||||||
|
// cgroup.procs).
|
||||||
|
//
|
||||||
|
// The numbers below are empirically chosen to have a decent
|
||||||
|
// chance to succeed in various scenarios ("runc pause/unpause
|
||||||
|
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
||||||
|
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
||||||
|
//
|
||||||
|
// Adding any amount of sleep in between retries did not
|
||||||
|
// increase the chances of successful freeze in "pause/unpause
|
||||||
|
// with parallel exec" reproducer. OTOH, adding an occasional
|
||||||
|
// sleep helped for the case where the system is extremely slow
|
||||||
|
// (CentOS 7 VM on GHA CI).
|
||||||
|
//
|
||||||
|
// Alas, this is still a game of chances, since the real fix
|
||||||
|
// belong to the kernel (cgroup v2 do not have this bug).
|
||||||
|
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
if i%50 == 49 {
|
||||||
|
// Occasional thaw and sleep improves
|
||||||
|
// the chances to succeed in freezing
|
||||||
|
// in case new processes keep appearing
|
||||||
|
// in the cgroup.
|
||||||
|
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if i%25 == 24 {
|
||||||
|
// Occasional short sleep before reading
|
||||||
|
// the state back also improves the chances to
|
||||||
|
// succeed in freezing in case of a very slow
|
||||||
|
// system.
|
||||||
|
time.Sleep(10 * time.Microsecond)
|
||||||
|
}
|
||||||
|
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
state = strings.TrimSpace(state)
|
||||||
|
switch state {
|
||||||
|
case "FREEZING":
|
||||||
|
continue
|
||||||
|
case string(configs.Frozen):
|
||||||
|
if i > 1 {
|
||||||
|
logrus.Debugf("frozen after %d retries", i)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
// should never happen
|
||||||
|
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Despite our best efforts, it got stuck in FREEZING.
|
||||||
|
return errors.New("unable to freeze")
|
||||||
|
case configs.Thawed:
|
||||||
|
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||||
|
case configs.Undefined:
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||||
|
for {
|
||||||
|
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||||
|
if err != nil {
|
||||||
|
// If the kernel is too old, then we just treat the freezer as
|
||||||
|
// being in an "undefined" state.
|
||||||
|
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return configs.Undefined, err
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(state) {
|
||||||
|
case "THAWED":
|
||||||
|
return configs.Thawed, nil
|
||||||
|
case "FROZEN":
|
||||||
|
// Find out whether the cgroup is frozen directly,
|
||||||
|
// or indirectly via an ancestor.
|
||||||
|
self, err := cgroups.ReadFile(path, "freezer.self_freezing")
|
||||||
|
if err != nil {
|
||||||
|
// If the kernel is too old, then we just treat
|
||||||
|
// it as being frozen.
|
||||||
|
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return configs.Frozen, err
|
||||||
|
}
|
||||||
|
switch self {
|
||||||
|
case "0\n":
|
||||||
|
return configs.Thawed, nil
|
||||||
|
case "1\n":
|
||||||
|
return configs.Frozen, nil
|
||||||
|
default:
|
||||||
|
return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
|
||||||
|
}
|
||||||
|
case "FREEZING":
|
||||||
|
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||||
|
// is still undergoing freezing. This should be a temporary delay.
|
||||||
|
time.Sleep(1 * time.Millisecond)
|
||||||
|
continue
|
||||||
|
default:
|
||||||
|
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
265
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
Normal file
265
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go
generated
vendored
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
var subsystems = []subsystem{
|
||||||
|
&CpusetGroup{},
|
||||||
|
&DevicesGroup{},
|
||||||
|
&MemoryGroup{},
|
||||||
|
&CpuGroup{},
|
||||||
|
&CpuacctGroup{},
|
||||||
|
&PidsGroup{},
|
||||||
|
&BlkioGroup{},
|
||||||
|
&HugetlbGroup{},
|
||||||
|
&NetClsGroup{},
|
||||||
|
&NetPrioGroup{},
|
||||||
|
&PerfEventGroup{},
|
||||||
|
&FreezerGroup{},
|
||||||
|
&RdmaGroup{},
|
||||||
|
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||||
|
&NameGroup{GroupName: "misc", Join: true},
|
||||||
|
}
|
||||||
|
|
||||||
|
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// If using cgroups-hybrid mode then add a "" controller indicating
|
||||||
|
// it should join the cgroups v2.
|
||||||
|
if cgroups.IsCgroup2HybridMode() {
|
||||||
|
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type subsystem interface {
|
||||||
|
// Name returns the name of the subsystem.
|
||||||
|
Name() string
|
||||||
|
// GetStats fills in the stats for the subsystem.
|
||||||
|
GetStats(path string, stats *cgroups.Stats) error
|
||||||
|
// Apply creates and joins a cgroup, adding pid into it. Some
|
||||||
|
// subsystems use resources to pre-configure the cgroup parents
|
||||||
|
// before creating or joining it.
|
||||||
|
Apply(path string, r *configs.Resources, pid int) error
|
||||||
|
// Set sets the cgroup resources.
|
||||||
|
Set(path string, r *configs.Resources) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type manager struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
cgroups *configs.Cgroup
|
||||||
|
paths map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||||
|
// Some v1 controllers (cpu, cpuset, and devices) expect
|
||||||
|
// cgroups.Resources to not be nil in Apply.
|
||||||
|
if cg.Resources == nil {
|
||||||
|
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
|
||||||
|
}
|
||||||
|
if cg.Resources.Unified != nil {
|
||||||
|
return nil, cgroups.ErrV1NoUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
if paths == nil {
|
||||||
|
var err error
|
||||||
|
paths, err = initPaths(cg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &manager{
|
||||||
|
cgroups: cg,
|
||||||
|
paths: paths,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isIgnorableError returns whether err is a permission error (in the loose
|
||||||
|
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||||
|
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||||
|
// the normal EPERM.
|
||||||
|
func isIgnorableError(rootless bool, err error) bool {
|
||||||
|
// We do not ignore errors if we are root.
|
||||||
|
if !rootless {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Is it an ordinary EPERM?
|
||||||
|
if errors.Is(err, os.ErrPermission) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Handle some specific syscall errors.
|
||||||
|
var errno unix.Errno
|
||||||
|
if errors.As(err, &errno) {
|
||||||
|
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Apply(pid int) (err error) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
c := m.cgroups
|
||||||
|
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
name := sys.Name()
|
||||||
|
p, ok := m.paths[name]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := sys.Apply(p, c.Resources, pid); err != nil {
|
||||||
|
// In the case of rootless (including euid=0 in userns), where an
|
||||||
|
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||||
|
// case of permission problems here, but do delete the path from
|
||||||
|
// the m.paths map, since it is either non-existent and could not
|
||||||
|
// be created, or the pid could not be added to it.
|
||||||
|
//
|
||||||
|
// Cases where limits for the subsystem have been set are handled
|
||||||
|
// later by Set, which fails with a friendly error (see
|
||||||
|
// if path == "" in Set).
|
||||||
|
if isIgnorableError(c.Rootless, err) && c.Path == "" {
|
||||||
|
delete(m.paths, name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Destroy() error {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return cgroups.RemovePaths(m.paths)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Path(subsys string) string {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.paths[subsys]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
stats := cgroups.NewStats()
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
path := m.paths[sys.Name()]
|
||||||
|
if path == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := sys.GetStats(path, stats); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Set(r *configs.Resources) error {
|
||||||
|
if r == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Unified != nil {
|
||||||
|
return cgroups.ErrV1NoUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
path := m.paths[sys.Name()]
|
||||||
|
if err := sys.Set(path, r); err != nil {
|
||||||
|
// When rootless is true, errors from the device subsystem
|
||||||
|
// are ignored, as it is really not expected to work.
|
||||||
|
if m.cgroups.Rootless && sys.Name() == "devices" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// However, errors from other subsystems are not ignored.
|
||||||
|
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||||
|
if path == "" {
|
||||||
|
// We never created a path for this cgroup, so we cannot set
|
||||||
|
// limits for it (though we have already tried at this point).
|
||||||
|
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Freeze toggles the container's freezer cgroup depending on the state
|
||||||
|
// provided
|
||||||
|
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||||
|
path := m.Path("freezer")
|
||||||
|
if path == "" {
|
||||||
|
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||||
|
}
|
||||||
|
|
||||||
|
prevState := m.cgroups.Resources.Freezer
|
||||||
|
m.cgroups.Resources.Freezer = state
|
||||||
|
freezer := &FreezerGroup{}
|
||||||
|
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||||
|
m.cgroups.Resources.Freezer = prevState
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetPids() ([]int, error) {
|
||||||
|
return cgroups.GetPids(m.Path("devices"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetAllPids() ([]int, error) {
|
||||||
|
return cgroups.GetAllPids(m.Path("devices"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetPaths() map[string]string {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.paths
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||||
|
return m.cgroups, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||||
|
dir := m.Path("freezer")
|
||||||
|
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||||
|
if dir == "" {
|
||||||
|
return configs.Undefined, nil
|
||||||
|
}
|
||||||
|
freezer := &FreezerGroup{}
|
||||||
|
return freezer.GetState(dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Exists() bool {
|
||||||
|
return cgroups.PathExists(m.Path("devices"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func OOMKillCount(path string) (uint64, error) {
|
||||||
|
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) OOMKillCount() (uint64, error) {
|
||||||
|
c, err := OOMKillCount(m.Path("memory"))
|
||||||
|
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||||
|
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return c, err
|
||||||
|
}
|
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
Normal file
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type HugetlbGroup struct{}
|
||||||
|
|
||||||
|
func (s *HugetlbGroup) Name() string {
|
||||||
|
return "hugetlb"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
for _, hugetlb := range r.HugetlbLimit {
|
||||||
|
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
if !cgroups.PathExists(path) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
hugetlbStats := cgroups.HugetlbStats{}
|
||||||
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
|
usage := "hugetlb." + pageSize + ".usage_in_bytes"
|
||||||
|
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
hugetlbStats.Usage = value
|
||||||
|
|
||||||
|
maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
hugetlbStats.MaxUsage = value
|
||||||
|
|
||||||
|
failcnt := "hugetlb." + pageSize + ".failcnt"
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
hugetlbStats.Failcnt = value
|
||||||
|
|
||||||
|
stats.HugetlbStats[pageSize] = hugetlbStats
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
348
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
Normal file
348
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
Normal file
@ -0,0 +1,348 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||||
|
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||||
|
cgroupMemoryUsage = "memory.usage_in_bytes"
|
||||||
|
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
type MemoryGroup struct{}
|
||||||
|
|
||||||
|
func (s *MemoryGroup) Name() string {
|
||||||
|
return "memory"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setMemory(path string, val int64) error {
|
||||||
|
if val == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
|
||||||
|
if !errors.Is(err, unix.EBUSY) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// EBUSY means the kernel can't set new limit as it's too low
|
||||||
|
// (lower than the current usage). Return more specific error.
|
||||||
|
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setSwap(path string, val int64) error {
|
||||||
|
if val == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
func setMemoryAndSwap(path string, r *configs.Resources) error {
|
||||||
|
// If the memory update is set to -1 and the swap is not explicitly
|
||||||
|
// set, we should also set swap to -1, it means unlimited memory.
|
||||||
|
if r.Memory == -1 && r.MemorySwap == 0 {
|
||||||
|
// Only set swap if it's enabled in kernel
|
||||||
|
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||||
|
r.MemorySwap = -1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// When memory and swap memory are both set, we need to handle the cases
|
||||||
|
// for updating container.
|
||||||
|
if r.Memory != 0 && r.MemorySwap != 0 {
|
||||||
|
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// When update memory limit, we should adapt the write sequence
|
||||||
|
// for memory and swap memory, so it won't fail because the new
|
||||||
|
// value and the old value don't fit kernel's validation.
|
||||||
|
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
|
||||||
|
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := setMemory(path, r.Memory); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := setMemory(path, r.Memory); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if err := setMemoryAndSwap(path, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore KernelMemory and KernelMemoryTCP
|
||||||
|
|
||||||
|
if r.MemoryReservation != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.OomKillDisable {
|
||||||
|
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
|
||||||
|
return nil
|
||||||
|
} else if *r.MemorySwappiness <= 100 {
|
||||||
|
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
const file = "memory.stat"
|
||||||
|
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer statsFile.Close()
|
||||||
|
|
||||||
|
sc := bufio.NewScanner(statsFile)
|
||||||
|
for sc.Scan() {
|
||||||
|
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
stats.MemoryStats.Stats[t] = v
|
||||||
|
}
|
||||||
|
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||||
|
|
||||||
|
memoryUsage, err := getMemoryData(path, "")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stats.MemoryStats.Usage = memoryUsage
|
||||||
|
swapUsage, err := getMemoryData(path, "memsw")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stats.MemoryStats.SwapUsage = swapUsage
|
||||||
|
kernelUsage, err := getMemoryData(path, "kmem")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stats.MemoryStats.KernelUsage = kernelUsage
|
||||||
|
kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||||
|
|
||||||
|
value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if value == 1 {
|
||||||
|
stats.MemoryStats.UseHierarchy = true
|
||||||
|
}
|
||||||
|
|
||||||
|
pagesByNUMA, err := getPageUsageByNUMA(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||||
|
memoryData := cgroups.MemoryData{}
|
||||||
|
|
||||||
|
moduleName := "memory"
|
||||||
|
if name != "" {
|
||||||
|
moduleName = "memory." + name
|
||||||
|
}
|
||||||
|
var (
|
||||||
|
usage = moduleName + ".usage_in_bytes"
|
||||||
|
maxUsage = moduleName + ".max_usage_in_bytes"
|
||||||
|
failcnt = moduleName + ".failcnt"
|
||||||
|
limit = moduleName + ".limit_in_bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||||
|
if err != nil {
|
||||||
|
if name != "" && os.IsNotExist(err) {
|
||||||
|
// Ignore ENOENT as swap and kmem controllers
|
||||||
|
// are optional in the kernel.
|
||||||
|
return cgroups.MemoryData{}, nil
|
||||||
|
}
|
||||||
|
return cgroups.MemoryData{}, err
|
||||||
|
}
|
||||||
|
memoryData.Usage = value
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||||
|
if err != nil {
|
||||||
|
return cgroups.MemoryData{}, err
|
||||||
|
}
|
||||||
|
memoryData.MaxUsage = value
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||||
|
if err != nil {
|
||||||
|
return cgroups.MemoryData{}, err
|
||||||
|
}
|
||||||
|
memoryData.Failcnt = value
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||||
|
if err != nil {
|
||||||
|
return cgroups.MemoryData{}, err
|
||||||
|
}
|
||||||
|
memoryData.Limit = value
|
||||||
|
|
||||||
|
return memoryData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
|
||||||
|
const (
|
||||||
|
maxColumns = math.MaxUint8 + 1
|
||||||
|
file = "memory.numa_stat"
|
||||||
|
)
|
||||||
|
stats := cgroups.PageUsageByNUMA{}
|
||||||
|
|
||||||
|
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return stats, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return stats, err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
|
||||||
|
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
|
||||||
|
// and it looks like this:
|
||||||
|
//
|
||||||
|
// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(fd)
|
||||||
|
for scanner.Scan() {
|
||||||
|
var field *cgroups.PageStats
|
||||||
|
|
||||||
|
line := scanner.Text()
|
||||||
|
columns := strings.SplitN(line, " ", maxColumns)
|
||||||
|
for i, column := range columns {
|
||||||
|
byNode := strings.SplitN(column, "=", 2)
|
||||||
|
// Some custom kernels have non-standard fields, like
|
||||||
|
// numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||||
|
// numa_exectime 0
|
||||||
|
if len(byNode) < 2 {
|
||||||
|
if i == 0 {
|
||||||
|
// Ignore/skip those.
|
||||||
|
break
|
||||||
|
} else {
|
||||||
|
// The first column was already validated,
|
||||||
|
// so be strict to the rest.
|
||||||
|
return stats, malformedLine(path, file, line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
key, val := byNode[0], byNode[1]
|
||||||
|
if i == 0 { // First column: key is name, val is total.
|
||||||
|
field = getNUMAField(&stats, key)
|
||||||
|
if field == nil { // unknown field (new kernel?)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
field.Total, err = strconv.ParseUint(val, 0, 64)
|
||||||
|
if err != nil {
|
||||||
|
return stats, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
field.Nodes = map[uint8]uint64{}
|
||||||
|
} else { // Subsequent columns: key is N<id>, val is usage.
|
||||||
|
if len(key) < 2 || key[0] != 'N' {
|
||||||
|
// This is definitely an error.
|
||||||
|
return stats, malformedLine(path, file, line)
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := strconv.ParseUint(key[1:], 10, 8)
|
||||||
|
if err != nil {
|
||||||
|
return stats, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
usage, err := strconv.ParseUint(val, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return stats, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
field.Nodes[uint8(n)] = usage
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
|
||||||
|
switch name {
|
||||||
|
case "total":
|
||||||
|
return &stats.Total
|
||||||
|
case "file":
|
||||||
|
return &stats.File
|
||||||
|
case "anon":
|
||||||
|
return &stats.Anon
|
||||||
|
case "unevictable":
|
||||||
|
return &stats.Unevictable
|
||||||
|
case "hierarchical_total":
|
||||||
|
return &stats.Hierarchical.Total
|
||||||
|
case "hierarchical_file":
|
||||||
|
return &stats.Hierarchical.File
|
||||||
|
case "hierarchical_anon":
|
||||||
|
return &stats.Hierarchical.Anon
|
||||||
|
case "hierarchical_unevictable":
|
||||||
|
return &stats.Hierarchical.Unevictable
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
Normal file
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type NameGroup struct {
|
||||||
|
GroupName string
|
||||||
|
Join bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NameGroup) Name() string {
|
||||||
|
return s.GroupName
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
if s.Join {
|
||||||
|
// Ignore errors if the named cgroup does not exist.
|
||||||
|
_ = apply(path, pid)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return nil
|
||||||
|
}
|
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
Normal file
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type NetClsGroup struct{}
|
||||||
|
|
||||||
|
func (s *NetClsGroup) Name() string {
|
||||||
|
return "net_cls"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if r.NetClsClassid != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return nil
|
||||||
|
}
|
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
Normal file
30
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go
generated
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type NetPrioGroup struct{}
|
||||||
|
|
||||||
|
func (s *NetPrioGroup) Name() string {
|
||||||
|
return "net_prio"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
for _, prioMap := range r.NetPrioIfpriomap {
|
||||||
|
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return nil
|
||||||
|
}
|
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
186
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go
generated
vendored
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
// The absolute path to the root of the cgroup hierarchies.
|
||||||
|
var (
|
||||||
|
cgroupRootLock sync.Mutex
|
||||||
|
cgroupRoot string
|
||||||
|
)
|
||||||
|
|
||||||
|
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||||
|
|
||||||
|
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
|
||||||
|
root, err := rootPath()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
inner, err := innerPath(cg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
paths := make(map[string]string)
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
name := sys.Name()
|
||||||
|
path, err := subsysPath(root, inner, name)
|
||||||
|
if err != nil {
|
||||||
|
// The non-presence of the devices subsystem
|
||||||
|
// is considered fatal for security reasons.
|
||||||
|
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
paths[name] = path
|
||||||
|
}
|
||||||
|
|
||||||
|
return paths, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func tryDefaultCgroupRoot() string {
|
||||||
|
var st, pst unix.Stat_t
|
||||||
|
|
||||||
|
// (1) it should be a directory...
|
||||||
|
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||||
|
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (2) ... and a mount point ...
|
||||||
|
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if st.Dev == pst.Dev {
|
||||||
|
// parent dir has the same dev -- not a mount point
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (3) ... of 'tmpfs' fs type.
|
||||||
|
var fst unix.Statfs_t
|
||||||
|
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||||
|
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (4) it should have at least 1 entry ...
|
||||||
|
dir, err := os.Open(defaultCgroupRoot)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
names, err := dir.Readdirnames(1)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if len(names) < 1 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// ... which is a cgroup mount point.
|
||||||
|
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||||
|
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultCgroupRoot
|
||||||
|
}
|
||||||
|
|
||||||
|
// rootPath finds and returns path to the root of the cgroup hierarchies.
|
||||||
|
func rootPath() (string, error) {
|
||||||
|
cgroupRootLock.Lock()
|
||||||
|
defer cgroupRootLock.Unlock()
|
||||||
|
|
||||||
|
if cgroupRoot != "" {
|
||||||
|
return cgroupRoot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fast path
|
||||||
|
cgroupRoot = tryDefaultCgroupRoot()
|
||||||
|
if cgroupRoot != "" {
|
||||||
|
return cgroupRoot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// slow path: parse mountinfo
|
||||||
|
mi, err := cgroups.GetCgroupMounts(false)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if len(mi) < 1 {
|
||||||
|
return "", errors.New("no cgroup mount found in mountinfo")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||||
|
// use its parent directory.
|
||||||
|
root := filepath.Dir(mi[0].Mountpoint)
|
||||||
|
|
||||||
|
if _, err := os.Stat(root); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroupRoot = root
|
||||||
|
return cgroupRoot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func innerPath(c *configs.Cgroup) (string, error) {
|
||||||
|
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||||
|
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||||
|
innerPath := utils.CleanPath(c.Path)
|
||||||
|
if innerPath == "" {
|
||||||
|
cgParent := utils.CleanPath(c.Parent)
|
||||||
|
cgName := utils.CleanPath(c.Name)
|
||||||
|
innerPath = filepath.Join(cgParent, cgName)
|
||||||
|
}
|
||||||
|
|
||||||
|
return innerPath, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func subsysPath(root, inner, subsystem string) (string, error) {
|
||||||
|
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||||
|
if filepath.IsAbs(inner) {
|
||||||
|
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
|
||||||
|
// If we didn't mount the subsystem, there is no point we make the path.
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||||
|
return filepath.Join(root, filepath.Base(mnt), inner), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||||
|
// process could in container and shared pid namespace with host, and
|
||||||
|
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||||
|
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Join(parentPath, inner), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func apply(path string, pid int) error {
|
||||||
|
if path == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return cgroups.WriteCgroupProc(path, pid)
|
||||||
|
}
|
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
Normal file
24
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go
generated
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PerfEventGroup struct{}
|
||||||
|
|
||||||
|
func (s *PerfEventGroup) Name() string {
|
||||||
|
return "perf_event"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return nil
|
||||||
|
}
|
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
Normal file
62
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PidsGroup struct{}
|
||||||
|
|
||||||
|
func (s *PidsGroup) Name() string {
|
||||||
|
return "pids"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if r.PidsLimit != 0 {
|
||||||
|
// "max" is the fallback value.
|
||||||
|
limit := "max"
|
||||||
|
|
||||||
|
if r.PidsLimit > 0 {
|
||||||
|
limit = strconv.FormatInt(r.PidsLimit, 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
if !cgroups.PathExists(path) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If no limit is set, read from pids.max returns "max", which is
|
||||||
|
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||||
|
// represent "no limit" for pids as 0, thus this conversion.
|
||||||
|
if max == math.MaxUint64 {
|
||||||
|
max = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.PidsStats.Current = current
|
||||||
|
stats.PidsStats.Limit = max
|
||||||
|
return nil
|
||||||
|
}
|
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
25
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/rdma.go
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RdmaGroup struct{}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) Name() string {
|
||||||
|
return "rdma"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
return fscommon.RdmaSet(path, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return fscommon.RdmaGetStats(path, stats)
|
||||||
|
}
|
87
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
87
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go
generated
vendored
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func isCpuSet(r *configs.Resources) bool {
|
||||||
|
return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func setCpu(dirPath string, r *configs.Resources) error {
|
||||||
|
if !isCpuSet(r) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: .CpuShares is not used here. Conversion is the caller's responsibility.
|
||||||
|
if r.CpuWeight != 0 {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.CpuQuota != 0 || r.CpuPeriod != 0 {
|
||||||
|
str := "max"
|
||||||
|
if r.CpuQuota > 0 {
|
||||||
|
str = strconv.FormatInt(r.CpuQuota, 10)
|
||||||
|
}
|
||||||
|
period := r.CpuPeriod
|
||||||
|
if period == 0 {
|
||||||
|
// This default value is documented in
|
||||||
|
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||||
|
period = 100000
|
||||||
|
}
|
||||||
|
str += " " + strconv.FormatUint(period, 10)
|
||||||
|
if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||||
|
const file = "cpu.stat"
|
||||||
|
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
for sc.Scan() {
|
||||||
|
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{Path: dirPath, File: file, Err: err}
|
||||||
|
}
|
||||||
|
switch t {
|
||||||
|
case "usage_usec":
|
||||||
|
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||||
|
|
||||||
|
case "user_usec":
|
||||||
|
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||||
|
|
||||||
|
case "system_usec":
|
||||||
|
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||||
|
|
||||||
|
case "nr_periods":
|
||||||
|
stats.CpuStats.ThrottlingData.Periods = v
|
||||||
|
|
||||||
|
case "nr_throttled":
|
||||||
|
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||||
|
|
||||||
|
case "throttled_usec":
|
||||||
|
stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := sc.Err(); err != nil {
|
||||||
|
return &parseError{Path: dirPath, File: file, Err: err}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
28
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func isCpusetSet(r *configs.Resources) bool {
|
||||||
|
return r.CpusetCpus != "" || r.CpusetMems != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func setCpuset(dirPath string, r *configs.Resources) error {
|
||||||
|
if !isCpusetSet(r) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.CpusetCpus != "" {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if r.CpusetMems != "" {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
152
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
152
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go
generated
vendored
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func supportedControllers() (string, error) {
|
||||||
|
return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers")
|
||||||
|
}
|
||||||
|
|
||||||
|
// needAnyControllers returns whether we enable some supported controllers or not,
|
||||||
|
// based on (1) controllers available and (2) resources that are being set.
|
||||||
|
// We don't check "pseudo" controllers such as
|
||||||
|
// "freezer" and "devices".
|
||||||
|
func needAnyControllers(r *configs.Resources) (bool, error) {
|
||||||
|
if r == nil {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// list of all available controllers
|
||||||
|
content, err := supportedControllers()
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
avail := make(map[string]struct{})
|
||||||
|
for _, ctr := range strings.Fields(content) {
|
||||||
|
avail[ctr] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check whether the controller if available or not
|
||||||
|
have := func(controller string) bool {
|
||||||
|
_, ok := avail[controller]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
|
||||||
|
if isPidsSet(r) && have("pids") {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
if isMemorySet(r) && have("memory") {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
if isIoSet(r) && have("io") {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
if isCpuSet(r) && have("cpu") {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
if isCpusetSet(r) && have("cpuset") {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
if isHugeTlbSet(r) && have("hugetlb") {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// containsDomainController returns whether the current config contains domain controller or not.
|
||||||
|
// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||||
|
// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids.
|
||||||
|
func containsDomainController(r *configs.Resources) bool {
|
||||||
|
return isMemorySet(r) || isIoSet(r) || isCpuSet(r) || isHugeTlbSet(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers.
|
||||||
|
func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) {
|
||||||
|
if !strings.HasPrefix(path, UnifiedMountpoint) {
|
||||||
|
return fmt.Errorf("invalid cgroup path %s", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
content, err := supportedControllers()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
cgTypeFile = "cgroup.type"
|
||||||
|
cgStCtlFile = "cgroup.subtree_control"
|
||||||
|
)
|
||||||
|
ctrs := strings.Fields(content)
|
||||||
|
res := "+" + strings.Join(ctrs, " +")
|
||||||
|
|
||||||
|
elements := strings.Split(path, "/")
|
||||||
|
elements = elements[3:]
|
||||||
|
current := "/sys/fs"
|
||||||
|
for i, e := range elements {
|
||||||
|
current = filepath.Join(current, e)
|
||||||
|
if i > 0 {
|
||||||
|
if err := os.Mkdir(current, 0o755); err != nil {
|
||||||
|
if !os.IsExist(err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If the directory was created, be sure it is not left around on errors.
|
||||||
|
current := current
|
||||||
|
defer func() {
|
||||||
|
if Err != nil {
|
||||||
|
os.Remove(current)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
cgType, _ := cgroups.ReadFile(current, cgTypeFile)
|
||||||
|
cgType = strings.TrimSpace(cgType)
|
||||||
|
switch cgType {
|
||||||
|
// If the cgroup is in an invalid mode (usually this means there's an internal
|
||||||
|
// process in the cgroup tree, because we created a cgroup under an
|
||||||
|
// already-populated-by-other-processes cgroup), then we have to error out if
|
||||||
|
// the user requested controllers which are not thread-aware. However, if all
|
||||||
|
// the controllers requested are thread-aware we can simply put the cgroup into
|
||||||
|
// threaded mode.
|
||||||
|
case "domain invalid":
|
||||||
|
if containsDomainController(c.Resources) {
|
||||||
|
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current)
|
||||||
|
} else {
|
||||||
|
// Not entirely correct (in theory we'd always want to be a domain --
|
||||||
|
// since that means we're a properly delegated cgroup subtree) but in
|
||||||
|
// this case there's not much we can do and it's better than giving an
|
||||||
|
// error.
|
||||||
|
_ = cgroups.WriteFile(current, cgTypeFile, "threaded")
|
||||||
|
}
|
||||||
|
// If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers
|
||||||
|
// (and you cannot usually take a cgroup out of threaded mode).
|
||||||
|
case "domain threaded":
|
||||||
|
fallthrough
|
||||||
|
case "threaded":
|
||||||
|
if containsDomainController(c.Resources) {
|
||||||
|
return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// enable all supported controllers
|
||||||
|
if i < len(elements)-1 {
|
||||||
|
if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil {
|
||||||
|
// try write one by one
|
||||||
|
allCtrs := strings.Split(res, " ")
|
||||||
|
for _, ctr := range allCtrs {
|
||||||
|
_ = cgroups.WriteFile(current, cgStCtlFile, ctr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Some controllers might not be enabled when rootless or containerized,
|
||||||
|
// but we don't catch the error here. (Caught in setXXX() functions.)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
99
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go
generated
vendored
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||||
|
|
||||||
|
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||||
|
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||||
|
return "", fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||||
|
}
|
||||||
|
|
||||||
|
return _defaultDirPath(UnifiedMountpoint, c.Path, c.Parent, c.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) {
|
||||||
|
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||||
|
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||||
|
innerPath := utils.CleanPath(cgPath)
|
||||||
|
if innerPath == "" {
|
||||||
|
cgParent := utils.CleanPath(cgParent)
|
||||||
|
cgName := utils.CleanPath(cgName)
|
||||||
|
innerPath = filepath.Join(cgParent, cgName)
|
||||||
|
}
|
||||||
|
if filepath.IsAbs(innerPath) {
|
||||||
|
return filepath.Join(root, innerPath), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
// The current user scope most probably has tasks in it already,
|
||||||
|
// making it impossible to enable controllers for its sub-cgroup.
|
||||||
|
// A parent cgroup (with no tasks in it) is what we need.
|
||||||
|
ownCgroup = filepath.Dir(ownCgroup)
|
||||||
|
|
||||||
|
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||||
|
func parseCgroupFile(path string) (string, error) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
return parseCgroupFromReader(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||||
|
s := bufio.NewScanner(r)
|
||||||
|
for s.Scan() {
|
||||||
|
var (
|
||||||
|
text = s.Text()
|
||||||
|
parts = strings.SplitN(text, ":", 3)
|
||||||
|
)
|
||||||
|
if len(parts) < 3 {
|
||||||
|
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
||||||
|
}
|
||||||
|
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||||
|
if parts[0] == "0" && parts[1] == "" {
|
||||||
|
return parts[2], nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := s.Err(); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return "", errors.New("cgroup path not found")
|
||||||
|
}
|
75
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
Normal file
75
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/userns"
|
||||||
|
)
|
||||||
|
|
||||||
|
func isRWM(perms devices.Permissions) bool {
|
||||||
|
var r, w, m bool
|
||||||
|
for _, perm := range perms {
|
||||||
|
switch perm {
|
||||||
|
case 'r':
|
||||||
|
r = true
|
||||||
|
case 'w':
|
||||||
|
w = true
|
||||||
|
case 'm':
|
||||||
|
m = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r && w && m
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is similar to the logic applied in crun for handling errors from bpf(2)
|
||||||
|
// <https://github.com/containers/crun/blob/0.17/src/libcrun/cgroup.c#L2438-L2470>.
|
||||||
|
func canSkipEBPFError(r *configs.Resources) bool {
|
||||||
|
// If we're running in a user namespace we can ignore eBPF rules because we
|
||||||
|
// usually cannot use bpf(2), as well as rootless containers usually don't
|
||||||
|
// have the necessary privileges to mknod(2) device inodes or access
|
||||||
|
// host-level instances (though ideally we would be blocking device access
|
||||||
|
// for rootless containers anyway).
|
||||||
|
if userns.RunningInUserNS() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// We cannot ignore an eBPF load error if any rule if is a block rule or it
|
||||||
|
// doesn't permit all access modes.
|
||||||
|
//
|
||||||
|
// NOTE: This will sometimes trigger in cases where access modes are split
|
||||||
|
// between different rules but to handle this correctly would require
|
||||||
|
// using ".../libcontainer/cgroup/devices".Emulator.
|
||||||
|
for _, dev := range r.Devices {
|
||||||
|
if !dev.Allow || !isRWM(dev.Permissions) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func setDevices(dirPath string, r *configs.Resources) error {
|
||||||
|
if r.SkipDevices {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
insts, license, err := devicefilter.DeviceFilter(r.Devices)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("cannot get dir FD for %s", dirPath)
|
||||||
|
}
|
||||||
|
defer unix.Close(dirFD)
|
||||||
|
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||||
|
if !canSkipEBPFError(r) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
127
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
127
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go
generated
vendored
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||||
|
var stateStr string
|
||||||
|
switch state {
|
||||||
|
case configs.Undefined:
|
||||||
|
return nil
|
||||||
|
case configs.Frozen:
|
||||||
|
stateStr = "1"
|
||||||
|
case configs.Thawed:
|
||||||
|
stateStr = "0"
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("invalid freezer state %q requested", state)
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR)
|
||||||
|
if err != nil {
|
||||||
|
// We can ignore this request as long as the user didn't ask us to
|
||||||
|
// freeze the container (since without the freezer cgroup, that's a
|
||||||
|
// no-op).
|
||||||
|
if state != configs.Frozen {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("freezer not supported: %w", err)
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
|
||||||
|
if _, err := fd.WriteString(stateStr); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Confirm that the cgroup did actually change states.
|
||||||
|
if actualState, err := readFreezer(dirPath, fd); err != nil {
|
||||||
|
return err
|
||||||
|
} else if actualState != state {
|
||||||
|
return fmt.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getFreezer(dirPath string) (configs.FreezerState, error) {
|
||||||
|
fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
// If the kernel is too old, then we just treat the freezer as being in
|
||||||
|
// an "undefined" state.
|
||||||
|
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return configs.Undefined, err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
|
||||||
|
return readFreezer(dirPath, fd)
|
||||||
|
}
|
||||||
|
|
||||||
|
func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) {
|
||||||
|
if _, err := fd.Seek(0, 0); err != nil {
|
||||||
|
return configs.Undefined, err
|
||||||
|
}
|
||||||
|
state := make([]byte, 2)
|
||||||
|
if _, err := fd.Read(state); err != nil {
|
||||||
|
return configs.Undefined, err
|
||||||
|
}
|
||||||
|
switch string(state) {
|
||||||
|
case "0\n":
|
||||||
|
return configs.Thawed, nil
|
||||||
|
case "1\n":
|
||||||
|
return waitFrozen(dirPath)
|
||||||
|
default:
|
||||||
|
return configs.Undefined, fmt.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// waitFrozen polls cgroup.events until it sees "frozen 1" in it.
|
||||||
|
func waitFrozen(dirPath string) (configs.FreezerState, error) {
|
||||||
|
fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return configs.Undefined, err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
|
||||||
|
// XXX: Simple wait/read/retry is used here. An implementation
|
||||||
|
// based on poll(2) or inotify(7) is possible, but it makes the code
|
||||||
|
// much more complicated. Maybe address this later.
|
||||||
|
const (
|
||||||
|
// Perform maxIter with waitTime in between iterations.
|
||||||
|
waitTime = 10 * time.Millisecond
|
||||||
|
maxIter = 1000
|
||||||
|
)
|
||||||
|
scanner := bufio.NewScanner(fd)
|
||||||
|
for i := 0; scanner.Scan(); {
|
||||||
|
if i == maxIter {
|
||||||
|
return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter)
|
||||||
|
}
|
||||||
|
line := scanner.Text()
|
||||||
|
val := strings.TrimPrefix(line, "frozen ")
|
||||||
|
if val != line { // got prefix
|
||||||
|
if val[0] == '1' {
|
||||||
|
return configs.Frozen, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
i++
|
||||||
|
// wait, then re-read
|
||||||
|
time.Sleep(waitTime)
|
||||||
|
_, err := fd.Seek(0, 0)
|
||||||
|
if err != nil {
|
||||||
|
return configs.Undefined, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Should only reach here either on read error,
|
||||||
|
// or if the file does not contain "frozen " line.
|
||||||
|
return configs.Undefined, scanner.Err()
|
||||||
|
}
|
259
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
259
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go
generated
vendored
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type parseError = fscommon.ParseError
|
||||||
|
|
||||||
|
type manager struct {
|
||||||
|
config *configs.Cgroup
|
||||||
|
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||||
|
dirPath string
|
||||||
|
// controllers is content of "cgroup.controllers" file.
|
||||||
|
// excludes pseudo-controllers ("devices" and "freezer").
|
||||||
|
controllers map[string]struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||||
|
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||||
|
// If dirPath is empty, it is automatically set using config.
|
||||||
|
func NewManager(config *configs.Cgroup, dirPath string) (cgroups.Manager, error) {
|
||||||
|
if dirPath == "" {
|
||||||
|
var err error
|
||||||
|
dirPath, err = defaultDirPath(config)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m := &manager{
|
||||||
|
config: config,
|
||||||
|
dirPath: dirPath,
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) getControllers() error {
|
||||||
|
if m.controllers != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers")
|
||||||
|
if err != nil {
|
||||||
|
if m.config.Rootless && m.config.Path == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
fields := strings.Fields(data)
|
||||||
|
m.controllers = make(map[string]struct{}, len(fields))
|
||||||
|
for _, c := range fields {
|
||||||
|
m.controllers[c] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Apply(pid int) error {
|
||||||
|
if err := CreateCgroupPath(m.dirPath, m.config); err != nil {
|
||||||
|
// Related tests:
|
||||||
|
// - "runc create (no limits + no cgrouppath + no permission) succeeds"
|
||||||
|
// - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error"
|
||||||
|
// - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||||
|
if m.config.Rootless {
|
||||||
|
if m.config.Path == "" {
|
||||||
|
if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("rootless needs no limits + no cgrouppath when no permission is granted for cgroups: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetPids() ([]int, error) {
|
||||||
|
return cgroups.GetPids(m.dirPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetAllPids() ([]int, error) {
|
||||||
|
return cgroups.GetAllPids(m.dirPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||||
|
var errs []error
|
||||||
|
|
||||||
|
st := cgroups.NewStats()
|
||||||
|
|
||||||
|
// pids (since kernel 4.5)
|
||||||
|
if err := statPids(m.dirPath, st); err != nil {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
// memory (since kernel 4.5)
|
||||||
|
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
// io (since kernel 4.5)
|
||||||
|
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
// cpu (since kernel 4.15)
|
||||||
|
// Note cpu.stat is available even if the controller is not enabled.
|
||||||
|
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
// hugetlb (since kernel 5.6)
|
||||||
|
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
// rdma (since kernel 4.11)
|
||||||
|
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
|
||||||
|
errs = append(errs, err)
|
||||||
|
}
|
||||||
|
if len(errs) > 0 && !m.config.Rootless {
|
||||||
|
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
|
||||||
|
}
|
||||||
|
return st, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||||
|
if m.config.Resources == nil {
|
||||||
|
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||||
|
}
|
||||||
|
if err := setFreezer(m.dirPath, state); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.config.Resources.Freezer = state
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Destroy() error {
|
||||||
|
return cgroups.RemovePath(m.dirPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Path(_ string) string {
|
||||||
|
return m.dirPath
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Set(r *configs.Resources) error {
|
||||||
|
if r == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := m.getControllers(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// pids (since kernel 4.5)
|
||||||
|
if err := setPids(m.dirPath, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// memory (since kernel 4.5)
|
||||||
|
if err := setMemory(m.dirPath, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// io (since kernel 4.5)
|
||||||
|
if err := setIo(m.dirPath, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// cpu (since kernel 4.15)
|
||||||
|
if err := setCpu(m.dirPath, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// devices (since kernel 4.15, pseudo-controller)
|
||||||
|
//
|
||||||
|
// When rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||||
|
// However, errors from other subsystems are not ignored.
|
||||||
|
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||||
|
if err := setDevices(m.dirPath, r); err != nil && !m.config.Rootless {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// cpuset (since kernel 5.0)
|
||||||
|
if err := setCpuset(m.dirPath, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// hugetlb (since kernel 5.6)
|
||||||
|
if err := setHugeTlb(m.dirPath, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// rdma (since kernel 4.11)
|
||||||
|
if err := fscommon.RdmaSet(m.dirPath, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// freezer (since kernel 5.2, pseudo-controller)
|
||||||
|
if err := setFreezer(m.dirPath, r.Freezer); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := m.setUnified(r.Unified); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
m.config.Resources = r
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) setUnified(res map[string]string) error {
|
||||||
|
for k, v := range res {
|
||||||
|
if strings.Contains(k, "/") {
|
||||||
|
return fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||||
|
}
|
||||||
|
if err := cgroups.WriteFile(m.dirPath, k, v); err != nil {
|
||||||
|
// Check for both EPERM and ENOENT since O_CREAT is used by WriteFile.
|
||||||
|
if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
|
||||||
|
// Check if a controller is available,
|
||||||
|
// to give more specific error if not.
|
||||||
|
sk := strings.SplitN(k, ".", 2)
|
||||||
|
if len(sk) != 2 {
|
||||||
|
return fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||||
|
}
|
||||||
|
c := sk[0]
|
||||||
|
if _, ok := m.controllers[c]; !ok && c != "cgroup" {
|
||||||
|
return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("unable to set unified resource %q: %w", k, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetPaths() map[string]string {
|
||||||
|
paths := make(map[string]string, 1)
|
||||||
|
paths[""] = m.dirPath
|
||||||
|
return paths
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||||
|
return m.config, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||||
|
return getFreezer(m.dirPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Exists() bool {
|
||||||
|
return cgroups.PathExists(m.dirPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func OOMKillCount(path string) (uint64, error) {
|
||||||
|
return fscommon.GetValueByKey(path, "memory.events", "oom_kill")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) OOMKillCount() (uint64, error) {
|
||||||
|
c, err := OOMKillCount(m.dirPath)
|
||||||
|
if err != nil && m.config.Rootless && os.IsNotExist(err) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return c, err
|
||||||
|
}
|
48
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
48
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func isHugeTlbSet(r *configs.Resources) bool {
|
||||||
|
return len(r.HugetlbLimit) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func setHugeTlb(dirPath string, r *configs.Resources) error {
|
||||||
|
if !isHugeTlbSet(r) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for _, hugetlb := range r.HugetlbLimit {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
|
||||||
|
hugetlbStats := cgroups.HugetlbStats{}
|
||||||
|
for _, pagesize := range cgroups.HugePageSizes() {
|
||||||
|
value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
hugetlbStats.Usage = value
|
||||||
|
|
||||||
|
fileName := "hugetlb." + pagesize + ".events"
|
||||||
|
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
hugetlbStats.Failcnt = value
|
||||||
|
|
||||||
|
stats.HugetlbStats[pagesize] = hugetlbStats
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
193
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
193
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go
generated
vendored
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func isIoSet(r *configs.Resources) bool {
|
||||||
|
return r.BlkioWeight != 0 ||
|
||||||
|
len(r.BlkioWeightDevice) > 0 ||
|
||||||
|
len(r.BlkioThrottleReadBpsDevice) > 0 ||
|
||||||
|
len(r.BlkioThrottleWriteBpsDevice) > 0 ||
|
||||||
|
len(r.BlkioThrottleReadIOPSDevice) > 0 ||
|
||||||
|
len(r.BlkioThrottleWriteIOPSDevice) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// bfqDeviceWeightSupported checks for per-device BFQ weight support (added
|
||||||
|
// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight".
|
||||||
|
func bfqDeviceWeightSupported(bfq *os.File) bool {
|
||||||
|
if bfq == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_, _ = bfq.Seek(0, 0)
|
||||||
|
buf := make([]byte, 32)
|
||||||
|
_, _ = bfq.Read(buf)
|
||||||
|
// If only a single number (default weight) if read back, we have older kernel.
|
||||||
|
_, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64)
|
||||||
|
return err != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func setIo(dirPath string, r *configs.Resources) error {
|
||||||
|
if !isIoSet(r) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If BFQ IO scheduler is available, use it.
|
||||||
|
var bfq *os.File
|
||||||
|
if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 {
|
||||||
|
var err error
|
||||||
|
bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR)
|
||||||
|
if err == nil {
|
||||||
|
defer bfq.Close()
|
||||||
|
} else if !os.IsNotExist(err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.BlkioWeight != 0 {
|
||||||
|
if bfq != nil { // Use BFQ.
|
||||||
|
if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback to io.weight with a conversion scheme.
|
||||||
|
v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight)
|
||||||
|
if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if bfqDeviceWeightSupported(bfq) {
|
||||||
|
for _, wd := range r.BlkioWeightDevice {
|
||||||
|
if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil {
|
||||||
|
return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
|
||||||
|
ret := map[string][]string{}
|
||||||
|
f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
parts := strings.Fields(line)
|
||||||
|
if len(parts) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ret[parts[0]] = parts[1:]
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return nil, &parseError{Path: dirPath, File: name, Err: err}
|
||||||
|
}
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||||
|
const file = "io.stat"
|
||||||
|
values, err := readCgroup2MapFile(dirPath, file)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||||
|
var parsedStats cgroups.BlkioStats
|
||||||
|
for k, v := range values {
|
||||||
|
d := strings.Split(k, ":")
|
||||||
|
if len(d) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
major, err := strconv.ParseUint(d[0], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{Path: dirPath, File: file, Err: err}
|
||||||
|
}
|
||||||
|
minor, err := strconv.ParseUint(d[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{Path: dirPath, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range v {
|
||||||
|
d := strings.Split(item, "=")
|
||||||
|
if len(d) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
op := d[0]
|
||||||
|
|
||||||
|
// Map to the cgroupv1 naming and layout (in separate tables).
|
||||||
|
var targetTable *[]cgroups.BlkioStatEntry
|
||||||
|
switch op {
|
||||||
|
// Equivalent to cgroupv1's blkio.io_service_bytes.
|
||||||
|
case "rbytes":
|
||||||
|
op = "Read"
|
||||||
|
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||||
|
case "wbytes":
|
||||||
|
op = "Write"
|
||||||
|
targetTable = &parsedStats.IoServiceBytesRecursive
|
||||||
|
// Equivalent to cgroupv1's blkio.io_serviced.
|
||||||
|
case "rios":
|
||||||
|
op = "Read"
|
||||||
|
targetTable = &parsedStats.IoServicedRecursive
|
||||||
|
case "wios":
|
||||||
|
op = "Write"
|
||||||
|
targetTable = &parsedStats.IoServicedRecursive
|
||||||
|
default:
|
||||||
|
// Skip over entries we cannot map to cgroupv1 stats for now.
|
||||||
|
// In the future we should expand the stats struct to include
|
||||||
|
// them.
|
||||||
|
logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
value, err := strconv.ParseUint(d[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{Path: dirPath, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
entry := cgroups.BlkioStatEntry{
|
||||||
|
Op: op,
|
||||||
|
Major: major,
|
||||||
|
Minor: minor,
|
||||||
|
Value: value,
|
||||||
|
}
|
||||||
|
*targetTable = append(*targetTable, entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats.BlkioStats = parsedStats
|
||||||
|
return nil
|
||||||
|
}
|
216
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
216
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go
generated
vendored
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
// numToStr converts an int64 value to a string for writing to a
|
||||||
|
// cgroupv2 files with .min, .max, .low, or .high suffix.
|
||||||
|
// The value of -1 is converted to "max" for cgroupv1 compatibility
|
||||||
|
// (which used to write -1 to remove the limit).
|
||||||
|
func numToStr(value int64) (ret string) {
|
||||||
|
switch {
|
||||||
|
case value == 0:
|
||||||
|
ret = ""
|
||||||
|
case value == -1:
|
||||||
|
ret = "max"
|
||||||
|
default:
|
||||||
|
ret = strconv.FormatInt(value, 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func isMemorySet(r *configs.Resources) bool {
|
||||||
|
return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func setMemory(dirPath string, r *configs.Resources) error {
|
||||||
|
if !isMemorySet(r) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
swapStr := numToStr(swap)
|
||||||
|
if swapStr == "" && swap == 0 && r.MemorySwap > 0 {
|
||||||
|
// memory and memorySwap set to the same value -- disable swap
|
||||||
|
swapStr = "0"
|
||||||
|
}
|
||||||
|
// never write empty string to `memory.swap.max`, it means set to 0.
|
||||||
|
if swapStr != "" {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if val := numToStr(r.Memory); val != "" {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// cgroup.Resources.KernelMemory is ignored
|
||||||
|
|
||||||
|
if val := numToStr(r.MemoryReservation); val != "" {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||||
|
const file = "memory.stat"
|
||||||
|
statsFile, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer statsFile.Close()
|
||||||
|
|
||||||
|
sc := bufio.NewScanner(statsFile)
|
||||||
|
for sc.Scan() {
|
||||||
|
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{Path: dirPath, File: file, Err: err}
|
||||||
|
}
|
||||||
|
stats.MemoryStats.Stats[t] = v
|
||||||
|
}
|
||||||
|
if err := sc.Err(); err != nil {
|
||||||
|
return &parseError{Path: dirPath, File: file, Err: err}
|
||||||
|
}
|
||||||
|
stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"]
|
||||||
|
// Unlike cgroup v1 which has memory.use_hierarchy binary knob,
|
||||||
|
// cgroup v2 is always hierarchical.
|
||||||
|
stats.MemoryStats.UseHierarchy = true
|
||||||
|
|
||||||
|
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint {
|
||||||
|
// The root cgroup does not have memory.{current,max}
|
||||||
|
// so emulate those using data from /proc/meminfo.
|
||||||
|
return statsFromMeminfo(stats)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stats.MemoryStats.Usage = memoryUsage
|
||||||
|
swapUsage, err := getMemoryDataV2(dirPath, "swap")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// As cgroup v1 reports SwapUsage values as mem+swap combined,
|
||||||
|
// while in cgroup v2 swap values do not include memory,
|
||||||
|
// report combined mem+swap for v1 compatibility.
|
||||||
|
swapUsage.Usage += memoryUsage.Usage
|
||||||
|
if swapUsage.Limit != math.MaxUint64 {
|
||||||
|
swapUsage.Limit += memoryUsage.Limit
|
||||||
|
}
|
||||||
|
stats.MemoryStats.SwapUsage = swapUsage
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||||
|
memoryData := cgroups.MemoryData{}
|
||||||
|
|
||||||
|
moduleName := "memory"
|
||||||
|
if name != "" {
|
||||||
|
moduleName = "memory." + name
|
||||||
|
}
|
||||||
|
usage := moduleName + ".current"
|
||||||
|
limit := moduleName + ".max"
|
||||||
|
|
||||||
|
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||||
|
if err != nil {
|
||||||
|
if name != "" && os.IsNotExist(err) {
|
||||||
|
// Ignore EEXIST as there's no swap accounting
|
||||||
|
// if kernel CONFIG_MEMCG_SWAP is not set or
|
||||||
|
// swapaccount=0 kernel boot parameter is given.
|
||||||
|
return cgroups.MemoryData{}, nil
|
||||||
|
}
|
||||||
|
return cgroups.MemoryData{}, err
|
||||||
|
}
|
||||||
|
memoryData.Usage = value
|
||||||
|
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||||
|
if err != nil {
|
||||||
|
return cgroups.MemoryData{}, err
|
||||||
|
}
|
||||||
|
memoryData.Limit = value
|
||||||
|
|
||||||
|
return memoryData, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statsFromMeminfo(stats *cgroups.Stats) error {
|
||||||
|
const file = "/proc/meminfo"
|
||||||
|
f, err := os.Open(file)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
// Fields we are interested in.
|
||||||
|
var (
|
||||||
|
swap_free uint64
|
||||||
|
swap_total uint64
|
||||||
|
main_total uint64
|
||||||
|
main_free uint64
|
||||||
|
)
|
||||||
|
mem := map[string]*uint64{
|
||||||
|
"SwapFree": &swap_free,
|
||||||
|
"SwapTotal": &swap_total,
|
||||||
|
"MemTotal": &main_total,
|
||||||
|
"MemFree": &main_free,
|
||||||
|
}
|
||||||
|
|
||||||
|
found := 0
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
for sc.Scan() {
|
||||||
|
parts := strings.SplitN(sc.Text(), ":", 3)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
// Should not happen.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
k := parts[0]
|
||||||
|
p, ok := mem[k]
|
||||||
|
if !ok {
|
||||||
|
// Unknown field -- not interested.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB"))
|
||||||
|
*p, err = strconv.ParseUint(vStr, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return &parseError{File: file, Err: errors.New("bad value for " + k)}
|
||||||
|
}
|
||||||
|
|
||||||
|
found++
|
||||||
|
if found == len(mem) {
|
||||||
|
// Got everything we need -- skip the rest.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := sc.Err(); err != nil {
|
||||||
|
return &parseError{Path: "", File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024
|
||||||
|
stats.MemoryStats.SwapUsage.Limit = math.MaxUint64
|
||||||
|
|
||||||
|
stats.MemoryStats.Usage.Usage = (main_total - main_free) * 1024
|
||||||
|
stats.MemoryStats.Usage.Limit = math.MaxUint64
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
72
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
72
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go
generated
vendored
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
package fs2
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func isPidsSet(r *configs.Resources) bool {
|
||||||
|
return r.PidsLimit != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func setPids(dirPath string, r *configs.Resources) error {
|
||||||
|
if !isPidsSet(r) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if val := numToStr(r.PidsLimit); val != "" {
|
||||||
|
if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error {
|
||||||
|
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||||
|
// (or threads if cgroup.threads is enabled)
|
||||||
|
contents, err := cgroups.ReadFile(dirPath, "cgroup.procs")
|
||||||
|
if errors.Is(err, unix.ENOTSUP) {
|
||||||
|
contents, err = cgroups.ReadFile(dirPath, "cgroup.threads")
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
pids := strings.Count(contents, "\n")
|
||||||
|
stats.PidsStats.Current = uint64(pids)
|
||||||
|
stats.PidsStats.Limit = 0
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||||
|
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return statPidsFromCgroupProcs(dirPath, stats)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
max, err := fscommon.GetCgroupParamUint(dirPath, "pids.max")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If no limit is set, read from pids.max returns "max", which is
|
||||||
|
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||||
|
// represent "no limit" for pids as 0, thus this conversion.
|
||||||
|
if max == math.MaxUint64 {
|
||||||
|
max = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.PidsStats.Current = current
|
||||||
|
stats.PidsStats.Limit = max
|
||||||
|
return nil
|
||||||
|
}
|
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
121
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/rdma.go
generated
vendored
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
package fscommon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// parseRdmaKV parses raw string to RdmaEntry.
|
||||||
|
func parseRdmaKV(raw string, entry *cgroups.RdmaEntry) error {
|
||||||
|
var value uint32
|
||||||
|
|
||||||
|
parts := strings.SplitN(raw, "=", 3)
|
||||||
|
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return errors.New("Unable to parse RDMA entry")
|
||||||
|
}
|
||||||
|
|
||||||
|
k, v := parts[0], parts[1]
|
||||||
|
|
||||||
|
if v == "max" {
|
||||||
|
value = math.MaxUint32
|
||||||
|
} else {
|
||||||
|
val64, err := strconv.ParseUint(v, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
value = uint32(val64)
|
||||||
|
}
|
||||||
|
if k == "hca_handle" {
|
||||||
|
entry.HcaHandles = value
|
||||||
|
} else if k == "hca_object" {
|
||||||
|
entry.HcaObjects = value
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// readRdmaEntries reads and converts array of rawstrings to RdmaEntries from file.
|
||||||
|
// example entry: mlx4_0 hca_handle=2 hca_object=2000
|
||||||
|
func readRdmaEntries(dir, file string) ([]cgroups.RdmaEntry, error) {
|
||||||
|
rdmaEntries := make([]cgroups.RdmaEntry, 0)
|
||||||
|
fd, err := cgroups.OpenFile(dir, file, unix.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer fd.Close() //nolint:errorlint
|
||||||
|
scanner := bufio.NewScanner(fd)
|
||||||
|
for scanner.Scan() {
|
||||||
|
parts := strings.SplitN(scanner.Text(), " ", 4)
|
||||||
|
if len(parts) == 3 {
|
||||||
|
entry := new(cgroups.RdmaEntry)
|
||||||
|
entry.Device = parts[0]
|
||||||
|
err = parseRdmaKV(parts[1], entry)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
err = parseRdmaKV(parts[2], entry)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
rdmaEntries = append(rdmaEntries, *entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rdmaEntries, scanner.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RdmaGetStats returns rdma stats such as totalLimit and current entries.
|
||||||
|
func RdmaGetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
currentEntries, err := readRdmaEntries(path, "rdma.current")
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
maxEntries, err := readRdmaEntries(path, "rdma.max")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// If device got removed between reading two files, ignore returning stats.
|
||||||
|
if len(currentEntries) != len(maxEntries) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.RdmaStats = cgroups.RdmaStats{
|
||||||
|
RdmaLimit: maxEntries,
|
||||||
|
RdmaCurrent: currentEntries,
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func createCmdString(device string, limits configs.LinuxRdma) string {
|
||||||
|
cmdString := device
|
||||||
|
if limits.HcaHandles != nil {
|
||||||
|
cmdString += " hca_handle=" + strconv.FormatUint(uint64(*limits.HcaHandles), 10)
|
||||||
|
}
|
||||||
|
if limits.HcaObjects != nil {
|
||||||
|
cmdString += " hca_object=" + strconv.FormatUint(uint64(*limits.HcaObjects), 10)
|
||||||
|
}
|
||||||
|
return cmdString
|
||||||
|
}
|
||||||
|
|
||||||
|
// RdmaSet sets RDMA resources.
|
||||||
|
func RdmaSet(path string, r *configs.Resources) error {
|
||||||
|
for device, limits := range r.Rdma {
|
||||||
|
if err := cgroups.WriteFile(path, "rdma.max", createCmdString(device, limits)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
145
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
Normal file
145
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go
generated
vendored
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
package fscommon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"path"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// Deprecated: use cgroups.OpenFile instead.
|
||||||
|
OpenFile = cgroups.OpenFile
|
||||||
|
// Deprecated: use cgroups.ReadFile instead.
|
||||||
|
ReadFile = cgroups.ReadFile
|
||||||
|
// Deprecated: use cgroups.WriteFile instead.
|
||||||
|
WriteFile = cgroups.WriteFile
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseError records a parse error details, including the file path.
|
||||||
|
type ParseError struct {
|
||||||
|
Path string
|
||||||
|
File string
|
||||||
|
Err error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *ParseError) Error() string {
|
||||||
|
return "unable to parse " + path.Join(e.Path, e.File) + ": " + e.Err.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *ParseError) Unwrap() error { return e.Err }
|
||||||
|
|
||||||
|
// ParseUint converts a string to an uint64 integer.
|
||||||
|
// Negative values are returned at zero as, due to kernel bugs,
|
||||||
|
// some of the memory cgroup stats can be negative.
|
||||||
|
func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||||
|
value, err := strconv.ParseUint(s, base, bitSize)
|
||||||
|
if err != nil {
|
||||||
|
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||||
|
// 1. Handle negative values greater than MinInt64 (and)
|
||||||
|
// 2. Handle negative values lesser than MinInt64
|
||||||
|
if intErr == nil && intValue < 0 {
|
||||||
|
return 0, nil
|
||||||
|
} else if errors.Is(intErr, strconv.ErrRange) && intValue < 0 {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return value, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return value, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseKeyValue parses a space-separated "name value" kind of cgroup
|
||||||
|
// parameter and returns its key as a string, and its value as uint64
|
||||||
|
// (ParseUint is used to convert the value). For example,
|
||||||
|
// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234.
|
||||||
|
func ParseKeyValue(t string) (string, uint64, error) {
|
||||||
|
parts := strings.SplitN(t, " ", 3)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
return "", 0, fmt.Errorf("line %q is not in key value format", t)
|
||||||
|
}
|
||||||
|
|
||||||
|
value, err := ParseUint(parts[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return "", 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return parts[0], value, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetValueByKey reads a key-value pairs from the specified cgroup file,
|
||||||
|
// and returns a value of the specified key. ParseUint is used for value
|
||||||
|
// conversion.
|
||||||
|
func GetValueByKey(path, file, key string) (uint64, error) {
|
||||||
|
content, err := cgroups.ReadFile(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
lines := strings.Split(content, "\n")
|
||||||
|
for _, line := range lines {
|
||||||
|
arr := strings.Split(line, " ")
|
||||||
|
if len(arr) == 2 && arr[0] == key {
|
||||||
|
val, err := ParseUint(arr[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
err = &ParseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
return val, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCgroupParamUint reads a single uint64 value from the specified cgroup file.
|
||||||
|
// If the value read is "max", the math.MaxUint64 is returned.
|
||||||
|
func GetCgroupParamUint(path, file string) (uint64, error) {
|
||||||
|
contents, err := GetCgroupParamString(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
contents = strings.TrimSpace(contents)
|
||||||
|
if contents == "max" {
|
||||||
|
return math.MaxUint64, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := ParseUint(contents, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return res, &ParseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCgroupParamInt reads a single int64 value from specified cgroup file.
|
||||||
|
// If the value read is "max", the math.MaxInt64 is returned.
|
||||||
|
func GetCgroupParamInt(path, file string) (int64, error) {
|
||||||
|
contents, err := cgroups.ReadFile(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
contents = strings.TrimSpace(contents)
|
||||||
|
if contents == "max" {
|
||||||
|
return math.MaxInt64, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := strconv.ParseInt(contents, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return res, &ParseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCgroupParamString reads a string from the specified cgroup file.
|
||||||
|
func GetCgroupParamString(path, file string) (string, error) {
|
||||||
|
contents, err := cgroups.ReadFile(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimSpace(contents), nil
|
||||||
|
}
|
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
27
vendor/github.com/opencontainers/runc/libcontainer/cgroups/getallpids.go
generated
vendored
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
package cgroups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/fs"
|
||||||
|
"path/filepath"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetAllPids returns all pids from the cgroup identified by path, and all its
|
||||||
|
// sub-cgroups.
|
||||||
|
func GetAllPids(path string) ([]int, error) {
|
||||||
|
var pids []int
|
||||||
|
err := filepath.WalkDir(path, func(p string, d fs.DirEntry, iErr error) error {
|
||||||
|
if iErr != nil {
|
||||||
|
return iErr
|
||||||
|
}
|
||||||
|
if !d.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
cPids, err := readProcsFile(p)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
pids = append(pids, cPids...)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return pids, err
|
||||||
|
}
|
173
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
Normal file
173
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
package cgroups
|
||||||
|
|
||||||
|
type ThrottlingData struct {
|
||||||
|
// Number of periods with throttling active
|
||||||
|
Periods uint64 `json:"periods,omitempty"`
|
||||||
|
// Number of periods when the container hit its throttling limit.
|
||||||
|
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
|
||||||
|
// Aggregate time the container was throttled for in nanoseconds.
|
||||||
|
ThrottledTime uint64 `json:"throttled_time,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// CpuUsage denotes the usage of a CPU.
|
||||||
|
// All CPU stats are aggregate since container inception.
|
||||||
|
type CpuUsage struct {
|
||||||
|
// Total CPU time consumed.
|
||||||
|
// Units: nanoseconds.
|
||||||
|
TotalUsage uint64 `json:"total_usage,omitempty"`
|
||||||
|
// Total CPU time consumed per core.
|
||||||
|
// Units: nanoseconds.
|
||||||
|
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
|
||||||
|
// CPU time consumed per core in kernel mode
|
||||||
|
// Units: nanoseconds.
|
||||||
|
PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"`
|
||||||
|
// CPU time consumed per core in user mode
|
||||||
|
// Units: nanoseconds.
|
||||||
|
PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"`
|
||||||
|
// Time spent by tasks of the cgroup in kernel mode.
|
||||||
|
// Units: nanoseconds.
|
||||||
|
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
|
||||||
|
// Time spent by tasks of the cgroup in user mode.
|
||||||
|
// Units: nanoseconds.
|
||||||
|
UsageInUsermode uint64 `json:"usage_in_usermode"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CpuStats struct {
|
||||||
|
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
|
||||||
|
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CPUSetStats struct {
|
||||||
|
// List of the physical numbers of the CPUs on which processes
|
||||||
|
// in that cpuset are allowed to execute
|
||||||
|
CPUs []uint16 `json:"cpus,omitempty"`
|
||||||
|
// cpu_exclusive flag
|
||||||
|
CPUExclusive uint64 `json:"cpu_exclusive"`
|
||||||
|
// List of memory nodes on which processes in that cpuset
|
||||||
|
// are allowed to allocate memory
|
||||||
|
Mems []uint16 `json:"mems,omitempty"`
|
||||||
|
// mem_hardwall flag
|
||||||
|
MemHardwall uint64 `json:"mem_hardwall"`
|
||||||
|
// mem_exclusive flag
|
||||||
|
MemExclusive uint64 `json:"mem_exclusive"`
|
||||||
|
// memory_migrate flag
|
||||||
|
MemoryMigrate uint64 `json:"memory_migrate"`
|
||||||
|
// memory_spread page flag
|
||||||
|
MemorySpreadPage uint64 `json:"memory_spread_page"`
|
||||||
|
// memory_spread slab flag
|
||||||
|
MemorySpreadSlab uint64 `json:"memory_spread_slab"`
|
||||||
|
// memory_pressure
|
||||||
|
MemoryPressure uint64 `json:"memory_pressure"`
|
||||||
|
// sched_load balance flag
|
||||||
|
SchedLoadBalance uint64 `json:"sched_load_balance"`
|
||||||
|
// sched_relax_domain_level
|
||||||
|
SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MemoryData struct {
|
||||||
|
Usage uint64 `json:"usage,omitempty"`
|
||||||
|
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||||
|
Failcnt uint64 `json:"failcnt"`
|
||||||
|
Limit uint64 `json:"limit"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MemoryStats struct {
|
||||||
|
// memory used for cache
|
||||||
|
Cache uint64 `json:"cache,omitempty"`
|
||||||
|
// usage of memory
|
||||||
|
Usage MemoryData `json:"usage,omitempty"`
|
||||||
|
// usage of memory + swap
|
||||||
|
SwapUsage MemoryData `json:"swap_usage,omitempty"`
|
||||||
|
// usage of kernel memory
|
||||||
|
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||||
|
// usage of kernel TCP memory
|
||||||
|
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
|
||||||
|
// usage of memory pages by NUMA node
|
||||||
|
// see chapter 5.6 of memory controller documentation
|
||||||
|
PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"`
|
||||||
|
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
|
||||||
|
UseHierarchy bool `json:"use_hierarchy"`
|
||||||
|
|
||||||
|
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PageUsageByNUMA struct {
|
||||||
|
// Embedding is used as types can't be recursive.
|
||||||
|
PageUsageByNUMAInner
|
||||||
|
Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PageUsageByNUMAInner struct {
|
||||||
|
Total PageStats `json:"total,omitempty"`
|
||||||
|
File PageStats `json:"file,omitempty"`
|
||||||
|
Anon PageStats `json:"anon,omitempty"`
|
||||||
|
Unevictable PageStats `json:"unevictable,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PageStats struct {
|
||||||
|
Total uint64 `json:"total,omitempty"`
|
||||||
|
Nodes map[uint8]uint64 `json:"nodes,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PidsStats struct {
|
||||||
|
// number of pids in the cgroup
|
||||||
|
Current uint64 `json:"current,omitempty"`
|
||||||
|
// active pids hard limit
|
||||||
|
Limit uint64 `json:"limit,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type BlkioStatEntry struct {
|
||||||
|
Major uint64 `json:"major,omitempty"`
|
||||||
|
Minor uint64 `json:"minor,omitempty"`
|
||||||
|
Op string `json:"op,omitempty"`
|
||||||
|
Value uint64 `json:"value,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type BlkioStats struct {
|
||||||
|
// number of bytes transferred to and from the block device
|
||||||
|
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
|
||||||
|
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
|
||||||
|
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
|
||||||
|
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
|
||||||
|
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
|
||||||
|
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
|
||||||
|
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
|
||||||
|
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type HugetlbStats struct {
|
||||||
|
// current res_counter usage for hugetlb
|
||||||
|
Usage uint64 `json:"usage,omitempty"`
|
||||||
|
// maximum usage ever recorded.
|
||||||
|
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||||
|
// number of times hugetlb usage allocation failure.
|
||||||
|
Failcnt uint64 `json:"failcnt"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type RdmaEntry struct {
|
||||||
|
Device string `json:"device,omitempty"`
|
||||||
|
HcaHandles uint32 `json:"hca_handles,omitempty"`
|
||||||
|
HcaObjects uint32 `json:"hca_objects,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type RdmaStats struct {
|
||||||
|
RdmaLimit []RdmaEntry `json:"rdma_limit,omitempty"`
|
||||||
|
RdmaCurrent []RdmaEntry `json:"rdma_current,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Stats struct {
|
||||||
|
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||||
|
CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"`
|
||||||
|
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
|
||||||
|
PidsStats PidsStats `json:"pids_stats,omitempty"`
|
||||||
|
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||||
|
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||||
|
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||||
|
RdmaStats RdmaStats `json:"rdma_stats,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewStats() *Stats {
|
||||||
|
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
||||||
|
hugetlbStats := make(map[string]HugetlbStats)
|
||||||
|
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
|
||||||
|
}
|
564
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
564
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go
generated
vendored
Normal file
@ -0,0 +1,564 @@
|
|||||||
|
package systemd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||||
|
dbus "github.com/godbus/dbus/v5"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2.
|
||||||
|
// v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and
|
||||||
|
// v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
||||||
|
defCPUQuotaPeriod = uint64(100000)
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
versionOnce sync.Once
|
||||||
|
version int
|
||||||
|
|
||||||
|
isRunningSystemdOnce sync.Once
|
||||||
|
isRunningSystemd bool
|
||||||
|
)
|
||||||
|
|
||||||
|
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||||
|
// It was borrowed here to avoid a dependency on cgo.
|
||||||
|
//
|
||||||
|
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||||
|
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||||
|
// checks whether /run/systemd/system/ exists and is a directory.
|
||||||
|
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||||
|
func IsRunningSystemd() bool {
|
||||||
|
isRunningSystemdOnce.Do(func() {
|
||||||
|
fi, err := os.Lstat("/run/systemd/system")
|
||||||
|
isRunningSystemd = err == nil && fi.IsDir()
|
||||||
|
})
|
||||||
|
return isRunningSystemd
|
||||||
|
}
|
||||||
|
|
||||||
|
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||||
|
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||||
|
// /test.slice/test-a.slice/test-a-b.slice.
|
||||||
|
func ExpandSlice(slice string) (string, error) {
|
||||||
|
suffix := ".slice"
|
||||||
|
// Name has to end with ".slice", but can't be just ".slice".
|
||||||
|
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||||
|
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Path-separators are not allowed.
|
||||||
|
if strings.Contains(slice, "/") {
|
||||||
|
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
var path, prefix string
|
||||||
|
sliceName := strings.TrimSuffix(slice, suffix)
|
||||||
|
// if input was -.slice, we should just return root now
|
||||||
|
if sliceName == "-" {
|
||||||
|
return "/", nil
|
||||||
|
}
|
||||||
|
for _, component := range strings.Split(sliceName, "-") {
|
||||||
|
// test--a.slice isn't permitted, nor is -test.slice.
|
||||||
|
if component == "" {
|
||||||
|
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append the component to the path and to the prefix.
|
||||||
|
path += "/" + prefix + component + suffix
|
||||||
|
prefix += component + "-"
|
||||||
|
}
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func groupPrefix(ruleType devices.Type) (string, error) {
|
||||||
|
switch ruleType {
|
||||||
|
case devices.BlockDevice:
|
||||||
|
return "block-", nil
|
||||||
|
case devices.CharDevice:
|
||||||
|
return "char-", nil
|
||||||
|
default:
|
||||||
|
return "", fmt.Errorf("device type %v has no group prefix", ruleType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// findDeviceGroup tries to find the device group name (as listed in
|
||||||
|
// /proc/devices) with the type prefixed as required for DeviceAllow, for a
|
||||||
|
// given (type, major) combination. If more than one device group exists, an
|
||||||
|
// arbitrary one is chosen.
|
||||||
|
func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) {
|
||||||
|
fh, err := os.Open("/proc/devices")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer fh.Close()
|
||||||
|
|
||||||
|
prefix, err := groupPrefix(ruleType)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(fh)
|
||||||
|
var currentType devices.Type
|
||||||
|
for scanner.Scan() {
|
||||||
|
// We need to strip spaces because the first number is column-aligned.
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
|
||||||
|
// Handle the "header" lines.
|
||||||
|
switch line {
|
||||||
|
case "Block devices:":
|
||||||
|
currentType = devices.BlockDevice
|
||||||
|
continue
|
||||||
|
case "Character devices:":
|
||||||
|
currentType = devices.CharDevice
|
||||||
|
continue
|
||||||
|
case "":
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip lines unrelated to our type.
|
||||||
|
if currentType != ruleType {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse out the (major, name).
|
||||||
|
var (
|
||||||
|
currMajor int64
|
||||||
|
currName string
|
||||||
|
)
|
||||||
|
if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
|
||||||
|
if err == nil {
|
||||||
|
err = errors.New("wrong number of fields")
|
||||||
|
}
|
||||||
|
return "", fmt.Errorf("scan /proc/devices line %q: %w", line, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if currMajor == ruleMajor {
|
||||||
|
return prefix + currName, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return "", fmt.Errorf("reading /proc/devices: %w", err)
|
||||||
|
}
|
||||||
|
// Couldn't find the device group.
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeviceAllow is the dbus type "a(ss)" which means we need a struct
|
||||||
|
// to represent it in Go.
|
||||||
|
type deviceAllowEntry struct {
|
||||||
|
Path string
|
||||||
|
Perms string
|
||||||
|
}
|
||||||
|
|
||||||
|
func allowAllDevices() []systemdDbus.Property {
|
||||||
|
// Setting mode to auto and removing all DeviceAllow rules
|
||||||
|
// results in allowing access to all devices.
|
||||||
|
return []systemdDbus.Property{
|
||||||
|
newProp("DevicePolicy", "auto"),
|
||||||
|
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// generateDeviceProperties takes the configured device rules and generates a
|
||||||
|
// corresponding set of systemd properties to configure the devices correctly.
|
||||||
|
func generateDeviceProperties(r *configs.Resources, sdVer int) ([]systemdDbus.Property, error) {
|
||||||
|
if r.SkipDevices {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
properties := []systemdDbus.Property{
|
||||||
|
// Always run in the strictest white-list mode.
|
||||||
|
newProp("DevicePolicy", "strict"),
|
||||||
|
// Empty the DeviceAllow array before filling it.
|
||||||
|
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Figure out the set of rules.
|
||||||
|
configEmu := &cgroupdevices.Emulator{}
|
||||||
|
for _, rule := range r.Devices {
|
||||||
|
if err := configEmu.Apply(*rule); err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to apply rule for systemd: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// systemd doesn't support blacklists. So we log a warning, and tell
|
||||||
|
// systemd to act as a deny-all whitelist. This ruleset will be replaced
|
||||||
|
// with our normal fallback code. This may result in spurious errors, but
|
||||||
|
// the only other option is to error out here.
|
||||||
|
if configEmu.IsBlacklist() {
|
||||||
|
// However, if we're dealing with an allow-all rule then we can do it.
|
||||||
|
if configEmu.IsAllowAll() {
|
||||||
|
return allowAllDevices(), nil
|
||||||
|
}
|
||||||
|
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
|
||||||
|
return properties, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now generate the set of rules we actually need to apply. Unlike the
|
||||||
|
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all
|
||||||
|
// whitelist which is the default for devices.Emulator.
|
||||||
|
finalRules, err := configEmu.Rules()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to get simplified rules for systemd: %w", err)
|
||||||
|
}
|
||||||
|
var deviceAllowList []deviceAllowEntry
|
||||||
|
for _, rule := range finalRules {
|
||||||
|
if !rule.Allow {
|
||||||
|
// Should never happen.
|
||||||
|
return nil, fmt.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
|
||||||
|
}
|
||||||
|
switch rule.Type {
|
||||||
|
case devices.BlockDevice, devices.CharDevice:
|
||||||
|
default:
|
||||||
|
// Should never happen.
|
||||||
|
return nil, fmt.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
entry := deviceAllowEntry{
|
||||||
|
Perms: string(rule.Permissions),
|
||||||
|
}
|
||||||
|
|
||||||
|
// systemd has a fairly odd (though understandable) syntax here, and
|
||||||
|
// because of the OCI configuration format we have to do quite a bit of
|
||||||
|
// trickery to convert things:
|
||||||
|
//
|
||||||
|
// * Concrete rules with non-wildcard major/minor numbers have to use
|
||||||
|
// /dev/{block,char}/MAJOR:minor paths. Before v240, systemd uses
|
||||||
|
// stat(2) on such paths to look up device properties, meaning we
|
||||||
|
// cannot add whitelist rules for devices that don't exist. Since v240,
|
||||||
|
// device properties are parsed from the path string.
|
||||||
|
//
|
||||||
|
// However, path globbing is not support for path-based rules so we
|
||||||
|
// need to handle wildcards in some other manner.
|
||||||
|
//
|
||||||
|
// * Wildcard-minor rules have to specify a "device group name" (the
|
||||||
|
// second column in /proc/devices).
|
||||||
|
//
|
||||||
|
// * Wildcard (major and minor) rules can just specify a glob with the
|
||||||
|
// type ("char-*" or "block-*").
|
||||||
|
//
|
||||||
|
// The only type of rule we can't handle is wildcard-major rules, and
|
||||||
|
// so we'll give a warning in that case (note that the fallback code
|
||||||
|
// will insert any rules systemd couldn't handle). What amazing fun.
|
||||||
|
|
||||||
|
if rule.Major == devices.Wildcard {
|
||||||
|
// "_ *:n _" rules aren't supported by systemd.
|
||||||
|
if rule.Minor != devices.Wildcard {
|
||||||
|
logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// "_ *:* _" rules just wildcard everything.
|
||||||
|
prefix, err := groupPrefix(rule.Type)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
entry.Path = prefix + "*"
|
||||||
|
} else if rule.Minor == devices.Wildcard {
|
||||||
|
// "_ n:* _" rules require a device group from /proc/devices.
|
||||||
|
group, err := findDeviceGroup(rule.Type, rule.Major)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unable to find device '%v/%d': %w", rule.Type, rule.Major, err)
|
||||||
|
}
|
||||||
|
if group == "" {
|
||||||
|
// Couldn't find a group.
|
||||||
|
logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
entry.Path = group
|
||||||
|
} else {
|
||||||
|
// "_ n:m _" rules are just a path in /dev/{block,char}/.
|
||||||
|
switch rule.Type {
|
||||||
|
case devices.BlockDevice:
|
||||||
|
entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor)
|
||||||
|
case devices.CharDevice:
|
||||||
|
entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor)
|
||||||
|
}
|
||||||
|
if sdVer < 240 {
|
||||||
|
// Old systemd versions use stat(2) on path to find out device major:minor
|
||||||
|
// numbers and type. If the path doesn't exist, it will not add the rule,
|
||||||
|
// emitting a warning instead.
|
||||||
|
// Since all of this logic is best-effort anyway (we manually set these
|
||||||
|
// rules separately to systemd) we can safely skip entries that don't
|
||||||
|
// have a corresponding path.
|
||||||
|
if _, err := os.Stat(entry.Path); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
deviceAllowList = append(deviceAllowList, entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
properties = append(properties, newProp("DeviceAllow", deviceAllowList))
|
||||||
|
return properties, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||||
|
return systemdDbus.Property{
|
||||||
|
Name: name,
|
||||||
|
Value: dbus.MakeVariant(units),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func getUnitName(c *configs.Cgroup) string {
|
||||||
|
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||||
|
if !strings.HasSuffix(c.Name, ".slice") {
|
||||||
|
return c.ScopePrefix + "-" + c.Name + ".scope"
|
||||||
|
}
|
||||||
|
return c.Name
|
||||||
|
}
|
||||||
|
|
||||||
|
// This code should be in sync with getUnitName.
|
||||||
|
func getUnitType(unitName string) string {
|
||||||
|
if strings.HasSuffix(unitName, ".slice") {
|
||||||
|
return "Slice"
|
||||||
|
}
|
||||||
|
return "Scope"
|
||||||
|
}
|
||||||
|
|
||||||
|
// isDbusError returns true if the error is a specific dbus error.
|
||||||
|
func isDbusError(err error, name string) bool {
|
||||||
|
if err != nil {
|
||||||
|
var derr dbus.Error
|
||||||
|
if errors.As(err, &derr) {
|
||||||
|
return strings.Contains(derr.Name, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||||
|
func isUnitExists(err error) bool {
|
||||||
|
return isDbusError(err, "org.freedesktop.systemd1.UnitExists")
|
||||||
|
}
|
||||||
|
|
||||||
|
func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property, ignoreExist bool) error {
|
||||||
|
statusChan := make(chan string, 1)
|
||||||
|
retry := true
|
||||||
|
|
||||||
|
retry:
|
||||||
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||||
|
_, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
if !isUnitExists(err) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if ignoreExist {
|
||||||
|
// TODO: remove this hack.
|
||||||
|
// This is kubelet making sure a slice exists (see
|
||||||
|
// https://github.com/opencontainers/runc/pull/1124).
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if retry {
|
||||||
|
// In case a unit with the same name exists, this may
|
||||||
|
// be a leftover failed unit. Reset it, so systemd can
|
||||||
|
// remove it, and retry once.
|
||||||
|
err = resetFailedUnit(cm, unitName)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Warnf("unable to reset failed unit: %v", err)
|
||||||
|
}
|
||||||
|
retry = false
|
||||||
|
goto retry
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
timeout := time.NewTimer(30 * time.Second)
|
||||||
|
defer timeout.Stop()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case s := <-statusChan:
|
||||||
|
close(statusChan)
|
||||||
|
// Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||||
|
if s != "done" {
|
||||||
|
_ = resetFailedUnit(cm, unitName)
|
||||||
|
return fmt.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s)
|
||||||
|
}
|
||||||
|
case <-timeout.C:
|
||||||
|
_ = resetFailedUnit(cm, unitName)
|
||||||
|
return errors.New("Timeout waiting for systemd to create " + unitName)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func stopUnit(cm *dbusConnManager, unitName string) error {
|
||||||
|
statusChan := make(chan string, 1)
|
||||||
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||||
|
_, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
if err == nil {
|
||||||
|
timeout := time.NewTimer(30 * time.Second)
|
||||||
|
defer timeout.Stop()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case s := <-statusChan:
|
||||||
|
close(statusChan)
|
||||||
|
// Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
|
||||||
|
if s != "done" {
|
||||||
|
logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s)
|
||||||
|
}
|
||||||
|
case <-timeout.C:
|
||||||
|
return errors.New("Timed out while waiting for systemd to remove " + unitName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In case of a failed unit, let systemd remove it.
|
||||||
|
_ = resetFailedUnit(cm, unitName)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func resetFailedUnit(cm *dbusConnManager, name string) error {
|
||||||
|
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||||
|
return c.ResetFailedUnitContext(context.TODO(), name)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func getUnitTypeProperty(cm *dbusConnManager, unitName string, unitType string, propertyName string) (*systemdDbus.Property, error) {
|
||||||
|
var prop *systemdDbus.Property
|
||||||
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) {
|
||||||
|
prop, Err = c.GetUnitTypePropertyContext(context.TODO(), unitName, unitType, propertyName)
|
||||||
|
return Err
|
||||||
|
})
|
||||||
|
return prop, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error {
|
||||||
|
return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||||
|
return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func getManagerProperty(cm *dbusConnManager, name string) (string, error) {
|
||||||
|
str := ""
|
||||||
|
err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error {
|
||||||
|
var err error
|
||||||
|
str, err = c.GetManagerProperty(name)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return strconv.Unquote(str)
|
||||||
|
}
|
||||||
|
|
||||||
|
func systemdVersion(cm *dbusConnManager) int {
|
||||||
|
versionOnce.Do(func() {
|
||||||
|
version = -1
|
||||||
|
verStr, err := getManagerProperty(cm, "Version")
|
||||||
|
if err == nil {
|
||||||
|
version, err = systemdVersionAtoi(verStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logrus.WithError(err).Error("unable to get systemd version")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return version
|
||||||
|
}
|
||||||
|
|
||||||
|
func systemdVersionAtoi(verStr string) (int, error) {
|
||||||
|
// verStr should be of the form:
|
||||||
|
// "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32" (without quotes).
|
||||||
|
// The result for all of the above should be 245.
|
||||||
|
// Thus, we unconditionally remove the "v" prefix
|
||||||
|
// and then match on the first integer we can grab.
|
||||||
|
re := regexp.MustCompile(`v?([0-9]+)`)
|
||||||
|
matches := re.FindStringSubmatch(verStr)
|
||||||
|
if len(matches) < 2 {
|
||||||
|
return 0, fmt.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches)
|
||||||
|
}
|
||||||
|
ver, err := strconv.Atoi(matches[1])
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("can't parse version: %w", err)
|
||||||
|
}
|
||||||
|
return ver, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) {
|
||||||
|
if period != 0 {
|
||||||
|
// systemd only supports CPUQuotaPeriodUSec since v242
|
||||||
|
sdVer := systemdVersion(cm)
|
||||||
|
if sdVer >= 242 {
|
||||||
|
*properties = append(*properties,
|
||||||
|
newProp("CPUQuotaPeriodUSec", period))
|
||||||
|
} else {
|
||||||
|
logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+
|
||||||
|
" (setting will still be applied to cgroupfs)", sdVer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if quota != 0 || period != 0 {
|
||||||
|
// corresponds to USEC_INFINITY in systemd
|
||||||
|
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||||
|
if quota > 0 {
|
||||||
|
if period == 0 {
|
||||||
|
// assume the default
|
||||||
|
period = defCPUQuotaPeriod
|
||||||
|
}
|
||||||
|
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||||
|
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||||
|
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||||
|
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||||
|
cpuQuotaPerSecUSec = uint64(quota*1000000) / period
|
||||||
|
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||||
|
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*properties = append(*properties,
|
||||||
|
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error {
|
||||||
|
if cpus == "" && mems == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// systemd only supports AllowedCPUs/AllowedMemoryNodes since v244
|
||||||
|
sdVer := systemdVersion(cm)
|
||||||
|
if sdVer < 244 {
|
||||||
|
logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+
|
||||||
|
" (settings will still be applied to cgroupfs)", sdVer)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if cpus != "" {
|
||||||
|
bits, err := RangeToBits(cpus)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w",
|
||||||
|
cpus, err)
|
||||||
|
}
|
||||||
|
*props = append(*props,
|
||||||
|
newProp("AllowedCPUs", bits))
|
||||||
|
}
|
||||||
|
if mems != "" {
|
||||||
|
bits, err := RangeToBits(mems)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w",
|
||||||
|
mems, err)
|
||||||
|
}
|
||||||
|
*props = append(*props,
|
||||||
|
newProp("AllowedMemoryNodes", bits))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
Normal file
60
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go
generated
vendored
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
package systemd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"math/big"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// RangeToBits converts a text representation of a CPU mask (as written to
|
||||||
|
// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes
|
||||||
|
// with the corresponding bits set (as consumed by systemd over dbus as
|
||||||
|
// AllowedCPUs/AllowedMemoryNodes unit property value).
|
||||||
|
func RangeToBits(str string) ([]byte, error) {
|
||||||
|
bits := new(big.Int)
|
||||||
|
|
||||||
|
for _, r := range strings.Split(str, ",") {
|
||||||
|
// allow extra spaces around
|
||||||
|
r = strings.TrimSpace(r)
|
||||||
|
// allow empty elements (extra commas)
|
||||||
|
if r == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ranges := strings.SplitN(r, "-", 2)
|
||||||
|
if len(ranges) > 1 {
|
||||||
|
start, err := strconv.ParseUint(ranges[0], 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
end, err := strconv.ParseUint(ranges[1], 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if start > end {
|
||||||
|
return nil, errors.New("invalid range: " + r)
|
||||||
|
}
|
||||||
|
for i := start; i <= end; i++ {
|
||||||
|
bits.SetBit(bits, int(i), 1)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
val, err := strconv.ParseUint(ranges[0], 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
bits.SetBit(bits, int(val), 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret := bits.Bytes()
|
||||||
|
if len(ret) == 0 {
|
||||||
|
// do not allow empty values
|
||||||
|
return nil, errors.New("empty value")
|
||||||
|
}
|
||||||
|
|
||||||
|
// fit cpuset parsing order in systemd
|
||||||
|
for l, r := 0, len(ret)-1; l < r; l, r = l+1, r-1 {
|
||||||
|
ret[l], ret[r] = ret[r], ret[l]
|
||||||
|
}
|
||||||
|
return ret, nil
|
||||||
|
}
|
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
102
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go
generated
vendored
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
package systemd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||||
|
dbus "github.com/godbus/dbus/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
dbusC *systemdDbus.Conn
|
||||||
|
dbusMu sync.RWMutex
|
||||||
|
dbusInited bool
|
||||||
|
dbusRootless bool
|
||||||
|
)
|
||||||
|
|
||||||
|
type dbusConnManager struct{}
|
||||||
|
|
||||||
|
// newDbusConnManager initializes systemd dbus connection manager.
|
||||||
|
func newDbusConnManager(rootless bool) *dbusConnManager {
|
||||||
|
dbusMu.Lock()
|
||||||
|
defer dbusMu.Unlock()
|
||||||
|
if dbusInited && rootless != dbusRootless {
|
||||||
|
panic("can't have both root and rootless dbus")
|
||||||
|
}
|
||||||
|
dbusInited = true
|
||||||
|
dbusRootless = rootless
|
||||||
|
return &dbusConnManager{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getConnection lazily initializes and returns systemd dbus connection.
|
||||||
|
func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) {
|
||||||
|
// In the case where dbusC != nil
|
||||||
|
// Use the read lock the first time to ensure
|
||||||
|
// that Conn can be acquired at the same time.
|
||||||
|
dbusMu.RLock()
|
||||||
|
if conn := dbusC; conn != nil {
|
||||||
|
dbusMu.RUnlock()
|
||||||
|
return conn, nil
|
||||||
|
}
|
||||||
|
dbusMu.RUnlock()
|
||||||
|
|
||||||
|
// In the case where dbusC == nil
|
||||||
|
// Use write lock to ensure that only one
|
||||||
|
// will be created
|
||||||
|
dbusMu.Lock()
|
||||||
|
defer dbusMu.Unlock()
|
||||||
|
if conn := dbusC; conn != nil {
|
||||||
|
return conn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
conn, err := d.newConnection()
|
||||||
|
if err != nil {
|
||||||
|
// When dbus-user-session is not installed, we can't detect whether we should try to connect to user dbus or system dbus, so d.dbusRootless is set to false.
|
||||||
|
// This may fail with a cryptic error "read unix @->/run/systemd/private: read: connection reset by peer: unknown."
|
||||||
|
// https://github.com/moby/moby/issues/42793
|
||||||
|
return nil, fmt.Errorf("failed to connect to dbus (hint: for rootless containers, maybe you need to install dbus-user-session package, see https://github.com/opencontainers/runc/blob/master/docs/cgroup-v2.md): %w", err)
|
||||||
|
}
|
||||||
|
dbusC = conn
|
||||||
|
return conn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) {
|
||||||
|
if dbusRootless {
|
||||||
|
return newUserSystemdDbus()
|
||||||
|
}
|
||||||
|
return systemdDbus.NewWithContext(context.TODO())
|
||||||
|
}
|
||||||
|
|
||||||
|
// resetConnection resets the connection to its initial state
|
||||||
|
// (so it can be reconnected if necessary).
|
||||||
|
func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) {
|
||||||
|
dbusMu.Lock()
|
||||||
|
defer dbusMu.Unlock()
|
||||||
|
if dbusC != nil && dbusC == conn {
|
||||||
|
dbusC.Close()
|
||||||
|
dbusC = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// retryOnDisconnect calls op, and if the error it returns is about closed dbus
|
||||||
|
// connection, the connection is re-established and the op is retried. This helps
|
||||||
|
// with the situation when dbus is restarted and we have a stale connection.
|
||||||
|
func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error {
|
||||||
|
for {
|
||||||
|
conn, err := d.getConnection()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = op(conn)
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !errors.Is(err, dbus.ErrClosed) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
d.resetConnection(conn)
|
||||||
|
}
|
||||||
|
}
|
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
106
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go
generated
vendored
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
package systemd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||||
|
dbus "github.com/godbus/dbus/v5"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/userns"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newUserSystemdDbus creates a connection for systemd user-instance.
|
||||||
|
func newUserSystemdDbus() (*systemdDbus.Conn, error) {
|
||||||
|
addr, err := DetectUserDbusSessionBusAddress()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
uid, err := DetectUID()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
|
||||||
|
conn, err := dbus.Dial(addr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error while dialing %q: %w", addr, err)
|
||||||
|
}
|
||||||
|
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
|
||||||
|
err = conn.Auth(methods)
|
||||||
|
if err != nil {
|
||||||
|
conn.Close()
|
||||||
|
return nil, fmt.Errorf("error while authenticating connection (address=%q, UID=%d): %w", addr, uid, err)
|
||||||
|
}
|
||||||
|
if err = conn.Hello(); err != nil {
|
||||||
|
conn.Close()
|
||||||
|
return nil, fmt.Errorf("error while sending Hello message (address=%q, UID=%d): %w", addr, uid, err)
|
||||||
|
}
|
||||||
|
return conn, nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// DetectUID detects UID from the OwnerUID field of `busctl --user status`
|
||||||
|
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
|
||||||
|
//
|
||||||
|
// Otherwise returns os.Getuid() .
|
||||||
|
func DetectUID() (int, error) {
|
||||||
|
if !userns.RunningInUserNS() {
|
||||||
|
return os.Getuid(), nil
|
||||||
|
}
|
||||||
|
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("could not execute `busctl --user --no-pager status` (output: %q): %w", string(b), err)
|
||||||
|
}
|
||||||
|
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||||
|
for scanner.Scan() {
|
||||||
|
s := strings.TrimSpace(scanner.Text())
|
||||||
|
if strings.HasPrefix(s, "OwnerUID=") {
|
||||||
|
uidStr := strings.TrimPrefix(s, "OwnerUID=")
|
||||||
|
i, err := strconv.Atoi(uidStr)
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("could not detect the OwnerUID: %w", err)
|
||||||
|
}
|
||||||
|
return i, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
return -1, errors.New("could not detect the OwnerUID")
|
||||||
|
}
|
||||||
|
|
||||||
|
// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
|
||||||
|
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
|
||||||
|
// Otherwise parses the value from `systemctl --user show-environment` .
|
||||||
|
func DetectUserDbusSessionBusAddress() (string, error) {
|
||||||
|
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
|
||||||
|
return env, nil
|
||||||
|
}
|
||||||
|
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
|
||||||
|
busPath := filepath.Join(xdr, "bus")
|
||||||
|
if _, err := os.Stat(busPath); err == nil {
|
||||||
|
busAddress := "unix:path=" + busPath
|
||||||
|
return busAddress, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("could not execute `systemctl --user --no-pager show-environment` (output=%q): %w", string(b), err)
|
||||||
|
}
|
||||||
|
scanner := bufio.NewScanner(bytes.NewReader(b))
|
||||||
|
for scanner.Scan() {
|
||||||
|
s := strings.TrimSpace(scanner.Text())
|
||||||
|
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
|
||||||
|
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`. Make sure you have installed the dbus-user-session or dbus-daemon package and then run: `systemctl --user start dbus`")
|
||||||
|
}
|
480
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
Normal file
480
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go
generated
vendored
Normal file
@ -0,0 +1,480 @@
|
|||||||
|
package systemd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||||
|
"github.com/godbus/dbus/v5"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type legacyManager struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
cgroups *configs.Cgroup
|
||||||
|
paths map[string]string
|
||||||
|
dbus *dbusConnManager
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||||
|
if cg.Rootless {
|
||||||
|
return nil, errors.New("cannot use rootless systemd cgroups manager on cgroup v1")
|
||||||
|
}
|
||||||
|
if cg.Resources != nil && cg.Resources.Unified != nil {
|
||||||
|
return nil, cgroups.ErrV1NoUnified
|
||||||
|
}
|
||||||
|
if paths == nil {
|
||||||
|
var err error
|
||||||
|
paths, err = initPaths(cg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &legacyManager{
|
||||||
|
cgroups: cg,
|
||||||
|
paths: paths,
|
||||||
|
dbus: newDbusConnManager(false),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type subsystem interface {
|
||||||
|
// Name returns the name of the subsystem.
|
||||||
|
Name() string
|
||||||
|
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||||
|
GetStats(path string, stats *cgroups.Stats) error
|
||||||
|
// Set sets cgroup resource limits.
|
||||||
|
Set(path string, r *configs.Resources) error
|
||||||
|
}
|
||||||
|
|
||||||
|
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||||
|
|
||||||
|
var legacySubsystems = []subsystem{
|
||||||
|
&fs.CpusetGroup{},
|
||||||
|
&fs.DevicesGroup{},
|
||||||
|
&fs.MemoryGroup{},
|
||||||
|
&fs.CpuGroup{},
|
||||||
|
&fs.CpuacctGroup{},
|
||||||
|
&fs.PidsGroup{},
|
||||||
|
&fs.BlkioGroup{},
|
||||||
|
&fs.HugetlbGroup{},
|
||||||
|
&fs.PerfEventGroup{},
|
||||||
|
&fs.FreezerGroup{},
|
||||||
|
&fs.NetPrioGroup{},
|
||||||
|
&fs.NetClsGroup{},
|
||||||
|
&fs.NameGroup{GroupName: "name=systemd"},
|
||||||
|
&fs.RdmaGroup{},
|
||||||
|
&fs.NameGroup{GroupName: "misc"},
|
||||||
|
}
|
||||||
|
|
||||||
|
func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||||
|
var properties []systemdDbus.Property
|
||||||
|
|
||||||
|
deviceProperties, err := generateDeviceProperties(r, systemdVersion(cm))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
properties = append(properties, deviceProperties...)
|
||||||
|
|
||||||
|
if r.Memory != 0 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("MemoryLimit", uint64(r.Memory)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.CpuShares != 0 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("CPUShares", r.CpuShares))
|
||||||
|
}
|
||||||
|
|
||||||
|
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||||
|
|
||||||
|
if r.BlkioWeight != 0 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("BlockIOWeight", uint64(r.BlkioWeight)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||||
|
}
|
||||||
|
|
||||||
|
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return properties, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// initPaths figures out and returns paths to cgroups.
|
||||||
|
func initPaths(c *configs.Cgroup) (map[string]string, error) {
|
||||||
|
slice := "system.slice"
|
||||||
|
if c.Parent != "" {
|
||||||
|
var err error
|
||||||
|
slice, err = ExpandSlice(c.Parent)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unit := getUnitName(c)
|
||||||
|
|
||||||
|
paths := make(map[string]string)
|
||||||
|
for _, s := range legacySubsystems {
|
||||||
|
subsystemPath, err := getSubsystemPath(slice, unit, s.Name())
|
||||||
|
if err != nil {
|
||||||
|
// Even if it's `not found` error, we'll return err
|
||||||
|
// because devices cgroup is hard requirement for
|
||||||
|
// container security.
|
||||||
|
if s.Name() == "devices" {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||||
|
if cgroups.IsNotFound(err) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
paths[s.Name()] = subsystemPath
|
||||||
|
}
|
||||||
|
|
||||||
|
// If systemd is using cgroups-hybrid mode then add the slice path of
|
||||||
|
// this container to the paths so the following process executed with
|
||||||
|
// "runc exec" joins that cgroup as well.
|
||||||
|
if cgroups.IsCgroup2HybridMode() {
|
||||||
|
// "" means cgroup-hybrid path
|
||||||
|
cgroupsHybridPath, err := getSubsystemPath(slice, unit, "")
|
||||||
|
if err != nil && cgroups.IsNotFound(err) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
paths[""] = cgroupsHybridPath
|
||||||
|
}
|
||||||
|
|
||||||
|
return paths, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) Apply(pid int) error {
|
||||||
|
var (
|
||||||
|
c = m.cgroups
|
||||||
|
unitName = getUnitName(c)
|
||||||
|
slice = "system.slice"
|
||||||
|
properties []systemdDbus.Property
|
||||||
|
)
|
||||||
|
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
if c.Parent != "" {
|
||||||
|
slice = c.Parent
|
||||||
|
}
|
||||||
|
|
||||||
|
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||||
|
|
||||||
|
if strings.HasSuffix(unitName, ".slice") {
|
||||||
|
// If we create a slice, the parent is defined via a Wants=.
|
||||||
|
properties = append(properties, systemdDbus.PropWants(slice))
|
||||||
|
} else {
|
||||||
|
// Otherwise it's a scope, which we put into a Slice=.
|
||||||
|
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||||
|
// Assume scopes always support delegation (supported since systemd v218).
|
||||||
|
properties = append(properties, newProp("Delegate", true))
|
||||||
|
}
|
||||||
|
|
||||||
|
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||||
|
if pid != -1 {
|
||||||
|
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||||
|
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("MemoryAccounting", true),
|
||||||
|
newProp("CPUAccounting", true),
|
||||||
|
newProp("BlockIOAccounting", true),
|
||||||
|
newProp("TasksAccounting", true),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("DefaultDependencies", false))
|
||||||
|
|
||||||
|
properties = append(properties, c.SystemdProps...)
|
||||||
|
|
||||||
|
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := m.joinCgroups(pid); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) Destroy() error {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
stopErr := stopUnit(m.dbus, getUnitName(m.cgroups))
|
||||||
|
|
||||||
|
// Both on success and on error, cleanup all the cgroups
|
||||||
|
// we are aware of, as some of them were created directly
|
||||||
|
// by Apply() and are not managed by systemd.
|
||||||
|
if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return stopErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) Path(subsys string) string {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.paths[subsys]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) joinCgroups(pid int) error {
|
||||||
|
for _, sys := range legacySubsystems {
|
||||||
|
name := sys.Name()
|
||||||
|
switch name {
|
||||||
|
case "name=systemd":
|
||||||
|
// let systemd handle this
|
||||||
|
case "cpuset":
|
||||||
|
if path, ok := m.paths[name]; ok {
|
||||||
|
s := &fs.CpusetGroup{}
|
||||||
|
if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if path, ok := m.paths[name]; ok {
|
||||||
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getSubsystemPath(slice, unit, subsystem string) (string, error) {
|
||||||
|
mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Join(mountpoint, slice, unit), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) Freeze(state configs.FreezerState) error {
|
||||||
|
err := m.doFreeze(state)
|
||||||
|
if err == nil {
|
||||||
|
m.cgroups.Resources.Freezer = state
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// doFreeze is the same as Freeze but without
|
||||||
|
// changing the m.cgroups.Resources.Frozen field.
|
||||||
|
func (m *legacyManager) doFreeze(state configs.FreezerState) error {
|
||||||
|
path, ok := m.paths["freezer"]
|
||||||
|
if !ok {
|
||||||
|
return errSubsystemDoesNotExist
|
||||||
|
}
|
||||||
|
freezer := &fs.FreezerGroup{}
|
||||||
|
resources := &configs.Resources{Freezer: state}
|
||||||
|
return freezer.Set(path, resources)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) GetPids() ([]int, error) {
|
||||||
|
path, ok := m.paths["devices"]
|
||||||
|
if !ok {
|
||||||
|
return nil, errSubsystemDoesNotExist
|
||||||
|
}
|
||||||
|
return cgroups.GetPids(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) GetAllPids() ([]int, error) {
|
||||||
|
path, ok := m.paths["devices"]
|
||||||
|
if !ok {
|
||||||
|
return nil, errSubsystemDoesNotExist
|
||||||
|
}
|
||||||
|
return cgroups.GetAllPids(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) GetStats() (*cgroups.Stats, error) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
stats := cgroups.NewStats()
|
||||||
|
for _, sys := range legacySubsystems {
|
||||||
|
path := m.paths[sys.Name()]
|
||||||
|
if path == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := sys.GetStats(path, stats); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// freezeBeforeSet answers whether there is a need to freeze the cgroup before
|
||||||
|
// applying its systemd unit properties, and thaw after, while avoiding
|
||||||
|
// unnecessary freezer state changes.
|
||||||
|
//
|
||||||
|
// The reason why we have to freeze is that systemd's application of device
|
||||||
|
// rules is done disruptively, resulting in spurious errors to common devices
|
||||||
|
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||||
|
// containers). So we have to freeze the container to avoid the container get
|
||||||
|
// an occasional "permission denied" error.
|
||||||
|
func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) {
|
||||||
|
// Special case for SkipDevices, as used by Kubernetes to create pod
|
||||||
|
// cgroups with allow-all device policy).
|
||||||
|
if r.SkipDevices {
|
||||||
|
if r.SkipFreezeOnSet {
|
||||||
|
// Both needsFreeze and needsThaw are false.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// No need to freeze if SkipDevices is set, and either
|
||||||
|
// (1) systemd unit does not (yet) exist, or
|
||||||
|
// (2) it has DevicePolicy=auto and empty DeviceAllow list.
|
||||||
|
//
|
||||||
|
// Interestingly, (1) and (2) are the same here because
|
||||||
|
// a non-existent unit returns default properties,
|
||||||
|
// and settings in (2) are the defaults.
|
||||||
|
//
|
||||||
|
// Do not return errors from getUnitTypeProperty, as they alone
|
||||||
|
// should not prevent Set from working.
|
||||||
|
|
||||||
|
unitType := getUnitType(unitName)
|
||||||
|
|
||||||
|
devPolicy, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DevicePolicy")
|
||||||
|
if e == nil && devPolicy.Value == dbus.MakeVariant("auto") {
|
||||||
|
devAllow, e := getUnitTypeProperty(m.dbus, unitName, unitType, "DeviceAllow")
|
||||||
|
if e == nil {
|
||||||
|
if rv := reflect.ValueOf(devAllow.Value.Value()); rv.Kind() == reflect.Slice && rv.Len() == 0 {
|
||||||
|
needsFreeze = false
|
||||||
|
needsThaw = false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
needsFreeze = true
|
||||||
|
needsThaw = true
|
||||||
|
|
||||||
|
// Check the current freezer state.
|
||||||
|
freezerState, err := m.GetFreezerState()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if freezerState == configs.Frozen {
|
||||||
|
// Already frozen, and should stay frozen.
|
||||||
|
needsFreeze = false
|
||||||
|
needsThaw = false
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Freezer == configs.Frozen {
|
||||||
|
// Will be frozen anyway -- no need to thaw.
|
||||||
|
needsThaw = false
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) Set(r *configs.Resources) error {
|
||||||
|
if r == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if r.Unified != nil {
|
||||||
|
return cgroups.ErrV1NoUnified
|
||||||
|
}
|
||||||
|
properties, err := genV1ResourcesProperties(r, m.dbus)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
unitName := getUnitName(m.cgroups)
|
||||||
|
needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if needsFreeze {
|
||||||
|
if err := m.doFreeze(configs.Frozen); err != nil {
|
||||||
|
// If freezer cgroup isn't supported, we just warn about it.
|
||||||
|
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||||
|
// skip update the cgroup while frozen failed. #3803
|
||||||
|
if !errors.Is(err, errSubsystemDoesNotExist) {
|
||||||
|
if needsThaw {
|
||||||
|
if thawErr := m.doFreeze(configs.Thawed); thawErr != nil {
|
||||||
|
logrus.Infof("thaw container after doFreeze failed: %v", thawErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setErr := setUnitProperties(m.dbus, unitName, properties...)
|
||||||
|
if needsThaw {
|
||||||
|
if err := m.doFreeze(configs.Thawed); err != nil {
|
||||||
|
logrus.Infof("thaw container after SetUnitProperties failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if setErr != nil {
|
||||||
|
return setErr
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sys := range legacySubsystems {
|
||||||
|
// Get the subsystem path, but don't error out for not found cgroups.
|
||||||
|
path, ok := m.paths[sys.Name()]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := sys.Set(path, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) GetPaths() map[string]string {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.paths
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||||
|
return m.cgroups, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||||
|
path, ok := m.paths["freezer"]
|
||||||
|
if !ok {
|
||||||
|
return configs.Undefined, nil
|
||||||
|
}
|
||||||
|
freezer := &fs.FreezerGroup{}
|
||||||
|
return freezer.GetState(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) Exists() bool {
|
||||||
|
return cgroups.PathExists(m.Path("devices"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *legacyManager) OOMKillCount() (uint64, error) {
|
||||||
|
return fs.OOMKillCount(m.Path("memory"))
|
||||||
|
}
|
472
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
472
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go
generated
vendored
Normal file
@ -0,0 +1,472 @@
|
|||||||
|
package systemd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||||
|
securejoin "github.com/cyphar/filepath-securejoin"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type unifiedManager struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
cgroups *configs.Cgroup
|
||||||
|
// path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||||
|
path string
|
||||||
|
dbus *dbusConnManager
|
||||||
|
fsMgr cgroups.Manager
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewUnifiedManager(config *configs.Cgroup, path string) (cgroups.Manager, error) {
|
||||||
|
m := &unifiedManager{
|
||||||
|
cgroups: config,
|
||||||
|
path: path,
|
||||||
|
dbus: newDbusConnManager(config.Rootless),
|
||||||
|
}
|
||||||
|
if err := m.initPath(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fsMgr, err := fs2.NewManager(config, m.path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
m.fsMgr = fsMgr
|
||||||
|
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified
|
||||||
|
// key/value map (where key is cgroupfs file name) to systemd unit properties.
|
||||||
|
// This is on a best-effort basis, so the properties that are not known
|
||||||
|
// (to this function and/or systemd) are ignored (but logged with "debug"
|
||||||
|
// log level).
|
||||||
|
//
|
||||||
|
// For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||||
|
//
|
||||||
|
// For the list of systemd unit properties, see systemd.resource-control(5).
|
||||||
|
func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
for k, v := range res {
|
||||||
|
if strings.Contains(k, "/") {
|
||||||
|
return nil, fmt.Errorf("unified resource %q must be a file name (no slashes)", k)
|
||||||
|
}
|
||||||
|
sk := strings.SplitN(k, ".", 2)
|
||||||
|
if len(sk) != 2 {
|
||||||
|
return nil, fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k)
|
||||||
|
}
|
||||||
|
// Kernel is quite forgiving to extra whitespace
|
||||||
|
// around the value, and so should we.
|
||||||
|
v = strings.TrimSpace(v)
|
||||||
|
// Please keep cases in alphabetical order.
|
||||||
|
switch k {
|
||||||
|
case "cpu.max":
|
||||||
|
// value: quota [period]
|
||||||
|
quota := int64(0) // 0 means "unlimited" for addCpuQuota, if period is set
|
||||||
|
period := defCPUQuotaPeriod
|
||||||
|
sv := strings.Fields(v)
|
||||||
|
if len(sv) < 1 || len(sv) > 2 {
|
||||||
|
return nil, fmt.Errorf("unified resource %q value invalid: %q", k, v)
|
||||||
|
}
|
||||||
|
// quota
|
||||||
|
if sv[0] != "max" {
|
||||||
|
quota, err = strconv.ParseInt(sv[0], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unified resource %q period value conversion error: %w", k, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// period
|
||||||
|
if len(sv) == 2 {
|
||||||
|
period, err = strconv.ParseUint(sv[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
addCpuQuota(cm, &props, quota, period)
|
||||||
|
|
||||||
|
case "cpu.weight":
|
||||||
|
num, err := strconv.ParseUint(v, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||||
|
}
|
||||||
|
props = append(props,
|
||||||
|
newProp("CPUWeight", num))
|
||||||
|
|
||||||
|
case "cpuset.cpus", "cpuset.mems":
|
||||||
|
bits, err := RangeToBits(v)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err)
|
||||||
|
}
|
||||||
|
m := map[string]string{
|
||||||
|
"cpuset.cpus": "AllowedCPUs",
|
||||||
|
"cpuset.mems": "AllowedMemoryNodes",
|
||||||
|
}
|
||||||
|
// systemd only supports these properties since v244
|
||||||
|
sdVer := systemdVersion(cm)
|
||||||
|
if sdVer >= 244 {
|
||||||
|
props = append(props,
|
||||||
|
newProp(m[k], bits))
|
||||||
|
} else {
|
||||||
|
logrus.Debugf("systemd v%d is too old to support %s"+
|
||||||
|
" (setting will still be applied to cgroupfs)",
|
||||||
|
sdVer, m[k])
|
||||||
|
}
|
||||||
|
|
||||||
|
case "memory.high", "memory.low", "memory.min", "memory.max", "memory.swap.max":
|
||||||
|
num := uint64(math.MaxUint64)
|
||||||
|
if v != "max" {
|
||||||
|
num, err = strconv.ParseUint(v, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m := map[string]string{
|
||||||
|
"memory.high": "MemoryHigh",
|
||||||
|
"memory.low": "MemoryLow",
|
||||||
|
"memory.min": "MemoryMin",
|
||||||
|
"memory.max": "MemoryMax",
|
||||||
|
"memory.swap.max": "MemorySwapMax",
|
||||||
|
}
|
||||||
|
props = append(props,
|
||||||
|
newProp(m[k], num))
|
||||||
|
|
||||||
|
case "pids.max":
|
||||||
|
num := uint64(math.MaxUint64)
|
||||||
|
if v != "max" {
|
||||||
|
var err error
|
||||||
|
num, err = strconv.ParseUint(v, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
props = append(props,
|
||||||
|
newProp("TasksMax", num))
|
||||||
|
|
||||||
|
case "memory.oom.group":
|
||||||
|
// Setting this to 1 is roughly equivalent to OOMPolicy=kill
|
||||||
|
// (as per systemd.service(5) and
|
||||||
|
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html),
|
||||||
|
// but it's not clear what to do if it is unset or set
|
||||||
|
// to 0 in runc update, as there are two other possible
|
||||||
|
// values for OOMPolicy (continue/stop).
|
||||||
|
fallthrough
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Ignore the unknown resource here -- will still be
|
||||||
|
// applied in Set which calls fs2.Set.
|
||||||
|
logrus.Debugf("don't know how to convert unified resource %q=%q to systemd unit property; skipping (will still be applied to cgroupfs)", k, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return props, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
|
||||||
|
var properties []systemdDbus.Property
|
||||||
|
|
||||||
|
// NOTE: This is of questionable correctness because we insert our own
|
||||||
|
// devices eBPF program later. Two programs with identical rules
|
||||||
|
// aren't the end of the world, but it is a bit concerning. However
|
||||||
|
// it's unclear if systemd removes all eBPF programs attached when
|
||||||
|
// doing SetUnitProperties...
|
||||||
|
deviceProperties, err := generateDeviceProperties(r, systemdVersion(cm))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
properties = append(properties, deviceProperties...)
|
||||||
|
|
||||||
|
if r.Memory != 0 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("MemoryMax", uint64(r.Memory)))
|
||||||
|
}
|
||||||
|
if r.MemoryReservation != 0 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("MemoryLow", uint64(r.MemoryReservation)))
|
||||||
|
}
|
||||||
|
|
||||||
|
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if swap != 0 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("MemorySwapMax", uint64(swap)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.CpuWeight != 0 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("CPUWeight", r.CpuWeight))
|
||||||
|
}
|
||||||
|
|
||||||
|
addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod)
|
||||||
|
|
||||||
|
if r.PidsLimit > 0 || r.PidsLimit == -1 {
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("TasksMax", uint64(r.PidsLimit)))
|
||||||
|
}
|
||||||
|
|
||||||
|
err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore r.KernelMemory
|
||||||
|
|
||||||
|
// convert Resources.Unified map to systemd properties
|
||||||
|
if r.Unified != nil {
|
||||||
|
unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
properties = append(properties, unifiedProps...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return properties, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) Apply(pid int) error {
|
||||||
|
var (
|
||||||
|
c = m.cgroups
|
||||||
|
unitName = getUnitName(c)
|
||||||
|
properties []systemdDbus.Property
|
||||||
|
)
|
||||||
|
|
||||||
|
slice := "system.slice"
|
||||||
|
if m.cgroups.Rootless {
|
||||||
|
slice = "user.slice"
|
||||||
|
}
|
||||||
|
if c.Parent != "" {
|
||||||
|
slice = c.Parent
|
||||||
|
}
|
||||||
|
|
||||||
|
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||||
|
|
||||||
|
if strings.HasSuffix(unitName, ".slice") {
|
||||||
|
// If we create a slice, the parent is defined via a Wants=.
|
||||||
|
properties = append(properties, systemdDbus.PropWants(slice))
|
||||||
|
} else {
|
||||||
|
// Otherwise it's a scope, which we put into a Slice=.
|
||||||
|
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||||
|
// Assume scopes always support delegation (supported since systemd v218).
|
||||||
|
properties = append(properties, newProp("Delegate", true))
|
||||||
|
}
|
||||||
|
|
||||||
|
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||||
|
if pid != -1 {
|
||||||
|
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||||
|
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("MemoryAccounting", true),
|
||||||
|
newProp("CPUAccounting", true),
|
||||||
|
newProp("IOAccounting", true),
|
||||||
|
newProp("TasksAccounting", true),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||||
|
properties = append(properties,
|
||||||
|
newProp("DefaultDependencies", false))
|
||||||
|
|
||||||
|
properties = append(properties, c.SystemdProps...)
|
||||||
|
|
||||||
|
if err := startUnit(m.dbus, unitName, properties, pid == -1); err != nil {
|
||||||
|
return fmt.Errorf("unable to start unit %q (properties %+v): %w", unitName, properties, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.OwnerUID != nil {
|
||||||
|
// The directory itself must be chowned.
|
||||||
|
err := os.Chown(m.path, *c.OwnerUID, -1)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
filesToChown, err := cgroupFilesToChown()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range filesToChown {
|
||||||
|
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
|
||||||
|
// Some files might not be present.
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// The kernel exposes a list of files that should be chowned to the delegate
|
||||||
|
// uid in /sys/kernel/cgroup/delegate. If the file is not present
|
||||||
|
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
|
||||||
|
func cgroupFilesToChown() ([]string, error) {
|
||||||
|
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
|
||||||
|
|
||||||
|
f, err := os.Open(cgroupDelegateFile)
|
||||||
|
if err != nil {
|
||||||
|
return []string{"cgroup.procs", "cgroup.subtree_control", "cgroup.threads"}, nil
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
filesToChown := []string{}
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
for scanner.Scan() {
|
||||||
|
filesToChown = append(filesToChown, scanner.Text())
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return filesToChown, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) Destroy() error {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
unitName := getUnitName(m.cgroups)
|
||||||
|
if err := stopUnit(m.dbus, unitName); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// systemd 239 do not remove sub-cgroups.
|
||||||
|
err := m.fsMgr.Destroy()
|
||||||
|
// fsMgr.Destroy has handled ErrNotExist
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) Path(_ string) string {
|
||||||
|
return m.path
|
||||||
|
}
|
||||||
|
|
||||||
|
// getSliceFull value is used in initPath.
|
||||||
|
// The value is incompatible with systemdDbus.PropSlice.
|
||||||
|
func (m *unifiedManager) getSliceFull() (string, error) {
|
||||||
|
c := m.cgroups
|
||||||
|
slice := "system.slice"
|
||||||
|
if c.Rootless {
|
||||||
|
slice = "user.slice"
|
||||||
|
}
|
||||||
|
if c.Parent != "" {
|
||||||
|
var err error
|
||||||
|
slice, err = ExpandSlice(c.Parent)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Rootless {
|
||||||
|
// managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service".
|
||||||
|
managerCG, err := getManagerProperty(m.dbus, "ControlGroup")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
slice = filepath.Join(managerCG, slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
// an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice"
|
||||||
|
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified.
|
||||||
|
return slice, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) initPath() error {
|
||||||
|
if m.path != "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
sliceFull, err := m.getSliceFull()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
c := m.cgroups
|
||||||
|
path := filepath.Join(sliceFull, getUnitName(c))
|
||||||
|
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// an example of the final path in rootless:
|
||||||
|
// "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
|
||||||
|
m.path = path
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) Freeze(state configs.FreezerState) error {
|
||||||
|
return m.fsMgr.Freeze(state)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) GetPids() ([]int, error) {
|
||||||
|
return cgroups.GetPids(m.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) GetAllPids() ([]int, error) {
|
||||||
|
return cgroups.GetAllPids(m.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||||
|
return m.fsMgr.GetStats()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) Set(r *configs.Resources) error {
|
||||||
|
if r == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
properties, err := genV2ResourcesProperties(r, m.dbus)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil {
|
||||||
|
return fmt.Errorf("unable to set unit properties: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return m.fsMgr.Set(r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) GetPaths() map[string]string {
|
||||||
|
paths := make(map[string]string, 1)
|
||||||
|
paths[""] = m.path
|
||||||
|
return paths
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||||
|
return m.cgroups, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||||
|
return m.fsMgr.GetFreezerState()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) Exists() bool {
|
||||||
|
return cgroups.PathExists(m.path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *unifiedManager) OOMKillCount() (uint64, error) {
|
||||||
|
return m.fsMgr.OOMKillCount()
|
||||||
|
}
|
469
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
Normal file
469
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
Normal file
@ -0,0 +1,469 @@
|
|||||||
|
package cgroups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/userns"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
CgroupProcesses = "cgroup.procs"
|
||||||
|
unifiedMountpoint = "/sys/fs/cgroup"
|
||||||
|
hybridMountpoint = "/sys/fs/cgroup/unified"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
isUnifiedOnce sync.Once
|
||||||
|
isUnified bool
|
||||||
|
isHybridOnce sync.Once
|
||||||
|
isHybrid bool
|
||||||
|
)
|
||||||
|
|
||||||
|
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||||
|
func IsCgroup2UnifiedMode() bool {
|
||||||
|
isUnifiedOnce.Do(func() {
|
||||||
|
var st unix.Statfs_t
|
||||||
|
err := unix.Statfs(unifiedMountpoint, &st)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) && userns.RunningInUserNS() {
|
||||||
|
// ignore the "not found" error if running in userns
|
||||||
|
logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint)
|
||||||
|
isUnified = false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
panic(fmt.Sprintf("cannot statfs cgroup root: %s", err))
|
||||||
|
}
|
||||||
|
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||||
|
})
|
||||||
|
return isUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsCgroup2HybridMode returns whether we are running in cgroup v2 hybrid mode.
|
||||||
|
func IsCgroup2HybridMode() bool {
|
||||||
|
isHybridOnce.Do(func() {
|
||||||
|
var st unix.Statfs_t
|
||||||
|
err := unix.Statfs(hybridMountpoint, &st)
|
||||||
|
if err != nil {
|
||||||
|
isHybrid = false
|
||||||
|
if !os.IsNotExist(err) {
|
||||||
|
// Report unexpected errors.
|
||||||
|
logrus.WithError(err).Debugf("statfs(%q) failed", hybridMountpoint)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
isHybrid = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||||
|
})
|
||||||
|
return isHybrid
|
||||||
|
}
|
||||||
|
|
||||||
|
type Mount struct {
|
||||||
|
Mountpoint string
|
||||||
|
Root string
|
||||||
|
Subsystems []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||||
|
// all indicates whether to return just the first instance or all the mounts.
|
||||||
|
// This function should not be used from cgroupv2 code, as in this case
|
||||||
|
// all the controllers are available under the constant unifiedMountpoint.
|
||||||
|
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||||
|
if IsCgroup2UnifiedMode() {
|
||||||
|
// TODO: remove cgroupv2 case once all external users are converted
|
||||||
|
availableControllers, err := GetAllSubsystems()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
m := Mount{
|
||||||
|
Mountpoint: unifiedMountpoint,
|
||||||
|
Root: unifiedMountpoint,
|
||||||
|
Subsystems: availableControllers,
|
||||||
|
}
|
||||||
|
return []Mount{m}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return getCgroupMountsV1(all)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||||
|
func GetAllSubsystems() ([]string, error) {
|
||||||
|
// /proc/cgroups is meaningless for v2
|
||||||
|
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
|
||||||
|
if IsCgroup2UnifiedMode() {
|
||||||
|
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||||
|
// - devices: implemented in kernel 4.15
|
||||||
|
// - freezer: implemented in kernel 5.2
|
||||||
|
// We assume these are always available, as it is hard to detect availability.
|
||||||
|
pseudo := []string{"devices", "freezer"}
|
||||||
|
data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
subsystems := append(pseudo, strings.Fields(data)...)
|
||||||
|
return subsystems, nil
|
||||||
|
}
|
||||||
|
f, err := os.Open("/proc/cgroups")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
subsystems := []string{}
|
||||||
|
|
||||||
|
s := bufio.NewScanner(f)
|
||||||
|
for s.Scan() {
|
||||||
|
text := s.Text()
|
||||||
|
if text[0] != '#' {
|
||||||
|
parts := strings.Fields(text)
|
||||||
|
if len(parts) >= 4 && parts[3] != "0" {
|
||||||
|
subsystems = append(subsystems, parts[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := s.Err(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return subsystems, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readProcsFile(dir string) ([]int, error) {
|
||||||
|
f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var (
|
||||||
|
s = bufio.NewScanner(f)
|
||||||
|
out = []int{}
|
||||||
|
)
|
||||||
|
|
||||||
|
for s.Scan() {
|
||||||
|
if t := s.Text(); t != "" {
|
||||||
|
pid, err := strconv.Atoi(t)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
out = append(out, pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out, s.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup
|
||||||
|
// or /proc/<pid>/cgroup, into a map of subsystems to cgroup paths, e.g.
|
||||||
|
//
|
||||||
|
// "cpu": "/user.slice/user-1000.slice"
|
||||||
|
// "pids": "/user.slice/user-1000.slice"
|
||||||
|
//
|
||||||
|
// etc.
|
||||||
|
//
|
||||||
|
// Note that for cgroup v2 unified hierarchy, there are no per-controller
|
||||||
|
// cgroup paths, so the resulting map will have a single element where the key
|
||||||
|
// is empty string ("") and the value is the cgroup path the <pid> is in.
|
||||||
|
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return parseCgroupFromReader(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper function for ParseCgroupFile to make testing easier
|
||||||
|
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||||
|
s := bufio.NewScanner(r)
|
||||||
|
cgroups := make(map[string]string)
|
||||||
|
|
||||||
|
for s.Scan() {
|
||||||
|
text := s.Text()
|
||||||
|
// from cgroups(7):
|
||||||
|
// /proc/[pid]/cgroup
|
||||||
|
// ...
|
||||||
|
// For each cgroup hierarchy ... there is one entry
|
||||||
|
// containing three colon-separated fields of the form:
|
||||||
|
// hierarchy-ID:subsystem-list:cgroup-path
|
||||||
|
parts := strings.SplitN(text, ":", 3)
|
||||||
|
if len(parts) < 3 {
|
||||||
|
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, subs := range strings.Split(parts[1], ",") {
|
||||||
|
cgroups[subs] = parts[2]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := s.Err(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return cgroups, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func PathExists(path string) bool {
|
||||||
|
if _, err := os.Stat(path); err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func EnterPid(cgroupPaths map[string]string, pid int) error {
|
||||||
|
for _, path := range cgroupPaths {
|
||||||
|
if PathExists(path) {
|
||||||
|
if err := WriteCgroupProc(path, pid); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func rmdir(path string) error {
|
||||||
|
err := unix.Rmdir(path)
|
||||||
|
if err == nil || err == unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &os.PathError{Op: "rmdir", Path: path, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemovePath aims to remove cgroup path. It does so recursively,
|
||||||
|
// by removing any subdirectories (sub-cgroups) first.
|
||||||
|
func RemovePath(path string) error {
|
||||||
|
// try the fast path first
|
||||||
|
if err := rmdir(path); err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
infos, err := os.ReadDir(path)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, info := range infos {
|
||||||
|
if info.IsDir() {
|
||||||
|
// We should remove subcgroups dir first
|
||||||
|
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
err = rmdir(path)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemovePaths iterates over the provided paths removing them.
|
||||||
|
// We trying to remove all paths five times with increasing delay between tries.
|
||||||
|
// If after all there are not removed cgroups - appropriate error will be
|
||||||
|
// returned.
|
||||||
|
func RemovePaths(paths map[string]string) (err error) {
|
||||||
|
const retries = 5
|
||||||
|
delay := 10 * time.Millisecond
|
||||||
|
for i := 0; i < retries; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
time.Sleep(delay)
|
||||||
|
delay *= 2
|
||||||
|
}
|
||||||
|
for s, p := range paths {
|
||||||
|
if err := RemovePath(p); err != nil {
|
||||||
|
// do not log intermediate iterations
|
||||||
|
switch i {
|
||||||
|
case 0:
|
||||||
|
logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)")
|
||||||
|
case retries - 1:
|
||||||
|
logrus.WithError(err).Error("Failed to remove cgroup")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_, err := os.Stat(p)
|
||||||
|
// We need this strange way of checking cgroups existence because
|
||||||
|
// RemoveAll almost always returns error, even on already removed
|
||||||
|
// cgroups
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
delete(paths, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(paths) == 0 {
|
||||||
|
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
|
||||||
|
paths = make(map[string]string)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fmt.Errorf("Failed to remove paths: %v", paths)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
hugePageSizes []string
|
||||||
|
initHPSOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
func HugePageSizes() []string {
|
||||||
|
initHPSOnce.Do(func() {
|
||||||
|
dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
files, err := dir.Readdirnames(0)
|
||||||
|
dir.Close()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
hugePageSizes, err = getHugePageSizeFromFilenames(files)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Warn("HugePageSizes: ", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return hugePageSizes
|
||||||
|
}
|
||||||
|
|
||||||
|
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||||
|
pageSizes := make([]string, 0, len(fileNames))
|
||||||
|
var warn error
|
||||||
|
|
||||||
|
for _, file := range fileNames {
|
||||||
|
// example: hugepages-1048576kB
|
||||||
|
val := strings.TrimPrefix(file, "hugepages-")
|
||||||
|
if len(val) == len(file) {
|
||||||
|
// Unexpected file name: no prefix found, ignore it.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// The suffix is always "kB" (as of Linux 5.13). If we find
|
||||||
|
// something else, produce an error but keep going.
|
||||||
|
eLen := len(val) - 2
|
||||||
|
val = strings.TrimSuffix(val, "kB")
|
||||||
|
if len(val) != eLen {
|
||||||
|
// Highly unlikely.
|
||||||
|
if warn == nil {
|
||||||
|
warn = errors.New(file + `: invalid suffix (expected "kB")`)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
size, err := strconv.Atoi(val)
|
||||||
|
if err != nil {
|
||||||
|
// Highly unlikely.
|
||||||
|
if warn == nil {
|
||||||
|
warn = fmt.Errorf("%s: %w", file, err)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574
|
||||||
|
// but in our case the size is in KB already.
|
||||||
|
if size >= (1 << 20) {
|
||||||
|
val = strconv.Itoa(size>>20) + "GB"
|
||||||
|
} else if size >= (1 << 10) {
|
||||||
|
val = strconv.Itoa(size>>10) + "MB"
|
||||||
|
} else {
|
||||||
|
val += "KB"
|
||||||
|
}
|
||||||
|
pageSizes = append(pageSizes, val)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pageSizes, warn
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetPids returns all pids, that were added to cgroup at path.
|
||||||
|
func GetPids(dir string) ([]int, error) {
|
||||||
|
return readProcsFile(dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
|
||||||
|
func WriteCgroupProc(dir string, pid int) error {
|
||||||
|
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||||
|
// is not mounted, we will get empty dir, and we want it fail here.
|
||||||
|
if dir == "" {
|
||||||
|
return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dont attach any pid to the cgroup if -1 is specified as a pid
|
||||||
|
if pid == -1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
for i := 0; i < 5; i++ {
|
||||||
|
_, err = file.WriteString(strconv.Itoa(pid))
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// EINVAL might mean that the task being added to cgroup.procs is in state
|
||||||
|
// TASK_NEW. We should attempt to do so again.
|
||||||
|
if errors.Is(err, unix.EINVAL) {
|
||||||
|
time.Sleep(30 * time.Millisecond)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("failed to write %v: %w", pid, err)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||||
|
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||||
|
// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142)
|
||||||
|
// convert from [2-262144] to [1-10000]
|
||||||
|
// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)"
|
||||||
|
func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 {
|
||||||
|
if cpuShares == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return (1 + ((cpuShares-2)*9999)/262142)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec
|
||||||
|
// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap
|
||||||
|
// is defined as memory+swap combined, while in cgroup v2 swap is a separate value.
|
||||||
|
func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) {
|
||||||
|
// for compatibility with cgroup1 controller, set swap to unlimited in
|
||||||
|
// case the memory is set to unlimited, and swap is not explicitly set,
|
||||||
|
// treating the request as "set both memory and swap to unlimited".
|
||||||
|
if memory == -1 && memorySwap == 0 {
|
||||||
|
return -1, nil
|
||||||
|
}
|
||||||
|
if memorySwap == -1 || memorySwap == 0 {
|
||||||
|
// -1 is "max", 0 is "unset", so treat as is
|
||||||
|
return memorySwap, nil
|
||||||
|
}
|
||||||
|
// sanity checks
|
||||||
|
if memory == 0 || memory == -1 {
|
||||||
|
return 0, errors.New("unable to set swap limit without memory limit")
|
||||||
|
}
|
||||||
|
if memory < 0 {
|
||||||
|
return 0, fmt.Errorf("invalid memory value: %d", memory)
|
||||||
|
}
|
||||||
|
if memorySwap < memory {
|
||||||
|
return 0, errors.New("memory+swap limit should be >= memory limit")
|
||||||
|
}
|
||||||
|
|
||||||
|
return memorySwap - memory, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since the OCI spec is designed for cgroup v1, in some cases
|
||||||
|
// there is need to convert from the cgroup v1 configuration to cgroup v2
|
||||||
|
// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990)
|
||||||
|
// convert linearly from [10-1000] to [1-10000]
|
||||||
|
func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 {
|
||||||
|
if blkIoWeight == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return 1 + (uint64(blkIoWeight)-10)*9999/990
|
||||||
|
}
|
290
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
290
vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go
generated
vendored
Normal file
@ -0,0 +1,290 @@
|
|||||||
|
package cgroups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
securejoin "github.com/cyphar/filepath-securejoin"
|
||||||
|
"github.com/moby/sys/mountinfo"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Code in this source file are specific to cgroup v1,
|
||||||
|
// and must not be used from any cgroup v2 code.
|
||||||
|
|
||||||
|
const (
|
||||||
|
CgroupNamePrefix = "name="
|
||||||
|
defaultPrefix = "/sys/fs/cgroup"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errUnified = errors.New("not implemented for cgroup v2 unified hierarchy")
|
||||||
|
ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1")
|
||||||
|
|
||||||
|
readMountinfoOnce sync.Once
|
||||||
|
readMountinfoErr error
|
||||||
|
cgroupMountinfo []*mountinfo.Info
|
||||||
|
)
|
||||||
|
|
||||||
|
type NotFoundError struct {
|
||||||
|
Subsystem string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *NotFoundError) Error() string {
|
||||||
|
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewNotFoundError(sub string) error {
|
||||||
|
return &NotFoundError{
|
||||||
|
Subsystem: sub,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func IsNotFound(err error) bool {
|
||||||
|
var nfErr *NotFoundError
|
||||||
|
return errors.As(err, &nfErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func tryDefaultPath(cgroupPath, subsystem string) string {
|
||||||
|
if !strings.HasPrefix(defaultPrefix, cgroupPath) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove possible prefix
|
||||||
|
subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix)
|
||||||
|
|
||||||
|
// Make sure we're still under defaultPrefix, and resolve
|
||||||
|
// a possible symlink (like cpu -> cpu,cpuacct).
|
||||||
|
path, err := securejoin.SecureJoin(defaultPrefix, subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (1) path should be a directory.
|
||||||
|
st, err := os.Lstat(path)
|
||||||
|
if err != nil || !st.IsDir() {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (2) path should be a mount point.
|
||||||
|
pst, err := os.Lstat(filepath.Dir(path))
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev {
|
||||||
|
// parent dir has the same dev -- path is not a mount point
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (3) path should have 'cgroup' fs type.
|
||||||
|
fst := unix.Statfs_t{}
|
||||||
|
err = unix.Statfs(path, &fst)
|
||||||
|
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return path
|
||||||
|
}
|
||||||
|
|
||||||
|
// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones
|
||||||
|
// with fstype of "cgroup") for the current running process.
|
||||||
|
//
|
||||||
|
// The results are cached (to avoid re-reading mountinfo which is relatively
|
||||||
|
// expensive), so it is assumed that cgroup mounts are not being changed.
|
||||||
|
func readCgroupMountinfo() ([]*mountinfo.Info, error) {
|
||||||
|
readMountinfoOnce.Do(func() {
|
||||||
|
cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts(
|
||||||
|
mountinfo.FSTypeFilter("cgroup"),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
return cgroupMountinfo, readMountinfoErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||||
|
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||||
|
if IsCgroup2UnifiedMode() {
|
||||||
|
return "", errUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||||
|
if len(subsystem) == 0 {
|
||||||
|
return hybridMountpoint, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid parsing mountinfo by trying the default path first, if possible.
|
||||||
|
if path := tryDefaultPath(cgroupPath, subsystem); path != "" {
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||||
|
return mnt, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
||||||
|
if IsCgroup2UnifiedMode() {
|
||||||
|
return "", "", errUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
mi, err := readCgroupMountinfo()
|
||||||
|
if err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem)
|
||||||
|
}
|
||||||
|
|
||||||
|
func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) {
|
||||||
|
for _, mi := range mounts {
|
||||||
|
if strings.HasPrefix(mi.Mountpoint, cgroupPath) {
|
||||||
|
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||||
|
if opt == subsystem {
|
||||||
|
return mi.Mountpoint, mi.Root, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", "", NewNotFoundError(subsystem)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||||
|
if len(m.Subsystems) == 0 {
|
||||||
|
return "", errors.New("no subsystem for mount")
|
||||||
|
}
|
||||||
|
|
||||||
|
return getControllerPath(m.Subsystems[0], cgroups)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) {
|
||||||
|
res := make([]Mount, 0, len(ss))
|
||||||
|
numFound := 0
|
||||||
|
for _, mi := range mounts {
|
||||||
|
m := Mount{
|
||||||
|
Mountpoint: mi.Mountpoint,
|
||||||
|
Root: mi.Root,
|
||||||
|
}
|
||||||
|
for _, opt := range strings.Split(mi.VFSOptions, ",") {
|
||||||
|
seen, known := ss[opt]
|
||||||
|
if !known || (!all && seen) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ss[opt] = true
|
||||||
|
opt = strings.TrimPrefix(opt, CgroupNamePrefix)
|
||||||
|
m.Subsystems = append(m.Subsystems, opt)
|
||||||
|
numFound++
|
||||||
|
}
|
||||||
|
if len(m.Subsystems) > 0 || all {
|
||||||
|
res = append(res, m)
|
||||||
|
}
|
||||||
|
if !all && numFound >= len(ss) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCgroupMountsV1(all bool) ([]Mount, error) {
|
||||||
|
mi, err := readCgroupMountinfo()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
allMap := make(map[string]bool)
|
||||||
|
for s := range allSubsystems {
|
||||||
|
allMap[s] = false
|
||||||
|
}
|
||||||
|
|
||||||
|
return getCgroupMountsHelper(allMap, mi, all)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||||
|
func GetOwnCgroup(subsystem string) (string, error) {
|
||||||
|
if IsCgroup2UnifiedMode() {
|
||||||
|
return "", errUnified
|
||||||
|
}
|
||||||
|
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return getControllerPath(subsystem, cgroups)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||||
|
cgroup, err := GetOwnCgroup(subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If subsystem is empty, we look for the cgroupv2 hybrid path.
|
||||||
|
if len(subsystem) == 0 {
|
||||||
|
return hybridMountpoint, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return getCgroupPathHelper(subsystem, cgroup)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetInitCgroup(subsystem string) (string, error) {
|
||||||
|
if IsCgroup2UnifiedMode() {
|
||||||
|
return "", errUnified
|
||||||
|
}
|
||||||
|
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return getControllerPath(subsystem, cgroups)
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||||
|
cgroup, err := GetInitCgroup(subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return getCgroupPathHelper(subsystem, cgroup)
|
||||||
|
}
|
||||||
|
|
||||||
|
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||||
|
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||||
|
// see paths from host, which don't exist in container.
|
||||||
|
relCgroup, err := filepath.Rel(root, cgroup)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Join(mnt, relCgroup), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||||
|
if IsCgroup2UnifiedMode() {
|
||||||
|
return "", errUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
if p, ok := cgroups[subsystem]; ok {
|
||||||
|
return p, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||||
|
return p, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return "", NewNotFoundError(subsystem)
|
||||||
|
}
|
66
vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
generated
vendored
Normal file
66
vendor/github.com/opencontainers/runc/libcontainer/configs/blkio_device.go
generated
vendored
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
// blockIODevice holds major:minor format supported in blkio cgroup
|
||||||
|
type blockIODevice struct {
|
||||||
|
// Major is the device's major number
|
||||||
|
Major int64 `json:"major"`
|
||||||
|
// Minor is the device's minor number
|
||||||
|
Minor int64 `json:"minor"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
|
||||||
|
type WeightDevice struct {
|
||||||
|
blockIODevice
|
||||||
|
// Weight is the bandwidth rate for the device, range is from 10 to 1000
|
||||||
|
Weight uint16 `json:"weight"`
|
||||||
|
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||||
|
LeafWeight uint16 `json:"leafWeight"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWeightDevice returns a configured WeightDevice pointer
|
||||||
|
func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
|
||||||
|
wd := &WeightDevice{}
|
||||||
|
wd.Major = major
|
||||||
|
wd.Minor = minor
|
||||||
|
wd.Weight = weight
|
||||||
|
wd.LeafWeight = leafWeight
|
||||||
|
return wd
|
||||||
|
}
|
||||||
|
|
||||||
|
// WeightString formats the struct to be writable to the cgroup specific file
|
||||||
|
func (wd *WeightDevice) WeightString() string {
|
||||||
|
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
|
||||||
|
}
|
||||||
|
|
||||||
|
// LeafWeightString formats the struct to be writable to the cgroup specific file
|
||||||
|
func (wd *WeightDevice) LeafWeightString() string {
|
||||||
|
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||||
|
type ThrottleDevice struct {
|
||||||
|
blockIODevice
|
||||||
|
// Rate is the IO rate limit per cgroup per device
|
||||||
|
Rate uint64 `json:"rate"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewThrottleDevice returns a configured ThrottleDevice pointer
|
||||||
|
func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
||||||
|
td := &ThrottleDevice{}
|
||||||
|
td.Major = major
|
||||||
|
td.Minor = minor
|
||||||
|
td.Rate = rate
|
||||||
|
return td
|
||||||
|
}
|
||||||
|
|
||||||
|
// String formats the struct to be writable to the cgroup specific file
|
||||||
|
func (td *ThrottleDevice) String() string {
|
||||||
|
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StringName formats the struct to be writable to the cgroup specific file
|
||||||
|
func (td *ThrottleDevice) StringName(name string) string {
|
||||||
|
return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
|
||||||
|
}
|
158
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
Normal file
158
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
generated
vendored
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
import (
|
||||||
|
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FreezerState string
|
||||||
|
|
||||||
|
const (
|
||||||
|
Undefined FreezerState = ""
|
||||||
|
Frozen FreezerState = "FROZEN"
|
||||||
|
Thawed FreezerState = "THAWED"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Cgroup holds properties of a cgroup on Linux.
|
||||||
|
type Cgroup struct {
|
||||||
|
// Name specifies the name of the cgroup
|
||||||
|
Name string `json:"name,omitempty"`
|
||||||
|
|
||||||
|
// Parent specifies the name of parent of cgroup or slice
|
||||||
|
Parent string `json:"parent,omitempty"`
|
||||||
|
|
||||||
|
// Path specifies the path to cgroups that are created and/or joined by the container.
|
||||||
|
// The path is assumed to be relative to the host system cgroup mountpoint.
|
||||||
|
Path string `json:"path"`
|
||||||
|
|
||||||
|
// ScopePrefix describes prefix for the scope name
|
||||||
|
ScopePrefix string `json:"scope_prefix"`
|
||||||
|
|
||||||
|
// Resources contains various cgroups settings to apply
|
||||||
|
*Resources
|
||||||
|
|
||||||
|
// Systemd tells if systemd should be used to manage cgroups.
|
||||||
|
Systemd bool
|
||||||
|
|
||||||
|
// SystemdProps are any additional properties for systemd,
|
||||||
|
// derived from org.systemd.property.xxx annotations.
|
||||||
|
// Ignored unless systemd is used for managing cgroups.
|
||||||
|
SystemdProps []systemdDbus.Property `json:"-"`
|
||||||
|
|
||||||
|
// Rootless tells if rootless cgroups should be used.
|
||||||
|
Rootless bool
|
||||||
|
|
||||||
|
// The host UID that should own the cgroup, or nil to accept
|
||||||
|
// the default ownership. This should only be set when the
|
||||||
|
// cgroupfs is to be mounted read/write.
|
||||||
|
// Not all cgroup manager implementations support changing
|
||||||
|
// the ownership.
|
||||||
|
OwnerUID *int `json:"owner_uid,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Resources struct {
|
||||||
|
// Devices is the set of access rules for devices in the container.
|
||||||
|
Devices []*devices.Rule `json:"devices"`
|
||||||
|
|
||||||
|
// Memory limit (in bytes)
|
||||||
|
Memory int64 `json:"memory"`
|
||||||
|
|
||||||
|
// Memory reservation or soft_limit (in bytes)
|
||||||
|
MemoryReservation int64 `json:"memory_reservation"`
|
||||||
|
|
||||||
|
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
|
||||||
|
MemorySwap int64 `json:"memory_swap"`
|
||||||
|
|
||||||
|
// CPU shares (relative weight vs. other containers)
|
||||||
|
CpuShares uint64 `json:"cpu_shares"`
|
||||||
|
|
||||||
|
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||||
|
CpuQuota int64 `json:"cpu_quota"`
|
||||||
|
|
||||||
|
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
|
||||||
|
CpuPeriod uint64 `json:"cpu_period"`
|
||||||
|
|
||||||
|
// How many time CPU will use in realtime scheduling (in usecs).
|
||||||
|
CpuRtRuntime int64 `json:"cpu_rt_quota"`
|
||||||
|
|
||||||
|
// CPU period to be used for realtime scheduling (in usecs).
|
||||||
|
CpuRtPeriod uint64 `json:"cpu_rt_period"`
|
||||||
|
|
||||||
|
// CPU to use
|
||||||
|
CpusetCpus string `json:"cpuset_cpus"`
|
||||||
|
|
||||||
|
// MEM to use
|
||||||
|
CpusetMems string `json:"cpuset_mems"`
|
||||||
|
|
||||||
|
// Process limit; set <= `0' to disable limit.
|
||||||
|
PidsLimit int64 `json:"pids_limit"`
|
||||||
|
|
||||||
|
// Specifies per cgroup weight, range is from 10 to 1000.
|
||||||
|
BlkioWeight uint16 `json:"blkio_weight"`
|
||||||
|
|
||||||
|
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||||
|
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
|
||||||
|
|
||||||
|
// Weight per cgroup per device, can override BlkioWeight.
|
||||||
|
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
|
||||||
|
|
||||||
|
// IO read rate limit per cgroup per device, bytes per second.
|
||||||
|
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
|
||||||
|
|
||||||
|
// IO write rate limit per cgroup per device, bytes per second.
|
||||||
|
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
|
||||||
|
|
||||||
|
// IO read rate limit per cgroup per device, IO per second.
|
||||||
|
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
|
||||||
|
|
||||||
|
// IO write rate limit per cgroup per device, IO per second.
|
||||||
|
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
|
||||||
|
|
||||||
|
// set the freeze value for the process
|
||||||
|
Freezer FreezerState `json:"freezer"`
|
||||||
|
|
||||||
|
// Hugetlb limit (in bytes)
|
||||||
|
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
|
||||||
|
|
||||||
|
// Whether to disable OOM Killer
|
||||||
|
OomKillDisable bool `json:"oom_kill_disable"`
|
||||||
|
|
||||||
|
// Tuning swappiness behaviour per cgroup
|
||||||
|
MemorySwappiness *uint64 `json:"memory_swappiness"`
|
||||||
|
|
||||||
|
// Set priority of network traffic for container
|
||||||
|
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
|
||||||
|
|
||||||
|
// Set class identifier for container's network packets
|
||||||
|
NetClsClassid uint32 `json:"net_cls_classid_u"`
|
||||||
|
|
||||||
|
// Rdma resource restriction configuration
|
||||||
|
Rdma map[string]LinuxRdma `json:"rdma"`
|
||||||
|
|
||||||
|
// Used on cgroups v2:
|
||||||
|
|
||||||
|
// CpuWeight sets a proportional bandwidth limit.
|
||||||
|
CpuWeight uint64 `json:"cpu_weight"`
|
||||||
|
|
||||||
|
// Unified is cgroupv2-only key-value map.
|
||||||
|
Unified map[string]string `json:"unified"`
|
||||||
|
|
||||||
|
// SkipDevices allows to skip configuring device permissions.
|
||||||
|
// Used by e.g. kubelet while creating a parent cgroup (kubepods)
|
||||||
|
// common for many containers, and by runc update.
|
||||||
|
//
|
||||||
|
// NOTE it is impossible to start a container which has this flag set.
|
||||||
|
SkipDevices bool `json:"-"`
|
||||||
|
|
||||||
|
// SkipFreezeOnSet is a flag for cgroup manager to skip the cgroup
|
||||||
|
// freeze when setting resources. Only applicable to systemd legacy
|
||||||
|
// (i.e. cgroup v1) manager (which uses freeze by default to avoid
|
||||||
|
// spurious permission errors caused by systemd inability to update
|
||||||
|
// device rules in a non-disruptive manner).
|
||||||
|
//
|
||||||
|
// If not set, a few methods (such as looking into cgroup's
|
||||||
|
// devices.list and querying the systemd unit properties) are used
|
||||||
|
// during Set() to figure out whether the freeze is required. Those
|
||||||
|
// methods may be relatively slow, thus this flag.
|
||||||
|
SkipFreezeOnSet bool `json:"-"`
|
||||||
|
}
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
Normal file
9
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
//go:build !linux
|
||||||
|
// +build !linux
|
||||||
|
|
||||||
|
package configs
|
||||||
|
|
||||||
|
// Cgroup holds properties of a cgroup on Linux
|
||||||
|
// TODO Windows: This can ultimately be entirely factored out on Windows as
|
||||||
|
// cgroups are a Unix-specific construct.
|
||||||
|
type Cgroup struct{}
|
414
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
Normal file
414
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
Normal file
@ -0,0 +1,414 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os/exec"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Rlimit struct {
|
||||||
|
Type int `json:"type"`
|
||||||
|
Hard uint64 `json:"hard"`
|
||||||
|
Soft uint64 `json:"soft"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// IDMap represents UID/GID Mappings for User Namespaces.
|
||||||
|
type IDMap struct {
|
||||||
|
ContainerID int `json:"container_id"`
|
||||||
|
HostID int `json:"host_id"`
|
||||||
|
Size int `json:"size"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seccomp represents syscall restrictions
|
||||||
|
// By default, only the native architecture of the kernel is allowed to be used
|
||||||
|
// for syscalls. Additional architectures can be added by specifying them in
|
||||||
|
// Architectures.
|
||||||
|
type Seccomp struct {
|
||||||
|
DefaultAction Action `json:"default_action"`
|
||||||
|
Architectures []string `json:"architectures"`
|
||||||
|
Syscalls []*Syscall `json:"syscalls"`
|
||||||
|
DefaultErrnoRet *uint `json:"default_errno_ret"`
|
||||||
|
ListenerPath string `json:"listener_path,omitempty"`
|
||||||
|
ListenerMetadata string `json:"listener_metadata,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Action is taken upon rule match in Seccomp
|
||||||
|
type Action int
|
||||||
|
|
||||||
|
const (
|
||||||
|
Kill Action = iota + 1
|
||||||
|
Errno
|
||||||
|
Trap
|
||||||
|
Allow
|
||||||
|
Trace
|
||||||
|
Log
|
||||||
|
Notify
|
||||||
|
KillThread
|
||||||
|
KillProcess
|
||||||
|
)
|
||||||
|
|
||||||
|
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
|
||||||
|
type Operator int
|
||||||
|
|
||||||
|
const (
|
||||||
|
EqualTo Operator = iota + 1
|
||||||
|
NotEqualTo
|
||||||
|
GreaterThan
|
||||||
|
GreaterThanOrEqualTo
|
||||||
|
LessThan
|
||||||
|
LessThanOrEqualTo
|
||||||
|
MaskEqualTo
|
||||||
|
)
|
||||||
|
|
||||||
|
// Arg is a rule to match a specific syscall argument in Seccomp
|
||||||
|
type Arg struct {
|
||||||
|
Index uint `json:"index"`
|
||||||
|
Value uint64 `json:"value"`
|
||||||
|
ValueTwo uint64 `json:"value_two"`
|
||||||
|
Op Operator `json:"op"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Syscall is a rule to match a syscall in Seccomp
|
||||||
|
type Syscall struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Action Action `json:"action"`
|
||||||
|
ErrnoRet *uint `json:"errnoRet"`
|
||||||
|
Args []*Arg `json:"args"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO Windows. Many of these fields should be factored out into those parts
|
||||||
|
// which are common across platforms, and those which are platform specific.
|
||||||
|
|
||||||
|
// Config defines configuration options for executing a process inside a contained environment.
|
||||||
|
type Config struct {
|
||||||
|
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
|
||||||
|
// This is a common option when the container is running in ramdisk
|
||||||
|
NoPivotRoot bool `json:"no_pivot_root"`
|
||||||
|
|
||||||
|
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
|
||||||
|
// that the parent process dies.
|
||||||
|
ParentDeathSignal int `json:"parent_death_signal"`
|
||||||
|
|
||||||
|
// Path to a directory containing the container's root filesystem.
|
||||||
|
Rootfs string `json:"rootfs"`
|
||||||
|
|
||||||
|
// Umask is the umask to use inside of the container.
|
||||||
|
Umask *uint32 `json:"umask"`
|
||||||
|
|
||||||
|
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
|
||||||
|
// bind mounts are writtable.
|
||||||
|
Readonlyfs bool `json:"readonlyfs"`
|
||||||
|
|
||||||
|
// Specifies the mount propagation flags to be applied to /.
|
||||||
|
RootPropagation int `json:"rootPropagation"`
|
||||||
|
|
||||||
|
// Mounts specify additional source and destination paths that will be mounted inside the container's
|
||||||
|
// rootfs and mount namespace if specified
|
||||||
|
Mounts []*Mount `json:"mounts"`
|
||||||
|
|
||||||
|
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
|
||||||
|
Devices []*devices.Device `json:"devices"`
|
||||||
|
|
||||||
|
MountLabel string `json:"mount_label"`
|
||||||
|
|
||||||
|
// Hostname optionally sets the container's hostname if provided
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
|
|
||||||
|
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
|
||||||
|
// If a namespace is not provided that namespace is shared from the container's parent process
|
||||||
|
Namespaces Namespaces `json:"namespaces"`
|
||||||
|
|
||||||
|
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||||
|
// All capabilities not specified will be dropped from the processes capability mask
|
||||||
|
Capabilities *Capabilities `json:"capabilities"`
|
||||||
|
|
||||||
|
// Networks specifies the container's network setup to be created
|
||||||
|
Networks []*Network `json:"networks"`
|
||||||
|
|
||||||
|
// Routes can be specified to create entries in the route table as the container is started
|
||||||
|
Routes []*Route `json:"routes"`
|
||||||
|
|
||||||
|
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
||||||
|
// placed into to limit the resources the container has available
|
||||||
|
Cgroups *Cgroup `json:"cgroups"`
|
||||||
|
|
||||||
|
// AppArmorProfile specifies the profile to apply to the process running in the container and is
|
||||||
|
// change at the time the process is execed
|
||||||
|
AppArmorProfile string `json:"apparmor_profile,omitempty"`
|
||||||
|
|
||||||
|
// ProcessLabel specifies the label to apply to the process running in the container. It is
|
||||||
|
// commonly used by selinux
|
||||||
|
ProcessLabel string `json:"process_label,omitempty"`
|
||||||
|
|
||||||
|
// Rlimits specifies the resource limits, such as max open files, to set in the container
|
||||||
|
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||||
|
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
||||||
|
|
||||||
|
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||||
|
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||||
|
// higher scores are preferred for being killed. If it is unset then we don't touch the current
|
||||||
|
// value.
|
||||||
|
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||||
|
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
|
||||||
|
|
||||||
|
// UidMappings is an array of User ID mappings for User Namespaces
|
||||||
|
UidMappings []IDMap `json:"uid_mappings"`
|
||||||
|
|
||||||
|
// GidMappings is an array of Group ID mappings for User Namespaces
|
||||||
|
GidMappings []IDMap `json:"gid_mappings"`
|
||||||
|
|
||||||
|
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
|
||||||
|
// mount pointing to /dev/null as to prevent reads of the file.
|
||||||
|
MaskPaths []string `json:"mask_paths"`
|
||||||
|
|
||||||
|
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
|
||||||
|
// so that these files prevent any writes.
|
||||||
|
ReadonlyPaths []string `json:"readonly_paths"`
|
||||||
|
|
||||||
|
// Sysctl is a map of properties and their values. It is the equivalent of using
|
||||||
|
// sysctl -w my.property.name value in Linux.
|
||||||
|
Sysctl map[string]string `json:"sysctl"`
|
||||||
|
|
||||||
|
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
||||||
|
// A number of rules are given, each having an action to be taken if a syscall matches it.
|
||||||
|
// A default action to be taken if no rules match is also given.
|
||||||
|
Seccomp *Seccomp `json:"seccomp"`
|
||||||
|
|
||||||
|
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
|
||||||
|
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
|
||||||
|
|
||||||
|
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||||
|
// CommandHooks are serialized to JSON, but other hooks are not.
|
||||||
|
Hooks Hooks
|
||||||
|
|
||||||
|
// Version is the version of opencontainer specification that is supported.
|
||||||
|
Version string `json:"version"`
|
||||||
|
|
||||||
|
// Labels are user defined metadata that is stored in the config and populated on the state
|
||||||
|
Labels []string `json:"labels"`
|
||||||
|
|
||||||
|
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
|
||||||
|
// callers keyring in this case.
|
||||||
|
NoNewKeyring bool `json:"no_new_keyring"`
|
||||||
|
|
||||||
|
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||||
|
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||||
|
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||||
|
|
||||||
|
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||||
|
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
|
||||||
|
// When RootlessEUID is set, runc creates a new userns for the container.
|
||||||
|
// (config.json needs to contain userns settings)
|
||||||
|
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||||
|
|
||||||
|
// RootlessCgroups is set when unlikely to have the full access to cgroups.
|
||||||
|
// When RootlessCgroups is set, cgroups errors are ignored.
|
||||||
|
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type (
|
||||||
|
HookName string
|
||||||
|
HookList []Hook
|
||||||
|
Hooks map[HookName]HookList
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Prestart commands are executed after the container namespaces are created,
|
||||||
|
// but before the user supplied command is executed from init.
|
||||||
|
// Note: This hook is now deprecated
|
||||||
|
// Prestart commands are called in the Runtime namespace.
|
||||||
|
Prestart HookName = "prestart"
|
||||||
|
|
||||||
|
// CreateRuntime commands MUST be called as part of the create operation after
|
||||||
|
// the runtime environment has been created but before the pivot_root has been executed.
|
||||||
|
// CreateRuntime is called immediately after the deprecated Prestart hook.
|
||||||
|
// CreateRuntime commands are called in the Runtime Namespace.
|
||||||
|
CreateRuntime HookName = "createRuntime"
|
||||||
|
|
||||||
|
// CreateContainer commands MUST be called as part of the create operation after
|
||||||
|
// the runtime environment has been created but before the pivot_root has been executed.
|
||||||
|
// CreateContainer commands are called in the Container namespace.
|
||||||
|
CreateContainer HookName = "createContainer"
|
||||||
|
|
||||||
|
// StartContainer commands MUST be called as part of the start operation and before
|
||||||
|
// the container process is started.
|
||||||
|
// StartContainer commands are called in the Container namespace.
|
||||||
|
StartContainer HookName = "startContainer"
|
||||||
|
|
||||||
|
// Poststart commands are executed after the container init process starts.
|
||||||
|
// Poststart commands are called in the Runtime Namespace.
|
||||||
|
Poststart HookName = "poststart"
|
||||||
|
|
||||||
|
// Poststop commands are executed after the container init process exits.
|
||||||
|
// Poststop commands are called in the Runtime Namespace.
|
||||||
|
Poststop HookName = "poststop"
|
||||||
|
)
|
||||||
|
|
||||||
|
// KnownHookNames returns the known hook names.
|
||||||
|
// Used by `runc features`.
|
||||||
|
func KnownHookNames() []string {
|
||||||
|
return []string{
|
||||||
|
string(Prestart), // deprecated
|
||||||
|
string(CreateRuntime),
|
||||||
|
string(CreateContainer),
|
||||||
|
string(StartContainer),
|
||||||
|
string(Poststart),
|
||||||
|
string(Poststop),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type Capabilities struct {
|
||||||
|
// Bounding is the set of capabilities checked by the kernel.
|
||||||
|
Bounding []string
|
||||||
|
// Effective is the set of capabilities checked by the kernel.
|
||||||
|
Effective []string
|
||||||
|
// Inheritable is the capabilities preserved across execve.
|
||||||
|
Inheritable []string
|
||||||
|
// Permitted is the limiting superset for effective capabilities.
|
||||||
|
Permitted []string
|
||||||
|
// Ambient is the ambient set of capabilities that are kept.
|
||||||
|
Ambient []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (hooks HookList) RunHooks(state *specs.State) error {
|
||||||
|
for i, h := range hooks {
|
||||||
|
if err := h.Run(state); err != nil {
|
||||||
|
return fmt.Errorf("error running hook #%d: %w", i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
||||||
|
var state map[HookName][]CommandHook
|
||||||
|
|
||||||
|
if err := json.Unmarshal(b, &state); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
*hooks = Hooks{}
|
||||||
|
for n, commandHooks := range state {
|
||||||
|
if len(commandHooks) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
(*hooks)[n] = HookList{}
|
||||||
|
for _, h := range commandHooks {
|
||||||
|
(*hooks)[n] = append((*hooks)[n], h)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (hooks *Hooks) MarshalJSON() ([]byte, error) {
|
||||||
|
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
||||||
|
for _, hook := range hooks {
|
||||||
|
switch chook := hook.(type) {
|
||||||
|
case CommandHook:
|
||||||
|
serializableHooks = append(serializableHooks, chook)
|
||||||
|
default:
|
||||||
|
logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return serializableHooks
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.Marshal(map[string]interface{}{
|
||||||
|
"prestart": serialize((*hooks)[Prestart]),
|
||||||
|
"createRuntime": serialize((*hooks)[CreateRuntime]),
|
||||||
|
"createContainer": serialize((*hooks)[CreateContainer]),
|
||||||
|
"startContainer": serialize((*hooks)[StartContainer]),
|
||||||
|
"poststart": serialize((*hooks)[Poststart]),
|
||||||
|
"poststop": serialize((*hooks)[Poststop]),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type Hook interface {
|
||||||
|
// Run executes the hook with the provided state.
|
||||||
|
Run(*specs.State) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFunctionHook will call the provided function when the hook is run.
|
||||||
|
func NewFunctionHook(f func(*specs.State) error) FuncHook {
|
||||||
|
return FuncHook{
|
||||||
|
run: f,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type FuncHook struct {
|
||||||
|
run func(*specs.State) error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f FuncHook) Run(s *specs.State) error {
|
||||||
|
return f.run(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Command struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Args []string `json:"args"`
|
||||||
|
Env []string `json:"env"`
|
||||||
|
Dir string `json:"dir"`
|
||||||
|
Timeout *time.Duration `json:"timeout"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCommandHook will execute the provided command when the hook is run.
|
||||||
|
func NewCommandHook(cmd Command) CommandHook {
|
||||||
|
return CommandHook{
|
||||||
|
Command: cmd,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type CommandHook struct {
|
||||||
|
Command
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c Command) Run(s *specs.State) error {
|
||||||
|
b, err := json.Marshal(s)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
cmd := exec.Cmd{
|
||||||
|
Path: c.Path,
|
||||||
|
Args: c.Args,
|
||||||
|
Env: c.Env,
|
||||||
|
Stdin: bytes.NewReader(b),
|
||||||
|
Stdout: &stdout,
|
||||||
|
Stderr: &stderr,
|
||||||
|
}
|
||||||
|
if err := cmd.Start(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
errC := make(chan error, 1)
|
||||||
|
go func() {
|
||||||
|
err := cmd.Wait()
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("error running hook: %w, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
||||||
|
}
|
||||||
|
errC <- err
|
||||||
|
}()
|
||||||
|
var timerCh <-chan time.Time
|
||||||
|
if c.Timeout != nil {
|
||||||
|
timer := time.NewTimer(*c.Timeout)
|
||||||
|
defer timer.Stop()
|
||||||
|
timerCh = timer.C
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case err := <-errC:
|
||||||
|
return err
|
||||||
|
case <-timerCh:
|
||||||
|
_ = cmd.Process.Kill()
|
||||||
|
<-errC
|
||||||
|
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
|
||||||
|
}
|
||||||
|
}
|
68
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
Normal file
68
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
import "errors"
|
||||||
|
|
||||||
|
var (
|
||||||
|
errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.")
|
||||||
|
errNoUserMap = errors.New("User namespaces enabled, but no user mapping found.")
|
||||||
|
errNoGIDMap = errors.New("User namespaces enabled, but no gid mappings found.")
|
||||||
|
errNoGroupMap = errors.New("User namespaces enabled, but no group mapping found.")
|
||||||
|
)
|
||||||
|
|
||||||
|
// HostUID gets the translated uid for the process on host which could be
|
||||||
|
// different when user namespaces are enabled.
|
||||||
|
func (c Config) HostUID(containerId int) (int, error) {
|
||||||
|
if c.Namespaces.Contains(NEWUSER) {
|
||||||
|
if c.UidMappings == nil {
|
||||||
|
return -1, errNoUIDMap
|
||||||
|
}
|
||||||
|
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
|
||||||
|
if !found {
|
||||||
|
return -1, errNoUserMap
|
||||||
|
}
|
||||||
|
return id, nil
|
||||||
|
}
|
||||||
|
// Return unchanged id.
|
||||||
|
return containerId, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HostRootUID gets the root uid for the process on host which could be non-zero
|
||||||
|
// when user namespaces are enabled.
|
||||||
|
func (c Config) HostRootUID() (int, error) {
|
||||||
|
return c.HostUID(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// HostGID gets the translated gid for the process on host which could be
|
||||||
|
// different when user namespaces are enabled.
|
||||||
|
func (c Config) HostGID(containerId int) (int, error) {
|
||||||
|
if c.Namespaces.Contains(NEWUSER) {
|
||||||
|
if c.GidMappings == nil {
|
||||||
|
return -1, errNoGIDMap
|
||||||
|
}
|
||||||
|
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
|
||||||
|
if !found {
|
||||||
|
return -1, errNoGroupMap
|
||||||
|
}
|
||||||
|
return id, nil
|
||||||
|
}
|
||||||
|
// Return unchanged id.
|
||||||
|
return containerId, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HostRootGID gets the root gid for the process on host which could be non-zero
|
||||||
|
// when user namespaces are enabled.
|
||||||
|
func (c Config) HostRootGID() (int, error) {
|
||||||
|
return c.HostGID(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Utility function that gets a host ID for a container ID from user namespace map
|
||||||
|
// if that ID is present in the map.
|
||||||
|
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
|
||||||
|
for _, m := range uMap {
|
||||||
|
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
||||||
|
hostID := m.HostID + (containerID - m.ContainerID)
|
||||||
|
return hostID, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1, false
|
||||||
|
}
|
10
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
Normal file
10
vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go
generated
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
//go:build gofuzz
|
||||||
|
// +build gofuzz
|
||||||
|
|
||||||
|
package configs
|
||||||
|
|
||||||
|
func FuzzUnmarshalJSON(data []byte) int {
|
||||||
|
hooks := Hooks{}
|
||||||
|
_ = hooks.UnmarshalJSON(data)
|
||||||
|
return 1
|
||||||
|
}
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
generated
vendored
Normal file
9
vendor/github.com/opencontainers/runc/libcontainer/configs/hugepage_limit.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
type HugepageLimit struct {
|
||||||
|
// which type of hugepage to limit.
|
||||||
|
Pagesize string `json:"page_size"`
|
||||||
|
|
||||||
|
// usage limit for hugepage.
|
||||||
|
Limit uint64 `json:"limit"`
|
||||||
|
}
|
16
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
Normal file
16
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
type IntelRdt struct {
|
||||||
|
// The identity for RDT Class of Service
|
||||||
|
ClosID string `json:"closID,omitempty"`
|
||||||
|
|
||||||
|
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||||
|
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||||
|
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||||
|
|
||||||
|
// The schema of memory bandwidth per L3 cache id
|
||||||
|
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||||
|
// The unit of memory bandwidth is specified in "percentages" by
|
||||||
|
// default, and in "MBps" if MBA Software Controller is enabled.
|
||||||
|
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||||
|
}
|
14
vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
generated
vendored
Normal file
14
vendor/github.com/opencontainers/runc/libcontainer/configs/interface_priority_map.go
generated
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
type IfPrioMap struct {
|
||||||
|
Interface string `json:"interface"`
|
||||||
|
Priority int64 `json:"priority"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *IfPrioMap) CgroupString() string {
|
||||||
|
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
|
||||||
|
}
|
48
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
Normal file
48
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
import "golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
const (
|
||||||
|
// EXT_COPYUP is a directive to copy up the contents of a directory when
|
||||||
|
// a tmpfs is mounted over it.
|
||||||
|
EXT_COPYUP = 1 << iota //nolint:golint // ignore "don't use ALL_CAPS" warning
|
||||||
|
)
|
||||||
|
|
||||||
|
type Mount struct {
|
||||||
|
// Source path for the mount.
|
||||||
|
Source string `json:"source"`
|
||||||
|
|
||||||
|
// Destination path for the mount inside the container.
|
||||||
|
Destination string `json:"destination"`
|
||||||
|
|
||||||
|
// Device the mount is for.
|
||||||
|
Device string `json:"device"`
|
||||||
|
|
||||||
|
// Mount flags.
|
||||||
|
Flags int `json:"flags"`
|
||||||
|
|
||||||
|
// Propagation Flags
|
||||||
|
PropagationFlags []int `json:"propagation_flags"`
|
||||||
|
|
||||||
|
// Mount data applied to the mount.
|
||||||
|
Data string `json:"data"`
|
||||||
|
|
||||||
|
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
|
||||||
|
Relabel string `json:"relabel"`
|
||||||
|
|
||||||
|
// RecAttr represents mount properties to be applied recursively (AT_RECURSIVE), see mount_setattr(2).
|
||||||
|
RecAttr *unix.MountAttr `json:"rec_attr"`
|
||||||
|
|
||||||
|
// Extensions are additional flags that are specific to runc.
|
||||||
|
Extensions int `json:"extensions"`
|
||||||
|
|
||||||
|
// Optional Command to be run before Source is mounted.
|
||||||
|
PremountCmds []Command `json:"premount_cmds"`
|
||||||
|
|
||||||
|
// Optional Command to be run after Source is mounted.
|
||||||
|
PostmountCmds []Command `json:"postmount_cmds"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *Mount) IsBind() bool {
|
||||||
|
return m.Flags&unix.MS_BIND != 0
|
||||||
|
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces.go
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
type NamespaceType string
|
||||||
|
|
||||||
|
type Namespaces []Namespace
|
126
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
generated
vendored
Normal file
126
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
generated
vendored
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
NEWNET NamespaceType = "NEWNET"
|
||||||
|
NEWPID NamespaceType = "NEWPID"
|
||||||
|
NEWNS NamespaceType = "NEWNS"
|
||||||
|
NEWUTS NamespaceType = "NEWUTS"
|
||||||
|
NEWIPC NamespaceType = "NEWIPC"
|
||||||
|
NEWUSER NamespaceType = "NEWUSER"
|
||||||
|
NEWCGROUP NamespaceType = "NEWCGROUP"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
nsLock sync.Mutex
|
||||||
|
supportedNamespaces = make(map[NamespaceType]bool)
|
||||||
|
)
|
||||||
|
|
||||||
|
// NsName converts the namespace type to its filename
|
||||||
|
func NsName(ns NamespaceType) string {
|
||||||
|
switch ns {
|
||||||
|
case NEWNET:
|
||||||
|
return "net"
|
||||||
|
case NEWNS:
|
||||||
|
return "mnt"
|
||||||
|
case NEWPID:
|
||||||
|
return "pid"
|
||||||
|
case NEWIPC:
|
||||||
|
return "ipc"
|
||||||
|
case NEWUSER:
|
||||||
|
return "user"
|
||||||
|
case NEWUTS:
|
||||||
|
return "uts"
|
||||||
|
case NEWCGROUP:
|
||||||
|
return "cgroup"
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsNamespaceSupported returns whether a namespace is available or
|
||||||
|
// not
|
||||||
|
func IsNamespaceSupported(ns NamespaceType) bool {
|
||||||
|
nsLock.Lock()
|
||||||
|
defer nsLock.Unlock()
|
||||||
|
supported, ok := supportedNamespaces[ns]
|
||||||
|
if ok {
|
||||||
|
return supported
|
||||||
|
}
|
||||||
|
nsFile := NsName(ns)
|
||||||
|
// if the namespace type is unknown, just return false
|
||||||
|
if nsFile == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_, err := os.Stat("/proc/self/ns/" + nsFile)
|
||||||
|
// a namespace is supported if it exists and we have permissions to read it
|
||||||
|
supported = err == nil
|
||||||
|
supportedNamespaces[ns] = supported
|
||||||
|
return supported
|
||||||
|
}
|
||||||
|
|
||||||
|
func NamespaceTypes() []NamespaceType {
|
||||||
|
return []NamespaceType{
|
||||||
|
NEWUSER, // Keep user NS always first, don't move it.
|
||||||
|
NEWIPC,
|
||||||
|
NEWUTS,
|
||||||
|
NEWNET,
|
||||||
|
NEWPID,
|
||||||
|
NEWNS,
|
||||||
|
NEWCGROUP,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Namespace defines configuration for each namespace. It specifies an
|
||||||
|
// alternate path that is able to be joined via setns.
|
||||||
|
type Namespace struct {
|
||||||
|
Type NamespaceType `json:"type"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Namespace) GetPath(pid int) string {
|
||||||
|
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Namespaces) Remove(t NamespaceType) bool {
|
||||||
|
i := n.index(t)
|
||||||
|
if i == -1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
*n = append((*n)[:i], (*n)[i+1:]...)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Namespaces) Add(t NamespaceType, path string) {
|
||||||
|
i := n.index(t)
|
||||||
|
if i == -1 {
|
||||||
|
*n = append(*n, Namespace{Type: t, Path: path})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
(*n)[i].Path = path
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Namespaces) index(t NamespaceType) int {
|
||||||
|
for i, ns := range *n {
|
||||||
|
if ns.Type == t {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Namespaces) Contains(t NamespaceType) bool {
|
||||||
|
return n.index(t) != -1
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *Namespaces) PathOf(t NamespaceType) string {
|
||||||
|
i := n.index(t)
|
||||||
|
if i == -1 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return (*n)[i].Path
|
||||||
|
}
|
33
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
Normal file
33
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
//go:build linux
|
||||||
|
// +build linux
|
||||||
|
|
||||||
|
package configs
|
||||||
|
|
||||||
|
import "golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
func (n *Namespace) Syscall() int {
|
||||||
|
return namespaceInfo[n.Type]
|
||||||
|
}
|
||||||
|
|
||||||
|
var namespaceInfo = map[NamespaceType]int{
|
||||||
|
NEWNET: unix.CLONE_NEWNET,
|
||||||
|
NEWNS: unix.CLONE_NEWNS,
|
||||||
|
NEWUSER: unix.CLONE_NEWUSER,
|
||||||
|
NEWIPC: unix.CLONE_NEWIPC,
|
||||||
|
NEWUTS: unix.CLONE_NEWUTS,
|
||||||
|
NEWPID: unix.CLONE_NEWPID,
|
||||||
|
NEWCGROUP: unix.CLONE_NEWCGROUP,
|
||||||
|
}
|
||||||
|
|
||||||
|
// CloneFlags parses the container's Namespaces options to set the correct
|
||||||
|
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||||
|
func (n *Namespaces) CloneFlags() uintptr {
|
||||||
|
var flag int
|
||||||
|
for _, v := range *n {
|
||||||
|
if v.Path != "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
flag |= namespaceInfo[v.Type]
|
||||||
|
}
|
||||||
|
return uintptr(flag)
|
||||||
|
}
|
14
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
generated
vendored
Normal file
14
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
//go:build !linux && !windows
|
||||||
|
// +build !linux,!windows
|
||||||
|
|
||||||
|
package configs
|
||||||
|
|
||||||
|
func (n *Namespace) Syscall() int {
|
||||||
|
panic("No namespace syscall support")
|
||||||
|
}
|
||||||
|
|
||||||
|
// CloneFlags parses the container's Namespaces options to set the correct
|
||||||
|
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||||
|
func (n *Namespaces) CloneFlags() uintptr {
|
||||||
|
panic("No namespace syscall support")
|
||||||
|
}
|
8
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
Normal file
8
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
//go:build !linux
|
||||||
|
// +build !linux
|
||||||
|
|
||||||
|
package configs
|
||||||
|
|
||||||
|
// Namespace defines configuration for each namespace. It specifies an
|
||||||
|
// alternate path that is able to be joined via setns.
|
||||||
|
type Namespace struct{}
|
75
vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
generated
vendored
Normal file
75
vendor/github.com/opencontainers/runc/libcontainer/configs/network.go
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
// Network defines configuration for a container's networking stack
|
||||||
|
//
|
||||||
|
// The network configuration can be omitted from a container causing the
|
||||||
|
// container to be setup with the host's networking stack
|
||||||
|
type Network struct {
|
||||||
|
// Type sets the networks type, commonly veth and loopback
|
||||||
|
Type string `json:"type"`
|
||||||
|
|
||||||
|
// Name of the network interface
|
||||||
|
Name string `json:"name"`
|
||||||
|
|
||||||
|
// The bridge to use.
|
||||||
|
Bridge string `json:"bridge"`
|
||||||
|
|
||||||
|
// MacAddress contains the MAC address to set on the network interface
|
||||||
|
MacAddress string `json:"mac_address"`
|
||||||
|
|
||||||
|
// Address contains the IPv4 and mask to set on the network interface
|
||||||
|
Address string `json:"address"`
|
||||||
|
|
||||||
|
// Gateway sets the gateway address that is used as the default for the interface
|
||||||
|
Gateway string `json:"gateway"`
|
||||||
|
|
||||||
|
// IPv6Address contains the IPv6 and mask to set on the network interface
|
||||||
|
IPv6Address string `json:"ipv6_address"`
|
||||||
|
|
||||||
|
// IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface
|
||||||
|
IPv6Gateway string `json:"ipv6_gateway"`
|
||||||
|
|
||||||
|
// Mtu sets the mtu value for the interface and will be mirrored on both the host and
|
||||||
|
// container's interfaces if a pair is created, specifically in the case of type veth
|
||||||
|
// Note: This does not apply to loopback interfaces.
|
||||||
|
Mtu int `json:"mtu"`
|
||||||
|
|
||||||
|
// TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and
|
||||||
|
// container's interfaces if a pair is created, specifically in the case of type veth
|
||||||
|
// Note: This does not apply to loopback interfaces.
|
||||||
|
TxQueueLen int `json:"txqueuelen"`
|
||||||
|
|
||||||
|
// HostInterfaceName is a unique name of a veth pair that resides on in the host interface of the
|
||||||
|
// container.
|
||||||
|
HostInterfaceName string `json:"host_interface_name"`
|
||||||
|
|
||||||
|
// HairpinMode specifies if hairpin NAT should be enabled on the virtual interface
|
||||||
|
// bridge port in the case of type veth
|
||||||
|
// Note: This is unsupported on some systems.
|
||||||
|
// Note: This does not apply to loopback interfaces.
|
||||||
|
HairpinMode bool `json:"hairpin_mode"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Route defines a routing table entry.
|
||||||
|
//
|
||||||
|
// Routes can be specified to create entries in the routing table as the container
|
||||||
|
// is started.
|
||||||
|
//
|
||||||
|
// All of destination, source, and gateway should be either IPv4 or IPv6.
|
||||||
|
// One of the three options must be present, and omitted entries will use their
|
||||||
|
// IP family default for the route table. For IPv4 for example, setting the
|
||||||
|
// gateway to 1.2.3.4 and the interface to eth0 will set up a standard
|
||||||
|
// destination of 0.0.0.0(or *) when viewed in the route table.
|
||||||
|
type Route struct {
|
||||||
|
// Destination specifies the destination IP address and mask in the CIDR form.
|
||||||
|
Destination string `json:"destination"`
|
||||||
|
|
||||||
|
// Source specifies the source IP address and mask in the CIDR form.
|
||||||
|
Source string `json:"source"`
|
||||||
|
|
||||||
|
// Gateway specifies the gateway IP address.
|
||||||
|
Gateway string `json:"gateway"`
|
||||||
|
|
||||||
|
// InterfaceName specifies the device to set this route up for, for example eth0.
|
||||||
|
InterfaceName string `json:"interface_name"`
|
||||||
|
}
|
9
vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
Normal file
9
vendor/github.com/opencontainers/runc/libcontainer/configs/rdma.go
generated
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
package configs
|
||||||
|
|
||||||
|
// LinuxRdma for Linux cgroup 'rdma' resource management (Linux 4.11)
|
||||||
|
type LinuxRdma struct {
|
||||||
|
// Maximum number of HCA handles that can be opened. Default is "no limit".
|
||||||
|
HcaHandles *uint32 `json:"hca_handles,omitempty"`
|
||||||
|
// Maximum number of HCA objects that can be created. Default is "no limit".
|
||||||
|
HcaObjects *uint32 `json:"hca_objects,omitempty"`
|
||||||
|
}
|
5
vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
package userns
|
||||||
|
|
||||||
|
// RunningInUserNS detects whether we are currently running in a user namespace.
|
||||||
|
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||||
|
var RunningInUserNS = runningInUserNS
|
16
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
generated
vendored
Normal file
16
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
//go:build gofuzz
|
||||||
|
// +build gofuzz
|
||||||
|
|
||||||
|
package userns
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/user"
|
||||||
|
)
|
||||||
|
|
||||||
|
func FuzzUIDMap(data []byte) int {
|
||||||
|
uidmap, _ := user.ParseIDMap(strings.NewReader(string(data)))
|
||||||
|
_ = uidMapInUserNS(uidmap)
|
||||||
|
return 1
|
||||||
|
}
|
37
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
generated
vendored
Normal file
37
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go
generated
vendored
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
package userns
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/user"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
inUserNS bool
|
||||||
|
nsOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
// runningInUserNS detects whether we are currently running in a user namespace.
|
||||||
|
// Originally copied from github.com/lxc/lxd/shared/util.go
|
||||||
|
func runningInUserNS() bool {
|
||||||
|
nsOnce.Do(func() {
|
||||||
|
uidmap, err := user.CurrentProcessUIDMap()
|
||||||
|
if err != nil {
|
||||||
|
// This kernel-provided file only exists if user namespaces are supported
|
||||||
|
return
|
||||||
|
}
|
||||||
|
inUserNS = uidMapInUserNS(uidmap)
|
||||||
|
})
|
||||||
|
return inUserNS
|
||||||
|
}
|
||||||
|
|
||||||
|
func uidMapInUserNS(uidmap []user.IDMap) bool {
|
||||||
|
/*
|
||||||
|
* We assume we are in the initial user namespace if we have a full
|
||||||
|
* range - 4294967295 uids starting at uid 0.
|
||||||
|
*/
|
||||||
|
if len(uidmap) == 1 && uidmap[0].ID == 0 && uidmap[0].ParentID == 0 && uidmap[0].Count == 4294967295 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
18
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
generated
vendored
Normal file
18
vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go
generated
vendored
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
//go:build !linux
|
||||||
|
// +build !linux
|
||||||
|
|
||||||
|
package userns
|
||||||
|
|
||||||
|
import "github.com/opencontainers/runc/libcontainer/user"
|
||||||
|
|
||||||
|
// runningInUserNS is a stub for non-Linux systems
|
||||||
|
// Always returns false
|
||||||
|
func runningInUserNS() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// uidMapInUserNS is a stub for non-Linux systems
|
||||||
|
// Always returns false
|
||||||
|
func uidMapInUserNS(uidmap []user.IDMap) bool {
|
||||||
|
return false
|
||||||
|
}
|
96
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
96
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright 2016, 2017 SUSE LLC
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MaxSendfdLen is the maximum length of the name of a file descriptor being
|
||||||
|
// sent using SendFd. The name of the file handle returned by RecvFd will never
|
||||||
|
// be larger than this value.
|
||||||
|
const MaxNameLen = 4096
|
||||||
|
|
||||||
|
// oobSpace is the size of the oob slice required to store a single FD. Note
|
||||||
|
// that unix.UnixRights appears to make the assumption that fd is always int32,
|
||||||
|
// so sizeof(fd) = 4.
|
||||||
|
var oobSpace = unix.CmsgSpace(4)
|
||||||
|
|
||||||
|
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
|
||||||
|
// socket. The file name of the remote file descriptor will be recreated
|
||||||
|
// locally (it is sent as non-auxiliary data in the same payload).
|
||||||
|
func RecvFd(socket *os.File) (*os.File, error) {
|
||||||
|
// For some reason, unix.Recvmsg uses the length rather than the capacity
|
||||||
|
// when passing the msg_controllen and other attributes to recvmsg. So we
|
||||||
|
// have to actually set the length.
|
||||||
|
name := make([]byte, MaxNameLen)
|
||||||
|
oob := make([]byte, oobSpace)
|
||||||
|
|
||||||
|
sockfd := socket.Fd()
|
||||||
|
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if n >= MaxNameLen || oobn != oobSpace {
|
||||||
|
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncate.
|
||||||
|
name = name[:n]
|
||||||
|
oob = oob[:oobn]
|
||||||
|
|
||||||
|
scms, err := unix.ParseSocketControlMessage(oob)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(scms) != 1 {
|
||||||
|
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
||||||
|
}
|
||||||
|
scm := scms[0]
|
||||||
|
|
||||||
|
fds, err := unix.ParseUnixRights(&scm)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(fds) != 1 {
|
||||||
|
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
||||||
|
}
|
||||||
|
fd := uintptr(fds[0])
|
||||||
|
|
||||||
|
return os.NewFile(fd, string(name)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendFd sends a file descriptor over the given AF_UNIX socket. In
|
||||||
|
// addition, the file.Name() of the given file will also be sent as
|
||||||
|
// non-auxiliary data in the same payload (allowing to send contextual
|
||||||
|
// information for a file descriptor).
|
||||||
|
func SendFd(socket *os.File, name string, fd uintptr) error {
|
||||||
|
if len(name) >= MaxNameLen {
|
||||||
|
return fmt.Errorf("sendfd: filename too long: %s", name)
|
||||||
|
}
|
||||||
|
return SendFds(socket, []byte(name), int(fd))
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendFds sends a list of files descriptor and msg over the given AF_UNIX socket.
|
||||||
|
func SendFds(socket *os.File, msg []byte, fds ...int) error {
|
||||||
|
oob := unix.UnixRights(fds...)
|
||||||
|
return unix.Sendmsg(int(socket.Fd()), msg, oob, nil, 0)
|
||||||
|
}
|
167
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
167
vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go
generated
vendored
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/binary"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
securejoin "github.com/cyphar/filepath-securejoin"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
exitSignalOffset = 128
|
||||||
|
)
|
||||||
|
|
||||||
|
// NativeEndian is the native byte order of the host system.
|
||||||
|
var NativeEndian binary.ByteOrder
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// Copied from <golang.org/x/net/internal/socket/sys.go>.
|
||||||
|
i := uint32(1)
|
||||||
|
b := (*[4]byte)(unsafe.Pointer(&i))
|
||||||
|
if b[0] == 1 {
|
||||||
|
NativeEndian = binary.LittleEndian
|
||||||
|
} else {
|
||||||
|
NativeEndian = binary.BigEndian
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExitStatus returns the correct exit status for a process based on if it
|
||||||
|
// was signaled or exited cleanly
|
||||||
|
func ExitStatus(status unix.WaitStatus) int {
|
||||||
|
if status.Signaled() {
|
||||||
|
return exitSignalOffset + int(status.Signal())
|
||||||
|
}
|
||||||
|
return status.ExitStatus()
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteJSON writes the provided struct v to w using standard json marshaling
|
||||||
|
func WriteJSON(w io.Writer, v interface{}) error {
|
||||||
|
data, err := json.Marshal(v)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
_, err = w.Write(data)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// CleanPath makes a path safe for use with filepath.Join. This is done by not
|
||||||
|
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||||
|
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||||
|
// path resulting from prepending another path will always resolve to lexically
|
||||||
|
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||||
|
// that include symlinks won't be safe as a result of using CleanPath.
|
||||||
|
func CleanPath(path string) string {
|
||||||
|
// Deal with empty strings nicely.
|
||||||
|
if path == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that all paths are cleaned (especially problematic ones like
|
||||||
|
// "/../../../../../" which can cause lots of issues).
|
||||||
|
path = filepath.Clean(path)
|
||||||
|
|
||||||
|
// If the path isn't absolute, we need to do more processing to fix paths
|
||||||
|
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||||
|
// paths to relative ones.
|
||||||
|
if !filepath.IsAbs(path) {
|
||||||
|
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||||
|
// This can't fail, as (by definition) all paths are relative to root.
|
||||||
|
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean the path again for good measure.
|
||||||
|
return filepath.Clean(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripRoot returns the passed path, stripping the root path if it was
|
||||||
|
// (lexicially) inside it. Note that both passed paths will always be treated
|
||||||
|
// as absolute, and the returned path will also always be absolute. In
|
||||||
|
// addition, the paths are cleaned before stripping the root.
|
||||||
|
func stripRoot(root, path string) string {
|
||||||
|
// Make the paths clean and absolute.
|
||||||
|
root, path = CleanPath("/"+root), CleanPath("/"+path)
|
||||||
|
switch {
|
||||||
|
case path == root:
|
||||||
|
path = "/"
|
||||||
|
case root == "/":
|
||||||
|
// do nothing
|
||||||
|
case strings.HasPrefix(path, root+"/"):
|
||||||
|
path = strings.TrimPrefix(path, root+"/")
|
||||||
|
}
|
||||||
|
return CleanPath("/" + path)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...)
|
||||||
|
// corresponding to the unsafePath resolved within the root. Before passing the
|
||||||
|
// fd, this path is verified to have been inside the root -- so operating on it
|
||||||
|
// through the passed fdpath should be safe. Do not access this path through
|
||||||
|
// the original path strings, and do not attempt to use the pathname outside of
|
||||||
|
// the passed closure (the file handle will be freed once the closure returns).
|
||||||
|
func WithProcfd(root, unsafePath string, fn func(procfd string) error) error {
|
||||||
|
// Remove the root then forcefully resolve inside the root.
|
||||||
|
unsafePath = stripRoot(root, unsafePath)
|
||||||
|
path, err := securejoin.SecureJoin(root, unsafePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("resolving path inside rootfs failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open the target path.
|
||||||
|
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("open o_path procfd: %w", err)
|
||||||
|
}
|
||||||
|
defer fh.Close()
|
||||||
|
|
||||||
|
// Double-check the path is the one we expected.
|
||||||
|
procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
|
||||||
|
if realpath, err := os.Readlink(procfd); err != nil {
|
||||||
|
return fmt.Errorf("procfd verification failed: %w", err)
|
||||||
|
} else if realpath != path {
|
||||||
|
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the closure.
|
||||||
|
return fn(procfd)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchLabels searches a list of key-value pairs for the provided key and
|
||||||
|
// returns the corresponding value. The pairs must be separated with '='.
|
||||||
|
func SearchLabels(labels []string, query string) string {
|
||||||
|
for _, l := range labels {
|
||||||
|
parts := strings.SplitN(l, "=", 2)
|
||||||
|
if len(parts) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if parts[0] == query {
|
||||||
|
return parts[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Annotations returns the bundle path and user defined annotations from the
|
||||||
|
// libcontainer state. We need to remove the bundle because that is a label
|
||||||
|
// added by libcontainer.
|
||||||
|
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
|
||||||
|
userAnnotations = make(map[string]string)
|
||||||
|
for _, l := range labels {
|
||||||
|
parts := strings.SplitN(l, "=", 2)
|
||||||
|
if len(parts) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if parts[0] == "bundle" {
|
||||||
|
bundle = parts[1]
|
||||||
|
} else {
|
||||||
|
userAnnotations[parts[0]] = parts[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
69
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
69
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
//go:build !windows
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package utils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// EnsureProcHandle returns whether or not the given file handle is on procfs.
|
||||||
|
func EnsureProcHandle(fh *os.File) error {
|
||||||
|
var buf unix.Statfs_t
|
||||||
|
if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil {
|
||||||
|
return fmt.Errorf("ensure %s is on procfs: %w", fh.Name(), err)
|
||||||
|
}
|
||||||
|
if buf.Type != unix.PROC_SUPER_MAGIC {
|
||||||
|
return fmt.Errorf("%s is not on procfs", fh.Name())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for
|
||||||
|
// the process (except for those below the given fd value).
|
||||||
|
func CloseExecFrom(minFd int) error {
|
||||||
|
fdDir, err := os.Open("/proc/self/fd")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer fdDir.Close()
|
||||||
|
|
||||||
|
if err := EnsureProcHandle(fdDir); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
fdList, err := fdDir.Readdirnames(-1)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, fdStr := range fdList {
|
||||||
|
fd, err := strconv.Atoi(fdStr)
|
||||||
|
// Ignore non-numeric file names.
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Ignore descriptors lower than our specified minimum.
|
||||||
|
if fd < minFd {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Intentionally ignore errors from unix.CloseOnExec -- the cases where
|
||||||
|
// this might fail are basically file descriptors that have already
|
||||||
|
// been closed (including and especially the one that was created when
|
||||||
|
// os.ReadDir did the "opendir" syscall).
|
||||||
|
unix.CloseOnExec(fd)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSockPair returns a new unix socket pair
|
||||||
|
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
||||||
|
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
|
||||||
|
}
|
1928
vendor/k8s.io/cri-api/pkg/apis/runtime/v1/api.pb.go
generated
vendored
1928
vendor/k8s.io/cri-api/pkg/apis/runtime/v1/api.pb.go
generated
vendored
File diff suppressed because it is too large
Load Diff
55
vendor/k8s.io/cri-api/pkg/apis/runtime/v1/api.proto
generated
vendored
55
vendor/k8s.io/cri-api/pkg/apis/runtime/v1/api.proto
generated
vendored
@ -131,6 +131,15 @@ service RuntimeService {
|
|||||||
|
|
||||||
// ListPodSandboxMetrics gets pod sandbox metrics from CRI Runtime
|
// ListPodSandboxMetrics gets pod sandbox metrics from CRI Runtime
|
||||||
rpc ListPodSandboxMetrics(ListPodSandboxMetricsRequest) returns (ListPodSandboxMetricsResponse) {}
|
rpc ListPodSandboxMetrics(ListPodSandboxMetricsRequest) returns (ListPodSandboxMetricsResponse) {}
|
||||||
|
|
||||||
|
// RuntimeConfig returns configuration information of the runtime.
|
||||||
|
// A couple of notes:
|
||||||
|
// - The RuntimeConfigRequest object is not to be confused with the contents of UpdateRuntimeConfigRequest.
|
||||||
|
// The former is for having runtime tell Kubelet what to do, the latter vice versa.
|
||||||
|
// - It is the expectation of the Kubelet that these fields are static for the lifecycle of the Kubelet.
|
||||||
|
// The Kubelet will not re-request the RuntimeConfiguration after startup, and CRI implementations should
|
||||||
|
// avoid updating them without a full node reboot.
|
||||||
|
rpc RuntimeConfig(RuntimeConfigRequest) returns (RuntimeConfigResponse) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ImageService defines the public APIs for managing images.
|
// ImageService defines the public APIs for managing images.
|
||||||
@ -199,7 +208,7 @@ message PortMapping {
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum MountPropagation {
|
enum MountPropagation {
|
||||||
// No mount propagation ("private" in Linux terminology).
|
// No mount propagation ("rprivate" in Linux terminology).
|
||||||
PROPAGATION_PRIVATE = 0;
|
PROPAGATION_PRIVATE = 0;
|
||||||
// Mounts get propagated from the host to the container ("rslave" in Linux).
|
// Mounts get propagated from the host to the container ("rslave" in Linux).
|
||||||
PROPAGATION_HOST_TO_CONTAINER = 1;
|
PROPAGATION_HOST_TO_CONTAINER = 1;
|
||||||
@ -770,6 +779,9 @@ message ImageSpec {
|
|||||||
// ImageSpec Annotations can be used to help the runtime target specific
|
// ImageSpec Annotations can be used to help the runtime target specific
|
||||||
// images in multi-arch images.
|
// images in multi-arch images.
|
||||||
map<string, string> annotations = 2;
|
map<string, string> annotations = 2;
|
||||||
|
// The container image reference specified by the user (e.g. image[:tag] or digest).
|
||||||
|
// Only set if available within the RPC context.
|
||||||
|
string user_specified_image = 18;
|
||||||
}
|
}
|
||||||
|
|
||||||
message KeyValue {
|
message KeyValue {
|
||||||
@ -1627,6 +1639,8 @@ message ContainerStats {
|
|||||||
MemoryUsage memory = 3;
|
MemoryUsage memory = 3;
|
||||||
// Usage of the writable layer.
|
// Usage of the writable layer.
|
||||||
FilesystemUsage writable_layer = 4;
|
FilesystemUsage writable_layer = 4;
|
||||||
|
// Swap usage gathered from the container.
|
||||||
|
SwapUsage swap = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
// WindowsContainerStats provides the resource usage statistics for a container specific for Windows
|
// WindowsContainerStats provides the resource usage statistics for a container specific for Windows
|
||||||
@ -1681,16 +1695,27 @@ message MemoryUsage {
|
|||||||
UInt64Value major_page_faults = 7;
|
UInt64Value major_page_faults = 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message SwapUsage {
|
||||||
|
// Timestamp in nanoseconds at which the information were collected. Must be > 0.
|
||||||
|
int64 timestamp = 1;
|
||||||
|
// Available swap for use. This is defined as the swap limit - swapUsageBytes.
|
||||||
|
UInt64Value swap_available_bytes = 2;
|
||||||
|
// Total memory in use. This includes all memory regardless of when it was accessed.
|
||||||
|
UInt64Value swap_usage_bytes = 3;
|
||||||
|
}
|
||||||
|
|
||||||
// WindowsMemoryUsage provides the memory usage information specific to Windows
|
// WindowsMemoryUsage provides the memory usage information specific to Windows
|
||||||
message WindowsMemoryUsage {
|
message WindowsMemoryUsage {
|
||||||
// Timestamp in nanoseconds at which the information were collected. Must be > 0.
|
// Timestamp in nanoseconds at which the information were collected. Must be > 0.
|
||||||
int64 timestamp = 1;
|
int64 timestamp = 1;
|
||||||
// The amount of working set memory in bytes.
|
// The amount of working set memory in bytes.
|
||||||
UInt64Value working_set_bytes = 2;
|
UInt64Value working_set_bytes = 2;
|
||||||
// Available memory for use. This is defined as the memory limit - workingSetBytes.
|
// Available memory for use. This is defined as the memory limit - commit_memory_bytes.
|
||||||
UInt64Value available_bytes = 3;
|
UInt64Value available_bytes = 3;
|
||||||
// Cumulative number of page faults.
|
// Cumulative number of page faults.
|
||||||
UInt64Value page_faults = 4;
|
UInt64Value page_faults = 4;
|
||||||
|
// Total commit memory in use. Commit memory is total of physical and virtual memory in use.
|
||||||
|
UInt64Value commit_memory_bytes = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message ReopenContainerLogRequest {
|
message ReopenContainerLogRequest {
|
||||||
@ -1801,3 +1826,29 @@ enum MetricType {
|
|||||||
COUNTER = 0;
|
COUNTER = 0;
|
||||||
GAUGE = 1;
|
GAUGE = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message RuntimeConfigRequest {}
|
||||||
|
|
||||||
|
message RuntimeConfigResponse {
|
||||||
|
// Configuration information for Linux-based runtimes. This field contains
|
||||||
|
// global runtime configuration options that are not specific to runtime
|
||||||
|
// handlers.
|
||||||
|
LinuxRuntimeConfiguration linux = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message LinuxRuntimeConfiguration {
|
||||||
|
// Cgroup driver to use
|
||||||
|
// Note: this field should not change for the lifecycle of the Kubelet,
|
||||||
|
// or while there are running containers.
|
||||||
|
// The Kubelet will not re-request this after startup, and will construct the cgroup
|
||||||
|
// hierarchy assuming it is static.
|
||||||
|
// If the runtime wishes to change this value, it must be accompanied by removal of
|
||||||
|
// all pods, and a restart of the Kubelet. The easiest way to do this is with a full node reboot.
|
||||||
|
CgroupDriver cgroup_driver = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum CgroupDriver {
|
||||||
|
SYSTEMD = 0;
|
||||||
|
CGROUPFS = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
13
vendor/modules.txt
vendored
13
vendor/modules.txt
vendored
@ -350,8 +350,19 @@ github.com/opencontainers/image-spec/specs-go
|
|||||||
github.com/opencontainers/image-spec/specs-go/v1
|
github.com/opencontainers/image-spec/specs-go/v1
|
||||||
# github.com/opencontainers/runc v1.1.8
|
# github.com/opencontainers/runc v1.1.8
|
||||||
## explicit; go 1.17
|
## explicit; go 1.17
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups/devices
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups/ebpf
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups/fs
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups/fs2
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups/fscommon
|
||||||
|
github.com/opencontainers/runc/libcontainer/cgroups/systemd
|
||||||
|
github.com/opencontainers/runc/libcontainer/configs
|
||||||
github.com/opencontainers/runc/libcontainer/devices
|
github.com/opencontainers/runc/libcontainer/devices
|
||||||
github.com/opencontainers/runc/libcontainer/user
|
github.com/opencontainers/runc/libcontainer/user
|
||||||
|
github.com/opencontainers/runc/libcontainer/userns
|
||||||
|
github.com/opencontainers/runc/libcontainer/utils
|
||||||
# github.com/opencontainers/runtime-spec v1.1.0
|
# github.com/opencontainers/runtime-spec v1.1.0
|
||||||
## explicit
|
## explicit
|
||||||
github.com/opencontainers/runtime-spec/specs-go
|
github.com/opencontainers/runtime-spec/specs-go
|
||||||
@ -800,7 +811,7 @@ k8s.io/component-base/metrics/legacyregistry
|
|||||||
k8s.io/component-base/metrics/prometheus/feature
|
k8s.io/component-base/metrics/prometheus/feature
|
||||||
k8s.io/component-base/metrics/prometheusextension
|
k8s.io/component-base/metrics/prometheusextension
|
||||||
k8s.io/component-base/version
|
k8s.io/component-base/version
|
||||||
# k8s.io/cri-api v0.27.1
|
# k8s.io/cri-api v0.28.0-beta.0
|
||||||
## explicit; go 1.20
|
## explicit; go 1.20
|
||||||
k8s.io/cri-api/pkg/apis/runtime/v1
|
k8s.io/cri-api/pkg/apis/runtime/v1
|
||||||
# k8s.io/klog/v2 v2.90.1
|
# k8s.io/klog/v2 v2.90.1
|
||||||
|
Loading…
Reference in New Issue
Block a user