Merge pull request #5490 from askervin/5Bu_blockio

Support for cgroups blockio
This commit is contained in:
Mike Brown
2022-04-29 10:07:56 -05:00
committed by GitHub
27 changed files with 1898 additions and 7 deletions

View File

@@ -189,6 +189,14 @@ var (
Name: "apparmor-profile",
Usage: "enable AppArmor with an existing custom profile",
},
cli.StringFlag{
Name: "blockio-config-file",
Usage: "file path to blockio class definitions. By default class definitions are not loaded.",
},
cli.StringFlag{
Name: "blockio-class",
Usage: "name of the blockio class to associate the container with",
},
cli.StringFlag{
Name: "rdt-class",
Usage: "name of the RDT class to associate the container with. Specifies a Class of Service (CLOS) for cache and memory bandwidth management.",

View File

@@ -39,6 +39,7 @@ import (
"github.com/containerd/containerd/platforms"
"github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/containerd/containerd/snapshots"
"github.com/intel/goresctrl/pkg/blockio"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
@@ -329,6 +330,19 @@ func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli
})
}
if c := context.String("blockio-config-file"); c != "" {
if err := blockio.SetConfigFromFile(c, false); err != nil {
return nil, fmt.Errorf("blockio-config-file error: %w", err)
}
}
if c := context.String("blockio-class"); c != "" {
if linuxBlockIO, err := blockio.OciLinuxBlockIO(c); err == nil {
opts = append(opts, oci.WithBlockIO(linuxBlockIO))
} else {
return nil, fmt.Errorf("blockio-class error: %w", err)
}
}
if c := context.String("rdt-class"); c != "" {
opts = append(opts, oci.WithRdt(c, "", ""))
}

View File

@@ -248,6 +248,14 @@ version = 2
# default_runtime_name is the default runtime name to use.
default_runtime_name = "runc"
# ignore_blockio_not_enabled_errors disables blockio related
# errors when blockio support has not been enabled. By default,
# trying to set the blockio class of a container via annotations
# produces an error if blockio hasn't been enabled. This config
# option practically enables a "soft" mode for blockio where these
# errors are ignored and the container gets no blockio class.
ignore_blockio_not_enabled_errors = false
# ignore_rdt_not_enabled_errors disables RDT related errors when RDT
# support has not been enabled. Intel RDT is a technology for cache and
# memory bandwidth management. By default, trying to set the RDT class of

View File

@@ -100,12 +100,16 @@ documentation.
- **sched_core** Core scheduling is a feature that allows only trusted tasks
to run concurrently on cpus sharing compute resources (eg: hyperthreads on
a core). (Default: **false**)
- **[plugins."io.containerd.service.v1.tasks-service"]** has one option:
- **rdt_config_file** (Linux only) specifies path to a configuration used for
configuring RDT (Default: **""**). Enables support for Intel RDT, a
technology for cache and memory bandwidth management.
See https://github.com/intel/goresctrl/blob/v0.2.0/doc/rdt.md#configuration
for details of the configuration file format.
- **[plugins."io.containerd.service.v1.tasks-service"]** has performance options:
- **blockio_config_file** (Linux only) specifies path to blockio class definitions
(Default: **""**). Controls I/O scheduler priority and bandwidth throttling.
See [blockio configuration](https://github.com/intel/goresctrl/blob/main/doc/blockio.md#configuration)
for details of the file format.
- **rdt_config_file** (Linux only) specifies path to a configuration used for configuring
RDT (Default: **""**). Enables support for Intel RDT, a technology
for cache and memory bandwidth management.
See [RDT configuration](https://github.com/intel/goresctrl/blob/main/doc/rdt.md#configuration)
for details of the file format.
**oom_score**
: The out of memory (OOM) score applied to the containerd daemon process (Default: 0)
@@ -192,7 +196,8 @@ imports = ["/etc/containerd/runtime_*.toml", "./debug.toml"]
platforms = ["linux/amd64"]
sched_core = true
[plugins."io.containerd.service.v1.tasks-service"]
rdt_config_file = "/etc/rdt-config.yaml"
blockio_config_file = ""
rdt_config_file = ""
```
## BUGS

View File

@@ -87,6 +87,15 @@ func WithPidsLimit(limit int64) SpecOpts {
}
}
// WithBlockIO sets the container's blkio parameters
func WithBlockIO(blockio *specs.LinuxBlockIO) SpecOpts {
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
setResources(s)
s.Linux.Resources.BlockIO = blockio
return nil
}
}
// WithCPUShares sets the container's cpu shares
func WithCPUShares(shares uint64) SpecOpts {
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {

View File

@@ -39,6 +39,14 @@ var WithAllKnownCapabilities = func(ctx context.Context, client Client, c *conta
return WithCapabilities(nil)(ctx, client, c, s)
}
// WithBlockIO sets the container's blkio parameters
//nolint: deadcode, unused
func WithBlockIO(blockio interface{}) SpecOpts {
return func(ctx context.Context, _ Client, c *containers.Container, s *Spec) error {
return errors.New("blkio not supported")
}
}
// WithCPUShares sets the container's cpu shares
//nolint: deadcode, unused
func WithCPUShares(shares uint64) SpecOpts {

View File

@@ -103,6 +103,11 @@ type ContainerdConfig struct {
// layers to the snapshotter.
DiscardUnpackedLayers bool `toml:"discard_unpacked_layers" json:"discardUnpackedLayers"`
// IgnoreBlockIONotEnabledErrors is a boolean flag to ignore
// blockio related errors when blockio support has not been
// enabled.
IgnoreBlockIONotEnabledErrors bool `toml:"ignore_blockio_not_enabled_errors" json:"ignoreBlockIONotEnabledErrors"`
// IgnoreRdtNotEnabledErrors is a boolean flag to ignore RDT related errors
// when RDT support has not been enabled.
IgnoreRdtNotEnabledErrors bool `toml:"ignore_rdt_not_enabled_errors" json:"ignoreRdtNotEnabledErrors"`

View File

@@ -0,0 +1,54 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"github.com/containerd/containerd/services/tasks"
"github.com/intel/goresctrl/pkg/blockio"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
// blockIOClassFromAnnotations examines container and pod annotations of a
// container and returns its effective blockio class.
func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
cls, err := blockio.ContainerClassFromAnnotations(containerName, containerAnnotations, podAnnotations)
if err != nil {
return "", err
}
if cls != "" && !tasks.BlockIOEnabled() {
if c.config.ContainerdConfig.IgnoreBlockIONotEnabledErrors {
cls = ""
logrus.Debugf("continuing create container %s, ignoring blockio not enabled (%v)", containerName, err)
} else {
return "", fmt.Errorf("blockio disabled, refusing to set blockio class of container %q to %q", containerName, cls)
}
}
return cls, nil
}
// blockIOToLinuxOci converts blockio class name into the LinuxBlockIO
// structure in the OCI runtime spec.
func blockIOToLinuxOci(className string) (*runtimespec.LinuxBlockIO, error) {
return blockio.OciLinuxBlockIO(className)
}

View File

@@ -0,0 +1,32 @@
//go:build !linux
// +build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
)
func (c *criService) blockIOClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
return "", nil
}
func blockIOToLinuxOci(className string) (*runtimespec.LinuxBlockIO, error) {
return nil, nil
}

View File

@@ -263,6 +263,19 @@ func (c *criService) containerSpec(
supplementalGroups := securityContext.GetSupplementalGroups()
// Get blockio class
blockIOClass, err := c.blockIOClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations)
if err != nil {
return nil, fmt.Errorf("failed to set blockio class: %w", err)
}
if blockIOClass != "" {
if linuxBlockIO, err := blockIOToLinuxOci(blockIOClass); err == nil {
specOpts = append(specOpts, oci.WithBlockIO(linuxBlockIO))
} else {
return nil, err
}
}
// Get RDT class
rdtClass, err := c.rdtClassFromAnnotations(config.GetMetadata().GetName(), config.Annotations, sandboxConfig.Annotations)
if err != nil {

View File

@@ -0,0 +1,24 @@
//go:build !linux
// +build !linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
func BlockIOEnabled() bool { return false }
func initBlockIO(configFilePath string) error { return nil }

View File

@@ -0,0 +1,50 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package tasks
import (
"fmt"
"github.com/containerd/containerd/log"
"github.com/intel/goresctrl/pkg/blockio"
)
var blockIOEnabled bool
func BlockIOEnabled() bool { return blockIOEnabled }
func initBlockIO(configFilePath string) error {
blockIOEnabled = false
if configFilePath == "" {
log.L.Debug("No blockio config file specified, blockio not configured")
return nil
}
if err := blockio.SetConfigFromFile(configFilePath, true); err != nil {
return fmt.Errorf("blockio not enabled: %w", err)
}
blockIOEnabled = true
return nil
}

View File

@@ -67,6 +67,8 @@ const (
// Config for the tasks service plugin
type Config struct {
// BlockIOConfigFile specifies the path to blockio configuration file
BlockIOConfigFile string `toml:"blockio_config_file" json:"blockioConfigFile"`
// RdtConfigFile specifies the path to RDT configuration file
RdtConfigFile string `toml:"rdt_config_file" json:"rdtConfigFile"`
}
@@ -139,6 +141,9 @@ func initFunc(ic *plugin.InitContext) (interface{}, error) {
l.monitor.Monitor(t, nil)
}
if err := initBlockIO(config.BlockIOConfigFile); err != nil {
log.G(ic.Context).WithError(err).Errorf("blockio initialization failed")
}
if err := initRdt(config.RdtConfigFile); err != nil {
log.G(ic.Context).WithError(err).Errorf("RDT initialization failed")
}

View File

@@ -0,0 +1,456 @@
/*
Copyright 2019-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package blockio implements class-based cgroup blockio controller
// management for containers.
//
// Input: configuration of classes with blockio controller parameters
// (weights, throttling) for sets of block devices.
//
// Outputs:
// Option 1: Write blockio parameters of a class to a cgroup directory.
// Option 2: Return blockio parameters of a class in a OCI LinuxBlockIO
// structure, that can be passed to OCI-compliant container
// runtime.
//
// Notes:
// - Using Weight requires bfq or cfq I/O scheduler to be
// effective for the block devices where Weight is used.
//
// Configuration example:
//
// Classes:
//
// # Define a blockio class "LowPrioThrottled".
// # Containers in this class will be throttled and handled as
// # low priority in the I/O scheduler.
//
// LowPrioThrottled:
//
// # Weight without a Devices list specifies the default
// # I/O scheduler weight for all devices
// # that are not explicitly mentioned in following items.
// # This will be written to cgroups(.bfq).weight.
// # Weights range from 10 to 1000, the default is 100.
//
// - Weight: 80
//
// # Set all parameters for all /dev/sd* and /dev/vd* block
// # devices.
//
// - Devices:
// - /dev/sd[a-z]
// - /dev/vd[a-z]
// ThrottleReadBps: 50M # max read bytes per second
// ThrottleWriteBps: 10M # max write bytes per second
// ThrottleReadIOPS: 10k # max read io operations per second
// ThrottleWriteIOPS: 5k # max write io operations per second
// Weight: 50 # I/O scheduler (cfq/bfq) weight for
// # these devices will be written to
// # cgroups(.bfq).weight_device
//
// # Set parameters particularly for SSD devices.
// # This configuration overrides above configurations for those
// # /dev/sd* and /dev/vd* devices whose disk id contains "SSD".
//
// - Devices:
// - /dev/disk/by-id/*SSD*
// ThrottleReadBps: 100M
// ThrottleWriteBps: 40M
// # Not mentioning Throttle*IOPS means no I/O operations
// # throttling on matching devices.
// Weight: 50
//
// # Define a blockio class "HighPrioFullSpeed".
// # There is no throttling on these containers, and
// # they will be prioritized by the I/O scheduler.
//
// HighPrioFullSpeed:
// - Weight: 400
//
// Usage example:
// blockio.SetLogger(logrus.New())
// if err := blockio.SetConfigFromFile("/etc/containers/blockio.yaml", false); err != nil {
// return err
// }
// // Output option 1: write directly to cgroup "/mytestgroup"
// if err := blockio.SetCgroupClass("/mytestgroup", "LowPrioThrottled"); err != nil {
// return err
// }
// // Output option 2: OCI LinuxBlockIO of a blockio class
// if lbio, err := blockio.OciLinuxBlockIO("LowPrioThrottled"); err != nil {
// return err
// } else {
// fmt.Printf("OCI LinuxBlockIO for LowPrioThrottled:\n%+v\n", lbio)
// }
package blockio
import (
"fmt"
"io/ioutil"
stdlog "log"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/api/resource"
"sigs.k8s.io/yaml"
"github.com/hashicorp/go-multierror"
"github.com/intel/goresctrl/pkg/cgroups"
grclog "github.com/intel/goresctrl/pkg/log"
)
const (
// sysfsBlockDeviceIOSchedulerPaths expands (with glob) to block device scheduler files.
// If modified, check how to parse device node from expanded paths.
sysfsBlockDeviceIOSchedulerPaths = "/sys/block/*/queue/scheduler"
)
// tBlockDeviceInfo holds information on a block device to be configured.
// As users can specify block devices using wildcards ("/dev/disk/by-id/*SSD*")
// tBlockDeviceInfo.Origin is maintained for traceability: why this
// block device is included in configuration.
// tBlockDeviceInfo.DevNode contains resolved device node, like "/dev/sda".
type tBlockDeviceInfo struct {
Major int64
Minor int64
DevNode string
Origin string
}
// Our logger instance.
var log grclog.Logger = grclog.NewLoggerWrapper(stdlog.New(os.Stderr, "[ blockio ] ", 0))
// classBlockIO connects user-defined block I/O classes to
// corresponding cgroups blockio controller parameters.
var classBlockIO = map[string]cgroups.BlockIOParameters{}
// SetLogger sets the logger instance to be used by the package.
// Examples:
// // Log to standard logger:
// stdlog := log.New(os.Stderr, "blockio:", 0)
// blockio.SetLogger(goresctrllog.NewLoggerWrapper(stdlog))
// // Log to logrus:
// blockio.SetLogger(logrus.New())
func SetLogger(l grclog.Logger) {
log = l
}
// SetConfigFromFile reads and applies blockio configuration from the
// filesystem.
func SetConfigFromFile(filename string, force bool) error {
if data, err := ioutil.ReadFile(filename); err == nil {
if err = SetConfigFromData(data, force); err != nil {
return fmt.Errorf("failed to set configuration from file %q: %s", filename, err)
}
return nil
} else {
return fmt.Errorf("failed to read config file %q: %v", filename, err)
}
}
// SetConfigFromData parses and applies configuration from data.
func SetConfigFromData(data []byte, force bool) error {
config := &Config{}
if err := yaml.Unmarshal(data, &config); err != nil {
return err
}
return SetConfig(config, force)
}
// SetConfig scans available block devices and applies new configuration.
func SetConfig(opt *Config, force bool) error {
if opt == nil {
// Setting nil configuration clears current configuration.
// SetConfigFromData([]byte(""), dontcare) arrives here.
classBlockIO = map[string]cgroups.BlockIOParameters{}
return nil
}
currentIOSchedulers, ioSchedulerDetectionError := getCurrentIOSchedulers()
if ioSchedulerDetectionError != nil {
log.Warnf("configuration validation partly disabled due to I/O scheduler detection error %#v", ioSchedulerDetectionError.Error())
}
classBlockIO = map[string]cgroups.BlockIOParameters{}
// Create cgroup blockio parameters for each blockio class
for class := range opt.Classes {
cgBlockIO, err := devicesParametersToCgBlockIO(opt.Classes[class], currentIOSchedulers)
if err != nil {
if force {
log.Warnf("ignoring: %v", err)
} else {
return err
}
}
classBlockIO[class] = cgBlockIO
}
return nil
}
// GetClasses returns block I/O class names
func GetClasses() []string {
classNames := make([]string, 0, len(classBlockIO))
for name := range classBlockIO {
classNames = append(classNames, name)
}
sort.Strings(classNames)
return classNames
}
// SetCgroupClass sets cgroup blkio controller parameters to match
// blockio class. "group" is the cgroup directory of the container
// without mountpoint and controller (blkio) directories:
// "/kubepods/burstable/POD_ID/CONTAINER_ID".
func SetCgroupClass(group string, class string) error {
cgBlockIO, ok := classBlockIO[class]
if !ok {
return fmt.Errorf("no BlockIO parameters for class %#v", class)
}
err := cgroups.ResetBlkioParameters(group, cgBlockIO)
if err != nil {
return fmt.Errorf("assigning container in cgroup %q to class %#v failed: %w", group, class, err)
}
return nil
}
// getCurrentIOSchedulers returns currently active I/O scheduler used for each block device in the system.
// Returns schedulers in a map: {"/dev/sda": "bfq"}
func getCurrentIOSchedulers() (map[string]string, error) {
var ios = map[string]string{}
schedulerFiles, err := filepath.Glob(sysfsBlockDeviceIOSchedulerPaths)
if err != nil {
return ios, fmt.Errorf("error in I/O scheduler wildcards %#v: %w", sysfsBlockDeviceIOSchedulerPaths, err)
}
for _, schedulerFile := range schedulerFiles {
devName := strings.SplitN(schedulerFile, "/", 5)[3]
schedulerDataB, err := ioutil.ReadFile(schedulerFile)
if err != nil {
// A block device may be disconnected.
log.Errorf("failed to read current I/O scheduler %#v: %v\n", schedulerFile, err)
continue
}
schedulerData := strings.Trim(string(schedulerDataB), "\n")
currentScheduler := ""
if strings.IndexByte(schedulerData, ' ') == -1 {
currentScheduler = schedulerData
} else {
openB := strings.Index(schedulerData, "[")
closeB := strings.Index(schedulerData, "]")
if -1 < openB && openB < closeB {
currentScheduler = schedulerData[openB+1 : closeB]
}
}
if currentScheduler == "" {
log.Errorf("could not parse current scheduler in %#v\n", schedulerFile)
continue
}
ios["/dev/"+devName] = currentScheduler
}
return ios, nil
}
// deviceParametersToCgBlockIO converts single blockio class parameters into cgroups blkio format.
func devicesParametersToCgBlockIO(dps []DevicesParameters, currentIOSchedulers map[string]string) (cgroups.BlockIOParameters, error) {
var errors *multierror.Error
blkio := cgroups.NewBlockIOParameters()
for _, dp := range dps {
var err error
var weight, throttleReadBps, throttleWriteBps, throttleReadIOPS, throttleWriteIOPS int64
weight, err = parseAndValidateQuantity("Weight", dp.Weight, -1, 10, 1000)
errors = multierror.Append(errors, err)
throttleReadBps, err = parseAndValidateQuantity("ThrottleReadBps", dp.ThrottleReadBps, -1, 0, -1)
errors = multierror.Append(errors, err)
throttleWriteBps, err = parseAndValidateQuantity("ThrottleWriteBps", dp.ThrottleWriteBps, -1, 0, -1)
errors = multierror.Append(errors, err)
throttleReadIOPS, err = parseAndValidateQuantity("ThrottleReadIOPS", dp.ThrottleReadIOPS, -1, 0, -1)
errors = multierror.Append(errors, err)
throttleWriteIOPS, err = parseAndValidateQuantity("ThrottleWriteIOPS", dp.ThrottleWriteIOPS, -1, 0, -1)
errors = multierror.Append(errors, err)
if dp.Devices == nil {
if weight > -1 {
blkio.Weight = weight
}
if throttleReadBps > -1 || throttleWriteBps > -1 || throttleReadIOPS > -1 || throttleWriteIOPS > -1 {
errors = multierror.Append(errors, fmt.Errorf("ignoring throttling (rbps=%#v wbps=%#v riops=%#v wiops=%#v): Devices not listed",
dp.ThrottleReadBps, dp.ThrottleWriteBps, dp.ThrottleReadIOPS, dp.ThrottleWriteIOPS))
}
} else {
blockDevices, err := currentPlatform.configurableBlockDevices(dp.Devices)
if err != nil {
// Problems in matching block device wildcards and resolving symlinks
// are worth reporting, but must not block configuring blkio where possible.
log.Warnf("%v", err)
}
if len(blockDevices) == 0 {
log.Warnf("no matches on any of Devices: %v, parameters ignored", dp.Devices)
}
for _, blockDeviceInfo := range blockDevices {
if weight != -1 {
if ios, found := currentIOSchedulers[blockDeviceInfo.DevNode]; found {
if ios != "bfq" && ios != "cfq" {
log.Warnf("weight has no effect on device %#v due to "+
"incompatible I/O scheduler %#v (bfq or cfq required)", blockDeviceInfo.DevNode, ios)
}
}
blkio.WeightDevice.Update(blockDeviceInfo.Major, blockDeviceInfo.Minor, weight)
}
if throttleReadBps != -1 {
blkio.ThrottleReadBpsDevice.Update(blockDeviceInfo.Major, blockDeviceInfo.Minor, throttleReadBps)
}
if throttleWriteBps != -1 {
blkio.ThrottleWriteBpsDevice.Update(blockDeviceInfo.Major, blockDeviceInfo.Minor, throttleWriteBps)
}
if throttleReadIOPS != -1 {
blkio.ThrottleReadIOPSDevice.Update(blockDeviceInfo.Major, blockDeviceInfo.Minor, throttleReadIOPS)
}
if throttleWriteIOPS != -1 {
blkio.ThrottleWriteIOPSDevice.Update(blockDeviceInfo.Major, blockDeviceInfo.Minor, throttleWriteIOPS)
}
}
}
}
return blkio, errors.ErrorOrNil()
}
// parseAndValidateQuantity parses quantities, like "64 M", and validates that they are in given range.
func parseAndValidateQuantity(fieldName string, fieldContent string,
defaultValue int64, min int64, max int64) (int64, error) {
// Returns field content
if fieldContent == "" {
return defaultValue, nil
}
qty, err := resource.ParseQuantity(fieldContent)
if err != nil {
return defaultValue, fmt.Errorf("syntax error in %#v (%#v)", fieldName, fieldContent)
}
value := qty.Value()
if min != -1 && min > value {
return defaultValue, fmt.Errorf("value of %#v (%#v) smaller than minimum (%#v)", fieldName, value, min)
}
if max != -1 && value > max {
return defaultValue, fmt.Errorf("value of %#v (%#v) bigger than maximum (%#v)", fieldName, value, max)
}
return value, nil
}
// platformInterface includes functions that access the system. Enables mocking the system.
type platformInterface interface {
configurableBlockDevices(devWildcards []string) ([]tBlockDeviceInfo, error)
}
// defaultPlatform versions of platformInterface functions access the underlying system.
type defaultPlatform struct{}
// currentPlatform defines which platformInterface is used: defaultPlatform or a mock, for instance.
var currentPlatform platformInterface = defaultPlatform{}
// configurableBlockDevices finds major:minor numbers for device filenames. Wildcards are allowed in filenames.
func (dpm defaultPlatform) configurableBlockDevices(devWildcards []string) ([]tBlockDeviceInfo, error) {
// Return map {devNode: tBlockDeviceInfo}
// Example: {"/dev/sda": {Major:8, Minor:0, Origin:"from symlink /dev/disk/by-id/ata-VendorXSSD from wildcard /dev/disk/by-id/*SSD*"}}
var errors *multierror.Error
blockDevices := []tBlockDeviceInfo{}
var origin string
// 1. Expand wildcards to device filenames (may be symlinks)
// Example: devMatches["/dev/disk/by-id/ata-VendorSSD"] == "from wildcard \"dev/disk/by-id/*SSD*\""
devMatches := map[string]string{} // {devNodeOrSymlink: origin}
for _, devWildcard := range devWildcards {
devWildcardMatches, err := filepath.Glob(devWildcard)
if err != nil {
errors = multierror.Append(errors, fmt.Errorf("bad device wildcard %#v: %w", devWildcard, err))
continue
}
if len(devWildcardMatches) == 0 {
errors = multierror.Append(errors, fmt.Errorf("device wildcard %#v does not match any device nodes", devWildcard))
continue
}
for _, devMatch := range devWildcardMatches {
if devMatch != devWildcard {
origin = fmt.Sprintf("from wildcard %#v", devWildcard)
} else {
origin = ""
}
devMatches[devMatch] = strings.TrimSpace(fmt.Sprintf("%v %v", devMatches[devMatch], origin))
}
}
// 2. Find out real device nodes behind symlinks
// Example: devRealPaths["/dev/sda"] == "from symlink \"/dev/disk/by-id/ata-VendorSSD\""
devRealpaths := map[string]string{} // {devNode: origin}
for devMatch, devOrigin := range devMatches {
realDevNode, err := filepath.EvalSymlinks(devMatch)
if err != nil {
errors = multierror.Append(errors, fmt.Errorf("cannot filepath.EvalSymlinks(%#v): %w", devMatch, err))
continue
}
if realDevNode != devMatch {
origin = fmt.Sprintf("from symlink %#v %v", devMatch, devOrigin)
} else {
origin = devOrigin
}
devRealpaths[realDevNode] = strings.TrimSpace(fmt.Sprintf("%v %v", devRealpaths[realDevNode], origin))
}
// 3. Filter out everything but block devices that are not partitions
// Example: blockDevices[0] == {Major: 8, Minor: 0, DevNode: "/dev/sda", Origin: "..."}
for devRealpath, devOrigin := range devRealpaths {
origin := ""
if devOrigin != "" {
origin = fmt.Sprintf(" (origin: %s)", devOrigin)
}
fileInfo, err := os.Stat(devRealpath)
if err != nil {
errors = multierror.Append(errors, fmt.Errorf("cannot os.Stat(%#v): %w%s", devRealpath, err, origin))
continue
}
fileMode := fileInfo.Mode()
if fileMode&os.ModeDevice == 0 {
errors = multierror.Append(errors, fmt.Errorf("file %#v is not a device%s", devRealpath, origin))
continue
}
if fileMode&os.ModeCharDevice != 0 {
errors = multierror.Append(errors, fmt.Errorf("file %#v is a character device%s", devRealpath, origin))
continue
}
sys, ok := fileInfo.Sys().(*syscall.Stat_t)
major := unix.Major(uint64(sys.Rdev))
minor := unix.Minor(uint64(sys.Rdev))
if !ok {
errors = multierror.Append(errors, fmt.Errorf("cannot get syscall stat_t from %#v: %w%s", devRealpath, err, origin))
continue
}
if minor&0xf != 0 {
errors = multierror.Append(errors, fmt.Errorf("skipping %#v: cannot weight/throttle partitions%s", devRealpath, origin))
continue
}
blockDevices = append(blockDevices, tBlockDeviceInfo{
Major: int64(major),
Minor: int64(minor),
DevNode: devRealpath,
Origin: devOrigin,
})
}
return blockDevices, errors.ErrorOrNil()
}

View File

@@ -0,0 +1,33 @@
/*
Copyright 2019-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package blockio
// Config contains a blockio configuration.
type Config struct {
// Classes define weights and throttling parameters for sets of devices.
Classes map[string][]DevicesParameters `json:",omitempty"`
}
// DevicesParameters defines Block IO parameters for a set of devices.
type DevicesParameters struct {
Devices []string `json:",omitempty"`
ThrottleReadBps string `json:",omitempty"`
ThrottleWriteBps string `json:",omitempty"`
ThrottleReadIOPS string `json:",omitempty"`
ThrottleWriteIOPS string `json:",omitempty"`
Weight string `json:",omitempty"`
}

View File

@@ -0,0 +1,47 @@
/*
Copyright 2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package blockio
import (
"github.com/intel/goresctrl/pkg/kubernetes"
)
const (
// BlockioContainerAnnotation is the CRI level container annotation for setting
// the blockio class of a container
BlockioContainerAnnotation = "io.kubernetes.cri.blockio-class"
// BlockioPodAnnotation is a Pod annotation for setting the blockio class of
// all containers of the pod
BlockioPodAnnotation = "blockio.resources.beta.kubernetes.io/pod"
// BlockioPodAnnotationContainerPrefix is prefix for per-container Pod annotation
// for setting the blockio class of one container of the pod
BlockioPodAnnotationContainerPrefix = "blockio.resources.beta.kubernetes.io/container."
)
// ContainerClassFromAnnotations determines the effective blockio
// class of a container from the Pod annotations and CRI level
// container annotations of a container. If the class is not specified
// by any annotation, returns empty class name. Returned error is
// reserved (nil).
func ContainerClassFromAnnotations(containerName string, containerAnnotations, podAnnotations map[string]string) (string, error) {
clsName, _ := kubernetes.ContainerClassFromAnnotations(
BlockioContainerAnnotation, BlockioPodAnnotation, BlockioPodAnnotationContainerPrefix,
containerName, containerAnnotations, podAnnotations)
return clsName, nil
}

71
vendor/github.com/intel/goresctrl/pkg/blockio/oci.go generated vendored Normal file
View File

@@ -0,0 +1,71 @@
/*
Copyright 2019-2021 Intel Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package blockio
import (
"fmt"
oci "github.com/opencontainers/runtime-spec/specs-go"
"github.com/intel/goresctrl/pkg/cgroups"
)
// OciLinuxBlockIO returns OCI LinuxBlockIO structure corresponding to the class.
func OciLinuxBlockIO(class string) (*oci.LinuxBlockIO, error) {
blockio, ok := classBlockIO[class]
if !ok {
return nil, fmt.Errorf("no OCI BlockIO parameters for class %#v", class)
}
ociBlockio := oci.LinuxBlockIO{}
if blockio.Weight != -1 {
w := uint16(blockio.Weight)
ociBlockio.Weight = &w
}
ociBlockio.WeightDevice = ociLinuxWeightDevices(blockio.WeightDevice)
ociBlockio.ThrottleReadBpsDevice = ociLinuxThrottleDevices(blockio.ThrottleReadBpsDevice)
ociBlockio.ThrottleWriteBpsDevice = ociLinuxThrottleDevices(blockio.ThrottleWriteBpsDevice)
ociBlockio.ThrottleReadIOPSDevice = ociLinuxThrottleDevices(blockio.ThrottleReadIOPSDevice)
ociBlockio.ThrottleWriteIOPSDevice = ociLinuxThrottleDevices(blockio.ThrottleWriteIOPSDevice)
return &ociBlockio, nil
}
func ociLinuxWeightDevices(dws cgroups.DeviceWeights) []oci.LinuxWeightDevice {
if dws == nil || len(dws) == 0 {
return nil
}
olwds := make([]oci.LinuxWeightDevice, len(dws))
for i, wd := range dws {
w := uint16(wd.Weight)
olwds[i].Major = wd.Major
olwds[i].Minor = wd.Minor
olwds[i].Weight = &w
}
return olwds
}
func ociLinuxThrottleDevices(drs cgroups.DeviceRates) []oci.LinuxThrottleDevice {
if drs == nil || len(drs) == 0 {
return nil
}
oltds := make([]oci.LinuxThrottleDevice, len(drs))
for i, dr := range drs {
oltds[i].Major = dr.Major
oltds[i].Minor = dr.Minor
oltds[i].Rate = uint64(dr.Rate)
}
return oltds
}

View File

@@ -0,0 +1,312 @@
// Copyright 2020-2021 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cgroups
import (
"fmt"
"strconv"
"strings"
"github.com/hashicorp/go-multierror"
)
// cgroups blkio parameter filenames.
var blkioWeightFiles = []string{"blkio.bfq.weight", "blkio.weight"}
var blkioWeightDeviceFiles = []string{"blkio.bfq.weight_device", "blkio.weight_device"}
var blkioThrottleReadBpsFiles = []string{"blkio.throttle.read_bps_device"}
var blkioThrottleWriteBpsFiles = []string{"blkio.throttle.write_bps_device"}
var blkioThrottleReadIOPSFiles = []string{"blkio.throttle.read_iops_device"}
var blkioThrottleWriteIOPSFiles = []string{"blkio.throttle.write_iops_device"}
// BlockIOParameters contains cgroups blockio controller parameters.
//
// Effects of Weight and Rate values in SetBlkioParameters():
// Value | Effect
// -------+-------------------------------------------------------------------
// -1 | Do not write to cgroups, value is missing.
// 0 | Write to cgroups, will clear the setting as specified in cgroups blkio interface.
// other | Write to cgroups, sets the value.
type BlockIOParameters struct {
Weight int64
WeightDevice DeviceWeights
ThrottleReadBpsDevice DeviceRates
ThrottleWriteBpsDevice DeviceRates
ThrottleReadIOPSDevice DeviceRates
ThrottleWriteIOPSDevice DeviceRates
}
// DeviceWeight contains values for
// - blkio.[io-scheduler].weight
type DeviceWeight struct {
Major int64
Minor int64
Weight int64
}
// DeviceRate contains values for
// - blkio.throttle.read_bps_device
// - blkio.throttle.write_bps_device
// - blkio.throttle.read_iops_device
// - blkio.throttle.write_iops_device
type DeviceRate struct {
Major int64
Minor int64
Rate int64
}
// DeviceWeights contains weights for devices.
type DeviceWeights []DeviceWeight
// DeviceRates contains throttling rates for devices.
type DeviceRates []DeviceRate
// DeviceParameters interface provides functions common to DeviceWeights and DeviceRates.
type DeviceParameters interface {
Append(maj, min, val int64)
Update(maj, min, val int64)
}
// Append appends (major, minor, value) to DeviceWeights slice.
func (w *DeviceWeights) Append(maj, min, val int64) {
*w = append(*w, DeviceWeight{Major: maj, Minor: min, Weight: val})
}
// Append appends (major, minor, value) to DeviceRates slice.
func (r *DeviceRates) Append(maj, min, val int64) {
*r = append(*r, DeviceRate{Major: maj, Minor: min, Rate: val})
}
// Update updates device weight in DeviceWeights slice, or appends it if not found.
func (w *DeviceWeights) Update(maj, min, val int64) {
for index, devWeight := range *w {
if devWeight.Major == maj && devWeight.Minor == min {
(*w)[index].Weight = val
return
}
}
w.Append(maj, min, val)
}
// Update updates device rate in DeviceRates slice, or appends it if not found.
func (r *DeviceRates) Update(maj, min, val int64) {
for index, devRate := range *r {
if devRate.Major == maj && devRate.Minor == min {
(*r)[index].Rate = val
return
}
}
r.Append(maj, min, val)
}
// NewBlockIOParameters creates new BlockIOParameters instance.
func NewBlockIOParameters() BlockIOParameters {
return BlockIOParameters{
Weight: -1,
}
}
// NewDeviceWeight creates new DeviceWeight instance.
func NewDeviceWeight() DeviceWeight {
return DeviceWeight{
Major: -1,
Minor: -1,
Weight: -1,
}
}
// NewDeviceRate creates new DeviceRate instance.
func NewDeviceRate() DeviceRate {
return DeviceRate{
Major: -1,
Minor: -1,
Rate: -1,
}
}
type devMajMin struct {
Major int64
Minor int64
}
// ResetBlkioParameters adds new, changes existing and removes missing blockIO parameters in cgroupsDir.
func ResetBlkioParameters(groupDir string, blockIO BlockIOParameters) error {
var errors *multierror.Error
oldBlockIO, _ := GetBlkioParameters(groupDir)
newBlockIO := NewBlockIOParameters()
newBlockIO.Weight = blockIO.Weight
newBlockIO.WeightDevice = resetDevWeights(oldBlockIO.WeightDevice, blockIO.WeightDevice)
newBlockIO.ThrottleReadBpsDevice = resetDevRates(oldBlockIO.ThrottleReadBpsDevice, blockIO.ThrottleReadBpsDevice)
newBlockIO.ThrottleWriteBpsDevice = resetDevRates(oldBlockIO.ThrottleWriteBpsDevice, blockIO.ThrottleWriteBpsDevice)
newBlockIO.ThrottleReadIOPSDevice = resetDevRates(oldBlockIO.ThrottleReadIOPSDevice, blockIO.ThrottleReadIOPSDevice)
newBlockIO.ThrottleWriteIOPSDevice = resetDevRates(oldBlockIO.ThrottleWriteIOPSDevice, blockIO.ThrottleWriteIOPSDevice)
errors = multierror.Append(errors, SetBlkioParameters(groupDir, newBlockIO))
return errors.ErrorOrNil()
}
// resetDevWeights adds wanted weight parameters to new and resets unwanted weights.
func resetDevWeights(old, wanted []DeviceWeight) []DeviceWeight {
new := []DeviceWeight{}
seenDev := map[devMajMin]bool{}
for _, wdp := range wanted {
seenDev[devMajMin{wdp.Major, wdp.Minor}] = true
new = append(new, wdp)
}
for _, wdp := range old {
if !seenDev[devMajMin{wdp.Major, wdp.Minor}] {
new = append(new, DeviceWeight{wdp.Major, wdp.Minor, 0})
}
}
return new
}
// resetDevRates adds wanted rate parameters to new and resets unwanted rates.
func resetDevRates(old, wanted []DeviceRate) []DeviceRate {
new := []DeviceRate{}
seenDev := map[devMajMin]bool{}
for _, rdp := range wanted {
new = append(new, rdp)
seenDev[devMajMin{rdp.Major, rdp.Minor}] = true
}
for _, rdp := range old {
if !seenDev[devMajMin{rdp.Major, rdp.Minor}] {
new = append(new, DeviceRate{rdp.Major, rdp.Minor, 0})
}
}
return new
}
// GetBlkioParameters returns BlockIO parameters from files in cgroups blkio controller directory.
func GetBlkioParameters(group string) (BlockIOParameters, error) {
var errors *multierror.Error
blockIO := NewBlockIOParameters()
errors = multierror.Append(errors, readWeight(group, blkioWeightFiles, &blockIO.Weight))
errors = multierror.Append(errors, readDeviceParameters(group, blkioWeightDeviceFiles, &blockIO.WeightDevice))
errors = multierror.Append(errors, readDeviceParameters(group, blkioThrottleReadBpsFiles, &blockIO.ThrottleReadBpsDevice))
errors = multierror.Append(errors, readDeviceParameters(group, blkioThrottleWriteBpsFiles, &blockIO.ThrottleWriteBpsDevice))
errors = multierror.Append(errors, readDeviceParameters(group, blkioThrottleReadIOPSFiles, &blockIO.ThrottleReadIOPSDevice))
errors = multierror.Append(errors, readDeviceParameters(group, blkioThrottleWriteIOPSFiles, &blockIO.ThrottleWriteIOPSDevice))
return blockIO, errors.ErrorOrNil()
}
// readWeight parses int64 from a cgroups entry.
func readWeight(groupDir string, filenames []string, rv *int64) error {
contents, err := readFirstFile(groupDir, filenames)
if err != nil {
return err
}
parsed, err := strconv.ParseInt(strings.TrimSuffix(contents, "\n"), 10, 64)
if err != nil {
return fmt.Errorf("parsing weight from %#v found in %v failed: %w", contents, filenames, err)
}
*rv = parsed
return nil
}
// readDeviceParameters parses device lines used for weights and throttling rates.
func readDeviceParameters(groupDir string, filenames []string, params DeviceParameters) error {
var errors *multierror.Error
contents, err := readFirstFile(groupDir, filenames)
if err != nil {
return err
}
for _, line := range strings.Split(contents, "\n") {
// Device weight files may have "default NNN" line at the beginning. Skip it.
if line == "" || strings.HasPrefix(line, "default ") {
continue
}
// Expect syntax MAJOR:MINOR VALUE
devVal := strings.Split(line, " ")
if len(devVal) != 2 {
errors = multierror.Append(errors, fmt.Errorf("invalid line %q, single space expected", line))
continue
}
majMin := strings.Split(devVal[0], ":")
if len(majMin) != 2 {
errors = multierror.Append(errors, fmt.Errorf("invalid line %q, single colon expected before space", line))
continue
}
major, majErr := strconv.ParseInt(majMin[0], 10, 64)
minor, minErr := strconv.ParseInt(majMin[1], 10, 64)
value, valErr := strconv.ParseInt(devVal[1], 10, 64)
if majErr != nil || minErr != nil || valErr != nil {
errors = multierror.Append(errors, fmt.Errorf("invalid number when parsing \"major:minor value\" from \"%s:%s %s\"", majMin[0], majMin[1], devVal[1]))
continue
}
params.Append(major, minor, value)
}
return errors.ErrorOrNil()
}
// readFirstFile returns contents of the first successfully read entry.
func readFirstFile(groupDir string, filenames []string) (string, error) {
var errors *multierror.Error
// If reading all the files fails, return list of read errors.
for _, filename := range filenames {
content, err := Blkio.Group(groupDir).Read(filename)
if err == nil {
return content, nil
}
errors = multierror.Append(errors, err)
}
err := errors.ErrorOrNil()
if err != nil {
return "", fmt.Errorf("could not read any of files %q: %w", filenames, err)
}
return "", nil
}
// SetBlkioParameters writes BlockIO parameters to files in cgroups blkio contoller directory.
func SetBlkioParameters(group string, blockIO BlockIOParameters) error {
var errors *multierror.Error
if blockIO.Weight >= 0 {
errors = multierror.Append(errors, writeFirstFile(group, blkioWeightFiles, "%d", blockIO.Weight))
}
for _, wd := range blockIO.WeightDevice {
errors = multierror.Append(errors, writeFirstFile(group, blkioWeightDeviceFiles, "%d:%d %d", wd.Major, wd.Minor, wd.Weight))
}
for _, rd := range blockIO.ThrottleReadBpsDevice {
errors = multierror.Append(errors, writeFirstFile(group, blkioThrottleReadBpsFiles, "%d:%d %d", rd.Major, rd.Minor, rd.Rate))
}
for _, rd := range blockIO.ThrottleWriteBpsDevice {
errors = multierror.Append(errors, writeFirstFile(group, blkioThrottleWriteBpsFiles, "%d:%d %d", rd.Major, rd.Minor, rd.Rate))
}
for _, rd := range blockIO.ThrottleReadIOPSDevice {
errors = multierror.Append(errors, writeFirstFile(group, blkioThrottleReadIOPSFiles, "%d:%d %d", rd.Major, rd.Minor, rd.Rate))
}
for _, rd := range blockIO.ThrottleWriteIOPSDevice {
errors = multierror.Append(errors, writeFirstFile(group, blkioThrottleWriteIOPSFiles, "%d:%d %d", rd.Major, rd.Minor, rd.Rate))
}
return errors.ErrorOrNil()
}
// writeFirstFile writes content to the first existing file in the list under groupDir.
func writeFirstFile(groupDir string, filenames []string, format string, args ...interface{}) error {
var errors *multierror.Error
// Returns list of errors from writes, list of single error due to all filenames missing or nil on success.
for _, filename := range filenames {
if err := Blkio.Group(groupDir).Write(filename, format, args...); err != nil {
errors = multierror.Append(errors, err)
continue
}
return nil
}
err := errors.ErrorOrNil()
if err != nil {
data := fmt.Sprintf(format, args...)
return fmt.Errorf("writing all files %v failed, errors: %w, content %q", filenames, err, data)
}
return nil
}

View File

@@ -0,0 +1,237 @@
// Copyright 2020-2021 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cgroups
import (
"bufio"
"bytes"
"errors"
"fmt"
"os"
"path"
"strings"
"syscall"
)
// Controller is our enumerated type for cgroup controllers.
type Controller int
// Group represents a control group.
type Group string
//nolint
const (
// UnkownController represents a controller of unknown type.
UnknownController Controller = iota
// blkio cgroup controller.
Blkio
// cpu cgroup controller.
Cpu
// cpuacct cgroup controller.
Cpuacct
// cpuset cgroup controller.
Cpuset
// devices cgroup controller.
Devices
// freezer cgroup controller.
Freezer
// hugetlb cgroup controller.
Hugetlb
// memory cgroup controller.
Memory
// net_cls cgroup controller.
NetCls
// net_prio cgroup controller.
NetPrio
// per_event cgroup controller.
PerfEvent
// pids cgroup controller.
Pids
)
var (
// controllerNames maps controllers to names/relative paths.
controllerNames = map[Controller]string{
Blkio: "blkio",
Cpu: "cpu",
Cpuacct: "cpuacct",
Cpuset: "cpuset",
Devices: "devices",
Freezer: "freezer",
Hugetlb: "hugetlb",
Memory: "memory",
NetCls: "net_cls",
NetPrio: "net_prio",
PerfEvent: "perf_event",
Pids: "pids",
}
// controllerNames maps controllers to names/relative paths.
controllerDirs = map[string]Controller{
"blkio": Blkio,
"cpu": Cpu,
"cpuacct": Cpuacct,
"cpuset": Cpuset,
"devices": Devices,
"freezer": Freezer,
"hugetlb": Hugetlb,
"memory": Memory,
"net_cls": NetCls,
"net_prio": NetPrio,
"perf_event": PerfEvent,
"pids": Pids,
}
)
// String returns the name of the given controller.
func (c Controller) String() string {
if name, ok := controllerNames[c]; ok {
return name
}
return "unknown"
}
// Path returns the absolute path of the given controller.
func (c Controller) Path() string {
return path.Join(mountDir, c.String())
}
// RelPath returns the relative path of the given controller.
func (c Controller) RelPath() string {
return c.String()
}
// Group returns the given group for the controller.
func (c Controller) Group(group string) Group {
return Group(path.Join(mountDir, c.String(), group))
}
// AsGroup returns the group for the given absolute directory path.
func AsGroup(absDir string) Group {
return Group(absDir)
}
// Controller returns the controller for the group.
func (g Group) Controller() Controller {
relPath := strings.TrimPrefix(string(g), mountDir+"/")
split := strings.SplitN(relPath, "/", 2)
if len(split) > 0 {
return controllerDirs[split[0]]
}
return UnknownController
}
// GetTasks reads the pids of threads currently assigned to the group.
func (g Group) GetTasks() ([]string, error) {
return g.readPids(Tasks)
}
// GetProcesses reads the pids of processes currently assigned to the group.
func (g Group) GetProcesses() ([]string, error) {
return g.readPids(Procs)
}
// AddTasks writes the given thread pids to the group.
func (g Group) AddTasks(pids ...string) error {
return g.writePids(Tasks, pids...)
}
// AddProcesses writes the given process pids to the group.
func (g Group) AddProcesses(pids ...string) error {
return g.writePids(Procs, pids...)
}
// Write writes the formatted data to the groups entry.
func (g Group) Write(entry, format string, args ...interface{}) error {
entryPath := path.Join(string(g), entry)
f, err := fsi.OpenFile(entryPath, os.O_WRONLY, 0644)
if err != nil {
return g.errorf("%q: failed to open for writing: %v", entry, err)
}
defer f.Close()
data := fmt.Sprintf(format, args...)
if _, err := f.Write([]byte(data)); err != nil {
return g.errorf("%q: failed to write %q: %v", entry, data, err)
}
return nil
}
// Read the groups entry and return contents in a string
func (g Group) Read(entry string) (string, error) {
var buf bytes.Buffer
entryPath := path.Join(string(g), entry)
f, err := fsi.OpenFile(entryPath, os.O_RDONLY, 0644)
if err != nil {
return "", g.errorf("%q: failed to open for reading: %v", entry, err)
}
defer f.Close()
if _, err := buf.ReadFrom(f); err != nil {
return "", err
}
return buf.String(), nil
}
// readPids reads pids from a cgroup's tasks or procs entry.
func (g Group) readPids(entry string) ([]string, error) {
var pids []string
pidFile := path.Join(string(g), entry)
f, err := fsi.OpenFile(pidFile, os.O_RDONLY, 0644)
if err != nil {
return nil, g.errorf("failed to open %q: %v", entry, err)
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
pids = append(pids, s.Text())
}
if s.Err() != nil {
return nil, g.errorf("failed to read %q: %v", entry, err)
}
return pids, nil
}
// writePids writes pids to a cgroup's tasks or procs entry.
func (g Group) writePids(entry string, pids ...string) error {
pidFile := path.Join(string(g), entry)
f, err := fsi.OpenFile(pidFile, os.O_WRONLY, 0644)
if err != nil {
return g.errorf("failed to write pids to %q: %v", pidFile, err)
}
defer f.Close()
for _, pid := range pids {
if _, err := f.Write([]byte(pid)); err != nil {
if !errors.Is(err, syscall.ESRCH) {
return g.errorf("failed to write pid %s to %q: %v",
pidFile, pid, err)
}
}
}
return nil
}
// error returns a formatted group-specific error.
func (g Group) errorf(format string, args ...interface{}) error {
name := strings.TrimPrefix(string(g), mountDir+"/")
return fmt.Errorf("cgroup "+name+": "+format, args...)
}

View File

@@ -0,0 +1,65 @@
package cgroups
import (
"fmt"
"os"
"path/filepath"
"sync"
)
// CgroupID implements mapping kernel cgroup IDs to cgroupfs paths with transparent caching.
type CgroupID struct {
root string
cache map[uint64]string
sync.Mutex
}
// NewCgroupID creates a new CgroupID map/cache.
func NewCgroupID(root string) *CgroupID {
return &CgroupID{
root: root,
cache: make(map[uint64]string),
}
}
// Find finds the path for the given cgroup id.
func (cgid *CgroupID) Find(id uint64) (string, error) {
found := false
var p string
cgid.Lock()
defer cgid.Unlock()
if path, ok := cgid.cache[id]; ok {
return path, nil
}
err := fsi.Walk(cgid.root, func(path string, info os.FileInfo, err error) error {
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
if found {
return filepath.SkipDir
}
if info.IsDir() && id == getID(path) {
found = true
p = path
return filepath.SkipDir
}
return nil
})
if err != nil {
return "", err
} else if !found {
return "", fmt.Errorf("cgroupid %v not found", id)
} else {
cgid.cache[id] = p
return p, nil
}
}

View File

@@ -0,0 +1,18 @@
//go:build linux
// +build linux
package cgroups
import (
"encoding/binary"
"golang.org/x/sys/unix"
)
func getID(path string) uint64 {
h, _, err := unix.NameToHandleAt(unix.AT_FDCWD, path, 0)
if err != nil {
return 0
}
return binary.LittleEndian.Uint64(h.Bytes())
}

View File

@@ -0,0 +1,8 @@
//go:build !linux
// +build !linux
package cgroups
func getID(path string) uint64 {
return 0
}

View File

@@ -0,0 +1,75 @@
// Copyright 2020 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cgroups
import (
"path"
"path/filepath"
)
//nolint
const (
// Tasks is a cgroup's "tasks" entry.
Tasks = "tasks"
// Procs is cgroup's "cgroup.procs" entry.
Procs = "cgroup.procs"
// CpuShares is the cpu controller's "cpu.shares" entry.
CpuShares = "cpu.shares"
// CpuPeriod is the cpu controller's "cpu.cfs_period_us" entry.
CpuPeriod = "cpu.cfs_period_us"
// CpuQuota is the cpu controller's "cpu.cfs_quota_us" entry.
CpuQuota = "cpu.cfs_quota_us"
// CpusetCpus is the cpuset controller's cpuset.cpus entry.
CpusetCpus = "cpuset.cpus"
// CpusetMems is the cpuset controller's cpuset.mems entry.
CpusetMems = "cpuset.mems"
)
var (
// mount is the parent directory for per-controller cgroupfs mounts.
mountDir = "/sys/fs/cgroup"
// v2Dir is the parent directory for per-controller cgroupfs mounts.
v2Dir = path.Join(mountDir, "unified")
// KubeletRoot is the --cgroup-root option the kubelet is running with.
KubeletRoot = ""
)
// GetMountDir returns the common mount point for cgroup v1 controllers.
func GetMountDir() string {
return mountDir
}
// SetMountDir sets the common mount point for the cgroup v1 controllers.
func SetMountDir(dir string) {
v2, _ := filepath.Rel(mountDir, v2Dir)
mountDir = dir
if v2 != "" {
v2Dir = path.Join(mountDir, v2)
}
}
// GetV2Dir returns the cgroup v2 unified mount directory.
func GetV2Dir() string {
return v2Dir
}
// SetV2Dir sets the unified cgroup v2 mount directory.
func SetV2Dir(dir string) {
if dir[0] == '/' {
v2Dir = dir
} else {
v2Dir = path.Join(mountDir, v2Dir)
}
}

39
vendor/github.com/intel/goresctrl/pkg/cgroups/fsi.go generated vendored Normal file
View File

@@ -0,0 +1,39 @@
// Copyright 2021 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This module defines filesystem interface (fsi) through which
// cgroups package accesses files.
package cgroups
import (
"os"
"path/filepath"
)
type fsiIface interface {
Open(name string) (fileIface, error)
OpenFile(name string, flag int, perm os.FileMode) (fileIface, error)
Walk(string, filepath.WalkFunc) error
Lstat(path string) (os.FileInfo, error)
}
type fileIface interface {
Close() error
Read(p []byte) (n int, err error)
Write(b []byte) (n int, err error)
}
// Set the default filesystem interface
var fsi fsiIface = newFsiOS()

View File

@@ -0,0 +1,230 @@
// Copyright 2021 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This module implements a mock filesystem that can be used as a
// replacement for the native filesystem interface (fsi).
package cgroups
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
)
type fsMock struct {
files map[string]*mockFile // filesystem contents
}
type mockFile struct {
// User-defined file properties
data []byte // contents of the file
// User/fsimock-defined properties
info *mockFileInfo
// File-specific user-overrides for the default file behavior
open func(string) (fileIface, error)
read func([]byte) (int, error)
write func([]byte) (int, error)
// fsimock-defined properties
fs *fsMock
filename string
handle *mockFileHandle
writeHistory [][]byte
}
type mockFileHandle struct {
pos int
}
type mockFileInfo struct {
mode os.FileMode
name string
mf *mockFile
}
func NewFsiMock(files map[string]mockFile) fsiIface {
mfs := fsMock{}
mfs.files = map[string]*mockFile{}
for filename, usermf := range files {
mf := usermf
if mf.info == nil {
mf.info = &mockFileInfo{}
}
if mf.info.name == "" {
mf.info.name = filepath.Base(filename)
}
mf.filename = filename
mf.info.mf = &mf
mf.fs = &mfs
mfs.files[filename] = &mf
}
return &mfs
}
func (mfs fsMock) OpenFile(name string, flag int, perm os.FileMode) (fileIface, error) {
fsmockLog("OpenFile(%q, %d, %d)", name, flag, perm)
if mf, ok := mfs.files[name]; ok {
mf.handle = &mockFileHandle{}
if mf.open != nil {
return mf.open(name)
}
return *mf, nil
}
return nil, fsmockErrorf("%q: file not found", name)
}
func (mfs fsMock) Open(name string) (fileIface, error) {
return mfs.OpenFile(name, 0, 0)
}
func (mfs fsMock) Walk(path string, walkFn filepath.WalkFunc) error {
dirPath := strings.TrimSuffix(path, "/")
info, err := mfs.Lstat(dirPath)
if err != nil {
err = walkFn(path, nil, err)
return err
}
if !info.IsDir() {
return walkFn(path, info, nil)
}
err = walkFn(path, info, nil)
if err != nil {
return err
}
for _, name := range mfs.dirContents(dirPath) {
if err = mfs.Walk(dirPath+"/"+name, walkFn); err != nil && err != filepath.SkipDir {
return err
}
}
return nil
}
func (mfs fsMock) dirContents(path string) []string {
dirPathS := strings.TrimSuffix(path, "/") + "/"
contentSet := map[string]struct{}{}
for filename := range mfs.files {
if !strings.HasPrefix(filename, dirPathS) {
continue
}
relToDirPath := strings.TrimPrefix(filename, dirPathS)
names := strings.SplitN(relToDirPath, "/", 2)
contentSet[names[0]] = struct{}{}
}
contents := make([]string, 0, len(contentSet))
for name := range contentSet {
contents = append(contents, name)
}
return contents
}
func (mfs fsMock) Lstat(path string) (os.FileInfo, error) {
if mf, ok := mfs.files[path]; ok {
return *mf.info, nil
}
if len(mfs.dirContents(path)) > 0 {
return mockFileInfo{
name: filepath.Base(path),
mode: os.ModeDir,
}, nil
}
return mockFileInfo{}, fsmockErrorf("%q: file not found", path)
}
func (mfi mockFileInfo) Name() string {
return mfi.name
}
func (mfi mockFileInfo) Size() int64 {
if mfi.mf != nil {
return int64(len(mfi.mf.data))
}
return 0
}
func (mfi mockFileInfo) Mode() os.FileMode {
return mfi.mode
}
func (mfi mockFileInfo) ModTime() time.Time {
return time.Time{}
}
func (mfi mockFileInfo) IsDir() bool {
return mfi.mode&os.ModeDir != 0
}
func (mfi mockFileInfo) Sys() interface{} {
return nil
}
func (mf mockFile) Write(b []byte) (n int, err error) {
pos := mf.handle.pos
if mf.write != nil {
n, err = mf.write(b)
if err == nil {
mf.fs.files[mf.filename].writeHistory = append(mf.fs.files[mf.filename].writeHistory, b)
}
} else {
newpos := pos + len(b)
if newpos > cap(mf.data) {
newdata := make([]byte, newpos)
copy(newdata, mf.data)
mf.data = newdata
}
copy(mf.data[pos:newpos], b)
mf.handle.pos = newpos
if f, ok := mf.fs.files[mf.filename]; ok {
f.data = mf.data
}
mf.fs.files[mf.filename].writeHistory = append(mf.fs.files[mf.filename].writeHistory, b)
}
fsmockLog("{%q, pos=%d}.Write([%d]byte(%q)) = (%d, %v) %q", mf.filename, pos, len(b), string(b), n, err, mf.fs.files[mf.filename].data)
return n, err
}
func (mf mockFile) Read(b []byte) (n int, err error) {
pos := mf.handle.pos
if mf.read != nil {
n, err = mf.read(b)
} else {
n = len(mf.data) - pos
err = nil
if n <= 0 {
err = io.EOF
}
if n > cap(b) {
n = cap(b)
}
copy(b, mf.data[pos:pos+n])
mf.handle.pos += n
}
fsmockLog("{%q, pos=%d}.Read([%d]byte) = (%d, %v)\n", mf.filename, pos, len(b), n, err)
return
}
func (mf mockFile) Close() error {
return nil
}
func fsmockLog(format string, args ...interface{}) {
fmt.Printf("fsmock: "+format+"\n", args...)
}
func fsmockErrorf(format string, args ...interface{}) error {
return fmt.Errorf("fsmock: "+format, args...)
}

63
vendor/github.com/intel/goresctrl/pkg/cgroups/fsios.go generated vendored Normal file
View File

@@ -0,0 +1,63 @@
// Copyright 2021 Intel Corporation. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This module provides the native implementation of the filesystem
// interface (fsi).
package cgroups
import (
"os"
"path/filepath"
)
type fsOs struct{}
type osFile struct {
file *os.File
}
func newFsiOS() fsiIface {
return fsOs{}
}
func (fsOs) OpenFile(name string, flag int, perm os.FileMode) (fileIface, error) {
f, err := os.OpenFile(name, flag, perm)
return osFile{f}, err
}
func (fsOs) Open(name string) (fileIface, error) {
f, err := os.Open(name)
return osFile{f}, err
}
func (fsOs) Lstat(name string) (os.FileInfo, error) {
return os.Lstat(name)
}
func (fsOs) Walk(root string, walkFn filepath.WalkFunc) error {
return filepath.Walk(root, walkFn)
}
func (osf osFile) Write(b []byte) (n int, err error) {
return osf.file.Write(b)
}
func (osf osFile) Read(b []byte) (n int, err error) {
return osf.file.Read(b)
}
func (osf osFile) Close() error {
return osf.file.Close()
}

2
vendor/modules.txt vendored
View File

@@ -253,6 +253,8 @@ github.com/hashicorp/go-multierror
github.com/imdario/mergo
# github.com/intel/goresctrl v0.2.0
## explicit; go 1.16
github.com/intel/goresctrl/pkg/blockio
github.com/intel/goresctrl/pkg/cgroups
github.com/intel/goresctrl/pkg/kubernetes
github.com/intel/goresctrl/pkg/log
github.com/intel/goresctrl/pkg/rdt