vendor: update google/cadvisor and opencontainers/runc

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano
2020-06-24 10:56:34 +02:00
parent 78d295d168
commit a6a3bf2eb4
632 changed files with 36493 additions and 89280 deletions

View File

@@ -7,6 +7,7 @@ go_library(
importpath = "github.com/google/cadvisor/accelerators",
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/google/cadvisor/container:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/github.com/mindprince/gonvml:go_default_library",

View File

@@ -24,6 +24,7 @@ import (
"sync"
"time"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
@@ -48,18 +49,23 @@ var sysFsPCIDevicesPath = "/sys/bus/pci/devices/"
const nvidiaVendorID = "0x10de"
func NewNvidiaManager() stats.Manager {
func NewNvidiaManager(includedMetrics container.MetricSet) stats.Manager {
if !includedMetrics.Has(container.AcceleratorUsageMetrics) {
klog.V(2).Info("NVIDIA GPU metrics disabled")
return &stats.NoopManager{}
}
manager := &nvidiaManager{}
err := manager.setup()
if err != nil {
klog.Warningf("NVidia GPU metrics will not be available: %s", err)
klog.Warningf("NVIDIA GPU metrics will not be available: %s", err)
manager.Destroy()
return &stats.NoopManager{}
}
return manager
}
// setup initializes NVML if nvidia devices are present on the node.
// setup initializes NVML if NVIDIA devices are present on the node.
func (nm *nvidiaManager) setup() error {
if !detectDevices(nvidiaVendorID) {
return fmt.Errorf("no NVIDIA devices found")
@@ -104,21 +110,21 @@ var initializeNVML = func(nm *nvidiaManager) error {
nm.nvmlInitialized = true
numDevices, err := gonvml.DeviceCount()
if err != nil {
return fmt.Errorf("GPU metrics would not be available. Failed to get the number of nvidia devices: %v", err)
return fmt.Errorf("GPU metrics would not be available. Failed to get the number of NVIDIA devices: %v", err)
}
if numDevices == 0 {
return nil
}
klog.V(1).Infof("NVML initialized. Number of nvidia devices: %v", numDevices)
klog.V(1).Infof("NVML initialized. Number of NVIDIA devices: %v", numDevices)
nm.nvidiaDevices = make(map[int]gonvml.Device, numDevices)
for i := 0; i < int(numDevices); i++ {
device, err := gonvml.DeviceHandleByIndex(uint(i))
if err != nil {
return fmt.Errorf("Failed to get nvidia device handle %d: %v", i, err)
return fmt.Errorf("Failed to get NVIDIA device handle %d: %v", i, err)
}
minorNumber, err := device.MinorNumber()
if err != nil {
return fmt.Errorf("Failed to get nvidia device minor number: %v", err)
return fmt.Errorf("Failed to get NVIDIA device minor number: %v", err)
}
nm.nvidiaDevices[int(minorNumber)] = device
}
@@ -135,7 +141,7 @@ func (nm *nvidiaManager) Destroy() {
}
}
// GetCollector returns a collector that can fetch nvidia gpu metrics for nvidia devices
// GetCollector returns a collector that can fetch NVIDIA gpu metrics for NVIDIA devices
// present in the devices.list file in the given devicesCgroupPath.
func (nm *nvidiaManager) GetCollector(devicesCgroupPath string) (stats.Collector, error) {
nc := &nvidiaCollector{}
@@ -165,7 +171,7 @@ func (nm *nvidiaManager) GetCollector(devicesCgroupPath string) (stats.Collector
for _, minor := range nvidiaMinorNumbers {
device, ok := nm.nvidiaDevices[minor]
if !ok {
return &stats.NoopCollector{}, fmt.Errorf("nvidia device minor number %d not found in cached devices", minor)
return &stats.NoopCollector{}, fmt.Errorf("NVIDIA device minor number %d not found in cached devices", minor)
}
nc.devices = append(nc.devices, device)
}

View File

@@ -27,8 +27,6 @@ go_library(
"//vendor/github.com/google/cadvisor/fs:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runtime-spec/specs-go:go_default_library",
"//vendor/golang.org/x/net/context:go_default_library",
"//vendor/google.golang.org/grpc:go_default_library",

View File

@@ -22,8 +22,6 @@ import (
"time"
"github.com/containerd/containerd/errdefs"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/net/context"
"github.com/google/cadvisor/container"
@@ -69,11 +67,9 @@ func newContainerdContainerHandler(
cgroupPaths := common.MakeCgroupPaths(cgroupSubsystems.MountPoints, name)
// Generate the equivalent cgroup manager for this container.
cgroupManager := &cgroupfs.Manager{
Cgroups: &libcontainerconfigs.Cgroup{
Name: name,
},
Paths: cgroupPaths,
cgroupManager, err := containerlibcontainer.NewCgroupManager(name, cgroupPaths)
if err != nil {
return nil, err
}
id := ContainerNameToContainerdID(name)

View File

@@ -18,8 +18,7 @@ go_library(
"//vendor/github.com/google/cadvisor/fs:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@@ -26,9 +26,7 @@ import (
containerlibcontainer "github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/cgroups"
)
type crioContainerHandler struct {
@@ -70,7 +68,7 @@ type crioContainerHandler struct {
reference info.ContainerReference
libcontainerHandler *containerlibcontainer.Handler
cgroupManager *cgroupfs.Manager
cgroupManager cgroups.Manager
rootFs string
pidKnown bool
}
@@ -94,11 +92,9 @@ func newCrioContainerHandler(
cgroupPaths := common.MakeCgroupPaths(cgroupSubsystems.MountPoints, name)
// Generate the equivalent cgroup manager for this container.
cgroupManager := &cgroupfs.Manager{
Cgroups: &libcontainerconfigs.Cgroup{
Name: name,
},
Paths: cgroupPaths,
cgroupManager, err := containerlibcontainer.NewCgroupManager(name, cgroupPaths)
if err != nil {
return nil, err
}
rootFs := "/"

View File

@@ -28,8 +28,6 @@ go_library(
"//vendor/github.com/google/cadvisor/machine:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/google/cadvisor/zfs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/golang.org/x/net/context:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],

View File

@@ -34,8 +34,6 @@ import (
dockercontainer "github.com/docker/docker/api/types/container"
docker "github.com/docker/docker/client"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/net/context"
"k8s.io/klog/v2"
)
@@ -136,11 +134,9 @@ func newDockerContainerHandler(
cgroupPaths := common.MakeCgroupPaths(cgroupSubsystems.MountPoints, name)
// Generate the equivalent cgroup manager for this container.
cgroupManager := &cgroupfs.Manager{
Cgroups: &libcontainerconfigs.Cgroup{
Name: name,
},
Paths: cgroupPaths,
cgroupManager, err := containerlibcontainer.NewCgroupManager(name, cgroupPaths)
if err != nil {
return nil, err
}
rootFs := "/"

View File

@@ -61,6 +61,7 @@ const (
PerfMetrics MetricKind = "perf_event"
ReferencedMemoryMetrics MetricKind = "referenced_memory"
CPUTopologyMetrics MetricKind = "cpu_topology"
ResctrlMetrics MetricKind = "resctrl"
)
// AllMetrics represents all kinds of metrics that cAdvisor supported.
@@ -83,6 +84,7 @@ var AllMetrics = MetricSet{
PerfMetrics: struct{}{},
ReferencedMemoryMetrics: struct{}{},
CPUTopologyMetrics: struct{}{},
ResctrlMetrics: struct{}{},
}
func (mk MetricKind) String() string {

View File

@@ -14,6 +14,9 @@ go_library(
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],

View File

@@ -16,6 +16,7 @@ package libcontainer
import (
"bufio"
"bytes"
"encoding/json"
"flag"
"fmt"
@@ -32,10 +33,9 @@ import (
info "github.com/google/cadvisor/info/v1"
"golang.org/x/sys/unix"
"bytes"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
fs2 "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"k8s.io/klog/v2"
)
@@ -71,9 +71,21 @@ func NewHandler(cgroupManager cgroups.Manager, rootFs string, pid int, includedM
// Get cgroup and networking stats of the specified container
func (h *Handler) GetStats() (*info.ContainerStats, error) {
cgroupStats, err := h.cgroupManager.GetStats()
if err != nil {
return nil, err
var cgroupStats *cgroups.Stats
readCgroupStats := true
if cgroups.IsCgroup2UnifiedMode() {
// On cgroup v2 there are no stats at the root cgroup
// so check whether it is the root cgroup
if h.cgroupManager.Path("") == fs2.UnifiedMountpoint {
readCgroupStats = false
}
}
var err error
if readCgroupStats {
cgroupStats, err = h.cgroupManager.GetStats()
if err != nil {
return nil, err
}
}
libcontainerStats := &libcontainer.Stats{
CgroupStats: cgroupStats,
@@ -838,8 +850,13 @@ func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Memory.HierarchicalData.Pgmajfault = v
}
inactiveFileKeyName := "total_inactive_file"
if cgroups.IsCgroup2UnifiedMode() {
inactiveFileKeyName = "inactive_file"
}
workingSet := ret.Memory.Usage
if v, ok := s.MemoryStats.Stats["total_inactive_file"]; ok {
if v, ok := s.MemoryStats.Stats[inactiveFileKeyName]; ok {
if workingSet < v {
workingSet = 0
} else {

View File

@@ -21,6 +21,10 @@ import (
"github.com/google/cadvisor/container"
"github.com/opencontainers/runc/libcontainer/cgroups"
fs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
fs2 "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
configs "github.com/opencontainers/runc/libcontainer/configs"
"k8s.io/klog/v2"
)
@@ -165,3 +169,16 @@ func DiskStatsCopy(blkioStats []cgroups.BlkioStatEntry) (stat []info.PerDiskStat
}
return DiskStatsCopy1(diskStat)
}
func NewCgroupManager(name string, paths map[string]string) (cgroups.Manager, error) {
if cgroups.IsCgroup2UnifiedMode() {
path := paths["cpu"]
return fs2.NewManager(nil, path, false)
}
config := configs.Cgroup{
Name: name,
}
return fs.NewManager(&config, paths, false), nil
}

View File

@@ -18,8 +18,6 @@ go_library(
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/machine:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/inotify:go_default_library",
],

View File

@@ -25,8 +25,6 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/machine"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"k8s.io/klog/v2"
)
@@ -51,19 +49,16 @@ func isRootCgroup(name string) bool {
}
func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, watcher *common.InotifyWatcher, rootFs string, includedMetrics container.MetricSet) (container.ContainerHandler, error) {
cgroupPaths := common.MakeCgroupPaths(cgroupSubsystems.MountPoints, name)
cHints, err := common.GetContainerHintsFromFile(*common.ArgContainerHints)
if err != nil {
return nil, err
}
// Generate the equivalent cgroup manager for this container.
cgroupManager := &cgroupfs.Manager{
Cgroups: &configs.Cgroup{
Name: name,
},
Paths: cgroupPaths,
cgroupPaths := common.MakeCgroupPaths(cgroupSubsystems.MountPoints, name)
cgroupManager, err := libcontainer.NewCgroupManager(name, cgroupPaths)
if err != nil {
return nil, err
}
var externalMounts []common.Mount
@@ -192,13 +187,18 @@ func fsToFsStats(fs *fs.Fs) info.FsStats {
func (h *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
var filesystems []fs.Fs
var err error
// Early exist if no disk metrics are to be collected.
if !h.includedMetrics.Has(container.DiskUsageMetrics) && !h.includedMetrics.Has(container.DiskIOMetrics) {
return nil
}
// Get Filesystem information only for the root cgroup.
if isRootCgroup(h.name) {
filesystems, err = h.fsInfo.GetGlobalFsInfo()
if err != nil {
return err
}
} else if h.includedMetrics.Has(container.DiskUsageMetrics) || h.includedMetrics.Has(container.DiskIOMetrics) {
} else {
if len(h.externalMounts) > 0 {
mountSet := make(map[string]struct{})
for _, mount := range h.externalMounts {
@@ -211,14 +211,14 @@ func (h *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
}
}
if isRootCgroup(h.name) || h.includedMetrics.Has(container.DiskUsageMetrics) {
if h.includedMetrics.Has(container.DiskUsageMetrics) {
for i := range filesystems {
fs := filesystems[i]
stats.Filesystem = append(stats.Filesystem, fsToFsStats(&fs))
}
}
if isRootCgroup(h.name) || h.includedMetrics.Has(container.DiskIOMetrics) {
if h.includedMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats(&fsNamer{fs: filesystems, factory: h.machineInfoFactory}, &stats.DiskIo)
}

View File

@@ -329,7 +329,8 @@ func (i *RealFsInfo) updateContainerImagesPath(label string, mounts []mount.Moun
for _, m := range mounts {
if _, ok := containerImagePaths[m.MountPoint]; ok {
if useMount == nil || (len(useMount.MountPoint) < len(m.MountPoint)) {
useMount = &m
useMount = new(mount.MountInfo)
*useMount = m
}
}
}
@@ -418,7 +419,17 @@ func (i *RealFsInfo) GetFsInfoForPath(mountSet map[string]struct{}) ([]Fs, error
Major: uint(partition.major),
Minor: uint(partition.minor),
}
fs.DiskStats = diskStatsMap[device]
if val, ok := diskStatsMap[device]; ok {
fs.DiskStats = val
} else {
for k, v := range diskStatsMap {
if v.MajorNum == uint64(partition.major) && v.MinorNum == uint64(partition.minor) {
fs.DiskStats = diskStatsMap[k]
break
}
}
}
filesystems = append(filesystems, fs)
}
}
@@ -450,13 +461,22 @@ func getDiskStatsMap(diskStatsFile string) (map[string]DiskStats, error) {
}
// 8 50 sdd2 40 0 280 223 7 0 22 108 0 330 330
deviceName := path.Join("/dev", words[2])
var error error
devInfo := make([]uint64, 2)
for i := 0; i < len(devInfo); i++ {
devInfo[i], error = strconv.ParseUint(words[i], 10, 64)
if error != nil {
return nil, error
}
}
wordLength := len(words)
offset := 3
var stats = make([]uint64, wordLength-offset)
if len(stats) < 11 {
return nil, fmt.Errorf("could not parse all 11 columns of /proc/diskstats")
}
var error error
for i := offset; i < wordLength; i++ {
stats[i-offset], error = strconv.ParseUint(words[i], 10, 64)
if error != nil {
@@ -464,6 +484,8 @@ func getDiskStatsMap(diskStatsFile string) (map[string]DiskStats, error) {
}
}
diskStats := DiskStats{
MajorNum: devInfo[0],
MinorNum: devInfo[1],
ReadsCompleted: stats[0],
ReadsMerged: stats[1],
SectorsRead: stats[2],

View File

@@ -64,6 +64,8 @@ type Fs struct {
}
type DiskStats struct {
MajorNum uint64
MinorNum uint64
ReadsCompleted uint64
ReadsMerged uint64
SectorsRead uint64

View File

@@ -848,6 +848,58 @@ type PerfStat struct {
Cpu int `json:"cpu"`
}
// MemoryBandwidthStats corresponds to MBM (Memory Bandwidth Monitoring).
// See: https://01.org/cache-monitoring-technology
// See: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
type MemoryBandwidthStats struct {
// The 'mbm_total_bytes'.
TotalBytes uint64 `json:"mbm_total_bytes,omitempty"`
// The 'mbm_local_bytes'.
LocalBytes uint64 `json:"mbm_local_bytes,omitempty"`
}
// CacheStats corresponds to CMT (Cache Monitoring Technology).
// See: https://01.org/cache-monitoring-technology
// See: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
type CacheStats struct {
// The 'llc_occupancy'.
LLCOccupancy uint64 `json:"llc_occupancy,omitempty"`
}
// ResctrlStats corresponds to statistics from Resource Control.
type ResctrlStats struct {
// Each NUMA Node statistics corresponds to one element in the array.
MemoryBandwidth []MemoryBandwidthStats `json:"memory_bandwidth,omitempty"`
Cache []CacheStats `json:"cache,omitempty"`
}
// PerfUncoreStat represents value of a single monitored perf uncore event.
type PerfUncoreStat struct {
// Indicates scaling ratio for an event: time_running/time_enabled
// (amount of time that event was being measured divided by
// amount of time that event was enabled for).
// value 1.0 indicates that no multiplexing occurred. Value close
// to 0 indicates that event was measured for short time and event's
// value might be inaccurate.
// See: https://lwn.net/Articles/324756/
ScalingRatio float64 `json:"scaling_ratio"`
// Value represents value of perf event retrieved from OS. It is
// normalized against ScalingRatio and takes multiplexing into
// consideration.
Value uint64 `json:"value"`
// Name is human readable name of an event.
Name string `json:"name"`
// Socket that perf event was measured on.
Socket int `json:"socket"`
// PMU is Performance Monitoring Unit which collected these stats.
PMU string `json:"pmu"`
}
type UlimitSpec struct {
Name string `json:"name"`
SoftLimit int64 `json:"soft_limit"`
@@ -900,8 +952,15 @@ type ContainerStats struct {
// Statistics originating from perf events
PerfStats []PerfStat `json:"perf_stats,omitempty"`
// Statistics originating from perf uncore events.
// Applies only for root container.
PerfUncoreStats []PerfUncoreStat `json:"perf_uncore_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
// Resource Control (resctrl) statistics
Resctrl ResctrlStats `json:"resctrl,omitempty"`
}
func timeEq(t1, t2 time.Time, tolerance time.Duration) bool {

View File

@@ -47,9 +47,10 @@ type Node struct {
}
type Core struct {
Id int `json:"core_id"`
Threads []int `json:"thread_ids"`
Caches []Cache `json:"caches"`
Id int `json:"core_id"`
Threads []int `json:"thread_ids"`
Caches []Cache `json:"caches"`
SocketID int `json:"socket_id"`
}
type Cache struct {
@@ -70,6 +71,19 @@ func (n *Node) FindCore(id int) (bool, int) {
return false, -1
}
// FindCoreByThread returns bool if found Core with same thread as provided and it's index in Node Core array.
// If it's not found, returns false and -1.
func (n *Node) FindCoreByThread(thread int) (bool, int) {
for i, n := range n.Cores {
for _, t := range n.Threads {
if t == thread {
return true, i
}
}
}
return false, -1
}
func (n *Node) AddThread(thread int, core int) {
var coreIdx int
if core == -1 {

View File

@@ -120,7 +120,8 @@ type DeprecatedContainerStats struct {
HasMemory bool `json:"has_memory"`
Memory v1.MemoryStats `json:"memory,omitempty"`
// Hugepage statistics
HasHugetlb bool `json:"has_hugetlb"`
HasHugetlb bool `json:"has_hugetlb"`
Hugetlb map[string]v1.HugetlbStats `json:"hugetlb,omitempty"`
// Network statistics
HasNetwork bool `json:"has_network"`
Network NetworkStats `json:"network,omitempty"`
@@ -136,8 +137,15 @@ type DeprecatedContainerStats struct {
// Custom Metrics
HasCustomMetrics bool `json:"has_custom_metrics"`
CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"`
// Perf events counters
PerfStats []v1.PerfStat `json:"perf_stats,omitempty"`
// Statistics originating from perf uncore events.
// Applies only for root container.
PerfUncoreStats []v1.PerfUncoreStat `json:"perf_uncore_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
// Resource Control (resctrl) statistics
Resctrl v1.ResctrlStats `json:"resctrl,omitempty"`
}
type ContainerStats struct {
@@ -168,8 +176,13 @@ type ContainerStats struct {
CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"`
// Perf events counters
PerfStats []v1.PerfStat `json:"perf_stats,omitempty"`
// Statistics originating from perf uncore events.
// Applies only for root container.
PerfUncoreStats []v1.PerfUncoreStat `json:"perf_uncore_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
// Resource Control (resctrl) statistics
Resctrl v1.ResctrlStats `json:"resctrl,omitempty"`
}
type Percentiles struct {
@@ -275,6 +288,7 @@ type ProcessInfo struct {
CgroupPath string `json:"cgroup_path"`
Cmd string `json:"cmd"`
FdCount int `json:"fd_count"`
Psr int `json:"psr"`
}
type TcpStat struct {

View File

@@ -155,6 +155,12 @@ func ContainerStatsFromV1(containerName string, spec *v1.ContainerSpec, stats []
if len(val.PerfStats) > 0 {
stat.PerfStats = val.PerfStats
}
if len(val.PerfUncoreStats) > 0 {
stat.PerfUncoreStats = val.PerfUncoreStats
}
if len(val.Resctrl.MemoryBandwidth) > 0 || len(val.Resctrl.Cache) > 0 {
stat.Resctrl = val.Resctrl
}
// TODO(rjnagal): Handle load stats.
newStats = append(newStats, stat)
}
@@ -169,6 +175,7 @@ func DeprecatedStatsFromV1(cont *v1.ContainerInfo) []DeprecatedContainerStats {
Timestamp: val.Timestamp,
HasCpu: cont.Spec.HasCpu,
HasMemory: cont.Spec.HasMemory,
HasHugetlb: cont.Spec.HasHugetlb,
HasNetwork: cont.Spec.HasNetwork,
HasFilesystem: cont.Spec.HasFilesystem,
HasDiskIo: cont.Spec.HasDiskIo,
@@ -188,6 +195,9 @@ func DeprecatedStatsFromV1(cont *v1.ContainerInfo) []DeprecatedContainerStats {
if stat.HasMemory {
stat.Memory = val.Memory
}
if stat.HasHugetlb {
stat.Hugetlb = val.Hugetlb
}
if stat.HasNetwork {
stat.Network.Interfaces = val.Network.Interfaces
}
@@ -203,6 +213,15 @@ func DeprecatedStatsFromV1(cont *v1.ContainerInfo) []DeprecatedContainerStats {
if stat.HasCustomMetrics {
stat.CustomMetrics = val.CustomMetrics
}
if len(val.PerfStats) > 0 {
stat.PerfStats = val.PerfStats
}
if len(val.PerfUncoreStats) > 0 {
stat.PerfUncoreStats = val.PerfUncoreStats
}
if len(val.Resctrl.MemoryBandwidth) > 0 || len(val.Resctrl.Cache) > 0 {
stat.Resctrl = val.Resctrl
}
// TODO(rjnagal): Handle load stats.
stats = append(stats, stat)
}

View File

@@ -22,6 +22,8 @@ import (
"strings"
"time"
"golang.org/x/sys/unix"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/nvm"
@@ -30,8 +32,6 @@ import (
"github.com/google/cadvisor/utils/sysinfo"
"k8s.io/klog/v2"
"golang.org/x/sys/unix"
)
const hugepagesDirectory = "/sys/kernel/mm/hugepages/"

View File

@@ -16,6 +16,7 @@
package machine
import (
"bytes"
"fmt"
"io/ioutil"
"os"
@@ -246,6 +247,17 @@ func getUniqueCPUPropertyCount(cpuBusPath string, propertyName string) int {
}
uniques := make(map[string]bool)
for _, sysCPUPath := range sysCPUPaths {
onlinePath := filepath.Join(sysCPUPath, "online")
onlineVal, err := ioutil.ReadFile(onlinePath)
if err != nil {
klog.Warningf("Cannot determine CPU %s online state, skipping", sysCPUPath)
continue
}
onlineVal = bytes.TrimSpace(onlineVal)
if len(onlineVal) == 0 || onlineVal[0] != 49 {
klog.Warningf("CPU %s is offline, skipping", sysCPUPath)
continue
}
propertyPath := filepath.Join(sysCPUPath, sysFsCPUTopology, propertyName)
propertyVal, err := ioutil.ReadFile(propertyPath)
if err != nil {

View File

@@ -24,6 +24,7 @@ go_library(
"//vendor/github.com/google/cadvisor/machine:go_default_library",
"//vendor/github.com/google/cadvisor/nvm:go_default_library",
"//vendor/github.com/google/cadvisor/perf:go_default_library",
"//vendor/github.com/google/cadvisor/resctrl:go_default_library",
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/github.com/google/cadvisor/summary:go_default_library",
"//vendor/github.com/google/cadvisor/utils/cpuload:go_default_library",
@@ -32,6 +33,8 @@ go_library(
"//vendor/github.com/google/cadvisor/version:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/clock:go_default_library",
],

View File

@@ -94,6 +94,9 @@ type containerData struct {
// perfCollector updates stats for perf_event cgroup controller.
perfCollector stats.Collector
// resctrlCollector updates stats for resctrl controller.
resctrlCollector stats.Collector
}
// jitter returns a time.Duration between duration and duration + maxFactor * duration,
@@ -159,7 +162,7 @@ func (cd *containerData) notifyOnDemand() {
func (cd *containerData) GetInfo(shouldUpdateSubcontainers bool) (*containerInfo, error) {
// Get spec and subcontainers.
if cd.clock.Since(cd.infoLastUpdatedTime) > 5*time.Second {
if cd.clock.Since(cd.infoLastUpdatedTime) > 5*time.Second || shouldUpdateSubcontainers {
err := cd.updateSpec()
if err != nil {
return nil, err
@@ -286,12 +289,12 @@ func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespac
if !inHostNamespace {
rootfs = "/rootfs"
}
format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,cgroup"
format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,psr,cgroup"
out, err := cd.getPsOutput(inHostNamespace, format)
if err != nil {
return nil, err
}
expectedFields := 12
expectedFields := 13
processes := []v2.ProcessInfo{}
lines := strings.Split(string(out), "\n")
for _, line := range lines[1:] {
@@ -330,7 +333,12 @@ func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespac
}
// convert to bytes
vs *= 1024
cgroup, err := cd.getCgroupPath(fields[11])
psr, err := strconv.Atoi(fields[11])
if err != nil {
return nil, fmt.Errorf("invalid pid %q: %v", fields[1], err)
}
cgroup, err := cd.getCgroupPath(fields[12])
if err != nil {
return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[11], err)
}
@@ -368,6 +376,7 @@ func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespac
Cmd: fields[10],
CgroupPath: cgroupPath,
FdCount: fdCount,
Psr: psr,
})
}
}
@@ -400,6 +409,7 @@ func newContainerData(containerName string, memoryCache *memory.InMemoryCache, h
clock: clock,
perfCollector: &stats.NoopCollector{},
nvidiaCollector: &stats.NoopCollector{},
resctrlCollector: &stats.NoopCollector{},
}
cont.info.ContainerReference = ref
@@ -641,6 +651,8 @@ func (cd *containerData) updateStats() error {
perfStatsErr := cd.perfCollector.UpdateStats(stats)
resctrlStatsErr := cd.resctrlCollector.UpdateStats(stats)
ref, err := cd.handler.ContainerReference()
if err != nil {
// Ignore errors if the container is dead.
@@ -669,6 +681,10 @@ func (cd *containerData) updateStats() error {
klog.Errorf("error occurred while collecting perf stats for container %s: %s", cInfo.Name, err)
return perfStatsErr
}
if resctrlStatsErr != nil {
klog.Errorf("error occurred while collecting resctrl stats for container %s: %s", cInfo.Name, err)
return resctrlStatsErr
}
return customStatsErr
}

View File

@@ -18,6 +18,7 @@ package manager
import (
"flag"
"fmt"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"net/http"
"os"
"path"
@@ -39,6 +40,7 @@ import (
"github.com/google/cadvisor/machine"
"github.com/google/cadvisor/nvm"
"github.com/google/cadvisor/perf"
"github.com/google/cadvisor/resctrl"
"github.com/google/cadvisor/stats"
"github.com/google/cadvisor/utils/oomparser"
"github.com/google/cadvisor/utils/sysfs"
@@ -46,6 +48,8 @@ import (
"github.com/google/cadvisor/watcher"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
@@ -148,11 +152,18 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig
}
// Detect the container we are running on.
selfContainer, err := cgroups.GetOwnCgroupPath("cpu")
if err != nil {
return nil, err
selfContainer := "/"
var err error
// Avoid using GetOwnCgroupPath on cgroup v2 as it is not supported by libcontainer
if cgroups.IsCgroup2UnifiedMode() {
klog.Warningf("Cannot detect current cgroup on cgroup v2")
} else {
selfContainer, err := cgroups.GetOwnCgroupPath("cpu")
if err != nil {
return nil, err
}
klog.V(2).Infof("cAdvisor running in container: %q", selfContainer)
}
klog.V(2).Infof("cAdvisor running in container: %q", selfContainer)
context := fs.Context{}
@@ -190,7 +201,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig
containerWatchers: []watcher.ContainerWatcher{},
eventsChannel: eventsChannel,
collectorHTTPClient: collectorHTTPClient,
nvidiaManager: accelerators.NewNvidiaManager(),
nvidiaManager: accelerators.NewNvidiaManager(includedMetricsSet),
rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
}
@@ -201,11 +212,16 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig
newManager.machineInfo = *machineInfo
klog.V(1).Infof("Machine: %+v", newManager.machineInfo)
newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores)
newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores, machineInfo.Topology)
if err != nil {
return nil, err
}
newManager.resctrlManager, err = resctrl.NewManager(selfContainer)
if err != nil {
klog.V(4).Infof("Cannot gather resctrl metrics: %v", err)
}
versionInfo, err := getVersionInfo()
if err != nil {
return nil, err
@@ -246,6 +262,7 @@ type manager struct {
collectorHTTPClient *http.Client
nvidiaManager stats.Manager
perfManager stats.Manager
resctrlManager stats.Manager
// List of raw container cgroup path prefix whitelist.
rawContainerCgroupPathPrefixWhiteList []string
}
@@ -545,6 +562,9 @@ func (m *manager) getSubcontainers(containerName string) map[string]*containerDa
// Get all the unique subcontainers of the specified container
matchedName := path.Join(containerName, "/")
for i := range m.containers {
if m.containers[i] == nil {
continue
}
name := m.containers[i].info.Name
if name == containerName || strings.HasPrefix(name, matchedName) {
containersMap[m.containers[i].info.Name] = m.containers[i]
@@ -650,6 +670,7 @@ func (m *manager) containerDataSliceToContainerInfoSlice(containers []*container
cinfo, err := m.containerDataToContainerInfo(containers[i], query)
if err != nil {
// Skip containers with errors, we try to degrade gracefully.
klog.V(4).Infof("convert container data to container info failed with error %s", err.Error())
continue
}
output = append(output, cinfo)
@@ -906,7 +927,14 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
if err != nil {
return err
}
if !cgroups.IsCgroup2UnifiedMode() {
if cgroups.IsCgroup2UnifiedMode() {
perfCgroupPath := path.Join(fs2.UnifiedMountpoint, containerName)
cont.perfCollector, err = m.perfManager.GetCollector(perfCgroupPath)
if err != nil {
klog.Infof("perf_event metrics will not be available for container %s: %s", containerName, err)
}
} else {
devicesCgroupPath, err := handler.GetCgroupPath("devices")
if err != nil {
klog.Warningf("Error getting devices cgroup path: %v", err)
@@ -916,18 +944,27 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
klog.V(4).Infof("GPU metrics may be unavailable/incomplete for container %s: %s", cont.info.Name, err)
}
}
perfCgroupPath, err := handler.GetCgroupPath("perf_event")
if err != nil {
klog.Warningf("Error getting perf_event cgroup path: %q", err)
} else {
cont.perfCollector, err = m.perfManager.GetCollector(perfCgroupPath)
if err != nil {
klog.Infof("perf_event metrics will not be available for container %s: %s", cont.info.Name, err)
klog.Infof("perf_event metrics will not be available for container %s: %s", containerName, err)
}
}
}
resctrlPath, err := intelrdt.GetIntelRdtPath(containerName)
if err != nil {
klog.Warningf("Error getting resctrl path: %q", err)
} else {
cont.resctrlCollector, err = m.resctrlManager.GetCollector(resctrlPath)
if err != nil {
klog.Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err)
}
}
// Add collectors
labels := handler.GetContainerLabels()
collectorConfigs := collector.GetCollectorConfigs(labels)

View File

@@ -1545,11 +1545,11 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
}...)
}
if c.includedMetrics.Has(container.PerfMetrics) {
if includedMetrics.Has(container.PerfMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_metric",
help: "Perf event metric",
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
@@ -1565,8 +1565,8 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
},
{
name: "container_perf_metric_scaling_ratio",
help: "Perf event metric scaling ratio",
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
@@ -1581,6 +1581,40 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
return values
},
},
{
name: "container_perf_uncore_events_total",
help: "Perf uncore event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"socket", "event", "pmu"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfUncoreStats))
for _, metric := range s.PerfUncoreStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_uncore_events_scaling_ratio",
help: "Perf uncore event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"socket", "event", "pmu"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfUncoreStats))
for _, metric := range s.PerfUncoreStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU},
timestamp: s.Timestamp,
})
}
return values
},
},
}...)
}
if includedMetrics.Has(container.ReferencedMemoryMetrics) {
@@ -1595,6 +1629,64 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
}...)
}
if includedMetrics.Has(container.ResctrlMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_memory_bandwidth_bytes",
help: "Total memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM).",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName},
getValues: func(s *info.ContainerStats) metricValues {
numberOfNUMANodes := len(s.Resctrl.MemoryBandwidth)
metrics := make(metricValues, numberOfNUMANodes)
for numaNode, stats := range s.Resctrl.MemoryBandwidth {
metrics[numaNode] = metricValue{
value: float64(stats.TotalBytes),
timestamp: s.Timestamp,
labels: []string{strconv.Itoa(numaNode)},
}
}
return metrics
},
},
{
name: "container_memory_bandwidth_local_bytes",
help: "Local memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM).",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName},
getValues: func(s *info.ContainerStats) metricValues {
numberOfNUMANodes := len(s.Resctrl.MemoryBandwidth)
metrics := make(metricValues, numberOfNUMANodes)
for numaNode, stats := range s.Resctrl.MemoryBandwidth {
metrics[numaNode] = metricValue{
value: float64(stats.LocalBytes),
timestamp: s.Timestamp,
labels: []string{strconv.Itoa(numaNode)},
}
}
return metrics
},
},
{
name: "container_llc_occupancy_bytes",
help: "Last level cache usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM).",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName},
getValues: func(s *info.ContainerStats) metricValues {
numberOfNUMANodes := len(s.Resctrl.Cache)
metrics := make(metricValues, numberOfNUMANodes)
for numaNode, stats := range s.Resctrl.Cache {
metrics[numaNode] = metricValue{
value: float64(stats.LLCOccupancy),
timestamp: s.Timestamp,
labels: []string{strconv.Itoa(numaNode)},
}
}
return metrics
},
},
}...)
}
return c
}

View File

@@ -648,7 +648,43 @@ func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.Container
Cpu: 1,
},
},
PerfUncoreStats: []info.PerfUncoreStat{
{
ScalingRatio: 1.0,
Value: 1231231512.0,
Name: "cas_count_read",
Socket: 0,
PMU: "uncore_imc_0",
},
{
ScalingRatio: 1.0,
Value: 1111231331.0,
Name: "cas_count_read",
Socket: 1,
PMU: "uncore_imc_0",
},
},
ReferencedMemory: 1234,
Resctrl: info.ResctrlStats{
MemoryBandwidth: []info.MemoryBandwidthStats{
{
TotalBytes: 4512312,
LocalBytes: 2390393,
},
{
TotalBytes: 2173713,
LocalBytes: 1231233,
},
},
Cache: []info.CacheStats{
{
LLCOccupancy: 162626,
},
{
LLCOccupancy: 213777,
},
},
},
},
},
},

View File

@@ -12,6 +12,7 @@ go_library(
importpath = "github.com/google/cadvisor/perf",
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],

View File

@@ -31,18 +31,21 @@ import (
"sync"
"unsafe"
info "github.com/google/cadvisor/info/v1"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
)
type collector struct {
cgroupPath string
events Events
events PerfEvents
cpuFiles map[string]map[int]readerCloser
cpuFilesLock sync.Mutex
numCores int
eventToCustomEvent map[Event]*CustomEvent
uncore stats.Collector
}
var (
@@ -61,50 +64,68 @@ func init() {
isLibpfmInitialized = true
}
func newCollector(cgroupPath string, events Events, numCores int) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores}
func newCollector(cgroupPath string, events PerfEvents, numCores int, topology []info.Node) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores, uncore: NewUncoreCollector(cgroupPath, events, topology)}
mapEventsToCustomEvents(collector)
return collector
}
func (c *collector) UpdateStats(stats *info.ContainerStats) error {
err := c.uncore.UpdateStats(stats)
if err != nil {
klog.Errorf("Failed to get uncore perf event stats: %v", err)
}
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
stats.PerfStats = []info.PerfStat{}
klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath)
for name, files := range c.cpuFiles {
for cpu, file := range files {
buf := make([]byte, 32)
_, err := file.Read(buf)
for name, cpus := range c.cpuFiles {
for cpu, file := range cpus {
stat, err := readPerfStat(file, name, cpu)
if err != nil {
klog.Warningf("Unable to read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath)
klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", name, cpu, c.cgroupPath, err.Error())
continue
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
klog.Warningf("Unable to decode from binary format read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath)
continue
}
klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", name, cpu, c.cgroupPath, perfData.Value)
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
stat := info.PerfStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Cpu: cpu,
}
stats.PerfStats = append(stats.PerfStats, stat)
klog.V(5).Infof("Read perf event (event: %q, CPU: %d) for %q: %d", name, cpu, c.cgroupPath, stat.Value)
stats.PerfStats = append(stats.PerfStats, *stat)
}
}
return nil
}
func readPerfStat(file readerCloser, name string, cpu int) (*info.PerfStat, error) {
buf := make([]byte, 32)
_, err := file.Read(buf)
if err != nil {
return nil, err
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
return nil, err
}
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
stat := info.PerfStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Cpu: cpu,
}
return &stat, nil
}
func (c *collector) setup() error {
cgroup, err := os.Open(c.cgroupPath)
if err != nil {
@@ -115,7 +136,7 @@ func (c *collector) setup() error {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
cgroupFd := int(cgroup.Fd())
for _, group := range c.events.Events {
for _, group := range c.events.Core.Events {
customEvent, ok := c.eventToCustomEvent[group[0]]
var err error
if ok {
@@ -127,6 +148,7 @@ func (c *collector) setup() error {
return err
}
}
return nil
}
@@ -141,10 +163,10 @@ func (c *collector) setupRawNonGrouped(event *CustomEvent, cgroup int) error {
return nil
}
func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, cgroup int) error {
func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, pid int) error {
var cpu int
for cpu = 0; cpu < c.numCores; cpu++ {
pid, groupFd, flags := cgroup, -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP
groupFd, flags := -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP
fd, err := unix.PerfEventOpen(config, pid, cpu, groupFd, flags)
if err != nil {
return fmt.Errorf("setting up perf event %#v failed: %q", config, err)
@@ -164,35 +186,18 @@ func (c *collector) addEventFile(name string, cpu int, perfFile *os.File) {
if !ok {
c.cpuFiles[name] = map[int]readerCloser{}
}
c.cpuFiles[name][cpu] = perfFile
}
func (c *collector) setupNonGrouped(name string, cgroup int) error {
if !isLibpfmInitialized {
return fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
perfEventAttr, err := getPerfEventAttr(name)
if err != nil {
return err
}
defer C.free(unsafe.Pointer(perfEventAttr))
klog.V(5).Infof("Setting up non-grouped perf event %s", name)
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
defer C.free(perfEventAttrMemory)
event := pfmPerfEncodeArgT{}
perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory)
fstr := C.CString("")
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.ulong(unsafe.Sizeof(event))
cSafeName := C.CString(name)
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr))
}
klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr)
setAttributes(perfEventAttr)
return c.registerEvent(perfEventAttr, string(name), cgroup)
return c.registerEvent(perfEventAttr, name, cgroup)
}
func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
@@ -214,6 +219,34 @@ func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
return config
}
func getPerfEventAttr(name string) (*unix.PerfEventAttr, error) {
if !isLibpfmInitialized {
return nil, fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
}
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
event := pfmPerfEncodeArgT{}
perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory)
fstr := C.CString("")
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.ulong(unsafe.Sizeof(event))
cSafeName := C.CString(name)
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return nil, fmt.Errorf("unable to transform event name %s to perf_event_attr: %v", name, int(pErr))
}
klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr)
setAttributes(perfEventAttr)
return perfEventAttr, nil
}
func setAttributes(config *unix.PerfEventAttr) {
config.Sample_type = perfSampleIdentifier
config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_ID
@@ -222,6 +255,7 @@ func setAttributes(config *unix.PerfEventAttr) {
}
func (c *collector) Destroy() {
c.uncore.Destroy()
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
@@ -233,7 +267,6 @@ func (c *collector) Destroy() {
klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
}
}
delete(c.cpuFiles, name)
}
}
@@ -255,7 +288,7 @@ func Finalize() {
func mapEventsToCustomEvents(collector *collector) {
collector.eventToCustomEvent = map[Event]*CustomEvent{}
for key, event := range collector.events.CustomEvents {
collector.eventToCustomEvent[event.Name] = &collector.events.CustomEvents[key]
for key, event := range collector.events.Core.CustomEvents {
collector.eventToCustomEvent[event.Name] = &collector.events.Core.CustomEvents[key]
}
}

View File

@@ -24,9 +24,16 @@ import (
"k8s.io/klog/v2"
)
type PerfEvents struct {
// Core perf events to be measured.
Core Events `json:"core,omitempty"`
// Uncore perf events to be measured.
Uncore Events `json:"uncore,omitempty"`
}
type Events struct {
// List of perf events' names to be measured. Any value found in
// output of perf list can be used.
// List of perf events' names to be measured.
Events [][]Event `json:"events"`
// List of custom perf events' to be measured. It is impossible to
@@ -40,7 +47,7 @@ type Event string
type CustomEvent struct {
// Type of the event. See perf_event_attr documentation
// at man perf_event_open.
Type uint32 `json:"type"`
Type uint32 `json:"type,omitempty"`
// Symbolically formed event like:
// pmu/config=PerfEvent.Config[0],config1=PerfEvent.Config[1],config2=PerfEvent.Config[2]
@@ -73,11 +80,11 @@ func (c *Config) UnmarshalJSON(b []byte) error {
return nil
}
func parseConfig(file *os.File) (events Events, err error) {
func parseConfig(file *os.File) (events PerfEvents, err error) {
decoder := json.NewDecoder(file)
err = decoder.Decode(&events)
if err != nil {
err = fmt.Errorf("unable to load perf events cofiguration from %q: %q", file.Name(), err)
err = fmt.Errorf("unable to load perf events configuration from %q: %q", file.Name(), err)
return
}
return

View File

@@ -21,16 +21,18 @@ import (
"fmt"
"os"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
)
type manager struct {
events Events
events PerfEvents
numCores int
topology []info.Node
stats.NoopDestroy
}
func NewManager(configFile string, numCores int) (stats.Manager, error) {
func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) {
if configFile == "" {
return &stats.NoopManager{}, nil
}
@@ -49,11 +51,11 @@ func NewManager(configFile string, numCores int) (stats.Manager, error) {
return nil, fmt.Errorf("event grouping is not supported you must modify config file at %s", configFile)
}
return &manager{events: config, numCores: numCores}, nil
return &manager{events: config, numCores: numCores, topology: topology}, nil
}
func areGroupedEventsUsed(events Events) bool {
for _, group := range events.Events {
func areGroupedEventsUsed(events PerfEvents) bool {
for _, group := range events.Core.Events {
if len(group) > 1 {
return true
}
@@ -62,7 +64,7 @@ func areGroupedEventsUsed(events Events) bool {
}
func (m *manager) GetCollector(cgroupPath string) (stats.Collector, error) {
collector := newCollector(cgroupPath, m.events, m.numCores)
collector := newCollector(cgroupPath, m.events, m.numCores, m.topology)
err := collector.setup()
if err != nil {
collector.Destroy()

View File

@@ -18,12 +18,13 @@
package perf
import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"k8s.io/klog/v2"
)
func NewManager(configFile string, numCores int) (stats.Manager, error) {
func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) {
klog.V(1).Info("cAdvisor is build without cgo and/or libpfm support. Perf event counters are not available.")
return &stats.NoopManager{}, nil
}

392
vendor/github.com/google/cadvisor/perf/uncore_libpfm.go generated vendored Normal file
View File

@@ -0,0 +1,392 @@
// +build libpfm,cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Uncore perf events logic.
package perf
// #cgo CFLAGS: -I/usr/include
// #cgo LDFLAGS: -lpfm
// #include <perfmon/pfmlib.h>
// #include <stdlib.h>
import "C"
import (
"bytes"
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"unsafe"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"github.com/google/cadvisor/utils/sysinfo"
)
type pmu struct {
name string
typeOf uint32
cpus []uint32
}
const (
uncorePMUPrefix = "uncore"
pmuTypeFilename = "type"
pmuCpumaskFilename = "cpumask"
systemDevicesPath = "/sys/devices"
rootPerfEventPath = "/sys/fs/cgroup/perf_event"
)
func getPMU(pmus []pmu, gotType uint32) (*pmu, error) {
for _, pmu := range pmus {
if pmu.typeOf == gotType {
return &pmu, nil
}
}
return nil, fmt.Errorf("there is no pmu with event type: %#v", gotType)
}
type uncorePMUs map[string]pmu
func readUncorePMU(path string, name string, cpumaskRegexp *regexp.Regexp) (*pmu, error) {
buf, err := ioutil.ReadFile(filepath.Join(path, pmuTypeFilename))
if err != nil {
return nil, err
}
typeString := strings.TrimSpace(string(buf))
eventType, err := strconv.ParseUint(typeString, 0, 32)
if err != nil {
return nil, err
}
buf, err = ioutil.ReadFile(filepath.Join(path, pmuCpumaskFilename))
if err != nil {
return nil, err
}
var cpus []uint32
cpumask := strings.TrimSpace(string(buf))
for _, cpu := range cpumaskRegexp.Split(cpumask, -1) {
parsedCPU, err := strconv.ParseUint(cpu, 0, 32)
if err != nil {
return nil, err
}
cpus = append(cpus, uint32(parsedCPU))
}
return &pmu{name: name, typeOf: uint32(eventType), cpus: cpus}, nil
}
func getUncorePMUs(devicesPath string) (uncorePMUs, error) {
pmus := make(uncorePMUs, 0)
// Depends on platform, cpu mask could be for example in form "0-1" or "0,1".
cpumaskRegexp := regexp.MustCompile("[-,\n]")
err := filepath.Walk(devicesPath, func(path string, info os.FileInfo, err error) error {
// Skip root path.
if path == devicesPath {
return nil
}
if info.IsDir() {
if strings.HasPrefix(info.Name(), uncorePMUPrefix) {
pmu, err := readUncorePMU(path, info.Name(), cpumaskRegexp)
if err != nil {
return err
}
pmus[info.Name()] = *pmu
}
}
return nil
})
if err != nil {
return nil, err
}
return pmus, nil
}
type uncoreCollector struct {
cpuFiles map[string]map[string]map[int]readerCloser
cpuFilesLock sync.Mutex
events [][]Event
eventToCustomEvent map[Event]*CustomEvent
topology []info.Node
// Handle for mocking purposes.
perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
}
func NewUncoreCollector(cgroupPath string, events PerfEvents, topology []info.Node) stats.Collector {
if cgroupPath != rootPerfEventPath {
// Uncore metric doesn't exists for cgroups, only for entire platform.
return &stats.NoopCollector{}
}
collector := &uncoreCollector{topology: topology}
// Default implementation of Linux perf_event_open function.
collector.perfEventOpen = unix.PerfEventOpen
err := collector.setup(events, systemDevicesPath)
if err != nil {
formatedError := fmt.Errorf("unable to setup uncore perf event collector: %v", err)
klog.V(5).Infof("Perf uncore metrics will not be available: %s", formatedError)
return &stats.NoopCollector{}
}
return collector
}
func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
var err error
readUncorePMUs, err := getUncorePMUs(devicesPath)
if err != nil {
return err
}
// Maping from event name, pmu type, cpu.
c.cpuFiles = make(map[string]map[string]map[int]readerCloser)
c.events = events.Uncore.Events
c.eventToCustomEvent = parseUncoreEvents(events.Uncore)
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for _, group := range c.events {
if len(group) > 1 {
klog.Warning("grouping uncore perf events is not supported!")
continue
}
eventName, pmuPrefix := parseEventName(string(group[0]))
var err error
customEvent, ok := c.eventToCustomEvent[group[0]]
if ok {
if customEvent.Type != 0 {
pmus := obtainPMUs("uncore", readUncorePMUs)
err = c.setupRawNonGroupedUncore(customEvent, pmus)
} else {
pmus := obtainPMUs(pmuPrefix, readUncorePMUs)
err = c.setupRawNonGroupedUncore(customEvent, pmus)
}
} else {
pmus := obtainPMUs(pmuPrefix, readUncorePMUs)
err = c.setupNonGroupedUncore(eventName, pmus)
}
if err != nil {
return err
}
}
return nil
}
func parseEventName(eventName string) (string, string) {
// First "/" separate pmu prefix and event name
// ex. "uncore_imc_0/cas_count_read" -> uncore_imc_0 and cas_count_read.
splittedEvent := strings.SplitN(eventName, "/", 2)
var pmuPrefix = ""
if len(splittedEvent) == 2 {
pmuPrefix = splittedEvent[0]
eventName = splittedEvent[1]
}
return eventName, pmuPrefix
}
func obtainPMUs(want string, gotPMUs uncorePMUs) []pmu {
var pmus []pmu
if want == "" {
return pmus
}
for _, pmu := range gotPMUs {
if strings.HasPrefix(pmu.name, want) {
pmus = append(pmus, pmu)
}
}
return pmus
}
func parseUncoreEvents(events Events) map[Event]*CustomEvent {
eventToCustomEvent := map[Event]*CustomEvent{}
for _, uncoreEvent := range events.Events {
for _, customEvent := range events.CustomEvents {
if uncoreEvent[0] == customEvent.Name {
eventToCustomEvent[customEvent.Name] = &customEvent
break
}
}
}
return eventToCustomEvent
}
func (c *uncoreCollector) Destroy() {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for name, pmus := range c.cpuFiles {
for pmu, cpus := range pmus {
for cpu, file := range cpus {
klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
}
}
delete(pmus, pmu)
}
delete(c.cpuFiles, name)
}
}
func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error {
klog.V(5).Info("Attempting to update uncore perf_event stats")
for name, pmus := range c.cpuFiles {
for pmu, cpus := range pmus {
for cpu, file := range cpus {
stat, err := readPerfUncoreStat(file, name, cpu, pmu, c.topology)
if err != nil {
return fmt.Errorf("unable to read from uncore perf_event_file (event: %q, CPU: %d, PMU: %s): %q", name, cpu, pmu, err.Error())
}
klog.V(5).Infof("Read uncore perf event (event: %q, CPU: %d, PMU: %s): %d", name, cpu, pmu, stat.Value)
stats.PerfUncoreStats = append(stats.PerfUncoreStats, *stat)
}
}
}
return nil
}
func (c *uncoreCollector) setupRawNonGroupedUncore(event *CustomEvent, pmus []pmu) error {
klog.V(5).Infof("Setting up non-grouped raw perf uncore event %#v", event)
if event.Type == 0 {
// PMU isn't set. Register event for all PMUs.
for _, pmu := range pmus {
newEvent := CustomEvent{
Type: pmu.typeOf,
Config: event.Config,
Name: event.Name,
}
config := createPerfEventAttr(newEvent)
err := c.registerUncoreEvent(config, string(newEvent.Name), pmu.cpus, pmu.name)
if err != nil {
return err
}
}
return nil
} else {
// Register event for the PMU.
config := createPerfEventAttr(*event)
pmu, err := getPMU(pmus, event.Type)
if err != nil {
return err
}
return c.registerUncoreEvent(config, string(event.Name), pmu.cpus, pmu.name)
}
}
func (c *uncoreCollector) setupNonGroupedUncore(name string, pmus []pmu) error {
perfEventAttr, err := getPerfEventAttr(name)
if err != nil {
return err
}
defer C.free(unsafe.Pointer(perfEventAttr))
klog.V(5).Infof("Setting up non-grouped uncore perf event %s", name)
// Register event for all memory controllers.
for _, pmu := range pmus {
perfEventAttr.Type = pmu.typeOf
err = c.registerUncoreEvent(perfEventAttr, name, pmu.cpus, pmu.name)
if err != nil {
return err
}
}
return nil
}
func (c *uncoreCollector) registerUncoreEvent(config *unix.PerfEventAttr, name string, cpus []uint32, pmu string) error {
for _, cpu := range cpus {
groupFd, pid, flags := -1, -1, 0
fd, err := c.perfEventOpen(config, pid, int(cpu), groupFd, flags)
if err != nil {
return fmt.Errorf("setting up perf event %#v failed: %q", config, err)
}
perfFile := os.NewFile(uintptr(fd), name)
if perfFile == nil {
return fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
}
c.addEventFile(name, pmu, int(cpu), perfFile)
}
return nil
}
func (c *uncoreCollector) addEventFile(name string, pmu string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[name]
if !ok {
c.cpuFiles[name] = map[string]map[int]readerCloser{}
}
_, ok = c.cpuFiles[name][pmu]
if !ok {
c.cpuFiles[name][pmu] = map[int]readerCloser{}
}
c.cpuFiles[name][pmu][cpu] = perfFile
}
func readPerfUncoreStat(file readerCloser, name string, cpu int, pmu string, topology []info.Node) (*info.PerfUncoreStat, error) {
buf := make([]byte, 32)
_, err := file.Read(buf)
if err != nil {
return nil, err
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
return nil, err
}
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
stat := info.PerfUncoreStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Socket: sysinfo.GetSocketFromCPU(topology, cpu),
PMU: pmu,
}
return &stat, nil
}

41
vendor/github.com/google/cadvisor/resctrl/BUILD generated vendored Normal file
View File

@@ -0,0 +1,41 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"collector.go",
"manager.go",
],
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/resctrl",
importpath = "github.com/google/cadvisor/resctrl",
visibility = ["//visibility:public"],
deps = select({
"@io_bazel_rules_go//go/platform:android": [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
],
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/intelrdt:go_default_library",
],
"//conditions:default": [],
}),
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

74
vendor/github.com/google/cadvisor/resctrl/collector.go generated vendored Normal file
View File

@@ -0,0 +1,74 @@
// +build linux
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Collector of resctrl for a container.
package resctrl
import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
)
type collector struct {
resctrl intelrdt.IntelRdtManager
stats.NoopDestroy
}
func newCollector(id string, resctrlPath string) *collector {
collector := &collector{
resctrl: intelrdt.IntelRdtManager{
Config: &configs.Config{
IntelRdt: &configs.IntelRdt{},
},
Id: id,
Path: resctrlPath,
},
}
return collector
}
func (c *collector) UpdateStats(stats *info.ContainerStats) error {
stats.Resctrl = info.ResctrlStats{}
resctrlStats, err := c.resctrl.GetStats()
if err != nil {
return err
}
numberOfNUMANodes := len(*resctrlStats.MBMStats)
stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, numberOfNUMANodes)
stats.Resctrl.Cache = make([]info.CacheStats, 0, numberOfNUMANodes)
for _, numaNodeStats := range *resctrlStats.MBMStats {
stats.Resctrl.MemoryBandwidth = append(stats.Resctrl.MemoryBandwidth,
info.MemoryBandwidthStats{
TotalBytes: numaNodeStats.MBMTotalBytes,
LocalBytes: numaNodeStats.MBMLocalBytes,
})
}
for _, numaNodeStats := range *resctrlStats.CMTStats {
stats.Resctrl.Cache = append(stats.Resctrl.Cache,
info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy})
}
return nil
}

43
vendor/github.com/google/cadvisor/resctrl/manager.go generated vendored Normal file
View File

@@ -0,0 +1,43 @@
// +build linux
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Manager of resctrl for containers.
package resctrl
import (
"github.com/google/cadvisor/stats"
"github.com/opencontainers/runc/libcontainer/intelrdt"
)
type manager struct {
id string
stats.NoopDestroy
}
func (m manager) GetCollector(resctrlPath string) (stats.Collector, error) {
collector := newCollector(m.id, resctrlPath)
return collector, nil
}
func NewManager(id string) (stats.Manager, error) {
if intelrdt.IsMBMEnabled() || intelrdt.IsCMTEnabled() {
return &manager{id: id}, nil
}
return &stats.NoopManager{}, nil
}

View File

@@ -6,6 +6,7 @@ go_library(
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/utils/sysfs",
importpath = "github.com/google/cadvisor/utils/sysfs",
visibility = ["//visibility:public"],
deps = ["//vendor/k8s.io/klog/v2:go_default_library"],
)
filegroup(

View File

@@ -15,13 +15,18 @@
package sysfs
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"k8s.io/klog/v2"
)
const (
@@ -95,6 +100,9 @@ type SysFs interface {
GetCacheInfo(cpu int, cache string) (CacheInfo, error)
GetSystemUUID() (string, error)
// IsCPUOnline determines if CPU status from kernel hotplug machanism standpoint.
// See: https://www.kernel.org/doc/html/latest/core-api/cpu_hotplug.html
IsCPUOnline(dir string) bool
}
type realSysFs struct{}
@@ -326,3 +334,26 @@ func (fs *realSysFs) GetSystemUUID() (string, error) {
return "", err
}
}
func (fs *realSysFs) IsCPUOnline(dir string) bool {
cpuPath := fmt.Sprintf("%s/online", dir)
content, err := ioutil.ReadFile(cpuPath)
if err != nil {
pathErr, ok := err.(*os.PathError)
if ok {
if errors.Is(pathErr.Unwrap(), os.ErrNotExist) && isZeroCPU(dir) {
return true
}
}
klog.Warningf("unable to read %s: %s", cpuPath, err.Error())
return false
}
trimmed := bytes.TrimSpace(content)
return len(trimmed) == 1 && trimmed[0] == 49
}
func isZeroCPU(dir string) bool {
regex := regexp.MustCompile("cpu([0-9]*)")
matches := regex.FindStringSubmatch(dir)
return len(matches) == 2 && matches[1] == "0"
}

View File

@@ -16,6 +16,7 @@ package sysinfo
import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
@@ -219,15 +220,16 @@ func GetNodesInfo(sysFs sysfs.SysFs) ([]info.Node, int, error) {
return nil, 0, err
}
node.Cores = cores
for _, core := range cores {
allLogicalCoresCount += len(core.Threads)
}
}
allLogicalCoresCount += len(cpuDirs)
// On some Linux platforms(such as Arm64 guest kernel), cache info may not exist.
// So, we should ignore error here.
err = addCacheInfo(sysFs, &node)
if err != nil {
klog.Warningf("Found node without cache information, nodeDir: %s", nodeDir)
klog.V(1).Infof("Found node without cache information, nodeDir: %s", nodeDir)
}
node.Memory, err = getNodeMemInfo(sysFs, nodeDir)
@@ -265,6 +267,11 @@ func getCPUTopology(sysFs sysfs.SysFs) ([]info.Node, int, error) {
return nil, 0, err
}
if len(cpusByPhysicalPackageID) == 0 {
klog.Warningf("Cannot read any physical package id for any CPU")
return nil, cpusCount, nil
}
for physicalPackageID, cpus := range cpusByPhysicalPackageID {
node := info.Node{Id: physicalPackageID}
@@ -278,7 +285,7 @@ func getCPUTopology(sysFs sysfs.SysFs) ([]info.Node, int, error) {
// So, we should ignore error here.
err = addCacheInfo(sysFs, &node)
if err != nil {
klog.Warningf("Found cpu without cache information, cpuPath: %s", cpus)
klog.V(1).Infof("Found cpu without cache information, cpuPath: %s", cpus)
}
nodes = append(nodes, node)
}
@@ -290,7 +297,10 @@ func getCpusByPhysicalPackageID(sysFs sysfs.SysFs, cpusPaths []string) (map[int]
for _, cpuPath := range cpusPaths {
rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuPath)
if err != nil {
if os.IsNotExist(err) {
klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuPath, err)
continue
} else if err != nil {
return nil, err
}
@@ -375,9 +385,15 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
if err != nil {
return nil, fmt.Errorf("Unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir)
}
if !sysFs.IsCPUOnline(cpuDir) {
continue
}
rawPhysicalID, err := sysFs.GetCoreID(cpuDir)
if err != nil {
if os.IsNotExist(err) {
klog.Warningf("Cannot read core id for %s, core_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}
physicalID, err := strconv.Atoi(rawPhysicalID)
@@ -403,6 +419,20 @@ func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
} else {
desiredCore.Threads = append(desiredCore.Threads, cpuID)
}
rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir)
if os.IsNotExist(err) {
klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err)
continue
} else if err != nil {
return nil, err
}
physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
if err != nil {
return nil, err
}
desiredCore.SocketID = physicalPackageID
}
return cores, nil
}
@@ -482,3 +512,14 @@ func getMatchedInt(rgx *regexp.Regexp, str string) (int, error) {
}
return valInt, nil
}
// GetSocketFromCPU returns Socket ID of passed CPU. If is not present, returns -1.
func GetSocketFromCPU(topology []info.Node, cpu int) int {
for _, node := range topology {
found, coreID := node.FindCoreByThread(cpu)
if found {
return node.Cores[coreID].SocketID
}
}
return -1
}