vendor: cadvisor v0.38.4

This commit is contained in:
David Porter
2020-11-13 19:52:57 +00:00
parent ec734aced7
commit 8af7405f17
396 changed files with 73154 additions and 18510 deletions

View File

@@ -21,6 +21,7 @@ package perf
// #cgo LDFLAGS: -lpfm
// #include <perfmon/pfmlib.h>
// #include <stdlib.h>
// #include <string.h>
import "C"
import (
@@ -41,33 +42,42 @@ import (
type collector struct {
cgroupPath string
events PerfEvents
cpuFiles map[string]map[int]readerCloser
cpuFiles map[int]group
cpuFilesLock sync.Mutex
numCores int
onlineCPUs []int
eventToCustomEvent map[Event]*CustomEvent
uncore stats.Collector
}
type group struct {
cpuFiles map[string]map[int]readerCloser
names []string
leaderName string
}
var (
isLibpfmInitialized = false
libpmfMutex = sync.Mutex{}
)
const (
groupLeaderFileDescriptor = -1
)
func init() {
libpmfMutex.Lock()
defer libpmfMutex.Unlock()
pErr := C.pfm_initialize()
if pErr != C.PFM_SUCCESS {
fmt.Printf("unable to initialize libpfm: %d", int(pErr))
klog.Errorf("unable to initialize libpfm: %d", int(pErr))
return
}
isLibpfmInitialized = true
}
func newCollector(cgroupPath string, events PerfEvents, numCores int, topology []info.Node) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores, uncore: NewUncoreCollector(cgroupPath, events, topology)}
func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket)}
mapEventsToCustomEvents(collector)
return collector
}
@@ -82,48 +92,87 @@ func (c *collector) UpdateStats(stats *info.ContainerStats) error {
stats.PerfStats = []info.PerfStat{}
klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath)
for name, cpus := range c.cpuFiles {
for cpu, file := range cpus {
stat, err := readPerfStat(file, name, cpu)
for _, group := range c.cpuFiles {
for cpu, file := range group.cpuFiles[group.leaderName] {
stat, err := readGroupPerfStat(file, group, cpu, c.cgroupPath)
if err != nil {
klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", name, cpu, c.cgroupPath, err.Error())
klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", group.leaderName, cpu, c.cgroupPath, err.Error())
continue
}
klog.V(5).Infof("Read perf event (event: %q, CPU: %d) for %q: %d", name, cpu, c.cgroupPath, stat.Value)
stats.PerfStats = append(stats.PerfStats, *stat)
stats.PerfStats = append(stats.PerfStats, stat...)
}
}
return nil
}
func readPerfStat(file readerCloser, name string, cpu int) (*info.PerfStat, error) {
buf := make([]byte, 32)
_, err := file.Read(buf)
func readGroupPerfStat(file readerCloser, group group, cpu int, cgroupPath string) ([]info.PerfStat, error) {
values, err := getPerfValues(file, group)
if err != nil {
return nil, err
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
perfStats := make([]info.PerfStat, len(values))
for i, value := range values {
klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", value.Name, cpu, cgroupPath, value.Value)
perfStats[i] = info.PerfStat{
PerfValue: value,
Cpu: cpu,
}
}
return perfStats, nil
}
func getPerfValues(file readerCloser, group group) ([]info.PerfValue, error) {
// 24 bytes of GroupReadFormat struct.
// 16 bytes of Values struct for each element in group.
// See https://man7.org/linux/man-pages/man2/perf_event_open.2.html section "Reading results" with PERF_FORMAT_GROUP specified.
buf := make([]byte, 24+16*len(group.names))
_, err := file.Read(buf)
if err != nil {
return []info.PerfValue{}, fmt.Errorf("unable to read perf event group ( leader = %s ): %w", group.leaderName, err)
}
perfData := &GroupReadFormat{}
reader := bytes.NewReader(buf[:24])
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
return nil, err
return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group ( leader = %s ): %w", group.leaderName, err)
}
values := make([]Values, perfData.Nr)
reader = bytes.NewReader(buf[24:])
err = binary.Read(reader, binary.LittleEndian, values)
if err != nil {
return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group values ( leader = %s ): %w", group.leaderName, err)
}
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
if perfData.TimeRunning != 0 && perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
stat := info.PerfStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Cpu: cpu,
perfValues := make([]info.PerfValue, perfData.Nr)
if scalingRatio != float64(0) {
for i, name := range group.names {
perfValues[i] = info.PerfValue{
ScalingRatio: scalingRatio,
Value: uint64(float64(values[i].Value) / scalingRatio),
Name: name,
}
}
} else {
for i, name := range group.names {
perfValues[i] = info.PerfValue{
ScalingRatio: scalingRatio,
Value: values[i].Value,
Name: name,
}
}
}
return &stat, nil
return perfValues, nil
}
func (c *collector) setup() error {
@@ -136,68 +185,152 @@ func (c *collector) setup() error {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
cgroupFd := int(cgroup.Fd())
for _, group := range c.events.Core.Events {
customEvent, ok := c.eventToCustomEvent[group[0]]
var err error
if ok {
err = c.setupRawNonGrouped(customEvent, cgroupFd)
} else {
err = c.setupNonGrouped(string(group[0]), cgroupFd)
for i, group := range c.events.Core.Events {
// CPUs file descriptors of group leader needed for perf_event_open.
leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
for _, cpu := range c.onlineCPUs {
leaderFileDescriptors[cpu] = groupLeaderFileDescriptor
}
if err != nil {
return err
for j, event := range group.events {
// First element is group leader.
isGroupLeader := j == 0
customEvent, ok := c.eventToCustomEvent[event]
if ok {
config := c.createConfigFromRawEvent(customEvent)
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return err
}
} else {
config, err := c.createConfigFromEvent(event)
if err != nil {
return err
}
leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, i, isGroupLeader}, leaderFileDescriptors)
if err != nil {
return err
}
// Clean memory allocated by C code.
C.free(unsafe.Pointer(config))
}
}
// Group is prepared so we should reset and enable counting.
for _, fd := range leaderFileDescriptors {
err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
if err != nil {
return err
}
err = unix.IoctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
if err != nil {
return err
}
}
}
return nil
}
func (c *collector) setupRawNonGrouped(event *CustomEvent, cgroup int) error {
klog.V(5).Infof("Setting up non-grouped raw perf event %#v", event)
config := createPerfEventAttr(*event)
err := c.registerEvent(config, string(event.Name), cgroup)
func readPerfEventAttr(name string, pfmGetOsEventEncoding func(string, unsafe.Pointer) error) (*unix.PerfEventAttr, error) {
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
// Fill memory with 0 values.
C.memset(perfEventAttrMemory, 0, C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
err := pfmGetOsEventEncoding(name, unsafe.Pointer(perfEventAttrMemory))
if err != nil {
return err
return nil, err
}
return (*unix.PerfEventAttr)(perfEventAttrMemory), nil
}
func pfmGetOsEventEncoding(name string, perfEventAttrMemory unsafe.Pointer) error {
event := pfmPerfEncodeArgT{}
fstr := C.CString("")
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.ulong(unsafe.Sizeof(event))
cSafeName := C.CString(name)
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr))
}
return nil
}
func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, pid int) error {
var cpu int
for cpu = 0; cpu < c.numCores; cpu++ {
groupFd, flags := -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP
fd, err := unix.PerfEventOpen(config, pid, cpu, groupFd, flags)
type eventInfo struct {
name string
config *unix.PerfEventAttr
pid int
groupIndex int
isGroupLeader bool
}
func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int]int) (map[int]int, error) {
newLeaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
var pid, flags int
if event.isGroupLeader {
pid = event.pid
flags = unix.PERF_FLAG_FD_CLOEXEC | unix.PERF_FLAG_PID_CGROUP
} else {
pid = -1
flags = unix.PERF_FLAG_FD_CLOEXEC
}
setAttributes(event.config, event.isGroupLeader)
for _, cpu := range c.onlineCPUs {
fd, err := unix.PerfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags)
if err != nil {
return fmt.Errorf("setting up perf event %#v failed: %q", config, err)
return nil, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err)
}
perfFile := os.NewFile(uintptr(fd), name)
perfFile := os.NewFile(uintptr(fd), event.name)
if perfFile == nil {
return fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
}
c.addEventFile(name, cpu, perfFile)
c.addEventFile(event.groupIndex, event.name, cpu, perfFile)
// If group leader, save fd for others.
if event.isGroupLeader {
newLeaderFileDescriptors[cpu] = fd
}
}
return nil
if event.isGroupLeader {
return newLeaderFileDescriptors, nil
}
return leaderFileDescriptors, nil
}
func (c *collector) addEventFile(name string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[name]
func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[index]
if !ok {
c.cpuFiles[name] = map[int]readerCloser{}
c.cpuFiles[index] = group{
leaderName: name,
cpuFiles: map[string]map[int]readerCloser{},
}
}
c.cpuFiles[name][cpu] = perfFile
}
func (c *collector) setupNonGrouped(name string, cgroup int) error {
perfEventAttr, err := getPerfEventAttr(name)
if err != nil {
return err
_, ok = c.cpuFiles[index].cpuFiles[name]
if !ok {
c.cpuFiles[index].cpuFiles[name] = map[int]readerCloser{}
}
defer C.free(unsafe.Pointer(perfEventAttr))
return c.registerEvent(perfEventAttr, name, cgroup)
c.cpuFiles[index].cpuFiles[name][cpu] = perfFile
// Check if name is already stored.
for _, have := range c.cpuFiles[index].names {
if name == have {
return
}
}
// Otherwise save it.
c.cpuFiles[index] = group{
cpuFiles: c.cpuFiles[index].cpuFiles,
names: append(c.cpuFiles[index].names, name),
leaderName: c.cpuFiles[index].leaderName,
}
}
func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
@@ -214,43 +347,20 @@ func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
config.Ext2 = event.Config[2]
}
setAttributes(config)
klog.V(5).Infof("perf_event_attr struct prepared: %#v", config)
return config
}
func getPerfEventAttr(name string) (*unix.PerfEventAttr, error) {
if !isLibpfmInitialized {
return nil, fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
func setAttributes(config *unix.PerfEventAttr, leader bool) {
config.Sample_type = unix.PERF_SAMPLE_IDENTIFIER
config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_GROUP | unix.PERF_FORMAT_ID
config.Bits = unix.PerfBitInherit
// Group leader should have this flag set to disable counting until all group would be prepared.
if leader {
config.Bits |= unix.PerfBitDisabled
}
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
event := pfmPerfEncodeArgT{}
perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory)
fstr := C.CString("")
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.ulong(unsafe.Sizeof(event))
cSafeName := C.CString(name)
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return nil, fmt.Errorf("unable to transform event name %s to perf_event_attr: %v", name, int(pErr))
}
klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr)
setAttributes(perfEventAttr)
return perfEventAttr, nil
}
func setAttributes(config *unix.PerfEventAttr) {
config.Sample_type = perfSampleIdentifier
config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_ID
config.Bits = perfAttrBitsInherit | perfAttrBitsExcludeGuest
config.Size = uint32(unsafe.Sizeof(unix.PerfEventAttr{}))
}
@@ -259,15 +369,17 @@ func (c *collector) Destroy() {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for name, files := range c.cpuFiles {
for cpu, file := range files {
klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
for _, group := range c.cpuFiles {
for name, files := range group.cpuFiles {
for cpu, file := range files {
klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
}
}
delete(group.cpuFiles, name)
}
delete(c.cpuFiles, name)
}
}
@@ -292,3 +404,27 @@ func mapEventsToCustomEvents(collector *collector) {
collector.eventToCustomEvent[event.Name] = &collector.events.Core.CustomEvents[key]
}
}
func (c *collector) createConfigFromRawEvent(event *CustomEvent) *unix.PerfEventAttr {
klog.V(5).Infof("Setting up raw perf event %#v", event)
config := createPerfEventAttr(*event)
klog.V(5).Infof("perf_event_attr: %#v", config)
return config
}
func (c *collector) createConfigFromEvent(event Event) (*unix.PerfEventAttr, error) {
klog.V(5).Infof("Setting up perf event %s", string(event))
config, err := readPerfEventAttr(string(event), pfmGetOsEventEncoding)
if err != nil {
C.free((unsafe.Pointer)(config))
return nil, err
}
klog.V(5).Infof("perf_event_attr: %#v", config)
return config, nil
}

View File

@@ -34,7 +34,7 @@ type PerfEvents struct {
type Events struct {
// List of perf events' names to be measured.
Events [][]Event `json:"events"`
Events []Group `json:"events"`
// List of custom perf events' to be measured. It is impossible to
// specify some events using their names and in such case you have
@@ -89,3 +89,39 @@ func parseConfig(file *os.File) (events PerfEvents, err error) {
}
return
}
type Group struct {
events []Event
array bool
}
func (g *Group) UnmarshalJSON(b []byte) error {
var jsonObj interface{}
err := json.Unmarshal(b, &jsonObj)
if err != nil {
return err
}
switch obj := jsonObj.(type) {
case string:
*g = Group{
events: []Event{Event(obj)},
array: false,
}
return nil
case []interface{}:
group := Group{
events: make([]Event, 0, len(obj)),
array: true,
}
for _, v := range obj {
value, ok := v.(string)
if !ok {
return fmt.Errorf("cannot unmarshal %v", value)
}
group.events = append(group.events, Event(value))
}
*g = group
return nil
}
return fmt.Errorf("unsupported type")
}

View File

@@ -23,48 +23,44 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"github.com/google/cadvisor/utils/sysinfo"
)
type manager struct {
events PerfEvents
numCores int
topology []info.Node
events PerfEvents
onlineCPUs []int
cpuToSocket map[int]int
stats.NoopDestroy
}
func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) {
func NewManager(configFile string, topology []info.Node) (stats.Manager, error) {
if configFile == "" {
return &stats.NoopManager{}, nil
}
file, err := os.Open(configFile)
if err != nil {
return nil, fmt.Errorf("Unable to read configuration file %q: %q", configFile, err)
return nil, fmt.Errorf("unable to read configuration file %q: %w", configFile, err)
}
config, err := parseConfig(file)
if err != nil {
return nil, fmt.Errorf("Unable to read configuration file %q: %q", configFile, err)
return nil, fmt.Errorf("unable to parse configuration file %q: %w", configFile, err)
}
if areGroupedEventsUsed(config) {
return nil, fmt.Errorf("event grouping is not supported you must modify config file at %s", configFile)
onlineCPUs := sysinfo.GetOnlineCPUs(topology)
cpuToSocket := make(map[int]int)
for _, cpu := range onlineCPUs {
cpuToSocket[cpu] = sysinfo.GetSocketFromCPU(topology, cpu)
}
return &manager{events: config, numCores: numCores, topology: topology}, nil
}
func areGroupedEventsUsed(events PerfEvents) bool {
for _, group := range events.Core.Events {
if len(group) > 1 {
return true
}
}
return false
return &manager{events: config, onlineCPUs: onlineCPUs, cpuToSocket: cpuToSocket}, nil
}
func (m *manager) GetCollector(cgroupPath string) (stats.Collector, error) {
collector := newCollector(cgroupPath, m.events, m.numCores, m.topology)
collector := newCollector(cgroupPath, m.events, m.onlineCPUs, m.cpuToSocket)
err := collector.setup()
if err != nil {
collector.Destroy()

View File

@@ -24,7 +24,7 @@ import (
"k8s.io/klog/v2"
)
func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) {
func NewManager(configFile string, topology []info.Node) (stats.Manager, error) {
klog.V(1).Info("cAdvisor is build without cgo and/or libpfm support. Perf event counters are not available.")
return &stats.NoopManager{}, nil
}

View File

@@ -23,18 +23,17 @@ import (
"unsafe"
)
const (
perfSampleIdentifier = 1 << 16
perfAttrBitsInherit = 1 << 1
perfAttrBitsExcludeGuest = 1 << 20
)
// ReadFormat allows to read perf event's value for non-grouped events
type ReadFormat struct {
Value uint64 /* The value of the event */
// GroupReadFormat allows to read perf event's values for grouped events.
// See https://man7.org/linux/man-pages/man2/perf_event_open.2.html section "Reading results" with PERF_FORMAT_GROUP specified.
type GroupReadFormat struct {
Nr uint64 /* The number of events */
TimeEnabled uint64 /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
TimeRunning uint64 /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
ID uint64 /* if PERF_FORMAT_ID */
}
type Values struct {
Value uint64 /* The value of the event */
ID uint64 /* if PERF_FORMAT_ID */
}
// pfmPerfEncodeArgT represents structure that is used to parse perf event nam

View File

@@ -23,12 +23,11 @@ package perf
// #include <stdlib.h>
import "C"
import (
"bytes"
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"reflect"
"regexp"
"strconv"
"strings"
@@ -40,7 +39,6 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"github.com/google/cadvisor/utils/sysinfo"
)
type pmu struct {
@@ -55,9 +53,10 @@ const (
pmuCpumaskFilename = "cpumask"
systemDevicesPath = "/sys/devices"
rootPerfEventPath = "/sys/fs/cgroup/perf_event"
uncorePID = -1
)
func getPMU(pmus []pmu, gotType uint32) (*pmu, error) {
func getPMU(pmus uncorePMUs, gotType uint32) (*pmu, error) {
for _, pmu := range pmus {
if pmu.typeOf == gotType {
return &pmu, nil
@@ -98,7 +97,7 @@ func readUncorePMU(path string, name string, cpumaskRegexp *regexp.Regexp) (*pmu
}
func getUncorePMUs(devicesPath string) (uncorePMUs, error) {
pmus := make(uncorePMUs, 0)
pmus := make(uncorePMUs)
// Depends on platform, cpu mask could be for example in form "0-1" or "0,1".
cpumaskRegexp := regexp.MustCompile("[-,\n]")
@@ -126,32 +125,33 @@ func getUncorePMUs(devicesPath string) (uncorePMUs, error) {
}
type uncoreCollector struct {
cpuFiles map[string]map[string]map[int]readerCloser
cpuFilesLock sync.Mutex
events [][]Event
cpuFiles map[int]map[string]group
events []Group
eventToCustomEvent map[Event]*CustomEvent
topology []info.Node
cpuToSocket map[int]int
// Handle for mocking purposes.
perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
ioctlSetInt func(fd int, req uint, value int) error
}
func NewUncoreCollector(cgroupPath string, events PerfEvents, topology []info.Node) stats.Collector {
func NewUncoreCollector(cgroupPath string, events PerfEvents, cpuToSocket map[int]int) stats.Collector {
if cgroupPath != rootPerfEventPath {
// Uncore metric doesn't exists for cgroups, only for entire platform.
return &stats.NoopCollector{}
}
collector := &uncoreCollector{topology: topology}
// Default implementation of Linux perf_event_open function.
collector.perfEventOpen = unix.PerfEventOpen
collector := &uncoreCollector{
cpuToSocket: cpuToSocket,
perfEventOpen: unix.PerfEventOpen,
ioctlSetInt: unix.IoctlSetInt,
}
err := collector.setup(events, systemDevicesPath)
if err != nil {
formatedError := fmt.Errorf("unable to setup uncore perf event collector: %v", err)
klog.V(5).Infof("Perf uncore metrics will not be available: %s", formatedError)
klog.Errorf("Perf uncore metrics will not be available: unable to setup uncore perf event collector: %v", err)
return &stats.NoopCollector{}
}
@@ -159,49 +159,100 @@ func NewUncoreCollector(cgroupPath string, events PerfEvents, topology []info.No
}
func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
var err error
readUncorePMUs, err := getUncorePMUs(devicesPath)
if err != nil {
return err
}
// Maping from event name, pmu type, cpu.
c.cpuFiles = make(map[string]map[string]map[int]readerCloser)
c.cpuFiles = make(map[int]map[string]group)
c.events = events.Uncore.Events
c.eventToCustomEvent = parseUncoreEvents(events.Uncore)
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for _, group := range c.events {
if len(group) > 1 {
klog.Warning("grouping uncore perf events is not supported!")
continue
}
eventName, pmuPrefix := parseEventName(string(group[0]))
var err error
customEvent, ok := c.eventToCustomEvent[group[0]]
if ok {
if customEvent.Type != 0 {
pmus := obtainPMUs("uncore", readUncorePMUs)
err = c.setupRawNonGroupedUncore(customEvent, pmus)
} else {
pmus := obtainPMUs(pmuPrefix, readUncorePMUs)
err = c.setupRawNonGroupedUncore(customEvent, pmus)
}
} else {
pmus := obtainPMUs(pmuPrefix, readUncorePMUs)
err = c.setupNonGroupedUncore(eventName, pmus)
}
for i, group := range c.events {
// Check what PMUs are needed.
groupPMUs, err := parsePMUs(group, readUncorePMUs, c.eventToCustomEvent)
if err != nil {
return err
}
err = checkGroup(group, groupPMUs)
if err != nil {
return err
}
// CPUs file descriptors of group leader needed for perf_event_open.
leaderFileDescriptors := make(map[string]map[uint32]int)
for _, pmu := range readUncorePMUs {
leaderFileDescriptors[pmu.name] = make(map[uint32]int)
for _, cpu := range pmu.cpus {
leaderFileDescriptors[pmu.name][cpu] = groupLeaderFileDescriptor
}
}
for _, event := range group.events {
eventName, _ := parseEventName(string(event))
customEvent, ok := c.eventToCustomEvent[event]
if ok {
err = c.setupRawEvent(customEvent, groupPMUs[event], i, leaderFileDescriptors)
} else {
err = c.setupEvent(eventName, groupPMUs[event], i, leaderFileDescriptors)
}
if err != nil {
return err
}
}
// Group is prepared so we should reset and enable counting.
for _, pmuCPUs := range leaderFileDescriptors {
for _, fd := range pmuCPUs {
// Call only for used PMUs.
if fd != groupLeaderFileDescriptor {
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
if err != nil {
return err
}
err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
if err != nil {
return err
}
}
}
}
}
return nil
}
func checkGroup(group Group, eventPMUs map[Event]uncorePMUs) error {
if group.array {
var pmu uncorePMUs
for _, event := range group.events {
if len(eventPMUs[event]) > 1 {
return fmt.Errorf("the events in group usually have to be from single PMU, try reorganizing the \"%v\" group", group.events)
}
if len(eventPMUs[event]) == 1 {
if pmu == nil {
pmu = eventPMUs[event]
continue
}
eq := reflect.DeepEqual(pmu, eventPMUs[event])
if !eq {
return fmt.Errorf("the events in group usually have to be from the same PMU, try reorganizing the \"%v\" group", group.events)
}
}
}
return nil
}
if len(eventPMUs[group.events[0]]) < 1 {
return fmt.Errorf("the event %q don't have any PMU to count with", group.events[0])
}
return nil
}
func parseEventName(eventName string) (string, string) {
// First "/" separate pmu prefix and event name
// ex. "uncore_imc_0/cas_count_read" -> uncore_imc_0 and cas_count_read.
@@ -214,14 +265,35 @@ func parseEventName(eventName string) (string, string) {
return eventName, pmuPrefix
}
func obtainPMUs(want string, gotPMUs uncorePMUs) []pmu {
var pmus []pmu
func parsePMUs(group Group, pmus uncorePMUs, customEvents map[Event]*CustomEvent) (map[Event]uncorePMUs, error) {
eventPMUs := make(map[Event]uncorePMUs)
for _, event := range group.events {
_, prefix := parseEventName(string(event))
custom, ok := customEvents[event]
if ok {
if custom.Type != 0 {
pmu, err := getPMU(pmus, custom.Type)
if err != nil {
return nil, err
}
eventPMUs[event] = uncorePMUs{pmu.name: *pmu}
continue
}
}
eventPMUs[event] = obtainPMUs(prefix, pmus)
}
return eventPMUs, nil
}
func obtainPMUs(want string, gotPMUs uncorePMUs) uncorePMUs {
pmus := make(uncorePMUs)
if want == "" {
return pmus
}
for _, pmu := range gotPMUs {
if strings.HasPrefix(pmu.name, want) {
pmus = append(pmus, pmu)
pmus[pmu.name] = pmu
}
}
@@ -230,11 +302,13 @@ func obtainPMUs(want string, gotPMUs uncorePMUs) []pmu {
func parseUncoreEvents(events Events) map[Event]*CustomEvent {
eventToCustomEvent := map[Event]*CustomEvent{}
for _, uncoreEvent := range events.Events {
for _, customEvent := range events.CustomEvents {
if uncoreEvent[0] == customEvent.Name {
eventToCustomEvent[customEvent.Name] = &customEvent
break
for _, group := range events.Events {
for _, uncoreEvent := range group.events {
for _, customEvent := range events.CustomEvents {
if uncoreEvent == customEvent.Name {
eventToCustomEvent[customEvent.Name] = &customEvent
break
}
}
}
}
@@ -246,34 +320,37 @@ func (c *uncoreCollector) Destroy() {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for name, pmus := range c.cpuFiles {
for pmu, cpus := range pmus {
for cpu, file := range cpus {
klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
for groupIndex, groupPMUs := range c.cpuFiles {
for pmu, group := range groupPMUs {
for name, cpus := range group.cpuFiles {
for cpu, file := range cpus {
klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
}
}
delete(group.cpuFiles, name)
}
delete(pmus, pmu)
delete(groupPMUs, pmu)
}
delete(c.cpuFiles, name)
delete(c.cpuFiles, groupIndex)
}
}
func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error {
klog.V(5).Info("Attempting to update uncore perf_event stats")
for name, pmus := range c.cpuFiles {
for pmu, cpus := range pmus {
for cpu, file := range cpus {
stat, err := readPerfUncoreStat(file, name, cpu, pmu, c.topology)
for _, groupPMUs := range c.cpuFiles {
for pmu, group := range groupPMUs {
for cpu, file := range group.cpuFiles[group.leaderName] {
stat, err := readPerfUncoreStat(file, group, cpu, pmu, c.cpuToSocket)
if err != nil {
return fmt.Errorf("unable to read from uncore perf_event_file (event: %q, CPU: %d, PMU: %s): %q", name, cpu, pmu, err.Error())
klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", group.leaderName, cpu, pmu, err.Error())
continue
}
klog.V(5).Infof("Read uncore perf event (event: %q, CPU: %d, PMU: %s): %d", name, cpu, pmu, stat.Value)
stats.PerfUncoreStats = append(stats.PerfUncoreStats, *stat)
stats.PerfUncoreStats = append(stats.PerfUncoreStats, stat...)
}
}
}
@@ -281,112 +358,144 @@ func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error {
return nil
}
func (c *uncoreCollector) setupRawNonGroupedUncore(event *CustomEvent, pmus []pmu) error {
klog.V(5).Infof("Setting up non-grouped raw perf uncore event %#v", event)
if event.Type == 0 {
// PMU isn't set. Register event for all PMUs.
for _, pmu := range pmus {
newEvent := CustomEvent{
Type: pmu.typeOf,
Config: event.Config,
Name: event.Name,
}
config := createPerfEventAttr(newEvent)
err := c.registerUncoreEvent(config, string(newEvent.Name), pmu.cpus, pmu.name)
if err != nil {
return err
}
}
return nil
} else {
// Register event for the PMU.
config := createPerfEventAttr(*event)
pmu, err := getPMU(pmus, event.Type)
if err != nil {
return err
}
return c.registerUncoreEvent(config, string(event.Name), pmu.cpus, pmu.name)
func (c *uncoreCollector) setupEvent(name string, pmus uncorePMUs, groupIndex int, leaderFileDescriptors map[string]map[uint32]int) error {
if !isLibpfmInitialized {
return fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
}
}
func (c *uncoreCollector) setupNonGroupedUncore(name string, pmus []pmu) error {
perfEventAttr, err := getPerfEventAttr(name)
klog.V(5).Infof("Setting up uncore perf event %s", name)
config, err := readPerfEventAttr(name, pfmGetOsEventEncoding)
if err != nil {
C.free((unsafe.Pointer)(config))
return err
}
defer C.free(unsafe.Pointer(perfEventAttr))
klog.V(5).Infof("Setting up non-grouped uncore perf event %s", name)
// Register event for all memory controllers.
for _, pmu := range pmus {
perfEventAttr.Type = pmu.typeOf
err = c.registerUncoreEvent(perfEventAttr, name, pmu.cpus, pmu.name)
config.Type = pmu.typeOf
isGroupLeader := leaderFileDescriptors[pmu.name][pmu.cpus[0]] == groupLeaderFileDescriptor
setAttributes(config, isGroupLeader)
leaderFileDescriptors[pmu.name], err = c.registerEvent(eventInfo{name, config, uncorePID, groupIndex, isGroupLeader}, pmu, leaderFileDescriptors[pmu.name])
if err != nil {
return err
}
}
// Clean memory allocated by C code.
C.free(unsafe.Pointer(config))
return nil
}
func (c *uncoreCollector) registerUncoreEvent(config *unix.PerfEventAttr, name string, cpus []uint32, pmu string) error {
for _, cpu := range cpus {
groupFd, pid, flags := -1, -1, 0
fd, err := c.perfEventOpen(config, pid, int(cpu), groupFd, flags)
func (c *uncoreCollector) registerEvent(eventInfo eventInfo, pmu pmu, leaderFileDescriptors map[uint32]int) (map[uint32]int, error) {
newLeaderFileDescriptors := make(map[uint32]int)
isGroupLeader := false
for _, cpu := range pmu.cpus {
groupFd, flags := leaderFileDescriptors[cpu], 0
fd, err := c.perfEventOpen(eventInfo.config, eventInfo.pid, int(cpu), groupFd, flags)
if err != nil {
return fmt.Errorf("setting up perf event %#v failed: %q", config, err)
return nil, fmt.Errorf("setting up perf event %#v failed: %q | (pmu: %q, groupFd: %d, cpu: %d)", eventInfo.config, err, pmu, groupFd, cpu)
}
perfFile := os.NewFile(uintptr(fd), name)
perfFile := os.NewFile(uintptr(fd), eventInfo.name)
if perfFile == nil {
return fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
}
c.addEventFile(name, pmu, int(cpu), perfFile)
c.addEventFile(eventInfo.groupIndex, eventInfo.name, pmu.name, int(cpu), perfFile)
// If group leader, save fd for others.
if leaderFileDescriptors[cpu] == groupLeaderFileDescriptor {
newLeaderFileDescriptors[cpu] = fd
isGroupLeader = true
}
}
if isGroupLeader {
return newLeaderFileDescriptors, nil
}
return leaderFileDescriptors, nil
}
func (c *uncoreCollector) addEventFile(index int, name string, pmu string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[index]
if !ok {
c.cpuFiles[index] = map[string]group{}
}
_, ok = c.cpuFiles[index][pmu]
if !ok {
c.cpuFiles[index][pmu] = group{
cpuFiles: map[string]map[int]readerCloser{},
leaderName: name,
}
}
_, ok = c.cpuFiles[index][pmu].cpuFiles[name]
if !ok {
c.cpuFiles[index][pmu].cpuFiles[name] = map[int]readerCloser{}
}
c.cpuFiles[index][pmu].cpuFiles[name][cpu] = perfFile
// Check if name is already stored.
for _, have := range c.cpuFiles[index][pmu].names {
if name == have {
return
}
}
// Otherwise save it.
c.cpuFiles[index][pmu] = group{
cpuFiles: c.cpuFiles[index][pmu].cpuFiles,
names: append(c.cpuFiles[index][pmu].names, name),
leaderName: c.cpuFiles[index][pmu].leaderName,
}
}
func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, groupIndex int, leaderFileDescriptors map[string]map[uint32]int) error {
klog.V(5).Infof("Setting up raw perf uncore event %#v", event)
for _, pmu := range pmus {
newEvent := CustomEvent{
Type: pmu.typeOf,
Config: event.Config,
Name: event.Name,
}
config := createPerfEventAttr(newEvent)
isGroupLeader := leaderFileDescriptors[pmu.name][pmu.cpus[0]] == groupLeaderFileDescriptor
setAttributes(config, isGroupLeader)
var err error
leaderFileDescriptors[pmu.name], err = c.registerEvent(eventInfo{string(newEvent.Name), config, uncorePID, groupIndex, isGroupLeader}, pmu, leaderFileDescriptors[pmu.name])
if err != nil {
return err
}
}
return nil
}
func (c *uncoreCollector) addEventFile(name string, pmu string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[name]
if !ok {
c.cpuFiles[name] = map[string]map[int]readerCloser{}
}
_, ok = c.cpuFiles[name][pmu]
if !ok {
c.cpuFiles[name][pmu] = map[int]readerCloser{}
}
c.cpuFiles[name][pmu][cpu] = perfFile
}
func readPerfUncoreStat(file readerCloser, name string, cpu int, pmu string, topology []info.Node) (*info.PerfUncoreStat, error) {
buf := make([]byte, 32)
_, err := file.Read(buf)
if err != nil {
return nil, err
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, perfData)
func readPerfUncoreStat(file readerCloser, group group, cpu int, pmu string, cpuToSocket map[int]int) ([]info.PerfUncoreStat, error) {
values, err := getPerfValues(file, group)
if err != nil {
return nil, err
}
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
socket, ok := cpuToSocket[cpu]
if !ok {
// Socket is unknown.
socket = -1
}
stat := info.PerfUncoreStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Socket: sysinfo.GetSocketFromCPU(topology, cpu),
PMU: pmu,
perfUncoreStats := make([]info.PerfUncoreStat, len(values))
for i, value := range values {
klog.V(5).Infof("Read metric for event %q for cpu %d from pmu %q: %d", value.Name, cpu, pmu, value.Value)
perfUncoreStats[i] = info.PerfUncoreStat{
PerfValue: value,
Socket: socket,
PMU: pmu,
}
}
return &stat, nil
return perfUncoreStats, nil
}