Updating dependency github.com/google/cadvisor to version 6a8d614

Signed-off-by: Davanum Srinivas <davanum@gmail.com>
This commit is contained in:
Davanum Srinivas 2020-05-14 17:29:52 -04:00
parent 449810c785
commit 082578c22f
No known key found for this signature in database
GPG Key ID: 80D83A796103BF59
109 changed files with 3417 additions and 1312 deletions

8
go.mod
View File

@ -26,9 +26,6 @@ require (
github.com/clusterhq/flocker-go v0.0.0-20160920122132-2b8b7259d313
github.com/codegangsta/negroni v1.0.0 // indirect
github.com/container-storage-interface/spec v1.2.0
github.com/containerd/console v1.0.0 // indirect
github.com/containerd/ttrpc v1.0.0 // indirect
github.com/containerd/typeurl v1.0.0 // indirect
github.com/containernetworking/cni v0.7.1
github.com/coredns/corefile-migration v1.0.6
github.com/coreos/go-oidc v2.1.0+incompatible
@ -55,7 +52,7 @@ require (
github.com/gogo/protobuf v1.3.1
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef
github.com/golang/mock v1.3.1
github.com/google/cadvisor v0.36.1-0.20200323171535-8af10c683a96
github.com/google/cadvisor v0.36.1-0.20200513160741-6a8d61401ea9
github.com/google/go-cmp v0.4.0
github.com/google/gofuzz v1.1.0
github.com/google/uuid v1.1.1
@ -80,7 +77,6 @@ require (
github.com/onsi/gomega v1.7.0
github.com/opencontainers/go-digest v1.0.0-rc1
github.com/opencontainers/runc v1.0.0-rc10
github.com/opencontainers/runtime-spec v1.0.2 // indirect
github.com/opencontainers/selinux v1.3.3
github.com/pkg/errors v0.9.1
github.com/pmezard/go-difflib v1.0.0
@ -264,7 +260,7 @@ replace (
github.com/golangplus/fmt => github.com/golangplus/fmt v0.0.0-20150411045040-2a5d6d7d2995
github.com/golangplus/testing => github.com/golangplus/testing v0.0.0-20180327235837-af21d9c3145e
github.com/google/btree => github.com/google/btree v1.0.0
github.com/google/cadvisor => github.com/google/cadvisor v0.36.1-0.20200323171535-8af10c683a96
github.com/google/cadvisor => github.com/google/cadvisor v0.36.1-0.20200513160741-6a8d61401ea9
github.com/google/go-cmp => github.com/google/go-cmp v0.4.0
github.com/google/gofuzz => github.com/google/gofuzz v1.1.0
github.com/google/martian => github.com/google/martian v2.1.0+incompatible

4
go.sum
View File

@ -208,8 +208,8 @@ github.com/golangplus/testing v0.0.0-20180327235837-af21d9c3145e h1:KhcknUwkWHKZ
github.com/golangplus/testing v0.0.0-20180327235837-af21d9c3145e/go.mod h1:0AA//k/eakGydO4jKRoRL2j92ZKSzTgj9tclaCrvXHk=
github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/cadvisor v0.36.1-0.20200323171535-8af10c683a96 h1:au7bcM+rjGXLBSfqjofcSONBre8tlIy94jEbp40BCOQ=
github.com/google/cadvisor v0.36.1-0.20200323171535-8af10c683a96/go.mod h1:OTAGfVYYYA6MI5yPZ1/81PGoh6gfXpcQIyb3mXX7C3M=
github.com/google/cadvisor v0.36.1-0.20200513160741-6a8d61401ea9 h1:VBfxIHDFErkDGv/tzgGzmoAH29xtytCrFJg7+h6h168=
github.com/google/cadvisor v0.36.1-0.20200513160741-6a8d61401ea9/go.mod h1:F+aV0KEeSUhGOVA6ADAi8/EJGMQgy6Cw/YWz1XpzDd0=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g=

2
vendor/BUILD vendored
View File

@ -169,6 +169,8 @@ filegroup(
"//vendor/github.com/google/cadvisor/machine:all-srcs",
"//vendor/github.com/google/cadvisor/manager:all-srcs",
"//vendor/github.com/google/cadvisor/metrics:all-srcs",
"//vendor/github.com/google/cadvisor/nvm:all-srcs",
"//vendor/github.com/google/cadvisor/perf:all-srcs",
"//vendor/github.com/google/cadvisor/stats:all-srcs",
"//vendor/github.com/google/cadvisor/storage:all-srcs",
"//vendor/github.com/google/cadvisor/summary:all-srcs",

View File

@ -10,7 +10,7 @@ go_library(
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/github.com/mindprince/gonvml:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -28,7 +28,7 @@ import (
"github.com/google/cadvisor/stats"
"github.com/mindprince/gonvml"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type nvidiaManager struct {
@ -46,26 +46,32 @@ type nvidiaManager struct {
var sysFsPCIDevicesPath = "/sys/bus/pci/devices/"
const nvidiaVendorId = "0x10de"
const nvidiaVendorID = "0x10de"
func NewNvidiaManager() stats.Manager {
return &nvidiaManager{}
manager := &nvidiaManager{}
err := manager.setup()
if err != nil {
klog.Warningf("NVidia GPU metrics will not be available: %s", err)
manager.Destroy()
return &stats.NoopManager{}
}
return manager
}
// Setup initializes NVML if nvidia devices are present on the node.
func (nm *nvidiaManager) Setup() {
if !detectDevices(nvidiaVendorId) {
klog.V(4).Info("No NVIDIA devices found.")
return
// setup initializes NVML if nvidia devices are present on the node.
func (nm *nvidiaManager) setup() error {
if !detectDevices(nvidiaVendorID) {
return fmt.Errorf("no NVIDIA devices found")
}
nm.devicesPresent = true
initializeNVML(nm)
return initializeNVML(nm)
}
// detectDevices returns true if a device with given pci id is present on the node.
func detectDevices(vendorId string) bool {
func detectDevices(vendorID string) bool {
devices, err := ioutil.ReadDir(sysFsPCIDevicesPath)
if err != nil {
klog.Warningf("Error reading %q: %v", sysFsPCIDevicesPath, err)
@ -79,8 +85,8 @@ func detectDevices(vendorId string) bool {
klog.V(4).Infof("Error while reading %q: %v", vendorPath, err)
continue
}
if strings.EqualFold(strings.TrimSpace(string(content)), vendorId) {
klog.V(3).Infof("Found device with vendorId %q", vendorId)
if strings.EqualFold(strings.TrimSpace(string(content)), vendorID) {
klog.V(3).Infof("Found device with vendorID %q", vendorID)
return true
}
}
@ -89,40 +95,43 @@ func detectDevices(vendorId string) bool {
// initializeNVML initializes the NVML library and sets up the nvmlDevices map.
// This is defined as a variable to help in testing.
var initializeNVML = func(nm *nvidiaManager) {
var initializeNVML = func(nm *nvidiaManager) error {
if err := gonvml.Initialize(); err != nil {
// This is under a logging level because otherwise we may cause
// log spam if the drivers/nvml is not installed on the system.
klog.V(4).Infof("Could not initialize NVML: %v", err)
return
return fmt.Errorf("Could not initialize NVML: %v", err)
}
nm.nvmlInitialized = true
numDevices, err := gonvml.DeviceCount()
if err != nil {
klog.Warningf("GPU metrics would not be available. Failed to get the number of nvidia devices: %v", err)
return
return fmt.Errorf("GPU metrics would not be available. Failed to get the number of nvidia devices: %v", err)
}
if numDevices == 0 {
return nil
}
klog.V(1).Infof("NVML initialized. Number of nvidia devices: %v", numDevices)
nm.nvidiaDevices = make(map[int]gonvml.Device, numDevices)
for i := 0; i < int(numDevices); i++ {
device, err := gonvml.DeviceHandleByIndex(uint(i))
if err != nil {
klog.Warningf("Failed to get nvidia device handle %d: %v", i, err)
continue
return fmt.Errorf("Failed to get nvidia device handle %d: %v", i, err)
}
minorNumber, err := device.MinorNumber()
if err != nil {
klog.Warningf("Failed to get nvidia device minor number: %v", err)
continue
return fmt.Errorf("Failed to get nvidia device minor number: %v", err)
}
nm.nvidiaDevices[int(minorNumber)] = device
}
return nil
}
// Destroy shuts down NVML.
func (nm *nvidiaManager) Destroy() {
if nm.nvmlInitialized {
gonvml.Shutdown()
err := gonvml.Shutdown()
if err != nil {
klog.Warningf("nvml library shutdown failed: %s", err)
}
}
}
@ -132,27 +141,31 @@ func (nm *nvidiaManager) GetCollector(devicesCgroupPath string) (stats.Collector
nc := &nvidiaCollector{}
if !nm.devicesPresent {
return nc, nil
return &stats.NoopCollector{}, nil
}
// Makes sure that we don't call initializeNVML() concurrently and
// that we only call initializeNVML() when it's not initialized.
nm.Lock()
if !nm.nvmlInitialized {
initializeNVML(nm)
}
if !nm.nvmlInitialized || len(nm.nvidiaDevices) == 0 {
nm.Unlock()
return nc, nil
err := initializeNVML(nm)
if err != nil {
nm.Unlock()
return &stats.NoopCollector{}, err
}
}
nm.Unlock()
if len(nm.nvidiaDevices) == 0 {
return &stats.NoopCollector{}, nil
}
nvidiaMinorNumbers, err := parseDevicesCgroup(devicesCgroupPath)
if err != nil {
return nc, err
return &stats.NoopCollector{}, err
}
for _, minor := range nvidiaMinorNumbers {
device, ok := nm.nvidiaDevices[minor]
if !ok {
return nc, fmt.Errorf("nvidia device minor number %d not found in cached devices", minor)
return &stats.NoopCollector{}, fmt.Errorf("nvidia device minor number %d not found in cached devices", minor)
}
nc.devices = append(nc.devices, device)
}
@ -216,6 +229,8 @@ var parseDevicesCgroup = func(devicesCgroupPath string) ([]int, error) {
type nvidiaCollector struct {
// Exposed for testing
devices []gonvml.Device
stats.NoopDestroy
}
func NewNvidiaCollector(devices []gonvml.Device) stats.Collector {

View File

@ -10,7 +10,7 @@ go_library(
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/storage:go_default_library",
"//vendor/github.com/google/cadvisor/utils:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -23,7 +23,7 @@ import (
"github.com/google/cadvisor/storage"
"github.com/google/cadvisor/utils"
"k8s.io/klog"
"k8s.io/klog/v2"
)
// ErrDataNotFound is the error resulting if failed to find a container in memory cache.
@ -38,19 +38,19 @@ type containerCache struct {
lock sync.RWMutex
}
func (self *containerCache) AddStats(stats *info.ContainerStats) error {
self.lock.Lock()
defer self.lock.Unlock()
func (c *containerCache) AddStats(stats *info.ContainerStats) error {
c.lock.Lock()
defer c.lock.Unlock()
// Add the stat to storage.
self.recentStats.Add(stats.Timestamp, stats)
c.recentStats.Add(stats.Timestamp, stats)
return nil
}
func (self *containerCache) RecentStats(start, end time.Time, maxStats int) ([]*info.ContainerStats, error) {
self.lock.RLock()
defer self.lock.RUnlock()
result := self.recentStats.InTimeRange(start, end, maxStats)
func (c *containerCache) RecentStats(start, end time.Time, maxStats int) ([]*info.ContainerStats, error) {
c.lock.RLock()
defer c.lock.RUnlock()
result := c.recentStats.InTimeRange(start, end, maxStats)
converted := make([]*info.ContainerStats, len(result))
for i, el := range result {
converted[i] = el.(*info.ContainerStats)
@ -73,20 +73,20 @@ type InMemoryCache struct {
backend []storage.StorageDriver
}
func (self *InMemoryCache) AddStats(cInfo *info.ContainerInfo, stats *info.ContainerStats) error {
func (c *InMemoryCache) AddStats(cInfo *info.ContainerInfo, stats *info.ContainerStats) error {
var cstore *containerCache
var ok bool
func() {
self.lock.Lock()
defer self.lock.Unlock()
if cstore, ok = self.containerCacheMap[cInfo.ContainerReference.Name]; !ok {
cstore = newContainerStore(cInfo.ContainerReference, self.maxAge)
self.containerCacheMap[cInfo.ContainerReference.Name] = cstore
c.lock.Lock()
defer c.lock.Unlock()
if cstore, ok = c.containerCacheMap[cInfo.ContainerReference.Name]; !ok {
cstore = newContainerStore(cInfo.ContainerReference, c.maxAge)
c.containerCacheMap[cInfo.ContainerReference.Name] = cstore
}
}()
for _, backend := range self.backend {
for _, backend := range c.backend {
// TODO(monnand): To deal with long delay write operations, we
// may want to start a pool of goroutines to do write
// operations.
@ -97,13 +97,13 @@ func (self *InMemoryCache) AddStats(cInfo *info.ContainerInfo, stats *info.Conta
return cstore.AddStats(stats)
}
func (self *InMemoryCache) RecentStats(name string, start, end time.Time, maxStats int) ([]*info.ContainerStats, error) {
func (c *InMemoryCache) RecentStats(name string, start, end time.Time, maxStats int) ([]*info.ContainerStats, error) {
var cstore *containerCache
var ok bool
err := func() error {
self.lock.RLock()
defer self.lock.RUnlock()
if cstore, ok = self.containerCacheMap[name]; !ok {
c.lock.RLock()
defer c.lock.RUnlock()
if cstore, ok = c.containerCacheMap[name]; !ok {
return ErrDataNotFound
}
return nil
@ -115,17 +115,17 @@ func (self *InMemoryCache) RecentStats(name string, start, end time.Time, maxSta
return cstore.RecentStats(start, end, maxStats)
}
func (self *InMemoryCache) Close() error {
self.lock.Lock()
self.containerCacheMap = make(map[string]*containerCache, 32)
self.lock.Unlock()
func (c *InMemoryCache) Close() error {
c.lock.Lock()
c.containerCacheMap = make(map[string]*containerCache, 32)
c.lock.Unlock()
return nil
}
func (self *InMemoryCache) RemoveContainer(containerName string) error {
self.lock.Lock()
delete(self.containerCacheMap, containerName)
self.lock.Unlock()
func (c *InMemoryCache) RemoveContainer(containerName string) error {
c.lock.Lock()
delete(c.containerCacheMap, containerName)
c.lock.Unlock()
return nil
}

View File

@ -32,7 +32,7 @@ import (
// Client represents the base URL for a cAdvisor client.
type Client struct {
baseUrl string
baseURL string
}
// NewClient returns a new client with the specified base URL.
@ -42,17 +42,17 @@ func NewClient(url string) (*Client, error) {
}
return &Client{
baseUrl: fmt.Sprintf("%sapi/v2.1/", url),
baseURL: fmt.Sprintf("%sapi/v2.1/", url),
}, nil
}
// MachineInfo returns the JSON machine information for this client.
// A non-nil error result indicates a problem with obtaining
// the JSON machine information data.
func (self *Client) MachineInfo() (minfo *v1.MachineInfo, err error) {
u := self.machineInfoUrl()
func (c *Client) MachineInfo() (minfo *v1.MachineInfo, err error) {
u := c.machineInfoURL()
ret := new(v1.MachineInfo)
if err = self.httpGetJsonData(ret, nil, u, "machine info"); err != nil {
if err = c.httpGetJSONData(ret, nil, u, "machine info"); err != nil {
return
}
minfo = ret
@ -62,25 +62,25 @@ func (self *Client) MachineInfo() (minfo *v1.MachineInfo, err error) {
// MachineStats returns the JSON machine statistics for this client.
// A non-nil error result indicates a problem with obtaining
// the JSON machine information data.
func (self *Client) MachineStats() ([]v2.MachineStats, error) {
func (c *Client) MachineStats() ([]v2.MachineStats, error) {
var ret []v2.MachineStats
u := self.machineStatsUrl()
err := self.httpGetJsonData(&ret, nil, u, "machine stats")
u := c.machineStatsURL()
err := c.httpGetJSONData(&ret, nil, u, "machine stats")
return ret, err
}
// VersionInfo returns the version info for cAdvisor.
func (self *Client) VersionInfo() (version string, err error) {
u := self.versionInfoUrl()
version, err = self.httpGetString(u, "version info")
func (c *Client) VersionInfo() (version string, err error) {
u := c.versionInfoURL()
version, err = c.httpGetString(u, "version info")
return
}
// Attributes returns hardware and software attributes of the machine.
func (self *Client) Attributes() (attr *v2.Attributes, err error) {
u := self.attributesUrl()
func (c *Client) Attributes() (attr *v2.Attributes, err error) {
u := c.attributesURL()
ret := new(v2.Attributes)
if err = self.httpGetJsonData(ret, nil, u, "attributes"); err != nil {
if err = c.httpGetJSONData(ret, nil, u, "attributes"); err != nil {
return
}
attr = ret
@ -88,8 +88,8 @@ func (self *Client) Attributes() (attr *v2.Attributes, err error) {
}
// Stats returns stats for the requested container.
func (self *Client) Stats(name string, request *v2.RequestOptions) (map[string]v2.ContainerInfo, error) {
u := self.statsUrl(name)
func (c *Client) Stats(name string, request *v2.RequestOptions) (map[string]v2.ContainerInfo, error) {
u := c.statsURL(name)
ret := make(map[string]v2.ContainerInfo)
data := url.Values{
"type": []string{request.IdType},
@ -98,33 +98,33 @@ func (self *Client) Stats(name string, request *v2.RequestOptions) (map[string]v
}
u = fmt.Sprintf("%s?%s", u, data.Encode())
if err := self.httpGetJsonData(&ret, nil, u, "stats"); err != nil {
if err := c.httpGetJSONData(&ret, nil, u, "stats"); err != nil {
return nil, err
}
return ret, nil
}
func (self *Client) machineInfoUrl() string {
return self.baseUrl + path.Join("machine")
func (c *Client) machineInfoURL() string {
return c.baseURL + path.Join("machine")
}
func (self *Client) machineStatsUrl() string {
return self.baseUrl + path.Join("machinestats")
func (c *Client) machineStatsURL() string {
return c.baseURL + path.Join("machinestats")
}
func (self *Client) versionInfoUrl() string {
return self.baseUrl + path.Join("version")
func (c *Client) versionInfoURL() string {
return c.baseURL + path.Join("version")
}
func (self *Client) attributesUrl() string {
return self.baseUrl + path.Join("attributes")
func (c *Client) attributesURL() string {
return c.baseURL + path.Join("attributes")
}
func (self *Client) statsUrl(name string) string {
return self.baseUrl + path.Join("stats", name)
func (c *Client) statsURL(name string) string {
return c.baseURL + path.Join("stats", name)
}
func (self *Client) httpGetResponse(postData interface{}, urlPath, infoName string) ([]byte, error) {
func (c *Client) httpGetResponse(postData interface{}, urlPath, infoName string) ([]byte, error) {
var resp *http.Response
var err error
@ -155,16 +155,16 @@ func (self *Client) httpGetResponse(postData interface{}, urlPath, infoName stri
return body, nil
}
func (self *Client) httpGetString(url, infoName string) (string, error) {
body, err := self.httpGetResponse(nil, url, infoName)
func (c *Client) httpGetString(url, infoName string) (string, error) {
body, err := c.httpGetResponse(nil, url, infoName)
if err != nil {
return "", err
}
return string(body), nil
}
func (self *Client) httpGetJsonData(data, postData interface{}, url, infoName string) error {
body, err := self.httpGetResponse(postData, url, infoName)
func (c *Client) httpGetJSONData(data, postData interface{}, url, infoName string) error {
body, err := c.httpGetResponse(postData, url, infoName)
if err != nil {
return err
}

View File

@ -209,7 +209,7 @@ func prometheusLabelSetToCadvisorLabel(promLabels model.Metric) string {
b.WriteString(l.GetValue())
}
return string(b.Bytes())
return b.String()
}
// Returns collected metrics and the next collection time of the collector

View File

@ -13,7 +13,7 @@ go_library(
"//vendor/github.com/google/cadvisor/fs:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -19,7 +19,7 @@ go_library(
"//vendor/github.com/karrick/godirwalk:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/pkg/errors:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/inotify:go_default_library",
],
)

View File

@ -27,7 +27,7 @@ import (
var ArgContainerHints = flag.String("container_hints", "/etc/cadvisor/container_hints.json", "location of the container hints file")
type containerHints struct {
type ContainerHints struct {
AllHosts []containerHint `json:"all_hosts,omitempty"`
}
@ -47,12 +47,12 @@ type networkInterface struct {
VethChild string `json:"veth_child,omitempty"`
}
func GetContainerHintsFromFile(containerHintsFile string) (containerHints, error) {
func GetContainerHintsFromFile(containerHintsFile string) (ContainerHints, error) {
dat, err := ioutil.ReadFile(containerHintsFile)
if os.IsNotExist(err) {
return containerHints{}, nil
return ContainerHints{}, nil
}
var cHints containerHints
var cHints ContainerHints
if err == nil {
err = json.Unmarshal(dat, &cHints)
}

View File

@ -22,7 +22,7 @@ import (
"github.com/google/cadvisor/fs"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type FsHandler interface {

View File

@ -31,7 +31,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/pkg/errors"
"k8s.io/klog"
"k8s.io/klog/v2"
)
func DebugInfo(watches map[string][]string) map[string][]string {

View File

@ -32,7 +32,6 @@ type ContainerType int
const (
ContainerTypeRaw ContainerType = iota
ContainerTypeDocker
ContainerTypeSystemd
ContainerTypeCrio
ContainerTypeContainerd
ContainerTypeMesos

View File

@ -32,7 +32,8 @@ go_library(
"//vendor/github.com/opencontainers/runtime-spec/specs-go:go_default_library",
"//vendor/golang.org/x/net/context:go_default_library",
"//vendor/google.golang.org/grpc:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/google.golang.org/grpc/backoff:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -29,6 +29,7 @@ import (
"github.com/containerd/containerd/pkg/dialer"
ptypes "github.com/gogo/protobuf/types"
"google.golang.org/grpc"
"google.golang.org/grpc/backoff"
)
type client struct {
@ -37,22 +38,23 @@ type client struct {
versionService versionapi.VersionClient
}
type containerdClient interface {
type ContainerdClient interface {
LoadContainer(ctx context.Context, id string) (*containers.Container, error)
TaskPid(ctx context.Context, id string) (uint32, error)
Version(ctx context.Context) (string, error)
}
var once sync.Once
var ctrdClient containerdClient = nil
var ctrdClient ContainerdClient = nil
const (
maxBackoffDelay = 3 * time.Second
baseBackoffDelay = 100 * time.Millisecond
connectionTimeout = 2 * time.Second
)
// Client creates a containerd client
func Client(address, namespace string) (containerdClient, error) {
func Client(address, namespace string) (ContainerdClient, error) {
var retErr error
once.Do(func() {
tryConn, err := net.DialTimeout("unix", address, connectionTimeout)
@ -62,12 +64,17 @@ func Client(address, namespace string) (containerdClient, error) {
}
tryConn.Close()
connParams := grpc.ConnectParams{
Backoff: backoff.Config{
BaseDelay: baseBackoffDelay,
MaxDelay: maxBackoffDelay,
},
}
gopts := []grpc.DialOption{
grpc.WithInsecure(),
grpc.WithDialer(dialer.Dialer),
grpc.WithContextDialer(dialer.ContextDialer),
grpc.WithBlock(),
grpc.WithBackoffMaxDelay(maxBackoffDelay),
grpc.WithTimeout(connectionTimeout),
grpc.WithConnectParams(connParams),
}
unary, stream := newNSInterceptors(namespace)
gopts = append(gopts,
@ -75,7 +82,9 @@ func Client(address, namespace string) (containerdClient, error) {
grpc.WithStreamInterceptor(stream),
)
conn, err := grpc.Dial(dialer.DialAddress(address), gopts...)
ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout)
defer cancel()
conn, err := grpc.DialContext(ctx, dialer.DialAddress(address), gopts...)
if err != nil {
retErr = err
return

View File

@ -22,7 +22,7 @@ import (
"strings"
"golang.org/x/net/context"
"k8s.io/klog"
"k8s.io/klog/v2"
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/libcontainer"
@ -43,7 +43,7 @@ var containerdCgroupRegexp = regexp.MustCompile(`([a-z0-9]{64})`)
type containerdFactory struct {
machineInfoFactory info.MachineInfoFactory
client containerdClient
client ContainerdClient
version string
// Information about the mounted cgroup subsystems.
cgroupSubsystems libcontainer.CgroupSubsystems
@ -52,11 +52,11 @@ type containerdFactory struct {
includedMetrics container.MetricSet
}
func (self *containerdFactory) String() string {
func (f *containerdFactory) String() string {
return k8sContainerdNamespace
}
func (self *containerdFactory) NewContainerHandler(name string, inHostNamespace bool) (handler container.ContainerHandler, err error) {
func (f *containerdFactory) NewContainerHandler(name string, inHostNamespace bool) (handler container.ContainerHandler, err error) {
client, err := Client(*ArgContainerdEndpoint, *ArgContainerdNamespace)
if err != nil {
return
@ -66,12 +66,12 @@ func (self *containerdFactory) NewContainerHandler(name string, inHostNamespace
return newContainerdContainerHandler(
client,
name,
self.machineInfoFactory,
self.fsInfo,
&self.cgroupSubsystems,
f.machineInfoFactory,
f.fsInfo,
&f.cgroupSubsystems,
inHostNamespace,
metadataEnvs,
self.includedMetrics,
f.includedMetrics,
)
}
@ -95,7 +95,7 @@ func isContainerName(name string) bool {
}
// Containerd can handle and accept all containerd created containers
func (self *containerdFactory) CanHandleAndAccept(name string) (bool, bool, error) {
func (f *containerdFactory) CanHandleAndAccept(name string) (bool, bool, error) {
// if the container is not associated with containerd, we can't handle it or accept it.
if !isContainerName(name) {
return false, false, nil
@ -105,7 +105,7 @@ func (self *containerdFactory) CanHandleAndAccept(name string) (bool, bool, erro
// If container and task lookup in containerd fails then we assume
// that the container state is not known to containerd
ctx := context.Background()
_, err := self.client.LoadContainer(ctx, id)
_, err := f.client.LoadContainer(ctx, id)
if err != nil {
return false, false, fmt.Errorf("failed to load container: %v", err)
}
@ -113,7 +113,7 @@ func (self *containerdFactory) CanHandleAndAccept(name string) (bool, bool, erro
return true, true, nil
}
func (self *containerdFactory) DebugInfo() map[string][]string {
func (f *containerdFactory) DebugInfo() map[string][]string {
return map[string][]string{}
}

View File

@ -56,7 +56,7 @@ var _ container.ContainerHandler = &containerdContainerHandler{}
// newContainerdContainerHandler returns a new container.ContainerHandler
func newContainerdContainerHandler(
client containerdClient,
client ContainerdClient,
name string,
machineInfoFactory info.MachineInfoFactory,
fsInfo fs.FsInfo,
@ -149,47 +149,47 @@ func newContainerdContainerHandler(
return handler, nil
}
func (self *containerdContainerHandler) ContainerReference() (info.ContainerReference, error) {
return self.reference, nil
func (h *containerdContainerHandler) ContainerReference() (info.ContainerReference, error) {
return h.reference, nil
}
func (self *containerdContainerHandler) needNet() bool {
func (h *containerdContainerHandler) needNet() bool {
// Since containerd does not handle networking ideally we need to return based
// on includedMetrics list. Here the assumption is the presence of cri-containerd
// label
if self.includedMetrics.Has(container.NetworkUsageMetrics) {
if h.includedMetrics.Has(container.NetworkUsageMetrics) {
//TODO change it to exported cri-containerd constants
return self.labels["io.cri-containerd.kind"] == "sandbox"
return h.labels["io.cri-containerd.kind"] == "sandbox"
}
return false
}
func (self *containerdContainerHandler) GetSpec() (info.ContainerSpec, error) {
func (h *containerdContainerHandler) GetSpec() (info.ContainerSpec, error) {
// TODO: Since we dont collect disk usage stats for containerd, we set hasFilesystem
// to false. Revisit when we support disk usage stats for containerd
hasFilesystem := false
spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, self.needNet(), hasFilesystem)
spec.Labels = self.labels
spec.Envs = self.envs
spec.Image = self.image
spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem)
spec.Labels = h.labels
spec.Envs = h.envs
spec.Image = h.image
return spec, err
}
func (self *containerdContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := self.machineInfoFactory.GetMachineInfo()
func (h *containerdContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := h.machineInfoFactory.GetMachineInfo()
if err != nil {
return err
}
if self.includedMetrics.Has(container.DiskIOMetrics) {
if h.includedMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
}
return nil
}
func (self *containerdContainerHandler) GetStats() (*info.ContainerStats, error) {
stats, err := self.libcontainerHandler.GetStats()
func (h *containerdContainerHandler) GetStats() (*info.ContainerStats, error) {
stats, err := h.libcontainerHandler.GetStats()
if err != nil {
return stats, err
}
@ -197,50 +197,50 @@ func (self *containerdContainerHandler) GetStats() (*info.ContainerStats, error)
// includes containers running in Kubernetes pods that use the network of the
// infrastructure container. This stops metrics being reported multiple times
// for each container in a pod.
if !self.needNet() {
if !h.needNet() {
stats.Network = info.NetworkStats{}
}
// Get filesystem stats.
err = self.getFsStats(stats)
err = h.getFsStats(stats)
return stats, err
}
func (self *containerdContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
func (h *containerdContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
return []info.ContainerReference{}, nil
}
func (self *containerdContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := self.cgroupPaths[resource]
func (h *containerdContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := h.cgroupPaths[resource]
if !ok {
return "", fmt.Errorf("could not find path for resource %q for container %q\n", resource, self.reference.Name)
return "", fmt.Errorf("could not find path for resource %q for container %q", resource, h.reference.Name)
}
return path, nil
}
func (self *containerdContainerHandler) GetContainerLabels() map[string]string {
return self.labels
func (h *containerdContainerHandler) GetContainerLabels() map[string]string {
return h.labels
}
func (self *containerdContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return self.libcontainerHandler.GetProcesses()
func (h *containerdContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return h.libcontainerHandler.GetProcesses()
}
func (self *containerdContainerHandler) Exists() bool {
return common.CgroupExists(self.cgroupPaths)
func (h *containerdContainerHandler) Exists() bool {
return common.CgroupExists(h.cgroupPaths)
}
func (self *containerdContainerHandler) Type() container.ContainerType {
func (h *containerdContainerHandler) Type() container.ContainerType {
return container.ContainerTypeContainerd
}
func (self *containerdContainerHandler) Start() {
func (h *containerdContainerHandler) Start() {
}
func (self *containerdContainerHandler) Cleanup() {
func (h *containerdContainerHandler) Cleanup() {
}
func (self *containerdContainerHandler) GetContainerIPAddress() string {
func (h *containerdContainerHandler) GetContainerIPAddress() string {
// containerd doesnt take care of networking.So it doesnt maintain networking states
return ""
}

View File

@ -9,7 +9,7 @@ go_library(
deps = [
"//vendor/github.com/google/cadvisor/container:go_default_library",
"//vendor/github.com/google/cadvisor/container/containerd:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -18,7 +18,7 @@ package install
import (
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/containerd"
"k8s.io/klog"
"k8s.io/klog/v2"
)
func init() {

View File

@ -20,7 +20,7 @@ go_library(
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -15,10 +15,12 @@
package crio
import (
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"sync"
"syscall"
"time"
)
@ -28,6 +30,12 @@ const (
maxUnixSocketPathSize = len(syscall.RawSockaddrUnix{}.Path)
)
var (
theClient CrioClient
clientErr error
crioClientOnce sync.Once
)
// Info represents CRI-O information as sent by the CRI-O server
type Info struct {
StorageDriver string `json:"storage_driver"`
@ -45,9 +53,10 @@ type ContainerInfo struct {
LogPath string `json:"log_path"`
Root string `json:"root"`
IP string `json:"ip_address"`
IPs []string `json:"ip_addresses"`
}
type crioClient interface {
type CrioClient interface {
Info() (Info, error)
ContainerInfo(string) (*ContainerInfo, error)
}
@ -62,22 +71,27 @@ func configureUnixTransport(tr *http.Transport, proto, addr string) error {
}
// No need for compression in local communications.
tr.DisableCompression = true
tr.Dial = func(_, _ string) (net.Conn, error) {
tr.DialContext = func(_ context.Context, _, _ string) (net.Conn, error) {
return net.DialTimeout(proto, addr, 32*time.Second)
}
return nil
}
// Client returns a new configured CRI-O client
func Client() (crioClient, error) {
tr := new(http.Transport)
configureUnixTransport(tr, "unix", CrioSocket)
c := &http.Client{
Transport: tr,
}
return &crioClientImpl{
client: c,
}, nil
func Client() (CrioClient, error) {
crioClientOnce.Do(func() {
tr := new(http.Transport)
theClient = nil
if clientErr = configureUnixTransport(tr, "unix", CrioSocket); clientErr != nil {
return
}
theClient = &crioClientImpl{
client: &http.Client{
Transport: tr,
},
}
})
return theClient, clientErr
}
func getRequest(path string) (*http.Request, error) {
@ -117,6 +131,7 @@ func (c *crioClientImpl) ContainerInfo(id string) (*ContainerInfo, error) {
if err != nil {
return nil, err
}
cInfo := ContainerInfo{}
resp, err := c.client.Do(req)
if err != nil {
return nil, err
@ -129,9 +144,14 @@ func (c *crioClientImpl) ContainerInfo(id string) (*ContainerInfo, error) {
return nil, fmt.Errorf("Error finding container %s: Status %d returned error %s", id, resp.StatusCode, resp.Body)
}
cInfo := ContainerInfo{}
if err := json.NewDecoder(resp.Body).Decode(&cInfo); err != nil {
return nil, err
}
if len(cInfo.IP) > 0 {
return &cInfo, nil
}
if len(cInfo.IPs) > 0 {
cInfo.IP = cInfo.IPs[0]
}
return &cInfo, nil
}

View File

@ -26,7 +26,7 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/watcher"
"k8s.io/klog"
"k8s.io/klog/v2"
)
// The namespace under which crio aliases are unique.
@ -57,14 +57,14 @@ type crioFactory struct {
includedMetrics container.MetricSet
client crioClient
client CrioClient
}
func (self *crioFactory) String() string {
func (f *crioFactory) String() string {
return CrioNamespace
}
func (self *crioFactory) NewContainerHandler(name string, inHostNamespace bool) (handler container.ContainerHandler, err error) {
func (f *crioFactory) NewContainerHandler(name string, inHostNamespace bool) (handler container.ContainerHandler, err error) {
client, err := Client()
if err != nil {
return
@ -74,14 +74,14 @@ func (self *crioFactory) NewContainerHandler(name string, inHostNamespace bool)
handler, err = newCrioContainerHandler(
client,
name,
self.machineInfoFactory,
self.fsInfo,
self.storageDriver,
self.storageDir,
&self.cgroupSubsystems,
f.machineInfoFactory,
f.fsInfo,
f.storageDriver,
f.storageDir,
&f.cgroupSubsystems,
inHostNamespace,
metadataEnvs,
self.includedMetrics,
f.includedMetrics,
)
return
}
@ -108,7 +108,7 @@ func isContainerName(name string) bool {
}
// crio handles all containers under /crio
func (self *crioFactory) CanHandleAndAccept(name string) (bool, bool, error) {
func (f *crioFactory) CanHandleAndAccept(name string) (bool, bool, error) {
if strings.HasPrefix(path.Base(name), "crio-conmon") {
// TODO(runcom): should we include crio-conmon cgroups?
return false, false, nil
@ -123,18 +123,10 @@ func (self *crioFactory) CanHandleAndAccept(name string) (bool, bool, error) {
return true, true, nil
}
func (self *crioFactory) DebugInfo() map[string][]string {
func (f *crioFactory) DebugInfo() map[string][]string {
return map[string][]string{}
}
var (
// TODO(runcom): handle versioning in CRI-O
version_regexp_string = `(\d+)\.(\d+)\.(\d+)`
version_re = regexp.MustCompile(version_regexp_string)
apiversion_regexp_string = `(\d+)\.(\d+)`
apiversion_re = regexp.MustCompile(apiversion_regexp_string)
)
// Register root container before running this function!
func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, includedMetrics container.MetricSet) error {
client, err := Client()

View File

@ -17,7 +17,6 @@ package crio
import (
"fmt"
"path"
"path/filepath"
"strconv"
"strings"
@ -33,7 +32,7 @@ import (
)
type crioContainerHandler struct {
client crioClient
client CrioClient
name string
machineInfoFactory info.MachineInfoFactory
@ -80,7 +79,7 @@ var _ container.ContainerHandler = &crioContainerHandler{}
// newCrioContainerHandler returns a new container.ContainerHandler
func newCrioContainerHandler(
client crioClient,
client CrioClient,
name string,
machineInfoFactory info.MachineInfoFactory,
fsInfo fs.FsInfo,
@ -105,7 +104,6 @@ func newCrioContainerHandler(
rootFs := "/"
if !inHostNamespace {
rootFs = "/rootfs"
storageDir = path.Join(rootFs, storageDir)
}
id := ContainerNameToCrioId(name)
@ -199,59 +197,59 @@ func newCrioContainerHandler(
return handler, nil
}
func (self *crioContainerHandler) Start() {
if self.fsHandler != nil {
self.fsHandler.Start()
func (h *crioContainerHandler) Start() {
if h.fsHandler != nil {
h.fsHandler.Start()
}
}
func (self *crioContainerHandler) Cleanup() {
if self.fsHandler != nil {
self.fsHandler.Stop()
func (h *crioContainerHandler) Cleanup() {
if h.fsHandler != nil {
h.fsHandler.Stop()
}
}
func (self *crioContainerHandler) ContainerReference() (info.ContainerReference, error) {
return self.reference, nil
func (h *crioContainerHandler) ContainerReference() (info.ContainerReference, error) {
return h.reference, nil
}
func (self *crioContainerHandler) needNet() bool {
if self.includedMetrics.Has(container.NetworkUsageMetrics) {
return self.labels["io.kubernetes.container.name"] == "POD"
func (h *crioContainerHandler) needNet() bool {
if h.includedMetrics.Has(container.NetworkUsageMetrics) {
return h.labels["io.kubernetes.container.name"] == "POD"
}
return false
}
func (self *crioContainerHandler) GetSpec() (info.ContainerSpec, error) {
hasFilesystem := self.includedMetrics.Has(container.DiskUsageMetrics)
spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, self.needNet(), hasFilesystem)
func (h *crioContainerHandler) GetSpec() (info.ContainerSpec, error) {
hasFilesystem := h.includedMetrics.Has(container.DiskUsageMetrics)
spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem)
spec.Labels = self.labels
spec.Envs = self.envs
spec.Image = self.image
spec.Labels = h.labels
spec.Envs = h.envs
spec.Image = h.image
return spec, err
}
func (self *crioContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := self.machineInfoFactory.GetMachineInfo()
func (h *crioContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := h.machineInfoFactory.GetMachineInfo()
if err != nil {
return err
}
if self.includedMetrics.Has(container.DiskIOMetrics) {
if h.includedMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
}
if !self.includedMetrics.Has(container.DiskUsageMetrics) {
if !h.includedMetrics.Has(container.DiskUsageMetrics) {
return nil
}
var device string
switch self.storageDriver {
switch h.storageDriver {
case overlay2StorageDriver, overlayStorageDriver:
deviceInfo, err := self.fsInfo.GetDirFsDevice(self.rootfsStorageDir)
deviceInfo, err := h.fsInfo.GetDirFsDevice(h.rootfsStorageDir)
if err != nil {
return fmt.Errorf("unable to determine device info for dir: %v: %v", self.rootfsStorageDir, err)
return fmt.Errorf("unable to determine device info for dir: %v: %v", h.rootfsStorageDir, err)
}
device = deviceInfo.Device
default:
@ -276,7 +274,7 @@ func (self *crioContainerHandler) getFsStats(stats *info.ContainerStats) error {
return fmt.Errorf("unable to determine fs type for device: %v", device)
}
fsStat := info.FsStats{Device: device, Type: fsType, Limit: limit}
usage := self.fsHandler.Usage()
usage := h.fsHandler.Usage()
fsStat.BaseUsage = usage.BaseUsageBytes
fsStat.Usage = usage.TotalUsageBytes
fsStat.Inodes = usage.InodeUsage
@ -286,26 +284,26 @@ func (self *crioContainerHandler) getFsStats(stats *info.ContainerStats) error {
return nil
}
func (self *crioContainerHandler) getLibcontainerHandler() *containerlibcontainer.Handler {
if self.pidKnown {
return self.libcontainerHandler
func (h *crioContainerHandler) getLibcontainerHandler() *containerlibcontainer.Handler {
if h.pidKnown {
return h.libcontainerHandler
}
id := ContainerNameToCrioId(self.name)
id := ContainerNameToCrioId(h.name)
cInfo, err := self.client.ContainerInfo(id)
cInfo, err := h.client.ContainerInfo(id)
if err != nil || cInfo.Pid == 0 {
return self.libcontainerHandler
return h.libcontainerHandler
}
self.pidKnown = true
self.libcontainerHandler = containerlibcontainer.NewHandler(self.cgroupManager, self.rootFs, cInfo.Pid, self.includedMetrics)
h.pidKnown = true
h.libcontainerHandler = containerlibcontainer.NewHandler(h.cgroupManager, h.rootFs, cInfo.Pid, h.includedMetrics)
return self.libcontainerHandler
return h.libcontainerHandler
}
func (self *crioContainerHandler) GetStats() (*info.ContainerStats, error) {
libcontainerHandler := self.getLibcontainerHandler()
func (h *crioContainerHandler) GetStats() (*info.ContainerStats, error) {
libcontainerHandler := h.getLibcontainerHandler()
stats, err := libcontainerHandler.GetStats()
if err != nil {
return stats, err
@ -314,12 +312,12 @@ func (self *crioContainerHandler) GetStats() (*info.ContainerStats, error) {
// includes containers running in Kubernetes pods that use the network of the
// infrastructure container. This stops metrics being reported multiple times
// for each container in a pod.
if !self.needNet() {
if !h.needNet() {
stats.Network = info.NetworkStats{}
}
// Get filesystem stats.
err = self.getFsStats(stats)
err = h.getFsStats(stats)
if err != nil {
return stats, err
}
@ -327,35 +325,35 @@ func (self *crioContainerHandler) GetStats() (*info.ContainerStats, error) {
return stats, nil
}
func (self *crioContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
func (h *crioContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
// No-op for Docker driver.
return []info.ContainerReference{}, nil
}
func (self *crioContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := self.cgroupPaths[resource]
func (h *crioContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := h.cgroupPaths[resource]
if !ok {
return "", fmt.Errorf("could not find path for resource %q for container %q\n", resource, self.reference.Name)
return "", fmt.Errorf("could not find path for resource %q for container %q", resource, h.reference.Name)
}
return path, nil
}
func (self *crioContainerHandler) GetContainerLabels() map[string]string {
return self.labels
func (h *crioContainerHandler) GetContainerLabels() map[string]string {
return h.labels
}
func (self *crioContainerHandler) GetContainerIPAddress() string {
return self.ipAddress
func (h *crioContainerHandler) GetContainerIPAddress() string {
return h.ipAddress
}
func (self *crioContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return self.libcontainerHandler.GetProcesses()
func (h *crioContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return h.libcontainerHandler.GetProcesses()
}
func (self *crioContainerHandler) Exists() bool {
return common.CgroupExists(self.cgroupPaths)
func (h *crioContainerHandler) Exists() bool {
return common.CgroupExists(h.cgroupPaths)
}
func (self *crioContainerHandler) Type() container.ContainerType {
func (h *crioContainerHandler) Type() container.ContainerType {
return container.ContainerTypeCrio
}

View File

@ -9,7 +9,7 @@ go_library(
deps = [
"//vendor/github.com/google/cadvisor/container:go_default_library",
"//vendor/github.com/google/cadvisor/container/crio:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -18,7 +18,7 @@ package install
import (
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/crio"
"k8s.io/klog"
"k8s.io/klog/v2"
)
func init() {

View File

@ -19,7 +19,7 @@ import (
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/watcher"
"k8s.io/klog"
"k8s.io/klog/v2"
)
// NewPlugin returns an implementation of container.Plugin suitable for passing to container.RegisterPlugin()

View File

@ -31,7 +31,7 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/golang.org/x/net/context:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -134,7 +134,7 @@ func ValidateInfo() (*dockertypes.Info, error) {
}
dockerInfo.ServerVersion = version.Version
}
version, err := parseVersion(dockerInfo.ServerVersion, version_re, 3)
version, err := parseVersion(dockerInfo.ServerVersion, versionRe, 3)
if err != nil {
return nil, err
}
@ -150,59 +150,51 @@ func ValidateInfo() (*dockertypes.Info, error) {
return &dockerInfo, nil
}
func Version() ([]int, error) {
ver, err := VersionString()
if err != nil {
return nil, err
}
return parseVersion(ver, version_re, 3)
}
func APIVersion() ([]int, error) {
ver, err := APIVersionString()
if err != nil {
return nil, err
}
return parseVersion(ver, apiversion_re, 2)
return parseVersion(ver, apiVersionRe, 2)
}
func VersionString() (string, error) {
docker_version := "Unknown"
dockerVersion := "Unknown"
client, err := Client()
if err == nil {
version, err := client.ServerVersion(defaultContext())
if err == nil {
docker_version = version.Version
dockerVersion = version.Version
}
}
return docker_version, err
return dockerVersion, err
}
func APIVersionString() (string, error) {
docker_api_version := "Unknown"
apiVersion := "Unknown"
client, err := Client()
if err == nil {
version, err := client.ServerVersion(defaultContext())
if err == nil {
docker_api_version = version.APIVersion
apiVersion = version.APIVersion
}
}
return docker_api_version, err
return apiVersion, err
}
func parseVersion(version_string string, regex *regexp.Regexp, length int) ([]int, error) {
matches := regex.FindAllStringSubmatch(version_string, -1)
func parseVersion(versionString string, regex *regexp.Regexp, length int) ([]int, error) {
matches := regex.FindAllStringSubmatch(versionString, -1)
if len(matches) != 1 {
return nil, fmt.Errorf("version string \"%v\" doesn't match expected regular expression: \"%v\"", version_string, regex.String())
return nil, fmt.Errorf("version string \"%v\" doesn't match expected regular expression: \"%v\"", versionString, regex.String())
}
version_string_array := matches[0][1:]
version_array := make([]int, length)
for index, version_str := range version_string_array {
version, err := strconv.Atoi(version_str)
versionStringArray := matches[0][1:]
versionArray := make([]int, length)
for index, versionStr := range versionStringArray {
version, err := strconv.Atoi(versionStr)
if err != nil {
return nil, fmt.Errorf("error while parsing \"%v\" in \"%v\"", version_str, version_string)
return nil, fmt.Errorf("error while parsing \"%v\" in \"%v\"", versionStr, versionString)
}
version_array[index] = version
versionArray[index] = version
}
return version_array, nil
return versionArray, nil
}

View File

@ -38,7 +38,7 @@ import (
docker "github.com/docker/docker/client"
"golang.org/x/net/context"
"k8s.io/klog"
"k8s.io/klog/v2"
)
var ArgDockerEndpoint = flag.String("docker", "unix:///var/run/docker.sock", "docker endpoint")
@ -103,6 +103,7 @@ const (
overlayStorageDriver storageDriver = "overlay"
overlay2StorageDriver storageDriver = "overlay2"
zfsStorageDriver storageDriver = "zfs"
vfsStorageDriver storageDriver = "vfs"
)
type dockerFactory struct {
@ -131,11 +132,11 @@ type dockerFactory struct {
zfsWatcher *zfs.ZfsWatcher
}
func (self *dockerFactory) String() string {
func (f *dockerFactory) String() string {
return DockerNamespace
}
func (self *dockerFactory) NewContainerHandler(name string, inHostNamespace bool) (handler container.ContainerHandler, err error) {
func (f *dockerFactory) NewContainerHandler(name string, inHostNamespace bool) (handler container.ContainerHandler, err error) {
client, err := Client()
if err != nil {
return
@ -146,18 +147,18 @@ func (self *dockerFactory) NewContainerHandler(name string, inHostNamespace bool
handler, err = newDockerContainerHandler(
client,
name,
self.machineInfoFactory,
self.fsInfo,
self.storageDriver,
self.storageDir,
&self.cgroupSubsystems,
f.machineInfoFactory,
f.fsInfo,
f.storageDriver,
f.storageDir,
&f.cgroupSubsystems,
inHostNamespace,
metadataEnvs,
self.dockerVersion,
self.includedMetrics,
self.thinPoolName,
self.thinPoolWatcher,
self.zfsWatcher,
f.dockerVersion,
f.includedMetrics,
f.thinPoolName,
f.thinPoolWatcher,
f.zfsWatcher,
)
return
}
@ -184,7 +185,7 @@ func isContainerName(name string) bool {
}
// Docker handles all containers under /docker
func (self *dockerFactory) CanHandleAndAccept(name string) (bool, bool, error) {
func (f *dockerFactory) CanHandleAndAccept(name string) (bool, bool, error) {
// if the container is not associated with docker, we can't handle it or accept it.
if !isContainerName(name) {
return false, false, nil
@ -194,7 +195,7 @@ func (self *dockerFactory) CanHandleAndAccept(name string) (bool, bool, error) {
id := ContainerNameToDockerId(name)
// We assume that if Inspect fails then the container is not known to docker.
ctnr, err := self.client.ContainerInspect(context.Background(), id)
ctnr, err := f.client.ContainerInspect(context.Background(), id)
if err != nil || !ctnr.State.Running {
return false, true, fmt.Errorf("error inspecting container: %v", err)
}
@ -202,15 +203,15 @@ func (self *dockerFactory) CanHandleAndAccept(name string) (bool, bool, error) {
return true, true, nil
}
func (self *dockerFactory) DebugInfo() map[string][]string {
func (f *dockerFactory) DebugInfo() map[string][]string {
return map[string][]string{}
}
var (
version_regexp_string = `(\d+)\.(\d+)\.(\d+)`
version_re = regexp.MustCompile(version_regexp_string)
apiversion_regexp_string = `(\d+)\.(\d+)`
apiversion_re = regexp.MustCompile(apiversion_regexp_string)
versionRegexpString = `(\d+)\.(\d+)\.(\d+)`
versionRe = regexp.MustCompile(versionRegexpString)
apiVersionRegexpString = `(\d+)\.(\d+)`
apiVersionRe = regexp.MustCompile(apiVersionRegexpString)
)
func startThinPoolWatcher(dockerInfo *dockertypes.Info) (*devicemapper.ThinPoolWatcher, error) {
@ -268,7 +269,7 @@ func ensureThinLsKernelVersion(kernelVersion string) error {
// thin_ls to work without corrupting the thin pool
minRhel7KernelVersion := semver.MustParse("3.10.0")
matches := version_re.FindStringSubmatch(kernelVersion)
matches := versionRe.FindStringSubmatch(kernelVersion)
if len(matches) < 4 {
return fmt.Errorf("error parsing kernel version: %q is not a semver", kernelVersion)
}
@ -334,7 +335,7 @@ func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, includedMetrics
}
// Version already validated above, assume no error here.
dockerVersion, _ := parseVersion(dockerInfo.ServerVersion, version_re, 3)
dockerVersion, _ := parseVersion(dockerInfo.ServerVersion, versionRe, 3)
dockerAPIVersion, _ := APIVersion()

View File

@ -37,7 +37,7 @@ import (
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
"golang.org/x/net/context"
"k8s.io/klog"
"k8s.io/klog/v2"
)
const (
@ -174,6 +174,8 @@ func newDockerContainerHandler(
rootfsStorageDir = path.Join(storageDir, string(storageDriver), rwLayerID, overlayRWLayer)
case overlay2StorageDriver:
rootfsStorageDir = path.Join(storageDir, string(storageDriver), rwLayerID, overlay2RWLayer)
case vfsStorageDriver:
rootfsStorageDir = path.Join(storageDir)
case zfsStorageDriver:
status, err := Status()
if err != nil {
@ -230,8 +232,8 @@ func newDockerContainerHandler(
ipAddress := ctnr.NetworkSettings.IPAddress
networkMode := string(ctnr.HostConfig.NetworkMode)
if ipAddress == "" && strings.HasPrefix(networkMode, "container:") {
containerId := strings.TrimPrefix(networkMode, "container:")
c, err := client.ContainerInspect(context.Background(), containerId)
containerID := strings.TrimPrefix(networkMode, "container:")
c, err := client.ContainerInspect(context.Background(), containerID)
if err != nil {
return nil, fmt.Errorf("failed to inspect container %q: %v", id, err)
}
@ -329,68 +331,68 @@ func (h *dockerFsHandler) Usage() common.FsUsage {
return usage
}
func (self *dockerContainerHandler) Start() {
if self.fsHandler != nil {
self.fsHandler.Start()
func (h *dockerContainerHandler) Start() {
if h.fsHandler != nil {
h.fsHandler.Start()
}
}
func (self *dockerContainerHandler) Cleanup() {
if self.fsHandler != nil {
self.fsHandler.Stop()
func (h *dockerContainerHandler) Cleanup() {
if h.fsHandler != nil {
h.fsHandler.Stop()
}
}
func (self *dockerContainerHandler) ContainerReference() (info.ContainerReference, error) {
return self.reference, nil
func (h *dockerContainerHandler) ContainerReference() (info.ContainerReference, error) {
return h.reference, nil
}
func (self *dockerContainerHandler) needNet() bool {
if self.includedMetrics.Has(container.NetworkUsageMetrics) {
return !self.networkMode.IsContainer()
func (h *dockerContainerHandler) needNet() bool {
if h.includedMetrics.Has(container.NetworkUsageMetrics) {
return !h.networkMode.IsContainer()
}
return false
}
func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
hasFilesystem := self.includedMetrics.Has(container.DiskUsageMetrics)
spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, self.needNet(), hasFilesystem)
func (h *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
hasFilesystem := h.includedMetrics.Has(container.DiskUsageMetrics)
spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, h.needNet(), hasFilesystem)
spec.Labels = self.labels
spec.Envs = self.envs
spec.Image = self.image
spec.CreationTime = self.creationTime
spec.Labels = h.labels
spec.Envs = h.envs
spec.Image = h.image
spec.CreationTime = h.creationTime
return spec, err
}
func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := self.machineInfoFactory.GetMachineInfo()
func (h *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := h.machineInfoFactory.GetMachineInfo()
if err != nil {
return err
}
if self.includedMetrics.Has(container.DiskIOMetrics) {
if h.includedMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
}
if !self.includedMetrics.Has(container.DiskUsageMetrics) {
if !h.includedMetrics.Has(container.DiskUsageMetrics) {
return nil
}
var device string
switch self.storageDriver {
switch h.storageDriver {
case devicemapperStorageDriver:
// Device has to be the pool name to correlate with the device name as
// set in the machine info filesystems.
device = self.poolName
case aufsStorageDriver, overlayStorageDriver, overlay2StorageDriver:
deviceInfo, err := self.fsInfo.GetDirFsDevice(self.rootfsStorageDir)
device = h.poolName
case aufsStorageDriver, overlayStorageDriver, overlay2StorageDriver, vfsStorageDriver:
deviceInfo, err := h.fsInfo.GetDirFsDevice(h.rootfsStorageDir)
if err != nil {
return fmt.Errorf("unable to determine device info for dir: %v: %v", self.rootfsStorageDir, err)
return fmt.Errorf("unable to determine device info for dir: %v: %v", h.rootfsStorageDir, err)
}
device = deviceInfo.Device
case zfsStorageDriver:
device = self.zfsParent
device = h.zfsParent
default:
return nil
}
@ -410,7 +412,7 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
}
fsStat := info.FsStats{Device: device, Type: fsType, Limit: limit}
usage := self.fsHandler.Usage()
usage := h.fsHandler.Usage()
fsStat.BaseUsage = usage.BaseUsageBytes
fsStat.Usage = usage.TotalUsageBytes
fsStat.Inodes = usage.InodeUsage
@ -421,8 +423,8 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
}
// TODO(vmarmol): Get from libcontainer API instead of cgroup manager when we don't have to support older Dockers.
func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
stats, err := self.libcontainerHandler.GetStats()
func (h *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
stats, err := h.libcontainerHandler.GetStats()
if err != nil {
return stats, err
}
@ -430,12 +432,12 @@ func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
// includes containers running in Kubernetes pods that use the network of the
// infrastructure container. This stops metrics being reported multiple times
// for each container in a pod.
if !self.needNet() {
if !h.needNet() {
stats.Network = info.NetworkStats{}
}
// Get filesystem stats.
err = self.getFsStats(stats)
err = h.getFsStats(stats)
if err != nil {
return stats, err
}
@ -443,35 +445,35 @@ func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
return stats, nil
}
func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
func (h *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
// No-op for Docker driver.
return []info.ContainerReference{}, nil
}
func (self *dockerContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := self.cgroupPaths[resource]
func (h *dockerContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := h.cgroupPaths[resource]
if !ok {
return "", fmt.Errorf("could not find path for resource %q for container %q\n", resource, self.reference.Name)
return "", fmt.Errorf("could not find path for resource %q for container %q", resource, h.reference.Name)
}
return path, nil
}
func (self *dockerContainerHandler) GetContainerLabels() map[string]string {
return self.labels
func (h *dockerContainerHandler) GetContainerLabels() map[string]string {
return h.labels
}
func (self *dockerContainerHandler) GetContainerIPAddress() string {
return self.ipAddress
func (h *dockerContainerHandler) GetContainerIPAddress() string {
return h.ipAddress
}
func (self *dockerContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return self.libcontainerHandler.GetProcesses()
func (h *dockerContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return h.libcontainerHandler.GetProcesses()
}
func (self *dockerContainerHandler) Exists() bool {
return common.CgroupExists(self.cgroupPaths)
func (h *dockerContainerHandler) Exists() bool {
return common.CgroupExists(h.cgroupPaths)
}
func (self *dockerContainerHandler) Type() container.ContainerType {
func (h *dockerContainerHandler) Type() container.ContainerType {
return container.ContainerTypeDocker
}

View File

@ -9,7 +9,7 @@ go_library(
deps = [
"//vendor/github.com/google/cadvisor/container:go_default_library",
"//vendor/github.com/google/cadvisor/container/docker:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -18,7 +18,7 @@ package install
import (
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/docker"
"k8s.io/klog"
"k8s.io/klog/v2"
)
func init() {

View File

@ -22,7 +22,7 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/watcher"
"golang.org/x/net/context"
"k8s.io/klog"
"k8s.io/klog/v2"
)
const dockerClientTimeout = 10 * time.Second

View File

@ -23,10 +23,7 @@ import (
)
const (
DockerInfoDriver = "Driver"
DockerInfoDriverStatus = "DriverStatus"
DriverStatusPoolName = "Pool Name"
DriverStatusDataLoopFile = "Data loop file"
DriverStatusMetadataFile = "Metadata file"
DriverStatusParentDataset = "Parent Dataset"
)

View File

@ -22,7 +22,7 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/watcher"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type ContainerHandlerFactory interface {
@ -58,6 +58,9 @@ const (
AppMetrics MetricKind = "app"
ProcessMetrics MetricKind = "process"
HugetlbUsageMetrics MetricKind = "hugetlb"
PerfMetrics MetricKind = "perf_event"
ReferencedMemoryMetrics MetricKind = "referenced_memory"
CPUTopologyMetrics MetricKind = "cpu_topology"
)
// AllMetrics represents all kinds of metrics that cAdvisor supported.
@ -77,6 +80,9 @@ var AllMetrics = MetricSet{
ProcessMetrics: struct{}{},
AppMetrics: struct{}{},
HugetlbUsageMetrics: struct{}{},
PerfMetrics: struct{}{},
ReferencedMemoryMetrics: struct{}{},
CPUTopologyMetrics: struct{}{},
}
func (mk MetricKind) String() string {
@ -204,9 +210,8 @@ func NewContainerHandler(name string, watchType watcher.ContainerWatchSource, in
klog.V(3).Infof("Using factory %q for container %q", factory, name)
handle, err := factory.NewContainerHandler(name, inHostNamespace)
return handle, canAccept, err
} else {
klog.V(4).Infof("Factory %q was unable to handle container %q", factory, name)
}
klog.V(4).Infof("Factory %q was unable to handle container %q", factory, name)
}
return nil, false, fmt.Errorf("no known factory can handle creation of container")

View File

@ -15,7 +15,7 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -17,6 +17,7 @@ package libcontainer
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
@ -35,7 +36,18 @@ import (
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
"k8s.io/klog"
"k8s.io/klog/v2"
)
var (
whitelistedUlimits = [...]string{"max_open_files"}
referencedResetInterval = flag.Uint64("referenced_reset_interval", 0,
"Reset interval for referenced bytes (container_referenced_bytes metric), number of measurement cycles after which referenced bytes are cleared, if set to 0 referenced bytes are never cleared (default: 0)")
smapsFilePathPattern = "/proc/%d/smaps"
clearRefsFilePathPattern = "/proc/%d/clear_refs"
referencedRegexp = regexp.MustCompile(`Referenced:\s*([0-9]+)\s*kB`)
)
type Handler struct {
@ -44,10 +56,9 @@ type Handler struct {
pid int
includedMetrics container.MetricSet
pidMetricsCache map[int]*info.CpuSchedstat
cycles uint64
}
var whitelistedUlimits = [...]string{"max_open_files"}
func NewHandler(cgroupManager cgroups.Manager, rootFs string, pid int, includedMetrics container.MetricSet) *Handler {
return &Handler{
cgroupManager: cgroupManager,
@ -81,6 +92,19 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) {
}
}
if h.includedMetrics.Has(container.ReferencedMemoryMetrics) {
h.cycles++
pids, err := h.cgroupManager.GetPids()
if err != nil {
klog.V(4).Infof("Could not get PIDs for container %d: %v", h.pid, err)
} else {
stats.ReferencedMemory, err = referencedBytesStat(pids, h.cycles, *referencedResetInterval)
if err != nil {
klog.V(4).Infof("Unable to get referenced bytes: %v", err)
}
}
}
// If we know the pid then get network stats from /proc/<pid>/net/dev
if h.pid == 0 {
return stats, nil
@ -110,7 +134,7 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) {
}
if h.includedMetrics.Has(container.NetworkAdvancedTcpUsageMetrics) {
ta, err := advancedTcpStatsFromProc(h.rootFs, h.pid, "net/netstat", "net/snmp")
ta, err := advancedTCPStatsFromProc(h.rootFs, h.pid, "net/netstat", "net/snmp")
if err != nil {
klog.V(4).Infof("Unable to get advanced tcp stats from pid %d: %v", h.pid, err)
} else {
@ -187,7 +211,7 @@ func processLimitsFile(fileData string) []info.UlimitSpec {
if strings.HasPrefix(lim, "Max") {
// Line format: Max open files 16384 16384 files
fields := regexp.MustCompile("[\\s]{2,}").Split(lim, -1)
fields := regexp.MustCompile(`[\s]{2,}`).Split(lim, -1)
name := strings.Replace(strings.ToLower(strings.TrimSpace(fields[0])), " ", "_", -1)
found := isUlimitWhitelisted(name)
@ -196,17 +220,17 @@ func processLimitsFile(fileData string) []info.UlimitSpec {
}
soft := strings.TrimSpace(fields[1])
soft_num, soft_err := parseUlimit(soft)
softNum, softErr := parseUlimit(soft)
hard := strings.TrimSpace(fields[2])
hard_num, hard_err := parseUlimit(hard)
hardNum, hardErr := parseUlimit(hard)
// Omit metric if there were any parsing errors
if soft_err == nil && hard_err == nil {
if softErr == nil && hardErr == nil {
ulimitSpec := info.UlimitSpec{
Name: name,
SoftLimit: int64(soft_num),
HardLimit: int64(hard_num),
SoftLimit: int64(softNum),
HardLimit: int64(hardNum),
}
ulimits = append(ulimits, ulimitSpec)
}
@ -318,6 +342,92 @@ func schedulerStatsFromProcs(rootFs string, pids []int, pidMetricsCache map[int]
return schedstats, nil
}
// referencedBytesStat gets and clears referenced bytes
// see: https://github.com/brendangregg/wss#wsspl-referenced-page-flag
func referencedBytesStat(pids []int, cycles uint64, resetInterval uint64) (uint64, error) {
referencedKBytes, err := getReferencedKBytes(pids)
if err != nil {
return uint64(0), err
}
err = clearReferencedBytes(pids, cycles, resetInterval)
if err != nil {
return uint64(0), err
}
return referencedKBytes * 1024, nil
}
func getReferencedKBytes(pids []int) (uint64, error) {
referencedKBytes := uint64(0)
readSmapsContent := false
foundMatch := false
for _, pid := range pids {
smapsFilePath := fmt.Sprintf(smapsFilePathPattern, pid)
smapsContent, err := ioutil.ReadFile(smapsFilePath)
if err != nil {
klog.V(5).Infof("Cannot read %s file, err: %s", smapsFilePath, err)
if os.IsNotExist(err) {
continue //smaps file does not exists for all PIDs
}
return 0, err
}
readSmapsContent = true
allMatches := referencedRegexp.FindAllSubmatch(smapsContent, -1)
if len(allMatches) == 0 {
klog.V(5).Infof("Not found any information about referenced bytes in %s file", smapsFilePath)
continue // referenced bytes may not exist in smaps file
}
for _, matches := range allMatches {
if len(matches) != 2 {
return 0, fmt.Errorf("failed to match regexp in output: %s", string(smapsContent))
}
foundMatch = true
referenced, err := strconv.ParseUint(string(matches[1]), 10, 64)
if err != nil {
return 0, err
}
referencedKBytes += referenced
}
}
if len(pids) != 0 {
if !readSmapsContent {
klog.Warningf("Cannot read smaps files for any PID from %s", "CONTAINER")
} else if !foundMatch {
klog.Warningf("Not found any information about referenced bytes in smaps files for any PID from %s", "CONTAINER")
}
}
return referencedKBytes, nil
}
func clearReferencedBytes(pids []int, cycles uint64, resetInterval uint64) error {
if resetInterval == 0 {
return nil
}
if cycles%resetInterval == 0 {
for _, pid := range pids {
clearRefsFilePath := fmt.Sprintf(clearRefsFilePathPattern, pid)
clerRefsFile, err := os.OpenFile(clearRefsFilePath, os.O_WRONLY, 0644)
if err != nil {
// clear_refs file may not exist for all PIDs
continue
}
_, err = clerRefsFile.WriteString("1\n")
if err != nil {
return err
}
err = clerRefsFile.Close()
if err != nil {
return err
}
}
}
return nil
}
func networkStatsFromProc(rootFs string, pid int) ([]info.InterfaceStats, error) {
netStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), "/net/dev")
@ -410,7 +520,7 @@ func setInterfaceStatValues(fields []string, pointers []*uint64) error {
func tcpStatsFromProc(rootFs string, pid int, file string) (info.TcpStat, error) {
tcpStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file)
tcpStats, err := scanTcpStats(tcpStatsFile)
tcpStats, err := scanTCPStats(tcpStatsFile)
if err != nil {
return tcpStats, fmt.Errorf("couldn't read tcp stats: %v", err)
}
@ -418,18 +528,18 @@ func tcpStatsFromProc(rootFs string, pid int, file string) (info.TcpStat, error)
return tcpStats, nil
}
func advancedTcpStatsFromProc(rootFs string, pid int, file1, file2 string) (info.TcpAdvancedStat, error) {
func advancedTCPStatsFromProc(rootFs string, pid int, file1, file2 string) (info.TcpAdvancedStat, error) {
var advancedStats info.TcpAdvancedStat
var err error
netstatFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file1)
err = scanAdvancedTcpStats(&advancedStats, netstatFile)
err = scanAdvancedTCPStats(&advancedStats, netstatFile)
if err != nil {
return advancedStats, err
}
snmpFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file2)
err = scanAdvancedTcpStats(&advancedStats, snmpFile)
err = scanAdvancedTCPStats(&advancedStats, snmpFile)
if err != nil {
return advancedStats, err
}
@ -437,17 +547,17 @@ func advancedTcpStatsFromProc(rootFs string, pid int, file1, file2 string) (info
return advancedStats, nil
}
func scanAdvancedTcpStats(advancedStats *info.TcpAdvancedStat, advancedTcpStatsFile string) error {
data, err := ioutil.ReadFile(advancedTcpStatsFile)
func scanAdvancedTCPStats(advancedStats *info.TcpAdvancedStat, advancedTCPStatsFile string) error {
data, err := ioutil.ReadFile(advancedTCPStatsFile)
if err != nil {
return fmt.Errorf("failure opening %s: %v", advancedTcpStatsFile, err)
return fmt.Errorf("failure opening %s: %v", advancedTCPStatsFile, err)
}
reader := strings.NewReader(string(data))
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanLines)
advancedTcpStats := make(map[string]interface{})
advancedTCPStats := make(map[string]interface{})
for scanner.Scan() {
nameParts := strings.Split(scanner.Text(), " ")
scanner.Scan()
@ -459,7 +569,7 @@ func scanAdvancedTcpStats(advancedStats *info.TcpAdvancedStat, advancedTcpStatsF
}
if len(nameParts) != len(valueParts) {
return fmt.Errorf("mismatch field count mismatch in %s: %s",
advancedTcpStatsFile, protocol)
advancedTCPStatsFile, protocol)
}
for i := 1; i < len(nameParts); i++ {
if strings.Contains(valueParts[i], "-") {
@ -467,18 +577,18 @@ func scanAdvancedTcpStats(advancedStats *info.TcpAdvancedStat, advancedTcpStatsF
if err != nil {
return fmt.Errorf("decode value: %s to int64 error: %s", valueParts[i], err)
}
advancedTcpStats[nameParts[i]] = vInt64
advancedTCPStats[nameParts[i]] = vInt64
} else {
vUint64, err := strconv.ParseUint(valueParts[i], 10, 64)
if err != nil {
return fmt.Errorf("decode value: %s to uint64 error: %s", valueParts[i], err)
}
advancedTcpStats[nameParts[i]] = vUint64
advancedTCPStats[nameParts[i]] = vUint64
}
}
}
b, err := json.Marshal(advancedTcpStats)
b, err := json.Marshal(advancedTCPStats)
if err != nil {
return err
}
@ -492,7 +602,7 @@ func scanAdvancedTcpStats(advancedStats *info.TcpAdvancedStat, advancedTcpStatsF
}
func scanTcpStats(tcpStatsFile string) (info.TcpStat, error) {
func scanTCPStats(tcpStatsFile string) (info.TcpStat, error) {
var stats info.TcpStat
@ -567,7 +677,7 @@ func udpStatsFromProc(rootFs string, pid int, file string) (info.UdpStat, error)
return udpStats, fmt.Errorf("failure opening %s: %v", udpStatsFile, err)
}
udpStats, err = scanUdpStats(r)
udpStats, err = scanUDPStats(r)
if err != nil {
return udpStats, fmt.Errorf("couldn't read udp stats: %v", err)
}
@ -575,7 +685,7 @@ func udpStatsFromProc(rootFs string, pid int, file string) (info.UdpStat, error)
return udpStats, nil
}
func scanUdpStats(r io.Reader) (info.UdpStat, error) {
func scanUDPStats(r io.Reader) (info.UdpStat, error) {
var stats info.UdpStat
scanner := bufio.NewScanner(r)
@ -643,7 +753,7 @@ func minUint32(x, y uint32) uint32 {
var numCpusFunc = getNumberOnlineCPUs
// Convert libcontainer stats to info.ContainerStats.
func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode
ret.Cpu.Usage.System = s.CpuStats.CpuUsage.UsageInKernelmode
ret.Cpu.Usage.Total = s.CpuStats.CpuUsage.TotalUsage
@ -787,7 +897,7 @@ func newContainerStats(libcontainerStats *libcontainer.Stats, includedMetrics co
}
if s := libcontainerStats.CgroupStats; s != nil {
setCpuStats(s, ret, includedMetrics.Has(container.PerCpuUsageMetrics))
setCPUStats(s, ret, includedMetrics.Has(container.PerCpuUsageMetrics))
if includedMetrics.Has(container.DiskIOMetrics) {
setDiskIoStats(s, ret)
}

View File

@ -21,7 +21,7 @@ import (
"github.com/google/cadvisor/container"
"github.com/opencontainers/runc/libcontainer/cgroups"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type CgroupSubsystems struct {
@ -104,15 +104,16 @@ func getCgroupSubsystemsHelper(allCgroups []cgroups.Mount, disableCgroups map[st
// Cgroup subsystems we support listing (should be the minimal set we need stats from).
var supportedSubsystems map[string]struct{} = map[string]struct{}{
"cpu": {},
"cpuacct": {},
"memory": {},
"hugetlb": {},
"pids": {},
"cpuset": {},
"blkio": {},
"io": {},
"devices": {},
"cpu": {},
"cpuacct": {},
"memory": {},
"hugetlb": {},
"pids": {},
"cpuset": {},
"blkio": {},
"io": {},
"devices": {},
"perf_event": {},
}
func DiskStatsCopy0(major, minor uint64) *info.PerDiskStats {
@ -129,38 +130,38 @@ type DiskKey struct {
Minor uint64
}
func DiskStatsCopy1(disk_stat map[DiskKey]*info.PerDiskStats) []info.PerDiskStats {
func DiskStatsCopy1(diskStat map[DiskKey]*info.PerDiskStats) []info.PerDiskStats {
i := 0
stat := make([]info.PerDiskStats, len(disk_stat))
for _, disk := range disk_stat {
stat := make([]info.PerDiskStats, len(diskStat))
for _, disk := range diskStat {
stat[i] = *disk
i++
}
return stat
}
func DiskStatsCopy(blkio_stats []cgroups.BlkioStatEntry) (stat []info.PerDiskStats) {
if len(blkio_stats) == 0 {
func DiskStatsCopy(blkioStats []cgroups.BlkioStatEntry) (stat []info.PerDiskStats) {
if len(blkioStats) == 0 {
return
}
disk_stat := make(map[DiskKey]*info.PerDiskStats)
for i := range blkio_stats {
major := blkio_stats[i].Major
minor := blkio_stats[i].Minor
disk_key := DiskKey{
diskStat := make(map[DiskKey]*info.PerDiskStats)
for i := range blkioStats {
major := blkioStats[i].Major
minor := blkioStats[i].Minor
key := DiskKey{
Major: major,
Minor: minor,
}
diskp, ok := disk_stat[disk_key]
diskp, ok := diskStat[key]
if !ok {
diskp = DiskStatsCopy0(major, minor)
disk_stat[disk_key] = diskp
diskStat[key] = diskp
}
op := blkio_stats[i].Op
op := blkioStats[i].Op
if op == "" {
op = "Count"
}
diskp.Stats[op] = blkio_stats[i].Value
diskp.Stats[op] = blkioStats[i].Value
}
return DiskStatsCopy1(disk_stat)
return DiskStatsCopy1(diskStat)
}

View File

@ -20,7 +20,7 @@ go_library(
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/inotify:go_default_library",
],
)

View File

@ -26,7 +26,7 @@ import (
info "github.com/google/cadvisor/info/v1"
watch "github.com/google/cadvisor/watcher"
"k8s.io/klog"
"k8s.io/klog/v2"
)
var dockerOnly = flag.Bool("docker_only", false, "Only report docker containers in addition to root stats")
@ -52,27 +52,27 @@ type rawFactory struct {
rawPrefixWhiteList []string
}
func (self *rawFactory) String() string {
func (f *rawFactory) String() string {
return "raw"
}
func (self *rawFactory) NewContainerHandler(name string, inHostNamespace bool) (container.ContainerHandler, error) {
func (f *rawFactory) NewContainerHandler(name string, inHostNamespace bool) (container.ContainerHandler, error) {
rootFs := "/"
if !inHostNamespace {
rootFs = "/rootfs"
}
return newRawContainerHandler(name, self.cgroupSubsystems, self.machineInfoFactory, self.fsInfo, self.watcher, rootFs, self.includedMetrics)
return newRawContainerHandler(name, f.cgroupSubsystems, f.machineInfoFactory, f.fsInfo, f.watcher, rootFs, f.includedMetrics)
}
// The raw factory can handle any container. If --docker_only is set to true, non-docker containers are ignored except for "/" and those whitelisted by raw_cgroup_prefix_whitelist flag.
func (self *rawFactory) CanHandleAndAccept(name string) (bool, bool, error) {
func (f *rawFactory) CanHandleAndAccept(name string) (bool, bool, error) {
if name == "/" {
return true, true, nil
}
if *dockerOnly && self.rawPrefixWhiteList[0] == "" {
if *dockerOnly && f.rawPrefixWhiteList[0] == "" {
return true, false, nil
}
for _, prefix := range self.rawPrefixWhiteList {
for _, prefix := range f.rawPrefixWhiteList {
if strings.HasPrefix(name, prefix) {
return true, true, nil
}
@ -80,8 +80,8 @@ func (self *rawFactory) CanHandleAndAccept(name string) (bool, bool, error) {
return true, false, nil
}
func (self *rawFactory) DebugInfo() map[string][]string {
return common.DebugInfo(self.watcher.GetWatches())
func (f *rawFactory) DebugInfo() map[string][]string {
return common.DebugInfo(f.watcher.GetWatches())
}
func Register(machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, includedMetrics map[container.MetricKind]struct{}, rawPrefixWhiteList []string) error {

View File

@ -27,7 +27,7 @@ import (
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/configs"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type rawContainerHandler struct {
@ -95,17 +95,17 @@ func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSu
}, nil
}
func (self *rawContainerHandler) ContainerReference() (info.ContainerReference, error) {
func (h *rawContainerHandler) ContainerReference() (info.ContainerReference, error) {
// We only know the container by its one name.
return info.ContainerReference{
Name: self.name,
Name: h.name,
}, nil
}
func (self *rawContainerHandler) GetRootNetworkDevices() ([]info.NetInfo, error) {
func (h *rawContainerHandler) GetRootNetworkDevices() ([]info.NetInfo, error) {
nd := []info.NetInfo{}
if isRootCgroup(self.name) {
mi, err := self.machineInfoFactory.GetMachineInfo()
if isRootCgroup(h.name) {
mi, err := h.machineInfoFactory.GetMachineInfo()
if err != nil {
return nd, err
}
@ -115,22 +115,22 @@ func (self *rawContainerHandler) GetRootNetworkDevices() ([]info.NetInfo, error)
}
// Nothing to start up.
func (self *rawContainerHandler) Start() {}
func (h *rawContainerHandler) Start() {}
// Nothing to clean up.
func (self *rawContainerHandler) Cleanup() {}
func (h *rawContainerHandler) Cleanup() {}
func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
func (h *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
const hasNetwork = false
hasFilesystem := isRootCgroup(self.name) || len(self.externalMounts) > 0
spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, hasNetwork, hasFilesystem)
hasFilesystem := isRootCgroup(h.name) || len(h.externalMounts) > 0
spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, hasNetwork, hasFilesystem)
if err != nil {
return spec, err
}
if isRootCgroup(self.name) {
if isRootCgroup(h.name) {
// Check physical network devices for root container.
nd, err := self.GetRootNetworkDevices()
nd, err := h.GetRootNetworkDevices()
if err != nil {
return spec, err
}
@ -189,54 +189,53 @@ func fsToFsStats(fs *fs.Fs) info.FsStats {
}
}
func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
func (h *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
var filesystems []fs.Fs
var err error
// Get Filesystem information only for the root cgroup.
if isRootCgroup(self.name) {
filesystems, err = self.fsInfo.GetGlobalFsInfo()
if isRootCgroup(h.name) {
filesystems, err = h.fsInfo.GetGlobalFsInfo()
if err != nil {
return err
}
} else if self.includedMetrics.Has(container.DiskUsageMetrics) || self.includedMetrics.Has(container.DiskIOMetrics) {
if len(self.externalMounts) > 0 {
var mountSet map[string]struct{}
mountSet = make(map[string]struct{})
for _, mount := range self.externalMounts {
} else if h.includedMetrics.Has(container.DiskUsageMetrics) || h.includedMetrics.Has(container.DiskIOMetrics) {
if len(h.externalMounts) > 0 {
mountSet := make(map[string]struct{})
for _, mount := range h.externalMounts {
mountSet[mount.HostDir] = struct{}{}
}
filesystems, err = self.fsInfo.GetFsInfoForPath(mountSet)
filesystems, err = h.fsInfo.GetFsInfoForPath(mountSet)
if err != nil {
return err
}
}
}
if isRootCgroup(self.name) || self.includedMetrics.Has(container.DiskUsageMetrics) {
if isRootCgroup(h.name) || h.includedMetrics.Has(container.DiskUsageMetrics) {
for i := range filesystems {
fs := filesystems[i]
stats.Filesystem = append(stats.Filesystem, fsToFsStats(&fs))
}
}
if isRootCgroup(self.name) || self.includedMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats(&fsNamer{fs: filesystems, factory: self.machineInfoFactory}, &stats.DiskIo)
if isRootCgroup(h.name) || h.includedMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats(&fsNamer{fs: filesystems, factory: h.machineInfoFactory}, &stats.DiskIo)
}
return nil
}
func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
if *disableRootCgroupStats && isRootCgroup(self.name) {
func (h *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
if *disableRootCgroupStats && isRootCgroup(h.name) {
return nil, nil
}
stats, err := self.libcontainerHandler.GetStats()
stats, err := h.libcontainerHandler.GetStats()
if err != nil {
return stats, err
}
// Get filesystem stats.
err = self.getFsStats(stats)
err = h.getFsStats(stats)
if err != nil {
return stats, err
}
@ -244,36 +243,36 @@ func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
return stats, nil
}
func (self *rawContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := self.cgroupPaths[resource]
func (h *rawContainerHandler) GetCgroupPath(resource string) (string, error) {
path, ok := h.cgroupPaths[resource]
if !ok {
return "", fmt.Errorf("could not find path for resource %q for container %q\n", resource, self.name)
return "", fmt.Errorf("could not find path for resource %q for container %q", resource, h.name)
}
return path, nil
}
func (self *rawContainerHandler) GetContainerLabels() map[string]string {
func (h *rawContainerHandler) GetContainerLabels() map[string]string {
return map[string]string{}
}
func (self *rawContainerHandler) GetContainerIPAddress() string {
func (h *rawContainerHandler) GetContainerIPAddress() string {
// the IP address for the raw container corresponds to the system ip address.
return "127.0.0.1"
}
func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
return common.ListContainers(self.name, self.cgroupPaths, listType)
func (h *rawContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
return common.ListContainers(h.name, h.cgroupPaths, listType)
}
func (self *rawContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return self.libcontainerHandler.GetProcesses()
func (h *rawContainerHandler) ListProcesses(listType container.ListType) ([]int, error) {
return h.libcontainerHandler.GetProcesses()
}
func (self *rawContainerHandler) Exists() bool {
return common.CgroupExists(self.cgroupPaths)
func (h *rawContainerHandler) Exists() bool {
return common.CgroupExists(h.cgroupPaths)
}
func (self *rawContainerHandler) Type() container.ContainerType {
func (h *rawContainerHandler) Type() container.ContainerType {
return container.ContainerTypeRaw
}

View File

@ -28,7 +28,7 @@ import (
"github.com/google/cadvisor/watcher"
inotify "k8s.io/utils/inotify"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type rawContainerWatcher struct {
@ -68,10 +68,10 @@ func NewRawContainerWatcher() (watcher.ContainerWatcher, error) {
return rawWatcher, nil
}
func (self *rawContainerWatcher) Start(events chan watcher.ContainerEvent) error {
func (w *rawContainerWatcher) Start(events chan watcher.ContainerEvent) error {
// Watch this container (all its cgroups) and all subdirectories.
for _, cgroupPath := range self.cgroupPaths {
_, err := self.watchDirectory(events, cgroupPath, "/")
for _, cgroupPath := range w.cgroupPaths {
_, err := w.watchDirectory(events, cgroupPath, "/")
if err != nil {
return err
}
@ -81,17 +81,17 @@ func (self *rawContainerWatcher) Start(events chan watcher.ContainerEvent) error
go func() {
for {
select {
case event := <-self.watcher.Event():
err := self.processEvent(event, events)
case event := <-w.watcher.Event():
err := w.processEvent(event, events)
if err != nil {
klog.Warningf("Error while processing event (%+v): %v", event, err)
}
case err := <-self.watcher.Error():
case err := <-w.watcher.Error():
klog.Warningf("Error while watching %q: %v", "/", err)
case <-self.stopWatcher:
err := self.watcher.Close()
case <-w.stopWatcher:
err := w.watcher.Close()
if err == nil {
self.stopWatcher <- err
w.stopWatcher <- err
return
}
}
@ -101,21 +101,21 @@ func (self *rawContainerWatcher) Start(events chan watcher.ContainerEvent) error
return nil
}
func (self *rawContainerWatcher) Stop() error {
func (w *rawContainerWatcher) Stop() error {
// Rendezvous with the watcher thread.
self.stopWatcher <- nil
return <-self.stopWatcher
w.stopWatcher <- nil
return <-w.stopWatcher
}
// Watches the specified directory and all subdirectories. Returns whether the path was
// already being watched and an error (if any).
func (self *rawContainerWatcher) watchDirectory(events chan watcher.ContainerEvent, dir string, containerName string) (bool, error) {
func (w *rawContainerWatcher) watchDirectory(events chan watcher.ContainerEvent, dir string, containerName string) (bool, error) {
// Don't watch .mount cgroups because they never have containers as sub-cgroups. A single container
// can have many .mount cgroups associated with it which can quickly exhaust the inotify watches on a node.
if strings.HasSuffix(containerName, ".mount") {
return false, nil
}
alreadyWatching, err := self.watcher.AddWatch(containerName, dir)
alreadyWatching, err := w.watcher.AddWatch(containerName, dir)
if err != nil {
return alreadyWatching, err
}
@ -124,7 +124,7 @@ func (self *rawContainerWatcher) watchDirectory(events chan watcher.ContainerEve
cleanup := true
defer func() {
if cleanup {
_, err := self.watcher.RemoveWatch(containerName, dir)
_, err := w.watcher.RemoveWatch(containerName, dir)
if err != nil {
klog.Warningf("Failed to remove inotify watch for %q: %v", dir, err)
}
@ -141,7 +141,7 @@ func (self *rawContainerWatcher) watchDirectory(events chan watcher.ContainerEve
if entry.IsDir() {
entryPath := path.Join(dir, entry.Name())
subcontainerName := path.Join(containerName, entry.Name())
alreadyWatchingSubDir, err := self.watchDirectory(events, entryPath, subcontainerName)
alreadyWatchingSubDir, err := w.watchDirectory(events, entryPath, subcontainerName)
if err != nil {
klog.Errorf("Failed to watch directory %q: %v", entryPath, err)
if os.IsNotExist(err) {
@ -168,7 +168,7 @@ func (self *rawContainerWatcher) watchDirectory(events chan watcher.ContainerEve
return alreadyWatching, nil
}
func (self *rawContainerWatcher) processEvent(event *inotify.Event, events chan watcher.ContainerEvent) error {
func (w *rawContainerWatcher) processEvent(event *inotify.Event, events chan watcher.ContainerEvent) error {
// Convert the inotify event type to a container create or delete.
var eventType watcher.ContainerEventType
switch {
@ -187,7 +187,7 @@ func (self *rawContainerWatcher) processEvent(event *inotify.Event, events chan
// Derive the container name from the path name.
var containerName string
for _, mount := range self.cgroupSubsystems.Mounts {
for _, mount := range w.cgroupSubsystems.Mounts {
mountLocation := path.Clean(mount.Mountpoint) + "/"
if strings.HasPrefix(event.Name, mountLocation) {
containerName = event.Name[len(mountLocation)-1:]
@ -202,7 +202,7 @@ func (self *rawContainerWatcher) processEvent(event *inotify.Event, events chan
switch eventType {
case watcher.ContainerAdd:
// New container was created, watch it.
alreadyWatched, err := self.watchDirectory(events, event.Name, containerName)
alreadyWatched, err := w.watchDirectory(events, event.Name, containerName)
if err != nil {
return err
}
@ -213,7 +213,7 @@ func (self *rawContainerWatcher) processEvent(event *inotify.Event, events chan
}
case watcher.ContainerDelete:
// Container was deleted, stop watching for it.
lastWatched, err := self.watcher.RemoveWatch(containerName, event.Name)
lastWatched, err := w.watcher.RemoveWatch(containerName, event.Name)
if err != nil {
return err
}

View File

@ -14,7 +14,7 @@ go_library(
"//vendor/github.com/google/cadvisor/fs:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -23,7 +23,7 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/watcher"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type systemdFactory struct{}

View File

@ -9,7 +9,7 @@ go_library(
deps = [
"//vendor/github.com/google/cadvisor/container:go_default_library",
"//vendor/github.com/google/cadvisor/container/systemd:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -18,7 +18,7 @@ package install
import (
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/systemd"
"k8s.io/klog"
"k8s.io/klog/v2"
)
func init() {

View File

@ -12,7 +12,7 @@ go_library(
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/devicemapper",
importpath = "github.com/google/cadvisor/devicemapper",
visibility = ["//visibility:public"],
deps = ["//vendor/k8s.io/klog:go_default_library"],
deps = ["//vendor/k8s.io/klog/v2:go_default_library"],
)
filegroup(

View File

@ -18,7 +18,7 @@ import (
"strconv"
"strings"
"k8s.io/klog"
"k8s.io/klog/v2"
)
// DmsetupClient is a low-level client for interacting with device mapper via

View File

@ -21,7 +21,7 @@ import (
"strconv"
"strings"
"k8s.io/klog"
"k8s.io/klog/v2"
)
// thinLsClient knows how to run a thin_ls very specific to CoW usage for

View File

@ -19,7 +19,7 @@ import (
"sync"
"time"
"k8s.io/klog"
"k8s.io/klog/v2"
)
// ThinPoolWatcher maintains a cache of device name -> usage stats for a
@ -85,13 +85,13 @@ func (w *ThinPoolWatcher) Stop() {
}
// GetUsage gets the cached usage value of the given device.
func (w *ThinPoolWatcher) GetUsage(deviceId string) (uint64, error) {
func (w *ThinPoolWatcher) GetUsage(deviceID string) (uint64, error) {
w.lock.RLock()
defer w.lock.RUnlock()
v, ok := w.cache[deviceId]
v, ok := w.cache[deviceID]
if !ok {
return 0, fmt.Errorf("no cached value for usage of device %v", deviceId)
return 0, fmt.Errorf("no cached value for usage of device %v", deviceID)
}
return v, nil
@ -129,13 +129,15 @@ func (w *ThinPoolWatcher) Refresh() error {
if output, err := w.dmsetup.Message(w.poolName, 0, reserveMetadataMessage); err != nil {
err = fmt.Errorf("error reserving metadata for thin-pool %v: %v output: %v", w.poolName, err, string(output))
return err
} else {
klog.V(5).Infof("reserved metadata snapshot for thin-pool %v", w.poolName)
}
klog.V(5).Infof("reserved metadata snapshot for thin-pool %v", w.poolName)
defer func() {
klog.V(5).Infof("releasing metadata snapshot for thin-pool %v", w.poolName)
w.dmsetup.Message(w.poolName, 0, releaseMetadataMessage)
_, err := w.dmsetup.Message(w.poolName, 0, releaseMetadataMessage)
if err != nil {
klog.Warningf("Unable to release metadata snapshot for thin-pool %v: %s", w.poolName, err)
}
}()
klog.V(5).Infof("running thin_ls on metadata device %v", w.metadataDevice)

View File

@ -9,7 +9,7 @@ go_library(
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/utils:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -24,7 +24,7 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/utils"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type byTimestamp []*info.Event
@ -44,7 +44,7 @@ func (e byTimestamp) Less(i, j int) bool {
type EventChannel struct {
// Watch ID. Can be used by the caller to request cancellation of watch events.
watchId int
watchID int
// Channel on which the caller can receive watch events.
channel chan *info.Event
}
@ -84,9 +84,9 @@ type EventManager interface {
GetEvents(request *Request) ([]*info.Event, error)
// AddEvent allows the caller to add an event to an EventManager
// object
AddEvent(e *info.Event) error
AddEvent(event *info.Event) error
// Cancels a previously requested watch event.
StopWatch(watch_id int)
StopWatch(watchID int)
}
// events provides an implementation for the EventManager interface.
@ -100,7 +100,7 @@ type events struct {
// lock guarding watchers.
watcherLock sync.RWMutex
// last allocated watch id.
lastId int
lastID int
// Event storage policy.
storagePolicy StoragePolicy
}
@ -118,9 +118,9 @@ type watch struct {
eventChannel *EventChannel
}
func NewEventChannel(watchId int) *EventChannel {
func NewEventChannel(watchID int) *EventChannel {
return &EventChannel{
watchId: watchId,
watchID: watchID,
channel: make(chan *info.Event, 10),
}
}
@ -148,9 +148,9 @@ func DefaultStoragePolicy() StoragePolicy {
}
// returns a pointer to an initialized Events object.
func NewEventManager(storagePolicy StoragePolicy) *events {
func NewEventManager(storagePolicy StoragePolicy) EventManager {
return &events{
eventStore: make(map[info.EventType]*utils.TimedStore, 0),
eventStore: make(map[info.EventType]*utils.TimedStore),
watchers: make(map[int]*watch),
storagePolicy: storagePolicy,
}
@ -173,12 +173,12 @@ func newWatch(request *Request, eventChannel *EventChannel) *watch {
}
}
func (self *EventChannel) GetChannel() chan *info.Event {
return self.channel
func (ch *EventChannel) GetChannel() chan *info.Event {
return ch.channel
}
func (self *EventChannel) GetWatchId() int {
return self.watchId
func (ch *EventChannel) GetWatchId() int {
return ch.watchID
}
// sorts and returns up to the last MaxEventsReturned chronological elements
@ -195,8 +195,8 @@ func getMaxEventsReturned(request *Request, eSlice []*info.Event) []*info.Event
// container path is a prefix of the event container path. Otherwise,
// it checks that the container paths of the event and request are
// equivalent
func checkIfIsSubcontainer(request *Request, event *info.Event) bool {
if request.IncludeSubcontainers == true {
func isSubcontainer(request *Request, event *info.Event) bool {
if request.IncludeSubcontainers {
return request.ContainerName == "/" || strings.HasPrefix(event.ContainerName+"/", request.ContainerName+"/")
}
return event.ContainerName == request.ContainerName
@ -221,7 +221,7 @@ func checkIfEventSatisfiesRequest(request *Request, event *info.Event) bool {
return false
}
if request.ContainerName != "" {
return checkIfIsSubcontainer(request, event)
return isSubcontainer(request, event)
}
return true
}
@ -231,15 +231,15 @@ func checkIfEventSatisfiesRequest(request *Request, event *info.Event) bool {
// adds it to a slice of *Event objects that is returned. If both MaxEventsReturned
// and StartTime/EndTime are specified in the request object, then only
// up to the most recent MaxEventsReturned events in that time range are returned.
func (self *events) GetEvents(request *Request) ([]*info.Event, error) {
func (e *events) GetEvents(request *Request) ([]*info.Event, error) {
returnEventList := []*info.Event{}
self.eventsLock.RLock()
defer self.eventsLock.RUnlock()
e.eventsLock.RLock()
defer e.eventsLock.RUnlock()
for eventType, fetch := range request.EventType {
if !fetch {
continue
}
evs, ok := self.eventStore[eventType]
evs, ok := e.eventStore[eventType]
if !ok {
continue
}
@ -261,50 +261,50 @@ func (self *events) GetEvents(request *Request) ([]*info.Event, error) {
// Request object it is fed to the channel. The StartTime and EndTime of the watch
// request should be uninitialized because the purpose is to watch indefinitely
// for events that will happen in the future
func (self *events) WatchEvents(request *Request) (*EventChannel, error) {
func (e *events) WatchEvents(request *Request) (*EventChannel, error) {
if !request.StartTime.IsZero() || !request.EndTime.IsZero() {
return nil, errors.New(
"for a call to watch, request.StartTime and request.EndTime must be uninitialized")
}
self.watcherLock.Lock()
defer self.watcherLock.Unlock()
new_id := self.lastId + 1
returnEventChannel := NewEventChannel(new_id)
e.watcherLock.Lock()
defer e.watcherLock.Unlock()
newID := e.lastID + 1
returnEventChannel := NewEventChannel(newID)
newWatcher := newWatch(request, returnEventChannel)
self.watchers[new_id] = newWatcher
self.lastId = new_id
e.watchers[newID] = newWatcher
e.lastID = newID
return returnEventChannel, nil
}
// helper function to update the event manager's eventStore
func (self *events) updateEventStore(e *info.Event) {
self.eventsLock.Lock()
defer self.eventsLock.Unlock()
if _, ok := self.eventStore[e.EventType]; !ok {
maxNumEvents := self.storagePolicy.DefaultMaxNumEvents
if numEvents, ok := self.storagePolicy.PerTypeMaxNumEvents[e.EventType]; ok {
func (e *events) updateEventStore(event *info.Event) {
e.eventsLock.Lock()
defer e.eventsLock.Unlock()
if _, ok := e.eventStore[event.EventType]; !ok {
maxNumEvents := e.storagePolicy.DefaultMaxNumEvents
if numEvents, ok := e.storagePolicy.PerTypeMaxNumEvents[event.EventType]; ok {
maxNumEvents = numEvents
}
if maxNumEvents == 0 {
// Event storage is disabled for e.EventType
// Event storage is disabled for event.EventType
return
}
maxAge := self.storagePolicy.DefaultMaxAge
if age, ok := self.storagePolicy.PerTypeMaxAge[e.EventType]; ok {
maxAge := e.storagePolicy.DefaultMaxAge
if age, ok := e.storagePolicy.PerTypeMaxAge[event.EventType]; ok {
maxAge = age
}
self.eventStore[e.EventType] = utils.NewTimedStore(maxAge, maxNumEvents)
e.eventStore[event.EventType] = utils.NewTimedStore(maxAge, maxNumEvents)
}
self.eventStore[e.EventType].Add(e.Timestamp, e)
e.eventStore[event.EventType].Add(event.Timestamp, event)
}
func (self *events) findValidWatchers(e *info.Event) []*watch {
func (e *events) findValidWatchers(event *info.Event) []*watch {
watchesToSend := make([]*watch, 0)
for _, watcher := range self.watchers {
for _, watcher := range e.watchers {
watchRequest := watcher.request
if checkIfEventSatisfiesRequest(watchRequest, e) {
if checkIfEventSatisfiesRequest(watchRequest, event) {
watchesToSend = append(watchesToSend, watcher)
}
}
@ -314,26 +314,26 @@ func (self *events) findValidWatchers(e *info.Event) []*watch {
// method of Events object that adds the argument Event object to the
// eventStore. It also feeds the event to a set of watch channels
// held by the manager if it satisfies the request keys of the channels
func (self *events) AddEvent(e *info.Event) error {
self.updateEventStore(e)
self.watcherLock.RLock()
defer self.watcherLock.RUnlock()
watchesToSend := self.findValidWatchers(e)
func (e *events) AddEvent(event *info.Event) error {
e.updateEventStore(event)
e.watcherLock.RLock()
defer e.watcherLock.RUnlock()
watchesToSend := e.findValidWatchers(event)
for _, watchObject := range watchesToSend {
watchObject.eventChannel.GetChannel() <- e
watchObject.eventChannel.GetChannel() <- event
}
klog.V(4).Infof("Added event %v", e)
klog.V(4).Infof("Added event %v", event)
return nil
}
// Removes a watch instance from the EventManager's watchers map
func (self *events) StopWatch(watchId int) {
self.watcherLock.Lock()
defer self.watcherLock.Unlock()
_, ok := self.watchers[watchId]
func (e *events) StopWatch(watchID int) {
e.watcherLock.Lock()
defer e.watcherLock.Unlock()
_, ok := e.watchers[watchID]
if !ok {
klog.Errorf("Could not find watcher instance %v", watchId)
klog.Errorf("Could not find watcher instance %v", watchID)
}
close(self.watchers[watchId].eventChannel.GetChannel())
delete(self.watchers, watchId)
close(e.watchers[watchID].eventChannel.GetChannel())
delete(e.watchers, watchID)
}

View File

@ -14,14 +14,14 @@ go_library(
"//vendor/github.com/google/cadvisor/devicemapper:go_default_library",
"//vendor/github.com/google/cadvisor/utils:go_default_library",
"//vendor/github.com/mistifyio/go-zfs:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/mount:go_default_library",
],
"@io_bazel_rules_go//go/platform:linux": [
"//vendor/github.com/google/cadvisor/devicemapper:go_default_library",
"//vendor/github.com/google/cadvisor/utils:go_default_library",
"//vendor/github.com/mistifyio/go-zfs:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/mount:go_default_library",
],
"//conditions:default": [],

View File

@ -34,7 +34,7 @@ import (
"github.com/google/cadvisor/utils"
zfs "github.com/mistifyio/go-zfs"
"k8s.io/klog"
"k8s.io/klog/v2"
"k8s.io/utils/mount"
)
@ -109,8 +109,8 @@ func NewFsInfo(context Context) (FsInfo, error) {
excluded := []string{fmt.Sprintf("%s/devicemapper/mnt", context.Docker.Root)}
fsInfo := &RealFsInfo{
partitions: processMounts(mounts, excluded),
labels: make(map[string]string, 0),
mounts: make(map[string]mount.MountInfo, 0),
labels: make(map[string]string),
mounts: make(map[string]mount.MountInfo),
dmsetup: devicemapper.NewDmsetupClient(),
fsUUIDToDeviceName: fsUUIDToDeviceName,
}
@ -163,7 +163,7 @@ func getFsUUIDToDeviceNameMap() (map[string]string, error) {
}
func processMounts(mounts []mount.MountInfo, excludedMountpointPrefixes []string) map[string]partition {
partitions := make(map[string]partition, 0)
partitions := make(map[string]partition)
supportedFsType := map[string]bool{
// all ext systems are checked through prefix.
@ -232,7 +232,7 @@ func processMounts(mounts []mount.MountInfo, excludedMountpointPrefixes []string
// docker is using devicemapper for its storage driver. If a loopback device is being used, don't
// return any information or error, as we want to report based on the actual partition where the
// loopback file resides, inside of the loopback file itself.
func (self *RealFsInfo) getDockerDeviceMapperInfo(context DockerContext) (string, *partition, error) {
func (i *RealFsInfo) getDockerDeviceMapperInfo(context DockerContext) (string, *partition, error) {
if context.Driver != DeviceMapper.String() {
return "", nil, nil
}
@ -242,7 +242,7 @@ func (self *RealFsInfo) getDockerDeviceMapperInfo(context DockerContext) (string
return "", nil, nil
}
dev, major, minor, blockSize, err := dockerDMDevice(context.DriverStatus, self.dmsetup)
dev, major, minor, blockSize, err := dockerDMDevice(context.DriverStatus, i.dmsetup)
if err != nil {
return "", nil, err
}
@ -256,49 +256,49 @@ func (self *RealFsInfo) getDockerDeviceMapperInfo(context DockerContext) (string
}
// addSystemRootLabel attempts to determine which device contains the mount for /.
func (self *RealFsInfo) addSystemRootLabel(mounts []mount.MountInfo) {
func (i *RealFsInfo) addSystemRootLabel(mounts []mount.MountInfo) {
for _, m := range mounts {
if m.MountPoint == "/" {
self.partitions[m.Source] = partition{
i.partitions[m.Source] = partition{
fsType: m.FsType,
mountpoint: m.MountPoint,
major: uint(m.Major),
minor: uint(m.Minor),
}
self.labels[LabelSystemRoot] = m.Source
i.labels[LabelSystemRoot] = m.Source
return
}
}
}
// addDockerImagesLabel attempts to determine which device contains the mount for docker images.
func (self *RealFsInfo) addDockerImagesLabel(context Context, mounts []mount.MountInfo) {
dockerDev, dockerPartition, err := self.getDockerDeviceMapperInfo(context.Docker)
func (i *RealFsInfo) addDockerImagesLabel(context Context, mounts []mount.MountInfo) {
dockerDev, dockerPartition, err := i.getDockerDeviceMapperInfo(context.Docker)
if err != nil {
klog.Warningf("Could not get Docker devicemapper device: %v", err)
}
if len(dockerDev) > 0 && dockerPartition != nil {
self.partitions[dockerDev] = *dockerPartition
self.labels[LabelDockerImages] = dockerDev
i.partitions[dockerDev] = *dockerPartition
i.labels[LabelDockerImages] = dockerDev
} else {
self.updateContainerImagesPath(LabelDockerImages, mounts, getDockerImagePaths(context))
i.updateContainerImagesPath(LabelDockerImages, mounts, getDockerImagePaths(context))
}
}
func (self *RealFsInfo) addCrioImagesLabel(context Context, mounts []mount.MountInfo) {
func (i *RealFsInfo) addCrioImagesLabel(context Context, mounts []mount.MountInfo) {
if context.Crio.Root != "" {
crioPath := context.Crio.Root
crioImagePaths := map[string]struct{}{
"/": {},
}
for _, dir := range []string{"overlay", "overlay2"} {
for _, dir := range []string{"devicemapper", "btrfs", "aufs", "overlay", "zfs"} {
crioImagePaths[path.Join(crioPath, dir+"-images")] = struct{}{}
}
for crioPath != "/" && crioPath != "." {
crioImagePaths[crioPath] = struct{}{}
crioPath = filepath.Dir(crioPath)
}
self.updateContainerImagesPath(LabelCrioImages, mounts, crioImagePaths)
i.updateContainerImagesPath(LabelCrioImages, mounts, crioImagePaths)
}
}
@ -324,7 +324,7 @@ func getDockerImagePaths(context Context) map[string]struct{} {
// This method compares the mountpoints with possible container image mount points. If a match is found,
// the label is added to the partition.
func (self *RealFsInfo) updateContainerImagesPath(label string, mounts []mount.MountInfo, containerImagePaths map[string]struct{}) {
func (i *RealFsInfo) updateContainerImagesPath(label string, mounts []mount.MountInfo, containerImagePaths map[string]struct{}) {
var useMount *mount.MountInfo
for _, m := range mounts {
if _, ok := containerImagePaths[m.MountPoint]; ok {
@ -334,27 +334,27 @@ func (self *RealFsInfo) updateContainerImagesPath(label string, mounts []mount.M
}
}
if useMount != nil {
self.partitions[useMount.Source] = partition{
i.partitions[useMount.Source] = partition{
fsType: useMount.FsType,
mountpoint: useMount.MountPoint,
major: uint(useMount.Major),
minor: uint(useMount.Minor),
}
self.labels[label] = useMount.Source
i.labels[label] = useMount.Source
}
}
func (self *RealFsInfo) GetDeviceForLabel(label string) (string, error) {
dev, ok := self.labels[label]
func (i *RealFsInfo) GetDeviceForLabel(label string) (string, error) {
dev, ok := i.labels[label]
if !ok {
return "", fmt.Errorf("non-existent label %q", label)
}
return dev, nil
}
func (self *RealFsInfo) GetLabelsForDevice(device string) ([]string, error) {
func (i *RealFsInfo) GetLabelsForDevice(device string) ([]string, error) {
labels := []string{}
for label, dev := range self.labels {
for label, dev := range i.labels {
if dev == device {
labels = append(labels, label)
}
@ -362,22 +362,22 @@ func (self *RealFsInfo) GetLabelsForDevice(device string) ([]string, error) {
return labels, nil
}
func (self *RealFsInfo) GetMountpointForDevice(dev string) (string, error) {
p, ok := self.partitions[dev]
func (i *RealFsInfo) GetMountpointForDevice(dev string) (string, error) {
p, ok := i.partitions[dev]
if !ok {
return "", fmt.Errorf("no partition info for device %q", dev)
}
return p.mountpoint, nil
}
func (self *RealFsInfo) GetFsInfoForPath(mountSet map[string]struct{}) ([]Fs, error) {
func (i *RealFsInfo) GetFsInfoForPath(mountSet map[string]struct{}) ([]Fs, error) {
filesystems := make([]Fs, 0)
deviceSet := make(map[string]struct{})
diskStatsMap, err := getDiskStatsMap("/proc/diskstats")
if err != nil {
return nil, err
}
for device, partition := range self.partitions {
for device, partition := range i.partitions {
_, hasMount := mountSet[partition.mountpoint]
_, hasDevice := deviceSet[device]
if mountSet == nil || (hasMount && !hasDevice) {
@ -481,8 +481,8 @@ func getDiskStatsMap(diskStatsFile string) (map[string]DiskStats, error) {
return diskStatsMap, nil
}
func (self *RealFsInfo) GetGlobalFsInfo() ([]Fs, error) {
return self.GetFsInfoForPath(nil)
func (i *RealFsInfo) GetGlobalFsInfo() ([]Fs, error) {
return i.GetFsInfoForPath(nil)
}
func major(devNumber uint64) uint {
@ -493,19 +493,19 @@ func minor(devNumber uint64) uint {
return uint((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00))
}
func (self *RealFsInfo) GetDeviceInfoByFsUUID(uuid string) (*DeviceInfo, error) {
deviceName, found := self.fsUUIDToDeviceName[uuid]
func (i *RealFsInfo) GetDeviceInfoByFsUUID(uuid string) (*DeviceInfo, error) {
deviceName, found := i.fsUUIDToDeviceName[uuid]
if !found {
return nil, ErrNoSuchDevice
}
p, found := self.partitions[deviceName]
p, found := i.partitions[deviceName]
if !found {
return nil, fmt.Errorf("cannot find device %q in partitions", deviceName)
}
return &DeviceInfo{deviceName, p.major, p.minor}, nil
}
func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
func (i *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
buf := new(syscall.Stat_t)
err := syscall.Stat(dir, buf)
if err != nil {
@ -515,13 +515,13 @@ func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
// The type Dev in Stat_t is 32bit on mips.
major := major(uint64(buf.Dev)) // nolint: unconvert
minor := minor(uint64(buf.Dev)) // nolint: unconvert
for device, partition := range self.partitions {
for device, partition := range i.partitions {
if partition.major == major && partition.minor == minor {
return &DeviceInfo{device, major, minor}, nil
}
}
mount, found := self.mounts[dir]
mount, found := i.mounts[dir]
// try the parent dir if not found until we reach the root dir
// this is an issue on btrfs systems where the directory is not
// the subvolume
@ -534,7 +534,7 @@ func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
// trim "/" from the new parent path otherwise the next possible
// filepath.Split in the loop will not split the string any further
dir = strings.TrimSuffix(pathdir, "/")
mount, found = self.mounts[dir]
mount, found = i.mounts[dir]
}
if found && mount.FsType == "btrfs" && mount.Major == 0 && strings.HasPrefix(mount.Source, "/dev/") {
@ -565,7 +565,7 @@ func GetDirUsage(dir string) (UsageInfo, error) {
return usage, fmt.Errorf("unsuported fileinfo for getting inode usage of %q", dir)
}
rootDevId := rootStat.Dev
rootDevID := rootStat.Dev
// dedupedInode stores inodes that could be duplicates (nlink > 1)
dedupedInodes := make(map[uint64]struct{})
@ -589,7 +589,7 @@ func GetDirUsage(dir string) (UsageInfo, error) {
return fmt.Errorf("unsupported fileinfo; could not convert to stat_t")
}
if s.Dev != rootDevId {
if s.Dev != rootDevID {
// don't descend into directories on other devices
return filepath.SkipDir
}
@ -611,7 +611,7 @@ func GetDirUsage(dir string) (UsageInfo, error) {
return usage, err
}
func (self *RealFsInfo) GetDirUsage(dir string) (UsageInfo, error) {
func (i *RealFsInfo) GetDirUsage(dir string) (UsageInfo, error) {
claimToken()
defer releaseToken()
return GetDirUsage(dir)
@ -727,14 +727,6 @@ func getZfstats(poolName string) (uint64, uint64, uint64, error) {
return total, dataset.Avail, dataset.Avail, nil
}
// Simple io.Writer implementation that counts how many bytes were written.
type byteCounter struct{ bytesWritten uint64 }
func (b *byteCounter) Write(p []byte) (int, error) {
b.bytesWritten += uint64(len(p))
return len(p), nil
}
// Get major and minor Ids for a mount point using btrfs as filesystem.
func getBtrfsMajorMinorIds(mount *mount.MountInfo) (int, int, error) {
// btrfs fix: following workaround fixes wrong btrfs Major and Minor Ids reported in /proc/self/mountinfo.
@ -760,7 +752,6 @@ func getBtrfsMajorMinorIds(mount *mount.MountInfo) (int, int, error) {
klog.V(4).Infof("btrfs rdev major:minor %d:%d\n", int(major(uint64(buf.Rdev))), int(minor(uint64(buf.Rdev)))) // nolint: unconvert
return int(major(uint64(buf.Dev))), int(minor(uint64(buf.Dev))), nil // nolint: unconvert
} else {
return 0, 0, fmt.Errorf("%s is not a block device", mount.Source)
}
return 0, 0, fmt.Errorf("%s is not a block device", mount.Source)
}

View File

@ -99,9 +99,9 @@ type ContainerReference struct {
// Sorts by container name.
type ContainerReferenceSlice []ContainerReference
func (self ContainerReferenceSlice) Len() int { return len(self) }
func (self ContainerReferenceSlice) Swap(i, j int) { self[i], self[j] = self[j], self[i] }
func (self ContainerReferenceSlice) Less(i, j int) bool { return self[i].Name < self[j].Name }
func (s ContainerReferenceSlice) Len() int { return len(s) }
func (s ContainerReferenceSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s ContainerReferenceSlice) Less(i, j int) bool { return s[i].Name < s[j].Name }
// ContainerInfoRequest is used when users check a container info from the REST API.
// It specifies how much data users want to get about a container
@ -126,10 +126,10 @@ func DefaultContainerInfoRequest() ContainerInfoRequest {
}
}
func (self *ContainerInfoRequest) Equals(other ContainerInfoRequest) bool {
return self.NumStats == other.NumStats &&
self.Start.Equal(other.Start) &&
self.End.Equal(other.End)
func (r *ContainerInfoRequest) Equals(other ContainerInfoRequest) bool {
return r.NumStats == other.NumStats &&
r.Start.Equal(other.Start) &&
r.End.Equal(other.End)
}
type ContainerInfo struct {
@ -151,30 +151,30 @@ type ContainerInfo struct {
// en/decoded. This will lead to small but acceptable differences between a
// ContainerInfo and its encode-then-decode version. Eq() is used to compare
// two ContainerInfo accepting small difference (<10ms) of Time fields.
func (self *ContainerInfo) Eq(b *ContainerInfo) bool {
func (ci *ContainerInfo) Eq(b *ContainerInfo) bool {
// If both self and b are nil, then Eq() returns true
if self == nil {
// If both ci and b are nil, then Eq() returns true
if ci == nil {
return b == nil
}
if b == nil {
return self == nil
return ci == nil
}
// For fields other than time.Time, we will compare them precisely.
// This would require that any slice should have same order.
if !reflect.DeepEqual(self.ContainerReference, b.ContainerReference) {
if !reflect.DeepEqual(ci.ContainerReference, b.ContainerReference) {
return false
}
if !reflect.DeepEqual(self.Subcontainers, b.Subcontainers) {
if !reflect.DeepEqual(ci.Subcontainers, b.Subcontainers) {
return false
}
if !self.Spec.Eq(&b.Spec) {
if !ci.Spec.Eq(&b.Spec) {
return false
}
for i, expectedStats := range b.Stats {
selfStats := self.Stats[i]
selfStats := ci.Stats[i]
if !expectedStats.Eq(selfStats) {
return false
}
@ -183,57 +183,66 @@ func (self *ContainerInfo) Eq(b *ContainerInfo) bool {
return true
}
func (self *ContainerSpec) Eq(b *ContainerSpec) bool {
func (s *ContainerSpec) Eq(b *ContainerSpec) bool {
// Creation within 1s of each other.
diff := self.CreationTime.Sub(b.CreationTime)
diff := s.CreationTime.Sub(b.CreationTime)
if (diff > time.Second) || (diff < -time.Second) {
return false
}
if self.HasCpu != b.HasCpu {
if s.HasCpu != b.HasCpu {
return false
}
if !reflect.DeepEqual(self.Cpu, b.Cpu) {
if !reflect.DeepEqual(s.Cpu, b.Cpu) {
return false
}
if self.HasMemory != b.HasMemory {
if s.HasMemory != b.HasMemory {
return false
}
if !reflect.DeepEqual(self.Memory, b.Memory) {
if !reflect.DeepEqual(s.Memory, b.Memory) {
return false
}
if self.HasNetwork != b.HasNetwork {
if s.HasHugetlb != b.HasHugetlb {
return false
}
if self.HasFilesystem != b.HasFilesystem {
if s.HasNetwork != b.HasNetwork {
return false
}
if self.HasDiskIo != b.HasDiskIo {
if s.HasProcesses != b.HasProcesses {
return false
}
if self.HasCustomMetrics != b.HasCustomMetrics {
if s.HasFilesystem != b.HasFilesystem {
return false
}
if s.HasDiskIo != b.HasDiskIo {
return false
}
if s.HasCustomMetrics != b.HasCustomMetrics {
return false
}
if s.Image != b.Image {
return false
}
return true
}
func (self *ContainerInfo) StatsAfter(ref time.Time) []*ContainerStats {
n := len(self.Stats) + 1
for i, s := range self.Stats {
func (ci *ContainerInfo) StatsAfter(ref time.Time) []*ContainerStats {
n := len(ci.Stats) + 1
for i, s := range ci.Stats {
if s.Timestamp.After(ref) {
n = i
break
}
}
if n > len(self.Stats) {
if n > len(ci.Stats) {
return nil
}
return self.Stats[n:]
return ci.Stats[n:]
}
func (self *ContainerInfo) StatsStartTime() time.Time {
func (ci *ContainerInfo) StatsStartTime() time.Time {
var ret time.Time
for _, s := range self.Stats {
for _, s := range ci.Stats {
if s.Timestamp.Before(ret) || ret.IsZero() {
ret = s.Timestamp
}
@ -241,10 +250,10 @@ func (self *ContainerInfo) StatsStartTime() time.Time {
return ret
}
func (self *ContainerInfo) StatsEndTime() time.Time {
func (ci *ContainerInfo) StatsEndTime() time.Time {
var ret time.Time
for i := len(self.Stats) - 1; i >= 0; i-- {
s := self.Stats[i]
for i := len(ci.Stats) - 1; i >= 0; i-- {
s := ci.Stats[i]
if s.Timestamp.After(ret) {
ret = s.Timestamp
}
@ -816,6 +825,29 @@ type AcceleratorStats struct {
DutyCycle uint64 `json:"duty_cycle"`
}
// PerfStat represents value of a single monitored perf event.
type PerfStat struct {
// Indicates scaling ratio for an event: time_running/time_enabled
// (amount of time that event was being measured divided by
// amount of time that event was enabled for).
// value 1.0 indicates that no multiplexing occurred. Value close
// to 0 indicates that event was measured for short time and event's
// value might be inaccurate.
// See: https://lwn.net/Articles/324756/
ScalingRatio float64 `json:"scaling_ratio"`
// Value represents value of perf event retrieved from OS. It is
// normalized against ScalingRatio and takes multiplexing into
// consideration.
Value uint64 `json:"value"`
// Name is human readable name of an event.
Name string `json:"name"`
// CPU that perf event was measured on.
Cpu int `json:"cpu"`
}
type UlimitSpec struct {
Name string `json:"name"`
SoftLimit int64 `json:"soft_limit"`
@ -864,6 +896,12 @@ type ContainerStats struct {
// Custom metrics from all collectors
CustomMetrics map[string][]MetricVal `json:"custom_metrics,omitempty"`
// Statistics originating from perf events
PerfStats []PerfStat `json:"perf_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
}
func timeEq(t1, t2 time.Time, tolerance time.Duration) bool {
@ -872,10 +910,7 @@ func timeEq(t1, t2 time.Time, tolerance time.Duration) bool {
t1, t2 = t2, t1
}
diff := t2.Sub(t1)
if diff <= tolerance {
return true
}
return false
return diff <= tolerance
}
const (
@ -916,6 +951,15 @@ func (a *ContainerStats) StatsEq(b *ContainerStats) bool {
if !reflect.DeepEqual(a.Filesystem, b.Filesystem) {
return false
}
if !reflect.DeepEqual(a.TaskStats, b.TaskStats) {
return false
}
if !reflect.DeepEqual(a.Accelerators, b.Accelerators) {
return false
}
if !reflect.DeepEqual(a.CustomMetrics, b.CustomMetrics) {
return false
}
return true
}
@ -943,9 +987,9 @@ type EventType string
const (
EventOom EventType = "oom"
EventOomKill = "oomKill"
EventContainerCreation = "containerCreation"
EventContainerDeletion = "containerDeletion"
EventOomKill EventType = "oomKill"
EventContainerCreation EventType = "containerCreation"
EventContainerDeletion EventType = "containerDeletion"
)
// Extra information about an event. Only one type will be set.

View File

@ -14,6 +14,8 @@
package v1
import "time"
type FsInfo struct {
// Block device associated with the filesystem.
Device string `json:"device"`
@ -59,8 +61,8 @@ type Cache struct {
Level int `json:"level"`
}
func (self *Node) FindCore(id int) (bool, int) {
for i, n := range self.Cores {
func (n *Node) FindCore(id int) (bool, int) {
for i, n := range n.Cores {
if n.Id == id {
return true, i
}
@ -68,30 +70,30 @@ func (self *Node) FindCore(id int) (bool, int) {
return false, -1
}
func (self *Node) AddThread(thread int, core int) {
func (n *Node) AddThread(thread int, core int) {
var coreIdx int
if core == -1 {
// Assume one hyperthread per core when topology data is missing.
core = thread
}
ok, coreIdx := self.FindCore(core)
ok, coreIdx := n.FindCore(core)
if !ok {
// New core
core := Core{Id: core}
self.Cores = append(self.Cores, core)
coreIdx = len(self.Cores) - 1
n.Cores = append(n.Cores, core)
coreIdx = len(n.Cores) - 1
}
self.Cores[coreIdx].Threads = append(self.Cores[coreIdx].Threads, thread)
n.Cores[coreIdx].Threads = append(n.Cores[coreIdx].Threads, thread)
}
func (self *Node) AddNodeCache(c Cache) {
self.Caches = append(self.Caches, c)
func (n *Node) AddNodeCache(c Cache) {
n.Caches = append(n.Caches, c)
}
func (self *Node) AddPerCoreCache(c Cache) {
for idx := range self.Cores {
self.Cores[idx].Caches = append(self.Cores[idx].Caches, c)
func (n *Node) AddPerCoreCache(c Cache) {
for idx := range n.Cores {
n.Cores[idx].Caches = append(n.Cores[idx].Caches, c)
}
}
@ -138,17 +140,15 @@ type CloudProvider string
const (
GCE CloudProvider = "GCE"
AWS = "AWS"
Azure = "Azure"
Baremetal = "Baremetal"
UnknownProvider = "Unknown"
AWS CloudProvider = "AWS"
Azure CloudProvider = "Azure"
UnknownProvider CloudProvider = "Unknown"
)
type InstanceType string
const (
NoInstance InstanceType = "None"
UnknownInstance = "Unknown"
UnknownInstance = "Unknown"
)
type InstanceID string
@ -158,6 +158,9 @@ const (
)
type MachineInfo struct {
// The time of this information point.
Timestamp time.Time `json:"timestamp"`
// The number of cores in this machine.
NumCores int `json:"num_cores"`
@ -213,6 +216,45 @@ type MachineInfo struct {
InstanceID InstanceID `json:"instance_id"`
}
func (m *MachineInfo) Clone() *MachineInfo {
memoryByType := m.MemoryByType
if len(m.MemoryByType) > 0 {
memoryByType = make(map[string]*MemoryInfo)
for memoryType, memoryInfo := range m.MemoryByType {
memoryByType[memoryType] = memoryInfo
}
}
diskMap := m.DiskMap
if len(m.DiskMap) > 0 {
diskMap = make(map[string]DiskInfo)
for k, info := range m.DiskMap {
diskMap[k] = info
}
}
copy := MachineInfo{
Timestamp: m.Timestamp,
NumCores: m.NumCores,
NumPhysicalCores: m.NumPhysicalCores,
NumSockets: m.NumSockets,
CpuFrequency: m.CpuFrequency,
MemoryCapacity: m.MemoryCapacity,
MemoryByType: memoryByType,
NVMInfo: m.NVMInfo,
HugePages: m.HugePages,
MachineID: m.MachineID,
SystemUUID: m.SystemUUID,
BootID: m.BootID,
Filesystems: m.Filesystems,
DiskMap: diskMap,
NetworkDevices: m.NetworkDevices,
Topology: m.Topology,
CloudProvider: m.CloudProvider,
InstanceType: m.InstanceType,
InstanceID: m.InstanceID,
}
return &copy
}
type MemoryInfo struct {
// The amount of memory (in bytes).
Capacity uint64 `json:"capacity"`

View File

@ -27,9 +27,6 @@ const (
// A counter-like value that is only expected to increase.
MetricCumulative MetricType = "cumulative"
// Rate over a time period.
MetricDelta MetricType = "delta"
)
// DataType for metric being exported.

View File

@ -12,7 +12,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -136,6 +136,8 @@ type DeprecatedContainerStats struct {
// Custom Metrics
HasCustomMetrics bool `json:"has_custom_metrics"`
CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
}
type ContainerStats struct {
@ -164,6 +166,10 @@ type ContainerStats struct {
Accelerators []v1.AcceleratorStats `json:"accelerators,omitempty"`
// Custom Metrics
CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"`
// Perf events counters
PerfStats []v1.PerfStat `json:"perf_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
}
type Percentiles struct {

View File

@ -19,7 +19,7 @@ import (
"time"
"github.com/google/cadvisor/info/v1"
"k8s.io/klog"
"k8s.io/klog/v2"
)
func machineFsStatsFromV1(fsStats []v1.FsStats) []MachineFsStats {
@ -101,7 +101,8 @@ func ContainerStatsFromV1(containerName string, spec *v1.ContainerSpec, stats []
var last *v1.ContainerStats
for _, val := range stats {
stat := &ContainerStats{
Timestamp: val.Timestamp,
Timestamp: val.Timestamp,
ReferencedMemory: val.ReferencedMemory,
}
if spec.HasCpu {
stat.Cpu = &val.Cpu
@ -151,6 +152,9 @@ func ContainerStatsFromV1(containerName string, spec *v1.ContainerSpec, stats []
if len(val.Accelerators) > 0 {
stat.Accelerators = val.Accelerators
}
if len(val.PerfStats) > 0 {
stat.PerfStats = val.PerfStats
}
// TODO(rjnagal): Handle load stats.
newStats = append(newStats, stat)
}
@ -169,6 +173,7 @@ func DeprecatedStatsFromV1(cont *v1.ContainerInfo) []DeprecatedContainerStats {
HasFilesystem: cont.Spec.HasFilesystem,
HasDiskIo: cont.Spec.HasDiskIo,
HasCustomMetrics: cont.Spec.HasCustomMetrics,
ReferencedMemory: val.ReferencedMemory,
}
if stat.HasCpu {
stat.Cpu = val.Cpu

View File

@ -5,23 +5,22 @@ go_library(
srcs = [
"info.go",
"machine.go",
"machine_no_libipmctl.go",
"operatingsystem_unix.go",
"operatingsystem_windows.go",
],
cgo = True,
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/machine",
importpath = "github.com/google/cadvisor/machine",
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/google/cadvisor/fs:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/nvm:go_default_library",
"//vendor/github.com/google/cadvisor/utils:go_default_library",
"//vendor/github.com/google/cadvisor/utils/cloudinfo:go_default_library",
"//vendor/github.com/google/cadvisor/utils/sysfs:go_default_library",
"//vendor/github.com/google/cadvisor/utils/sysinfo:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
] + select({
"@io_bazel_rules_go//go/platform:windows": [
"//vendor/golang.org/x/sys/windows/registry:go_default_library",

View File

@ -20,14 +20,16 @@ import (
"io/ioutil"
"path/filepath"
"strings"
"time"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/nvm"
"github.com/google/cadvisor/utils/cloudinfo"
"github.com/google/cadvisor/utils/sysfs"
"github.com/google/cadvisor/utils/sysinfo"
"k8s.io/klog"
"k8s.io/klog/v2"
"golang.org/x/sys/unix"
)
@ -35,8 +37,8 @@ import (
const hugepagesDirectory = "/sys/kernel/mm/hugepages/"
const memoryControllerPath = "/sys/devices/system/edac/mc/"
var machineIdFilePath = flag.String("machine_id_file", "/etc/machine-id,/var/lib/dbus/machine-id", "Comma-separated list of files to check for machine-id. Use the first one that exists.")
var bootIdFilePath = flag.String("boot_id_file", "/proc/sys/kernel/random/boot_id", "Comma-separated list of files to check for boot-id. Use the first one that exists.")
var machineIDFilePath = flag.String("machine_id_file", "/etc/machine-id,/var/lib/dbus/machine-id", "Comma-separated list of files to check for machine-id. Use the first one that exists.")
var bootIDFilePath = flag.String("boot_id_file", "/proc/sys/kernel/random/boot_id", "Comma-separated list of files to check for boot-id. Use the first one that exists.")
func getInfoFromFiles(filePaths string) string {
if len(filePaths) == 0 {
@ -77,7 +79,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach
return nil, err
}
nvmInfo, err := GetNVMInfo()
nvmInfo, err := nvm.GetInfo()
if err != nil {
return nil, err
}
@ -118,6 +120,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach
instanceID := realCloudInfo.GetInstanceID()
machineInfo := &info.MachineInfo{
Timestamp: time.Now(),
NumCores: numCores,
NumPhysicalCores: GetPhysicalCores(cpuinfo),
NumSockets: GetSockets(cpuinfo),
@ -129,9 +132,9 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach
DiskMap: diskMap,
NetworkDevices: netDevices,
Topology: topology,
MachineID: getInfoFromFiles(filepath.Join(rootFs, *machineIdFilePath)),
MachineID: getInfoFromFiles(filepath.Join(rootFs, *machineIDFilePath)),
SystemUUID: systemUUID,
BootID: getInfoFromFiles(filepath.Join(rootFs, *bootIdFilePath)),
BootID: getInfoFromFiles(filepath.Join(rootFs, *bootIDFilePath)),
CloudProvider: cloudProvider,
InstanceType: instanceType,
InstanceID: instanceID,

View File

@ -33,18 +33,16 @@ import (
"github.com/google/cadvisor/utils/sysfs"
"github.com/google/cadvisor/utils/sysinfo"
"k8s.io/klog"
"k8s.io/klog/v2"
"golang.org/x/sys/unix"
)
var (
cpuRegExp = regexp.MustCompile(`^processor\s*:\s*([0-9]+)$`)
coreRegExp = regexp.MustCompile(`(?m)^core id\s*:\s*([0-9]+)$`)
nodeRegExp = regexp.MustCompile(`(?m)^physical id\s*:\s*([0-9]+)$`)
nodeBusRegExp = regexp.MustCompile(`^node([0-9]+)$`)
coreRegExp = regexp.MustCompile(`(?m)^core id\s*:\s*([0-9]+)$`)
nodeRegExp = regexp.MustCompile(`(?m)^physical id\s*:\s*([0-9]+)$`)
// Power systems have a different format so cater for both
cpuClockSpeedMHz = regexp.MustCompile(`(?:cpu MHz|clock)\s*:\s*([0-9]+\.[0-9]+)(?:MHz)?`)
cpuClockSpeedMHz = regexp.MustCompile(`(?:cpu MHz|CPU MHz|clock)\s*:\s*([0-9]+\.[0-9]+)(?:MHz)?`)
memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`)
swapCapacityRegexp = regexp.MustCompile(`SwapTotal:\s*([0-9]+) kB`)
@ -52,10 +50,9 @@ var (
isMemoryController = regexp.MustCompile("mc[0-9]+")
isDimm = regexp.MustCompile("dimm[0-9]+")
machineArch = getMachineArch()
maxFreqFile = "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"
)
const maxFreqFile = "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"
const nodePath = "/sys/devices/system/node"
const sysFsCPUCoreID = "core_id"
const sysFsCPUPhysicalPackageID = "physical_package_id"
const sysFsCPUTopology = "topology"
@ -171,10 +168,10 @@ func GetMachineMemoryByType(edacPath string) (map[string]*info.MemoryInfo, error
continue
}
memType, err := ioutil.ReadFile(path.Join(edacPath, controller, dimm, memTypeFileName))
readableMemType := strings.TrimSpace(string(memType))
if err != nil {
return map[string]*info.MemoryInfo{}, err
}
readableMemType := strings.TrimSpace(string(memType))
if _, exists := memory[readableMemType]; !exists {
memory[readableMemType] = &info.MemoryInfo{}
}
@ -260,18 +257,6 @@ func getUniqueCPUPropertyCount(cpuBusPath string, propertyName string) int {
return len(uniques)
}
func extractValue(s string, r *regexp.Regexp) (bool, int, error) {
matches := r.FindSubmatch([]byte(s))
if len(matches) == 2 {
val, err := strconv.ParseInt(string(matches[1]), 10, 32)
if err != nil {
return false, -1, err
}
return true, int(val), nil
}
return false, -1, nil
}
// getUniqueMatchesCount returns number of unique matches in given argument using provided regular expression
func getUniqueMatchesCount(s string, r *regexp.Regexp) int {
matches := r.FindAllString(s, -1)

View File

@ -37,19 +37,18 @@ func getOperatingSystem() (string, error) {
return "", err
}
return string(osName), nil
} else {
bytes, err := ioutil.ReadFile("/etc/os-release")
if err != nil && os.IsNotExist(err) {
// /usr/lib/os-release in stateless systems like Clear Linux
bytes, err = ioutil.ReadFile("/usr/lib/os-release")
}
if err != nil {
return "", fmt.Errorf("error opening file : %v", err)
}
line := rex.FindAllStringSubmatch(string(bytes), -1)
if len(line) > 0 {
return strings.Trim(line[0][2], "\""), nil
}
return "Linux", nil
}
bytes, err := ioutil.ReadFile("/etc/os-release")
if err != nil && os.IsNotExist(err) {
// /usr/lib/os-release in stateless systems like Clear Linux
bytes, err = ioutil.ReadFile("/usr/lib/os-release")
}
if err != nil {
return "", fmt.Errorf("error opening file : %v", err)
}
line := rex.FindAllStringSubmatch(string(bytes), -1)
if len(line) > 0 {
return strings.Trim(line[0][2], "\""), nil
}
return "Linux", nil
}

View File

@ -22,6 +22,8 @@ go_library(
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
"//vendor/github.com/google/cadvisor/machine:go_default_library",
"//vendor/github.com/google/cadvisor/nvm:go_default_library",
"//vendor/github.com/google/cadvisor/perf:go_default_library",
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/github.com/google/cadvisor/summary:go_default_library",
"//vendor/github.com/google/cadvisor/utils/cpuload:go_default_library",
@ -30,7 +32,7 @@ go_library(
"//vendor/github.com/google/cadvisor/version:go_default_library",
"//vendor/github.com/google/cadvisor/watcher:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/clock:go_default_library",
],
)

View File

@ -39,7 +39,7 @@ import (
"github.com/google/cadvisor/utils/cpuload"
units "github.com/docker/go-units"
"k8s.io/klog"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
@ -91,6 +91,9 @@ type containerData struct {
// nvidiaCollector updates stats for Nvidia GPUs attached to the container.
nvidiaCollector stats.Collector
// perfCollector updates stats for perf_event cgroup controller.
perfCollector stats.Collector
}
// jitter returns a time.Duration between duration and duration + maxFactor * duration,
@ -104,23 +107,24 @@ func jitter(duration time.Duration, maxFactor float64) time.Duration {
return wait
}
func (c *containerData) Start() error {
go c.housekeeping()
func (cd *containerData) Start() error {
go cd.housekeeping()
return nil
}
func (c *containerData) Stop() error {
err := c.memoryCache.RemoveContainer(c.info.Name)
func (cd *containerData) Stop() error {
err := cd.memoryCache.RemoveContainer(cd.info.Name)
if err != nil {
return err
}
close(c.stop)
close(cd.stop)
cd.perfCollector.Destroy()
return nil
}
func (c *containerData) allowErrorLogging() bool {
if c.clock.Since(c.lastErrorTime) > time.Minute {
c.lastErrorTime = c.clock.Now()
func (cd *containerData) allowErrorLogging() bool {
if cd.clock.Since(cd.lastErrorTime) > time.Minute {
cd.lastErrorTime = cd.clock.Now()
return true
}
return false
@ -130,22 +134,22 @@ func (c *containerData) allowErrorLogging() bool {
// It is designed to be used in conjunction with periodic housekeeping, and will cause the timer for
// periodic housekeeping to reset. This should be used sparingly, as calling OnDemandHousekeeping frequently
// can have serious performance costs.
func (c *containerData) OnDemandHousekeeping(maxAge time.Duration) {
if c.clock.Since(c.statsLastUpdatedTime) > maxAge {
func (cd *containerData) OnDemandHousekeeping(maxAge time.Duration) {
if cd.clock.Since(cd.statsLastUpdatedTime) > maxAge {
housekeepingFinishedChan := make(chan struct{})
c.onDemandChan <- housekeepingFinishedChan
cd.onDemandChan <- housekeepingFinishedChan
select {
case <-c.stop:
case <-cd.stop:
case <-housekeepingFinishedChan:
}
}
}
// notifyOnDemand notifies all calls to OnDemandHousekeeping that housekeeping is finished
func (c *containerData) notifyOnDemand() {
func (cd *containerData) notifyOnDemand() {
for {
select {
case finishedChan := <-c.onDemandChan:
case finishedChan := <-cd.onDemandChan:
close(finishedChan)
default:
return
@ -153,35 +157,42 @@ func (c *containerData) notifyOnDemand() {
}
}
func (c *containerData) GetInfo(shouldUpdateSubcontainers bool) (*containerInfo, error) {
func (cd *containerData) GetInfo(shouldUpdateSubcontainers bool) (*containerInfo, error) {
// Get spec and subcontainers.
if c.clock.Since(c.infoLastUpdatedTime) > 5*time.Second {
err := c.updateSpec()
if cd.clock.Since(cd.infoLastUpdatedTime) > 5*time.Second {
err := cd.updateSpec()
if err != nil {
return nil, err
}
if shouldUpdateSubcontainers {
err = c.updateSubcontainers()
err = cd.updateSubcontainers()
if err != nil {
return nil, err
}
}
c.infoLastUpdatedTime = c.clock.Now()
cd.infoLastUpdatedTime = cd.clock.Now()
}
// Make a copy of the info for the user.
c.lock.Lock()
defer c.lock.Unlock()
return &c.info, nil
cd.lock.Lock()
defer cd.lock.Unlock()
cInfo := containerInfo{
Subcontainers: cd.info.Subcontainers,
Spec: cd.info.Spec,
}
cInfo.Id = cd.info.Id
cInfo.Name = cd.info.Name
cInfo.Aliases = cd.info.Aliases
cInfo.Namespace = cd.info.Namespace
return &cInfo, nil
}
func (c *containerData) DerivedStats() (v2.DerivedStats, error) {
if c.summaryReader == nil {
return v2.DerivedStats{}, fmt.Errorf("derived stats not enabled for container %q", c.info.Name)
func (cd *containerData) DerivedStats() (v2.DerivedStats, error) {
if cd.summaryReader == nil {
return v2.DerivedStats{}, fmt.Errorf("derived stats not enabled for container %q", cd.info.Name)
}
return c.summaryReader.DerivedStats()
return cd.summaryReader.DerivedStats()
}
func (c *containerData) getCgroupPath(cgroups string) (string, error) {
func (cd *containerData) getCgroupPath(cgroups string) (string, error) {
if cgroups == "-" {
return "/", nil
}
@ -199,8 +210,8 @@ func (c *containerData) getCgroupPath(cgroups string) (string, error) {
// Returns contents of a file inside the container root.
// Takes in a path relative to container root.
func (c *containerData) ReadFile(filepath string, inHostNamespace bool) ([]byte, error) {
pids, err := c.getContainerPids(inHostNamespace)
func (cd *containerData) ReadFile(filepath string, inHostNamespace bool) ([]byte, error) {
pids, err := cd.getContainerPids(inHostNamespace)
if err != nil {
return nil, err
}
@ -218,11 +229,11 @@ func (c *containerData) ReadFile(filepath string, inHostNamespace bool) ([]byte,
}
}
// No process paths could be found. Declare config non-existent.
return nil, fmt.Errorf("file %q does not exist.", filepath)
return nil, fmt.Errorf("file %q does not exist", filepath)
}
// Return output for ps command in host /proc with specified format
func (c *containerData) getPsOutput(inHostNamespace bool, format string) ([]byte, error) {
func (cd *containerData) getPsOutput(inHostNamespace bool, format string) ([]byte, error) {
args := []string{}
command := "ps"
if !inHostNamespace {
@ -239,9 +250,9 @@ func (c *containerData) getPsOutput(inHostNamespace bool, format string) ([]byte
// Get pids of processes in this container.
// A slightly lighterweight call than GetProcessList if other details are not required.
func (c *containerData) getContainerPids(inHostNamespace bool) ([]string, error) {
func (cd *containerData) getContainerPids(inHostNamespace bool) ([]string, error) {
format := "pid,cgroup"
out, err := c.getPsOutput(inHostNamespace, format)
out, err := cd.getPsOutput(inHostNamespace, format)
if err != nil {
return nil, err
}
@ -257,26 +268,26 @@ func (c *containerData) getContainerPids(inHostNamespace bool) ([]string, error)
return nil, fmt.Errorf("expected at least %d fields, found %d: output: %q", expectedFields, len(fields), line)
}
pid := fields[0]
cgroup, err := c.getCgroupPath(fields[1])
cgroup, err := cd.getCgroupPath(fields[1])
if err != nil {
return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[1], err)
}
if c.info.Name == cgroup {
if cd.info.Name == cgroup {
pids = append(pids, pid)
}
}
return pids, nil
}
func (c *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) {
func (cd *containerData) GetProcessList(cadvisorContainer string, inHostNamespace bool) ([]v2.ProcessInfo, error) {
// report all processes for root.
isRoot := c.info.Name == "/"
isRoot := cd.info.Name == "/"
rootfs := "/"
if !inHostNamespace {
rootfs = "/rootfs"
}
format := "user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,cgroup"
out, err := c.getPsOutput(inHostNamespace, format)
out, err := cd.getPsOutput(inHostNamespace, format)
if err != nil {
return nil, err
}
@ -299,7 +310,7 @@ func (c *containerData) GetProcessList(cadvisorContainer string, inHostNamespace
if err != nil {
return nil, fmt.Errorf("invalid ppid %q: %v", fields[2], err)
}
percentCpu, err := strconv.ParseFloat(fields[4], 32)
percentCPU, err := strconv.ParseFloat(fields[4], 32)
if err != nil {
return nil, fmt.Errorf("invalid cpu percent %q: %v", fields[4], err)
}
@ -319,7 +330,7 @@ func (c *containerData) GetProcessList(cadvisorContainer string, inHostNamespace
}
// convert to bytes
vs *= 1024
cgroup, err := c.getCgroupPath(fields[11])
cgroup, err := cd.getCgroupPath(fields[11])
if err != nil {
return nil, fmt.Errorf("could not parse cgroup path from %q: %v", fields[11], err)
}
@ -342,13 +353,13 @@ func (c *containerData) GetProcessList(cadvisorContainer string, inHostNamespace
}
fdCount = len(fds)
if isRoot || c.info.Name == cgroup {
if isRoot || cd.info.Name == cgroup {
processes = append(processes, v2.ProcessInfo{
User: fields[0],
Pid: pid,
Ppid: ppid,
StartTime: fields[3],
PercentCpu: float32(percentCpu),
PercentCpu: float32(percentCPU),
PercentMemory: float32(percentMem),
RSS: rss,
VirtualSize: vs,
@ -387,6 +398,8 @@ func newContainerData(containerName string, memoryCache *memory.InMemoryCache, h
collectorManager: collectorManager,
onDemandChan: make(chan chan struct{}, 100),
clock: clock,
perfCollector: &stats.NoopCollector{},
nvidiaCollector: &stats.NoopCollector{},
}
cont.info.ContainerReference = ref
@ -409,52 +422,52 @@ func newContainerData(containerName string, memoryCache *memory.InMemoryCache, h
cont.summaryReader, err = summary.New(cont.info.Spec)
if err != nil {
cont.summaryReader = nil
klog.Warningf("Failed to create summary reader for %q: %v", ref.Name, err)
klog.V(5).Infof("Failed to create summary reader for %q: %v", ref.Name, err)
}
return cont, nil
}
// Determine when the next housekeeping should occur.
func (self *containerData) nextHousekeepingInterval() time.Duration {
if self.allowDynamicHousekeeping {
func (cd *containerData) nextHousekeepingInterval() time.Duration {
if cd.allowDynamicHousekeeping {
var empty time.Time
stats, err := self.memoryCache.RecentStats(self.info.Name, empty, empty, 2)
stats, err := cd.memoryCache.RecentStats(cd.info.Name, empty, empty, 2)
if err != nil {
if self.allowErrorLogging() {
klog.Warningf("Failed to get RecentStats(%q) while determining the next housekeeping: %v", self.info.Name, err)
if cd.allowErrorLogging() {
klog.Warningf("Failed to get RecentStats(%q) while determining the next housekeeping: %v", cd.info.Name, err)
}
} else if len(stats) == 2 {
// TODO(vishnuk): Use no processes as a signal.
// Raise the interval if usage hasn't changed in the last housekeeping.
if stats[0].StatsEq(stats[1]) && (self.housekeepingInterval < self.maxHousekeepingInterval) {
self.housekeepingInterval *= 2
if self.housekeepingInterval > self.maxHousekeepingInterval {
self.housekeepingInterval = self.maxHousekeepingInterval
if stats[0].StatsEq(stats[1]) && (cd.housekeepingInterval < cd.maxHousekeepingInterval) {
cd.housekeepingInterval *= 2
if cd.housekeepingInterval > cd.maxHousekeepingInterval {
cd.housekeepingInterval = cd.maxHousekeepingInterval
}
} else if self.housekeepingInterval != *HousekeepingInterval {
} else if cd.housekeepingInterval != *HousekeepingInterval {
// Lower interval back to the baseline.
self.housekeepingInterval = *HousekeepingInterval
cd.housekeepingInterval = *HousekeepingInterval
}
}
}
return jitter(self.housekeepingInterval, 1.0)
return jitter(cd.housekeepingInterval, 1.0)
}
// TODO(vmarmol): Implement stats collecting as a custom collector.
func (c *containerData) housekeeping() {
// Start any background goroutines - must be cleaned up in c.handler.Cleanup().
c.handler.Start()
defer c.handler.Cleanup()
func (cd *containerData) housekeeping() {
// Start any background goroutines - must be cleaned up in cd.handler.Cleanup().
cd.handler.Start()
defer cd.handler.Cleanup()
// Initialize cpuload reader - must be cleaned up in c.loadReader.Stop()
if c.loadReader != nil {
err := c.loadReader.Start()
// Initialize cpuload reader - must be cleaned up in cd.loadReader.Stop()
if cd.loadReader != nil {
err := cd.loadReader.Start()
if err != nil {
klog.Warningf("Could not start cpu load stat collector for %q: %s", c.info.Name, err)
klog.Warningf("Could not start cpu load stat collector for %q: %s", cd.info.Name, err)
}
defer c.loadReader.Stop()
defer cd.loadReader.Stop()
}
// Long housekeeping is either 100ms or half of the housekeeping interval.
@ -464,11 +477,11 @@ func (c *containerData) housekeeping() {
}
// Housekeep every second.
klog.V(3).Infof("Start housekeeping for container %q\n", c.info.Name)
houseKeepingTimer := c.clock.NewTimer(0 * time.Second)
klog.V(3).Infof("Start housekeeping for container %q\n", cd.info.Name)
houseKeepingTimer := cd.clock.NewTimer(0 * time.Second)
defer houseKeepingTimer.Stop()
for {
if !c.housekeepingTick(houseKeepingTimer.C(), longHousekeeping) {
if !cd.housekeepingTick(houseKeepingTimer.C(), longHousekeeping) {
return
}
// Stop and drain the timer so that it is safe to reset it
@ -479,74 +492,74 @@ func (c *containerData) housekeeping() {
}
}
// Log usage if asked to do so.
if c.logUsage {
if cd.logUsage {
const numSamples = 60
var empty time.Time
stats, err := c.memoryCache.RecentStats(c.info.Name, empty, empty, numSamples)
stats, err := cd.memoryCache.RecentStats(cd.info.Name, empty, empty, numSamples)
if err != nil {
if c.allowErrorLogging() {
klog.Warningf("[%s] Failed to get recent stats for logging usage: %v", c.info.Name, err)
if cd.allowErrorLogging() {
klog.Warningf("[%s] Failed to get recent stats for logging usage: %v", cd.info.Name, err)
}
} else if len(stats) < numSamples {
// Ignore, not enough stats yet.
} else {
usageCpuNs := uint64(0)
usageCPUNs := uint64(0)
for i := range stats {
if i > 0 {
usageCpuNs += (stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total)
usageCPUNs += (stats[i].Cpu.Usage.Total - stats[i-1].Cpu.Usage.Total)
}
}
usageMemory := stats[numSamples-1].Memory.Usage
instantUsageInCores := float64(stats[numSamples-1].Cpu.Usage.Total-stats[numSamples-2].Cpu.Usage.Total) / float64(stats[numSamples-1].Timestamp.Sub(stats[numSamples-2].Timestamp).Nanoseconds())
usageInCores := float64(usageCpuNs) / float64(stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds())
usageInCores := float64(usageCPUNs) / float64(stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds())
usageInHuman := units.HumanSize(float64(usageMemory))
// Don't set verbosity since this is already protected by the logUsage flag.
klog.Infof("[%s] %.3f cores (average: %.3f cores), %s of memory", c.info.Name, instantUsageInCores, usageInCores, usageInHuman)
klog.Infof("[%s] %.3f cores (average: %.3f cores), %s of memory", cd.info.Name, instantUsageInCores, usageInCores, usageInHuman)
}
}
houseKeepingTimer.Reset(c.nextHousekeepingInterval())
houseKeepingTimer.Reset(cd.nextHousekeepingInterval())
}
}
func (c *containerData) housekeepingTick(timer <-chan time.Time, longHousekeeping time.Duration) bool {
func (cd *containerData) housekeepingTick(timer <-chan time.Time, longHousekeeping time.Duration) bool {
select {
case <-c.stop:
case <-cd.stop:
// Stop housekeeping when signaled.
return false
case finishedChan := <-c.onDemandChan:
case finishedChan := <-cd.onDemandChan:
// notify the calling function once housekeeping has completed
defer close(finishedChan)
case <-timer:
}
start := c.clock.Now()
err := c.updateStats()
start := cd.clock.Now()
err := cd.updateStats()
if err != nil {
if c.allowErrorLogging() {
klog.Warningf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
if cd.allowErrorLogging() {
klog.Warningf("Failed to update stats for container \"%s\": %s", cd.info.Name, err)
}
}
// Log if housekeeping took too long.
duration := c.clock.Since(start)
duration := cd.clock.Since(start)
if duration >= longHousekeeping {
klog.V(3).Infof("[%s] Housekeeping took %s", c.info.Name, duration)
klog.V(3).Infof("[%s] Housekeeping took %s", cd.info.Name, duration)
}
c.notifyOnDemand()
c.statsLastUpdatedTime = c.clock.Now()
cd.notifyOnDemand()
cd.statsLastUpdatedTime = cd.clock.Now()
return true
}
func (c *containerData) updateSpec() error {
spec, err := c.handler.GetSpec()
func (cd *containerData) updateSpec() error {
spec, err := cd.handler.GetSpec()
if err != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
if !cd.handler.Exists() {
return nil
}
return err
}
customMetrics, err := c.collectorManager.GetSpec()
customMetrics, err := cd.collectorManager.GetSpec()
if err != nil {
return err
}
@ -554,28 +567,28 @@ func (c *containerData) updateSpec() error {
spec.HasCustomMetrics = true
spec.CustomMetrics = customMetrics
}
c.lock.Lock()
defer c.lock.Unlock()
c.info.Spec = spec
cd.lock.Lock()
defer cd.lock.Unlock()
cd.info.Spec = spec
return nil
}
// Calculate new smoothed load average using the new sample of runnable threads.
// The decay used ensures that the load will stabilize on a new constant value within
// 10 seconds.
func (c *containerData) updateLoad(newLoad uint64) {
if c.loadAvg < 0 {
c.loadAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization.
func (cd *containerData) updateLoad(newLoad uint64) {
if cd.loadAvg < 0 {
cd.loadAvg = float64(newLoad) // initialize to the first seen sample for faster stabilization.
} else {
c.loadAvg = c.loadAvg*c.loadDecay + float64(newLoad)*(1.0-c.loadDecay)
cd.loadAvg = cd.loadAvg*cd.loadDecay + float64(newLoad)*(1.0-cd.loadDecay)
}
}
func (c *containerData) updateStats() error {
stats, statsErr := c.handler.GetStats()
func (cd *containerData) updateStats() error {
stats, statsErr := cd.handler.GetStats()
if statsErr != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
if !cd.handler.Exists() {
return nil
}
@ -585,32 +598,32 @@ func (c *containerData) updateStats() error {
if stats == nil {
return statsErr
}
if c.loadReader != nil {
if cd.loadReader != nil {
// TODO(vmarmol): Cache this path.
path, err := c.handler.GetCgroupPath("cpu")
path, err := cd.handler.GetCgroupPath("cpu")
if err == nil {
loadStats, err := c.loadReader.GetCpuLoad(c.info.Name, path)
loadStats, err := cd.loadReader.GetCpuLoad(cd.info.Name, path)
if err != nil {
return fmt.Errorf("failed to get load stat for %q - path %q, error %s", c.info.Name, path, err)
return fmt.Errorf("failed to get load stat for %q - path %q, error %s", cd.info.Name, path, err)
}
stats.TaskStats = loadStats
c.updateLoad(loadStats.NrRunning)
cd.updateLoad(loadStats.NrRunning)
// convert to 'milliLoad' to avoid floats and preserve precision.
stats.Cpu.LoadAverage = int32(c.loadAvg * 1000)
stats.Cpu.LoadAverage = int32(cd.loadAvg * 1000)
}
}
if c.summaryReader != nil {
err := c.summaryReader.AddSample(*stats)
if cd.summaryReader != nil {
err := cd.summaryReader.AddSample(*stats)
if err != nil {
// Ignore summary errors for now.
klog.V(2).Infof("Failed to add summary stats for %q: %v", c.info.Name, err)
klog.V(2).Infof("Failed to add summary stats for %q: %v", cd.info.Name, err)
}
}
var customStatsErr error
cm := c.collectorManager.(*collector.GenericCollectorManager)
cm := cd.collectorManager.(*collector.GenericCollectorManager)
if len(cm.Collectors) > 0 {
if cm.NextCollectionTime.Before(c.clock.Now()) {
customStats, err := c.updateCustomStats()
if cm.NextCollectionTime.Before(cd.clock.Now()) {
customStats, err := cd.updateCustomStats()
if customStats != nil {
stats.CustomMetrics = customStats
}
@ -621,15 +634,17 @@ func (c *containerData) updateStats() error {
}
var nvidiaStatsErr error
if c.nvidiaCollector != nil {
if cd.nvidiaCollector != nil {
// This updates the Accelerators field of the stats struct
nvidiaStatsErr = c.nvidiaCollector.UpdateStats(stats)
nvidiaStatsErr = cd.nvidiaCollector.UpdateStats(stats)
}
ref, err := c.handler.ContainerReference()
perfStatsErr := cd.perfCollector.UpdateStats(stats)
ref, err := cd.handler.ContainerReference()
if err != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
if !cd.handler.Exists() {
return nil
}
return err
@ -639,7 +654,7 @@ func (c *containerData) updateStats() error {
ContainerReference: ref,
}
err = c.memoryCache.AddStats(&cInfo, stats)
err = cd.memoryCache.AddStats(&cInfo, stats)
if err != nil {
return err
}
@ -647,15 +662,20 @@ func (c *containerData) updateStats() error {
return statsErr
}
if nvidiaStatsErr != nil {
klog.Errorf("error occurred while collecting nvidia stats for container %s: %s", cInfo.Name, err)
return nvidiaStatsErr
}
if perfStatsErr != nil {
klog.Errorf("error occurred while collecting perf stats for container %s: %s", cInfo.Name, err)
return perfStatsErr
}
return customStatsErr
}
func (c *containerData) updateCustomStats() (map[string][]info.MetricVal, error) {
_, customStats, customStatsErr := c.collectorManager.Collect()
func (cd *containerData) updateCustomStats() (map[string][]info.MetricVal, error) {
_, customStats, customStatsErr := cd.collectorManager.Collect()
if customStatsErr != nil {
if !c.handler.Exists() {
if !cd.handler.Exists() {
return customStats, nil
}
customStatsErr = fmt.Errorf("%v, continuing to push custom stats", customStatsErr)
@ -663,19 +683,19 @@ func (c *containerData) updateCustomStats() (map[string][]info.MetricVal, error)
return customStats, customStatsErr
}
func (c *containerData) updateSubcontainers() error {
func (cd *containerData) updateSubcontainers() error {
var subcontainers info.ContainerReferenceSlice
subcontainers, err := c.handler.ListContainers(container.ListSelf)
subcontainers, err := cd.handler.ListContainers(container.ListSelf)
if err != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
if !cd.handler.Exists() {
return nil
}
return err
}
sort.Sort(subcontainers)
c.lock.Lock()
defer c.lock.Unlock()
c.info.Subcontainers = subcontainers
cd.lock.Lock()
defer cd.lock.Unlock()
cd.info.Subcontainers = subcontainers
return nil
}

View File

@ -37,6 +37,8 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/info/v2"
"github.com/google/cadvisor/machine"
"github.com/google/cadvisor/nvm"
"github.com/google/cadvisor/perf"
"github.com/google/cadvisor/stats"
"github.com/google/cadvisor/utils/oomparser"
"github.com/google/cadvisor/utils/sysfs"
@ -44,7 +46,7 @@ import (
"github.com/google/cadvisor/watcher"
"github.com/opencontainers/runc/libcontainer/cgroups"
"k8s.io/klog"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
@ -121,7 +123,7 @@ type Manager interface {
// Get past events that have been detected and that fit the request.
GetPastEvents(request *events.Request) ([]*info.Event, error)
CloseEventChannel(watch_id int)
CloseEventChannel(watchID int)
// Get status information about docker.
DockerInfo() (info.DockerStatus, error)
@ -133,8 +135,14 @@ type Manager interface {
DebugInfo() map[string][]string
}
// Housekeeping configuration for the manager
type HouskeepingConfig = struct {
Interval *time.Duration
AllowDynamic *bool
}
// New takes a memory storage and returns a new manager.
func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingInterval time.Duration, allowDynamicHousekeeping bool, includedMetricsSet container.MetricSet, collectorHttpClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string) (Manager, error) {
func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig HouskeepingConfig, includedMetricsSet container.MetricSet, collectorHTTPClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string, perfEventsFile string) (Manager, error) {
if memoryCache == nil {
return nil, fmt.Errorf("manager requires memory storage")
}
@ -176,12 +184,12 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingIn
cadvisorContainer: selfContainer,
inHostNamespace: inHostNamespace,
startupTime: time.Now(),
maxHousekeepingInterval: maxHousekeepingInterval,
allowDynamicHousekeeping: allowDynamicHousekeeping,
maxHousekeepingInterval: *houskeepingConfig.Interval,
allowDynamicHousekeeping: *houskeepingConfig.AllowDynamic,
includedMetrics: includedMetricsSet,
containerWatchers: []watcher.ContainerWatcher{},
eventsChannel: eventsChannel,
collectorHttpClient: collectorHttpClient,
collectorHTTPClient: collectorHTTPClient,
nvidiaManager: accelerators.NewNvidiaManager(),
rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
}
@ -193,6 +201,11 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingIn
newManager.machineInfo = *machineInfo
klog.V(1).Infof("Machine: %+v", newManager.machineInfo)
newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores)
if err != nil {
return nil, err
}
versionInfo, err := getVersionInfo()
if err != nil {
return nil, err
@ -230,17 +243,18 @@ type manager struct {
includedMetrics container.MetricSet
containerWatchers []watcher.ContainerWatcher
eventsChannel chan watcher.ContainerEvent
collectorHttpClient *http.Client
collectorHTTPClient *http.Client
nvidiaManager stats.Manager
perfManager stats.Manager
// List of raw container cgroup path prefix whitelist.
rawContainerCgroupPathPrefixWhiteList []string
}
// Start the container manager.
func (self *manager) Start() error {
self.containerWatchers = container.InitializePlugins(self, self.fsInfo, self.includedMetrics)
func (m *manager) Start() error {
m.containerWatchers = container.InitializePlugins(m, m.fsInfo, m.includedMetrics)
err := raw.Register(self, self.fsInfo, self.includedMetrics, self.rawContainerCgroupPathPrefixWhiteList)
err := raw.Register(m, m.fsInfo, m.includedMetrics, m.rawContainerCgroupPathPrefixWhiteList)
if err != nil {
klog.Errorf("Registration of the raw container factory failed: %v", err)
}
@ -249,10 +263,10 @@ func (self *manager) Start() error {
if err != nil {
return err
}
self.containerWatchers = append(self.containerWatchers, rawWatcher)
m.containerWatchers = append(m.containerWatchers, rawWatcher)
// Watch for OOMs.
err = self.watchForNewOoms()
err = m.watchForNewOoms()
if err != nil {
klog.Warningf("Could not configure a source for OOM detection, disabling OOM events: %v", err)
}
@ -262,16 +276,13 @@ func (self *manager) Start() error {
return nil
}
// Setup collection of nvidia GPU metrics if any of them are attached to the machine.
self.nvidiaManager.Setup()
// Create root and then recover all containers.
err = self.createContainer("/", watcher.Raw)
err = m.createContainer("/", watcher.Raw)
if err != nil {
return err
}
klog.V(2).Infof("Starting recovery of all containers")
err = self.detectSubcontainers("/")
err = m.detectSubcontainers("/")
if err != nil {
return err
}
@ -279,54 +290,63 @@ func (self *manager) Start() error {
// Watch for new container.
quitWatcher := make(chan error)
err = self.watchForNewContainers(quitWatcher)
err = m.watchForNewContainers(quitWatcher)
if err != nil {
return err
}
self.quitChannels = append(self.quitChannels, quitWatcher)
m.quitChannels = append(m.quitChannels, quitWatcher)
// Look for new containers in the main housekeeping thread.
quitGlobalHousekeeping := make(chan error)
self.quitChannels = append(self.quitChannels, quitGlobalHousekeeping)
go self.globalHousekeeping(quitGlobalHousekeeping)
m.quitChannels = append(m.quitChannels, quitGlobalHousekeeping)
go m.globalHousekeeping(quitGlobalHousekeeping)
quitUpdateMachineInfo := make(chan error)
self.quitChannels = append(self.quitChannels, quitUpdateMachineInfo)
go self.updateMachineInfo(quitUpdateMachineInfo)
m.quitChannels = append(m.quitChannels, quitUpdateMachineInfo)
go m.updateMachineInfo(quitUpdateMachineInfo)
return nil
}
func (self *manager) Stop() error {
defer self.nvidiaManager.Destroy()
func (m *manager) Stop() error {
defer m.nvidiaManager.Destroy()
defer m.destroyPerfCollectors()
// Stop and wait on all quit channels.
for i, c := range self.quitChannels {
for i, c := range m.quitChannels {
// Send the exit signal and wait on the thread to exit (by closing the channel).
c <- nil
err := <-c
if err != nil {
// Remove the channels that quit successfully.
self.quitChannels = self.quitChannels[i:]
m.quitChannels = m.quitChannels[i:]
return err
}
}
self.quitChannels = make([]chan error, 0, 2)
m.quitChannels = make([]chan error, 0, 2)
nvm.Finalize()
perf.Finalize()
return nil
}
func (self *manager) updateMachineInfo(quit chan error) {
func (m *manager) destroyPerfCollectors() {
for _, container := range m.containers {
container.perfCollector.Destroy()
}
}
func (m *manager) updateMachineInfo(quit chan error) {
ticker := time.NewTicker(*updateMachineInfoInterval)
for {
select {
case <-ticker.C:
info, err := machine.Info(self.sysFs, self.fsInfo, self.inHostNamespace)
info, err := machine.Info(m.sysFs, m.fsInfo, m.inHostNamespace)
if err != nil {
klog.Errorf("Could not get machine info: %v", err)
break
}
self.machineMu.Lock()
self.machineInfo = *info
self.machineMu.Unlock()
m.machineMu.Lock()
m.machineInfo = *info
m.machineMu.Unlock()
klog.V(5).Infof("Update machine info: %+v", *info)
case <-quit:
ticker.Stop()
@ -336,21 +356,21 @@ func (self *manager) updateMachineInfo(quit chan error) {
}
}
func (self *manager) globalHousekeeping(quit chan error) {
func (m *manager) globalHousekeeping(quit chan error) {
// Long housekeeping is either 100ms or half of the housekeeping interval.
longHousekeeping := 100 * time.Millisecond
if *globalHousekeepingInterval/2 < longHousekeeping {
longHousekeeping = *globalHousekeepingInterval / 2
}
ticker := time.Tick(*globalHousekeepingInterval)
ticker := time.NewTicker(*globalHousekeepingInterval)
for {
select {
case t := <-ticker:
case t := <-ticker.C:
start := time.Now()
// Check for new containers.
err := self.detectSubcontainers("/")
err := m.detectSubcontainers("/")
if err != nil {
klog.Errorf("Failed to detect containers: %s", err)
}
@ -369,15 +389,15 @@ func (self *manager) globalHousekeeping(quit chan error) {
}
}
func (self *manager) getContainerData(containerName string) (*containerData, error) {
func (m *manager) getContainerData(containerName string) (*containerData, error) {
var cont *containerData
var ok bool
func() {
self.containersLock.RLock()
defer self.containersLock.RUnlock()
m.containersLock.RLock()
defer m.containersLock.RUnlock()
// Ensure we have the container.
cont, ok = self.containers[namespacedContainerName{
cont, ok = m.containers[namespacedContainerName{
Name: containerName,
}]
}()
@ -387,8 +407,8 @@ func (self *manager) getContainerData(containerName string) (*containerData, err
return cont, nil
}
func (self *manager) GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error) {
conts, err := self.getRequestedContainers(containerName, options)
func (m *manager) GetDerivedStats(containerName string, options v2.RequestOptions) (map[string]v2.DerivedStats, error) {
conts, err := m.getRequestedContainers(containerName, options)
if err != nil {
return nil, err
}
@ -404,8 +424,8 @@ func (self *manager) GetDerivedStats(containerName string, options v2.RequestOpt
return stats, errs.OrNil()
}
func (self *manager) GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) {
conts, err := self.getRequestedContainers(containerName, options)
func (m *manager) GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) {
conts, err := m.getRequestedContainers(containerName, options)
if err != nil {
return nil, err
}
@ -416,43 +436,43 @@ func (self *manager) GetContainerSpec(containerName string, options v2.RequestOp
if err != nil {
errs.append(name, "GetInfo", err)
}
spec := self.getV2Spec(cinfo)
spec := m.getV2Spec(cinfo)
specs[name] = spec
}
return specs, errs.OrNil()
}
// Get V2 container spec from v1 container info.
func (self *manager) getV2Spec(cinfo *containerInfo) v2.ContainerSpec {
spec := self.getAdjustedSpec(cinfo)
func (m *manager) getV2Spec(cinfo *containerInfo) v2.ContainerSpec {
spec := m.getAdjustedSpec(cinfo)
return v2.ContainerSpecFromV1(&spec, cinfo.Aliases, cinfo.Namespace)
}
func (self *manager) getAdjustedSpec(cinfo *containerInfo) info.ContainerSpec {
func (m *manager) getAdjustedSpec(cinfo *containerInfo) info.ContainerSpec {
spec := cinfo.Spec
// Set default value to an actual value
if spec.HasMemory {
// Memory.Limit is 0 means there's no limit
if spec.Memory.Limit == 0 {
self.machineMu.RLock()
spec.Memory.Limit = uint64(self.machineInfo.MemoryCapacity)
self.machineMu.RUnlock()
m.machineMu.RLock()
spec.Memory.Limit = uint64(m.machineInfo.MemoryCapacity)
m.machineMu.RUnlock()
}
}
return spec
}
func (self *manager) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
cont, err := self.getContainerData(containerName)
func (m *manager) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
cont, err := m.getContainerData(containerName)
if err != nil {
return nil, err
}
return self.containerDataToContainerInfo(cont, query)
return m.containerDataToContainerInfo(cont, query)
}
func (self *manager) GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error) {
containers, err := self.getRequestedContainers(containerName, options)
func (m *manager) GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error) {
containers, err := m.getRequestedContainers(containerName, options)
if err != nil {
return nil, err
}
@ -469,9 +489,9 @@ func (self *manager) GetContainerInfoV2(containerName string, options v2.Request
infos[name] = result
continue
}
result.Spec = self.getV2Spec(cinfo)
result.Spec = m.getV2Spec(cinfo)
stats, err := self.memoryCache.RecentStats(name, nilTime, nilTime, options.Count)
stats, err := m.memoryCache.RecentStats(name, nilTime, nilTime, options.Count)
if err != nil {
errs.append(name, "RecentStats", err)
infos[name] = result
@ -485,14 +505,14 @@ func (self *manager) GetContainerInfoV2(containerName string, options v2.Request
return infos, errs.OrNil()
}
func (self *manager) containerDataToContainerInfo(cont *containerData, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
func (m *manager) containerDataToContainerInfo(cont *containerData, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
// Get the info from the container.
cinfo, err := cont.GetInfo(true)
if err != nil {
return nil, err
}
stats, err := self.memoryCache.RecentStats(cinfo.Name, query.Start, query.End, query.NumStats)
stats, err := m.memoryCache.RecentStats(cinfo.Name, query.Start, query.End, query.NumStats)
if err != nil {
return nil, err
}
@ -501,55 +521,55 @@ func (self *manager) containerDataToContainerInfo(cont *containerData, query *in
ret := &info.ContainerInfo{
ContainerReference: cinfo.ContainerReference,
Subcontainers: cinfo.Subcontainers,
Spec: self.getAdjustedSpec(cinfo),
Spec: m.getAdjustedSpec(cinfo),
Stats: stats,
}
return ret, nil
}
func (self *manager) getContainer(containerName string) (*containerData, error) {
self.containersLock.RLock()
defer self.containersLock.RUnlock()
cont, ok := self.containers[namespacedContainerName{Name: containerName}]
func (m *manager) getContainer(containerName string) (*containerData, error) {
m.containersLock.RLock()
defer m.containersLock.RUnlock()
cont, ok := m.containers[namespacedContainerName{Name: containerName}]
if !ok {
return nil, fmt.Errorf("unknown container %q", containerName)
}
return cont, nil
}
func (self *manager) getSubcontainers(containerName string) map[string]*containerData {
self.containersLock.RLock()
defer self.containersLock.RUnlock()
containersMap := make(map[string]*containerData, len(self.containers))
func (m *manager) getSubcontainers(containerName string) map[string]*containerData {
m.containersLock.RLock()
defer m.containersLock.RUnlock()
containersMap := make(map[string]*containerData, len(m.containers))
// Get all the unique subcontainers of the specified container
matchedName := path.Join(containerName, "/")
for i := range self.containers {
name := self.containers[i].info.Name
for i := range m.containers {
name := m.containers[i].info.Name
if name == containerName || strings.HasPrefix(name, matchedName) {
containersMap[self.containers[i].info.Name] = self.containers[i]
containersMap[m.containers[i].info.Name] = m.containers[i]
}
}
return containersMap
}
func (self *manager) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
containersMap := self.getSubcontainers(containerName)
func (m *manager) SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
containersMap := m.getSubcontainers(containerName)
containers := make([]*containerData, 0, len(containersMap))
for _, cont := range containersMap {
containers = append(containers, cont)
}
return self.containerDataSliceToContainerInfoSlice(containers, query)
return m.containerDataSliceToContainerInfoSlice(containers, query)
}
func (self *manager) getAllDockerContainers() map[string]*containerData {
self.containersLock.RLock()
defer self.containersLock.RUnlock()
containers := make(map[string]*containerData, len(self.containers))
func (m *manager) getAllDockerContainers() map[string]*containerData {
m.containersLock.RLock()
defer m.containersLock.RUnlock()
containers := make(map[string]*containerData, len(m.containers))
// Get containers in the Docker namespace.
for name, cont := range self.containers {
for name, cont := range m.containers {
if name.Namespace == docker.DockerNamespace {
containers[cont.info.Name] = cont
}
@ -557,12 +577,12 @@ func (self *manager) getAllDockerContainers() map[string]*containerData {
return containers
}
func (self *manager) AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) {
containers := self.getAllDockerContainers()
func (m *manager) AllDockerContainers(query *info.ContainerInfoRequest) (map[string]info.ContainerInfo, error) {
containers := m.getAllDockerContainers()
output := make(map[string]info.ContainerInfo, len(containers))
for name, cont := range containers {
inf, err := self.containerDataToContainerInfo(cont, query)
inf, err := m.containerDataToContainerInfo(cont, query)
if err != nil {
// Ignore the error because of race condition and return best-effort result.
if err == memory.ErrDataNotFound {
@ -576,19 +596,19 @@ func (self *manager) AllDockerContainers(query *info.ContainerInfoRequest) (map[
return output, nil
}
func (self *manager) getDockerContainer(containerName string) (*containerData, error) {
self.containersLock.RLock()
defer self.containersLock.RUnlock()
func (m *manager) getDockerContainer(containerName string) (*containerData, error) {
m.containersLock.RLock()
defer m.containersLock.RUnlock()
// Check for the container in the Docker container namespace.
cont, ok := self.containers[namespacedContainerName{
cont, ok := m.containers[namespacedContainerName{
Namespace: docker.DockerNamespace,
Name: containerName,
}]
// Look for container by short prefix name if no exact match found.
if !ok {
for contName, c := range self.containers {
for contName, c := range m.containers {
if contName.Namespace == docker.DockerNamespace && strings.HasPrefix(contName.Name, containerName) {
if cont == nil {
cont = c
@ -606,20 +626,20 @@ func (self *manager) getDockerContainer(containerName string) (*containerData, e
return cont, nil
}
func (self *manager) DockerContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) {
container, err := self.getDockerContainer(containerName)
func (m *manager) DockerContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) {
container, err := m.getDockerContainer(containerName)
if err != nil {
return info.ContainerInfo{}, err
}
inf, err := self.containerDataToContainerInfo(container, query)
inf, err := m.containerDataToContainerInfo(container, query)
if err != nil {
return info.ContainerInfo{}, err
}
return *inf, nil
}
func (self *manager) containerDataSliceToContainerInfoSlice(containers []*containerData, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
func (m *manager) containerDataSliceToContainerInfoSlice(containers []*containerData, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
if len(containers) == 0 {
return nil, fmt.Errorf("no containers found")
}
@ -627,7 +647,7 @@ func (self *manager) containerDataSliceToContainerInfoSlice(containers []*contai
// Get the info for each container.
output := make([]*info.ContainerInfo, 0, len(containers))
for i := range containers {
cinfo, err := self.containerDataToContainerInfo(containers[i], query)
cinfo, err := m.containerDataToContainerInfo(containers[i], query)
if err != nil {
// Skip containers with errors, we try to degrade gracefully.
continue
@ -638,8 +658,8 @@ func (self *manager) containerDataSliceToContainerInfoSlice(containers []*contai
return output, nil
}
func (self *manager) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
containers, err := self.getRequestedContainers(containerName, options)
func (m *manager) GetRequestedContainersInfo(containerName string, options v2.RequestOptions) (map[string]*info.ContainerInfo, error) {
containers, err := m.getRequestedContainers(containerName, options)
if err != nil {
return nil, err
}
@ -649,7 +669,7 @@ func (self *manager) GetRequestedContainersInfo(containerName string, options v2
NumStats: options.Count,
}
for name, data := range containers {
info, err := self.containerDataToContainerInfo(data, &query)
info, err := m.containerDataToContainerInfo(data, &query)
if err != nil {
errs.append(name, "containerDataToContainerInfo", err)
}
@ -658,26 +678,26 @@ func (self *manager) GetRequestedContainersInfo(containerName string, options v2
return containersMap, errs.OrNil()
}
func (self *manager) getRequestedContainers(containerName string, options v2.RequestOptions) (map[string]*containerData, error) {
func (m *manager) getRequestedContainers(containerName string, options v2.RequestOptions) (map[string]*containerData, error) {
containersMap := make(map[string]*containerData)
switch options.IdType {
case v2.TypeName:
if options.Recursive == false {
cont, err := self.getContainer(containerName)
if !options.Recursive {
cont, err := m.getContainer(containerName)
if err != nil {
return containersMap, err
}
containersMap[cont.info.Name] = cont
} else {
containersMap = self.getSubcontainers(containerName)
containersMap = m.getSubcontainers(containerName)
if len(containersMap) == 0 {
return containersMap, fmt.Errorf("unknown container: %q", containerName)
}
}
case v2.TypeDocker:
if options.Recursive == false {
if !options.Recursive {
containerName = strings.TrimPrefix(containerName, "/")
cont, err := self.getDockerContainer(containerName)
cont, err := m.getDockerContainer(containerName)
if err != nil {
return containersMap, err
}
@ -686,7 +706,7 @@ func (self *manager) getRequestedContainers(containerName string, options v2.Req
if containerName != "/" {
return containersMap, fmt.Errorf("invalid request for docker container %q with subcontainers", containerName)
}
containersMap = self.getAllDockerContainers()
containersMap = m.getAllDockerContainers()
}
default:
return containersMap, fmt.Errorf("invalid request type %q", options.IdType)
@ -706,32 +726,32 @@ func (self *manager) getRequestedContainers(containerName string, options v2.Req
return containersMap, nil
}
func (self *manager) GetDirFsInfo(dir string) (v2.FsInfo, error) {
device, err := self.fsInfo.GetDirFsDevice(dir)
func (m *manager) GetDirFsInfo(dir string) (v2.FsInfo, error) {
device, err := m.fsInfo.GetDirFsDevice(dir)
if err != nil {
return v2.FsInfo{}, fmt.Errorf("failed to get device for dir %q: %v", dir, err)
}
return self.getFsInfoByDeviceName(device.Device)
return m.getFsInfoByDeviceName(device.Device)
}
func (self *manager) GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error) {
device, err := self.fsInfo.GetDeviceInfoByFsUUID(uuid)
func (m *manager) GetFsInfoByFsUUID(uuid string) (v2.FsInfo, error) {
device, err := m.fsInfo.GetDeviceInfoByFsUUID(uuid)
if err != nil {
return v2.FsInfo{}, err
}
return self.getFsInfoByDeviceName(device.Device)
return m.getFsInfoByDeviceName(device.Device)
}
func (self *manager) GetFsInfo(label string) ([]v2.FsInfo, error) {
func (m *manager) GetFsInfo(label string) ([]v2.FsInfo, error) {
var empty time.Time
// Get latest data from filesystems hanging off root container.
stats, err := self.memoryCache.RecentStats("/", empty, empty, 1)
stats, err := m.memoryCache.RecentStats("/", empty, empty, 1)
if err != nil {
return nil, err
}
dev := ""
if len(label) != 0 {
dev, err = self.fsInfo.GetDeviceForLabel(label)
dev, err = m.fsInfo.GetDeviceForLabel(label)
if err != nil {
return nil, err
}
@ -742,11 +762,11 @@ func (self *manager) GetFsInfo(label string) ([]v2.FsInfo, error) {
if len(label) != 0 && fs.Device != dev {
continue
}
mountpoint, err := self.fsInfo.GetMountpointForDevice(fs.Device)
mountpoint, err := m.fsInfo.GetMountpointForDevice(fs.Device)
if err != nil {
return nil, err
}
labels, err := self.fsInfo.GetLabelsForDevice(fs.Device)
labels, err := m.fsInfo.GetLabelsForDevice(fs.Device)
if err != nil {
return nil, err
}
@ -772,8 +792,7 @@ func (self *manager) GetFsInfo(label string) ([]v2.FsInfo, error) {
func (m *manager) GetMachineInfo() (*info.MachineInfo, error) {
m.machineMu.RLock()
defer m.machineMu.RUnlock()
// Copy and return the MachineInfo.
return &m.machineInfo, nil
return m.machineInfo.Clone(), nil
}
func (m *manager) GetVersionInfo() (*info.VersionInfo, error) {
@ -785,18 +804,15 @@ func (m *manager) GetVersionInfo() (*info.VersionInfo, error) {
}
func (m *manager) Exists(containerName string) bool {
m.containersLock.Lock()
defer m.containersLock.Unlock()
m.containersLock.RLock()
defer m.containersLock.RUnlock()
namespacedName := namespacedContainerName{
Name: containerName,
}
_, ok := m.containers[namespacedName]
if ok {
return true
}
return false
return ok
}
func (m *manager) GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error) {
@ -831,7 +847,7 @@ func (m *manager) registerCollectors(collectorConfigs map[string]string, cont *c
klog.V(4).Infof("Got config from %q: %q", v, configFile)
if strings.HasPrefix(k, "prometheus") || strings.HasPrefix(k, "Prometheus") {
newCollector, err := collector.NewPrometheusCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHttpClient)
newCollector, err := collector.NewPrometheusCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHTTPClient)
if err != nil {
return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
}
@ -840,7 +856,7 @@ func (m *manager) registerCollectors(collectorConfigs map[string]string, cont *c
return fmt.Errorf("failed to register collector for container %q, config %q: %v", cont.info.Name, k, err)
}
} else {
newCollector, err := collector.NewCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHttpClient)
newCollector, err := collector.NewCollector(k, configFile, *applicationMetricsCountLimit, cont.handler, m.collectorHTTPClient)
if err != nil {
return fmt.Errorf("failed to create collector for container %q, config %q: %v", cont.info.Name, k, err)
}
@ -853,35 +869,6 @@ func (m *manager) registerCollectors(collectorConfigs map[string]string, cont *c
return nil
}
// Enables overwriting an existing containerData/Handler object for a given containerName.
// Can't use createContainer as it just returns if a given containerName has a handler already.
// Ex: rkt handler will want to take priority over the raw handler, but the raw handler might be created first.
// Only allow raw handler to be overridden
func (m *manager) overrideContainer(containerName string, watchSource watcher.ContainerWatchSource) error {
m.containersLock.Lock()
defer m.containersLock.Unlock()
namespacedName := namespacedContainerName{
Name: containerName,
}
if _, ok := m.containers[namespacedName]; ok {
containerData := m.containers[namespacedName]
if containerData.handler.Type() != container.ContainerTypeRaw {
return nil
}
err := m.destroyContainerLocked(containerName)
if err != nil {
return fmt.Errorf("overrideContainer: failed to destroy containerData/handler for %v: %v", containerName, err)
}
}
return m.createContainerLocked(containerName, watchSource)
}
// Create a container.
func (m *manager) createContainer(containerName string, watchSource watcher.ContainerWatchSource) error {
m.containersLock.Lock()
@ -926,7 +913,17 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
} else {
cont.nvidiaCollector, err = m.nvidiaManager.GetCollector(devicesCgroupPath)
if err != nil {
klog.V(4).Infof("GPU metrics may be unavailable/incomplete for container %q: %v", cont.info.Name, err)
klog.V(4).Infof("GPU metrics may be unavailable/incomplete for container %s: %s", cont.info.Name, err)
}
}
perfCgroupPath, err := handler.GetCgroupPath("perf_event")
if err != nil {
klog.Warningf("Error getting perf_event cgroup path: %q", err)
} else {
cont.perfCollector, err = m.perfManager.GetCollector(perfCgroupPath)
if err != nil {
klog.Infof("perf_event metrics will not be available for container %s: %s", cont.info.Name, err)
}
}
}
@ -1100,16 +1097,16 @@ func (m *manager) detectSubcontainers(containerName string) error {
}
// Watches for new containers started in the system. Runs forever unless there is a setup error.
func (self *manager) watchForNewContainers(quit chan error) error {
for _, watcher := range self.containerWatchers {
err := watcher.Start(self.eventsChannel)
func (m *manager) watchForNewContainers(quit chan error) error {
for _, watcher := range m.containerWatchers {
err := watcher.Start(m.eventsChannel)
if err != nil {
return err
}
}
// There is a race between starting the watch and new container creation so we do a detection before we read new containers.
err := self.detectSubcontainers("/")
err := m.detectSubcontainers("/")
if err != nil {
return err
}
@ -1118,15 +1115,15 @@ func (self *manager) watchForNewContainers(quit chan error) error {
go func() {
for {
select {
case event := <-self.eventsChannel:
case event := <-m.eventsChannel:
switch {
case event.EventType == watcher.ContainerAdd:
switch event.WatchSource {
default:
err = self.createContainer(event.Name, event.WatchSource)
err = m.createContainer(event.Name, event.WatchSource)
}
case event.EventType == watcher.ContainerDelete:
err = self.destroyContainer(event.Name)
err = m.destroyContainer(event.Name)
}
if err != nil {
klog.Warningf("Failed to process watch event %+v: %v", event, err)
@ -1135,7 +1132,7 @@ func (self *manager) watchForNewContainers(quit chan error) error {
var errs partialFailure
// Stop processing events if asked to quit.
for i, watcher := range self.containerWatchers {
for i, watcher := range m.containerWatchers {
err := watcher.Stop()
if err != nil {
errs.append(fmt.Sprintf("watcher %d", i), "Stop", err)
@ -1155,7 +1152,7 @@ func (self *manager) watchForNewContainers(quit chan error) error {
return nil
}
func (self *manager) watchForNewOoms() error {
func (m *manager) watchForNewOoms() error {
klog.V(2).Infof("Started watching for new ooms in manager")
outStream := make(chan *oomparser.OomInstance, 10)
oomLog, err := oomparser.New()
@ -1172,7 +1169,7 @@ func (self *manager) watchForNewOoms() error {
Timestamp: oomInstance.TimeOfDeath,
EventType: info.EventOom,
}
err := self.eventHandler.AddEvent(newEvent)
err := m.eventHandler.AddEvent(newEvent)
if err != nil {
klog.Errorf("failed to add OOM event for %q: %v", oomInstance.ContainerName, err)
}
@ -1189,7 +1186,7 @@ func (self *manager) watchForNewOoms() error {
},
},
}
err = self.eventHandler.AddEvent(newEvent)
err = m.eventHandler.AddEvent(newEvent)
if err != nil {
klog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
}
@ -1199,18 +1196,18 @@ func (self *manager) watchForNewOoms() error {
}
// can be called by the api which will take events returned on the channel
func (self *manager) WatchForEvents(request *events.Request) (*events.EventChannel, error) {
return self.eventHandler.WatchEvents(request)
func (m *manager) WatchForEvents(request *events.Request) (*events.EventChannel, error) {
return m.eventHandler.WatchEvents(request)
}
// can be called by the api which will return all events satisfying the request
func (self *manager) GetPastEvents(request *events.Request) ([]*info.Event, error) {
return self.eventHandler.GetEvents(request)
func (m *manager) GetPastEvents(request *events.Request) ([]*info.Event, error) {
return m.eventHandler.GetEvents(request)
}
// called by the api when a client is no longer listening to the channel
func (self *manager) CloseEventChannel(watch_id int) {
self.eventHandler.StopWatch(watch_id)
func (m *manager) CloseEventChannel(watchID int) {
m.eventHandler.StopWatch(watchID)
}
// Parses the events StoragePolicy from the flags.
@ -1303,12 +1300,12 @@ func (m *manager) DebugInfo() map[string][]string {
return debugInfo
}
func (self *manager) getFsInfoByDeviceName(deviceName string) (v2.FsInfo, error) {
mountPoint, err := self.fsInfo.GetMountpointForDevice(deviceName)
func (m *manager) getFsInfoByDeviceName(deviceName string) (v2.FsInfo, error) {
mountPoint, err := m.fsInfo.GetMountpointForDevice(deviceName)
if err != nil {
return v2.FsInfo{}, fmt.Errorf("failed to get mount point for device %q: %v", deviceName, err)
}
infos, err := self.GetFsInfo("")
infos, err := m.GetFsInfo("")
if err != nil {
return v2.FsInfo{}, err
}
@ -1322,22 +1319,22 @@ func (self *manager) getFsInfoByDeviceName(deviceName string) (v2.FsInfo, error)
func getVersionInfo() (*info.VersionInfo, error) {
kernel_version := machine.KernelVersion()
container_os := machine.ContainerOsVersion()
docker_version, err := docker.VersionString()
kernelVersion := machine.KernelVersion()
osVersion := machine.ContainerOsVersion()
dockerVersion, err := docker.VersionString()
if err != nil {
return nil, err
}
docker_api_version, err := docker.APIVersionString()
dockerAPIVersion, err := docker.APIVersionString()
if err != nil {
return nil, err
}
return &info.VersionInfo{
KernelVersion: kernel_version,
ContainerOsVersion: container_os,
DockerVersion: docker_version,
DockerAPIVersion: docker_api_version,
KernelVersion: kernelVersion,
ContainerOsVersion: osVersion,
DockerVersion: dockerVersion,
DockerAPIVersion: dockerAPIVersion,
CadvisorVersion: version.Info["version"],
CadvisorRevision: version.Info["revision"],
}, nil

View File

@ -2,7 +2,12 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["prometheus.go"],
srcs = [
"metrics.go",
"prometheus.go",
"prometheus_fake.go",
"prometheus_machine.go",
],
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/metrics",
importpath = "github.com/google/cadvisor/metrics",
visibility = ["//visibility:public"],
@ -10,7 +15,8 @@ go_library(
"//vendor/github.com/google/cadvisor/container:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/clock:go_default_library",
],
)

42
vendor/github.com/google/cadvisor/metrics/metrics.go generated vendored Normal file
View File

@ -0,0 +1,42 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"time"
info "github.com/google/cadvisor/info/v1"
)
// metricValue describes a single metric value for a given set of label values
// within a parent containerMetric.
type metricValue struct {
value float64
labels []string
timestamp time.Time
}
type metricValues []metricValue
// infoProvider will usually be manager.Manager, but can be swapped out for testing.
type infoProvider interface {
// SubcontainersInfo provides information about all subcontainers of the
// specified container including itself.
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// GetVersionInfo provides information about the version.
GetVersionInfo() (*info.VersionInfo, error)
// GetMachineInfo provides information about the machine.
GetMachineInfo() (*info.MachineInfo, error)
}

View File

@ -17,35 +17,17 @@ package metrics
import (
"fmt"
"regexp"
"strconv"
"time"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
// infoProvider will usually be manager.Manager, but can be swapped out for testing.
type infoProvider interface {
// SubcontainersInfo provides information about all subcontainers of the
// specified container including itself.
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// GetVersionInfo provides information about the version.
GetVersionInfo() (*info.VersionInfo, error)
// GetMachineInfo provides information about the machine.
GetMachineInfo() (*info.MachineInfo, error)
}
// metricValue describes a single metric value for a given set of label values
// within a parent containerMetric.
type metricValue struct {
value float64
labels []string
timestamp time.Time
}
type metricValues []metricValue
// asFloat64 converts a uint64 into a float64.
func asFloat64(v uint64) float64 { return float64(v) }
@ -121,7 +103,7 @@ type PrometheusCollector struct {
// ContainerLabelsFunc specifies which base labels will be attached to all
// exported metrics. If left to nil, the DefaultContainerLabels function
// will be used instead.
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet) *PrometheusCollector {
func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetrics container.MetricSet, now clock.Clock) *PrometheusCollector {
if f == nil {
f = DefaultContainerLabels
}
@ -140,8 +122,8 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{
value: float64(time.Now().Unix()),
timestamp: time.Now(),
value: float64(now.Now().Unix()),
timestamp: now.Now(),
}}
},
},
@ -1562,16 +1544,66 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
},
}...)
}
if c.includedMetrics.Has(container.PerfMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_metric",
help: "Perf event metric",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_metric_scaling_ratio",
help: "Perf event metric scaling ratio",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
}...)
}
if includedMetrics.Has(container.ReferencedMemoryMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_referenced_bytes",
help: "Container referenced bytes during last measurements cycle",
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.ReferencedMemory), timestamp: s.Timestamp}}
},
},
}...)
}
return c
}
var (
versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
machineInfoCoresDesc = prometheus.NewDesc("machine_cpu_cores", "Number of CPU cores on the machine.", nil, nil)
machineInfoMemoryDesc = prometheus.NewDesc("machine_memory_bytes", "Amount of memory installed on the machine.", nil, nil)
versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
startTimeDesc = prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", nil, nil)
cpuPeriodDesc = prometheus.NewDesc("container_spec_cpu_period", "CPU period of the container.", nil, nil)
cpuQuotaDesc = prometheus.NewDesc("container_spec_cpu_quota", "CPU quota of the container.", nil, nil)
cpuSharesDesc = prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", nil, nil)
)
// Describe describes all the metrics ever exported by cadvisor. It
@ -1581,16 +1613,17 @@ func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
for _, cm := range c.containerMetrics {
ch <- cm.desc([]string{})
}
ch <- startTimeDesc
ch <- cpuPeriodDesc
ch <- cpuQuotaDesc
ch <- cpuSharesDesc
ch <- versionInfoDesc
ch <- machineInfoCoresDesc
ch <- machineInfoMemoryDesc
}
// Collect fetches the stats from all containers and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
c.errors.Set(0)
c.collectMachineInfo(ch)
c.collectVersionInfo(ch)
c.collectContainersInfo(ch)
c.errors.Collect(ch)
@ -1745,7 +1778,6 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric)
}
}
}
}
func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
@ -1758,17 +1790,6 @@ func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion, versionInfo.CadvisorRevision}...)
}
func (c *PrometheusCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
machineInfo, err := c.infoProvider.GetMachineInfo()
if err != nil {
c.errors.Set(1)
klog.Warningf("Couldn't get machine info: %s", err)
return
}
ch <- prometheus.MustNewConstMetric(machineInfoCoresDesc, prometheus.GaugeValue, float64(machineInfo.NumCores))
ch <- prometheus.MustNewConstMetric(machineInfoMemoryDesc, prometheus.GaugeValue, float64(machineInfo.MemoryCapacity))
}
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
const maxMemorySize = uint64(1 << 62)
@ -1780,10 +1801,10 @@ func specMemoryValue(v uint64) float64 {
return float64(v)
}
var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
// sanitizeLabelName replaces anything that doesn't match
// client_label.LabelNameRE with an underscore.
func sanitizeLabelName(name string) string {
return invalidLabelCharRE.ReplaceAllString(name, "_")
return invalidNameCharRE.ReplaceAllString(name, "_")
}

View File

@ -0,0 +1,683 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"errors"
"time"
info "github.com/google/cadvisor/info/v1"
)
type testSubcontainersInfoProvider struct{}
func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
return &info.VersionInfo{
KernelVersion: "4.1.6-200.fc22.x86_64",
ContainerOsVersion: "Fedora 22 (Twenty Two)",
DockerVersion: "1.8.1",
CadvisorVersion: "0.16.0",
CadvisorRevision: "abcdef",
}, nil
}
func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
return &info.MachineInfo{
Timestamp: time.Unix(1395066363, 0),
NumCores: 4,
NumPhysicalCores: 1,
NumSockets: 1,
MemoryCapacity: 1024,
MemoryByType: map[string]*info.MemoryInfo{
"Non-volatile-RAM": {Capacity: 2168421613568, DimmCount: 8},
"Unbuffered-DDR4": {Capacity: 412316860416, DimmCount: 12},
},
NVMInfo: info.NVMInfo{
MemoryModeCapacity: 429496729600,
AppDirectModeCapacity: 1735166787584,
},
MachineID: "machine-id-test",
SystemUUID: "system-uuid-test",
BootID: "boot-id-test",
Topology: []info.Node{
{
Id: 0,
Memory: 33604804608,
HugePages: []info.HugePagesInfo{
{
PageSize: uint64(1048576),
NumPages: uint64(0),
},
{
PageSize: uint64(2048),
NumPages: uint64(0),
},
},
Cores: []info.Core{
{
Id: 0,
Threads: []int{0, 1},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 1,
Threads: []int{2, 3},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 2,
Threads: []int{4, 5},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 3,
Threads: []int{6, 7},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
},
},
{
Id: 1,
Memory: 33604804606,
HugePages: []info.HugePagesInfo{
{
PageSize: uint64(1048576),
NumPages: uint64(2),
},
{
PageSize: uint64(2048),
NumPages: uint64(4),
},
},
Cores: []info.Core{
{
Id: 4,
Threads: []int{8, 9},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 5,
Threads: []int{10, 11},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 6,
Threads: []int{12, 13},
Caches: []info.Cache{
{
Size: 32768,
Type: "Data",
Level: 1,
},
{
Size: 32768,
Type: "Instruction",
Level: 1,
},
{
Size: 262144,
Type: "Unified",
Level: 2,
},
},
},
{
Id: 7,
Threads: []int{14, 15},
Caches: []info.Cache{
{
Size: 32764,
Type: "Data",
Level: 1,
},
{
Size: 32764,
Type: "Instruction",
Level: 1,
},
{
Size: 262148,
Type: "Unified",
Level: 2,
},
},
},
},
Caches: []info.Cache{
{
Size: 8388608,
Type: "Unified",
Level: 3,
},
},
},
},
}, nil
}
func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return []*info.ContainerInfo{
{
ContainerReference: info.ContainerReference{
Name: "testcontainer",
Aliases: []string{"testcontaineralias"},
},
Spec: info.ContainerSpec{
Image: "test",
HasCpu: true,
Cpu: info.CpuSpec{
Limit: 1000,
Period: 100000,
Quota: 10000,
},
Memory: info.MemorySpec{
Limit: 2048,
Reservation: 1024,
SwapLimit: 4096,
},
HasHugetlb: true,
HasProcesses: true,
Processes: info.ProcessSpec{
Limit: 100,
},
CreationTime: time.Unix(1257894000, 0),
Labels: map[string]string{
"foo.label": "bar",
},
Envs: map[string]string{
"foo+env": "prod",
},
},
Stats: []*info.ContainerStats{
{
Timestamp: time.Unix(1395066363, 0),
Cpu: info.CpuStats{
Usage: info.CpuUsage{
Total: 1,
PerCpu: []uint64{2, 3, 4, 5},
User: 6,
System: 7,
},
CFS: info.CpuCFS{
Periods: 723,
ThrottledPeriods: 18,
ThrottledTime: 1724314000,
},
Schedstat: info.CpuSchedstat{
RunTime: 53643567,
RunqueueTime: 479424566378,
RunPeriods: 984285,
},
LoadAverage: 2,
},
Memory: info.MemoryStats{
Usage: 8,
MaxUsage: 8,
WorkingSet: 9,
ContainerData: info.MemoryStatsMemoryData{
Pgfault: 10,
Pgmajfault: 11,
},
HierarchicalData: info.MemoryStatsMemoryData{
Pgfault: 12,
Pgmajfault: 13,
},
Cache: 14,
RSS: 15,
MappedFile: 16,
Swap: 8192,
},
Hugetlb: map[string]info.HugetlbStats{
"2Mi": {
Usage: 4,
MaxUsage: 10,
Failcnt: 1,
},
"1Gi": {
Usage: 0,
MaxUsage: 0,
Failcnt: 0,
},
},
Network: info.NetworkStats{
InterfaceStats: info.InterfaceStats{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
Interfaces: []info.InterfaceStats{
{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
},
Tcp: info.TcpStat{
Established: 13,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
Tcp6: info.TcpStat{
Established: 11,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
TcpAdvanced: info.TcpAdvancedStat{
TCPFullUndo: 2361,
TCPMD5NotFound: 0,
TCPDSACKRecv: 83680,
TCPSackShifted: 2,
TCPSackShiftFallback: 298,
PFMemallocDrop: 0,
EstabResets: 37,
InSegs: 140370590,
TCPPureAcks: 24251339,
TCPDSACKOldSent: 15633,
IPReversePathFilter: 0,
TCPFastOpenPassiveFail: 0,
InCsumErrors: 0,
TCPRenoFailures: 43414,
TCPMemoryPressuresChrono: 0,
TCPDeferAcceptDrop: 0,
TW: 10436427,
TCPSpuriousRTOs: 0,
TCPDSACKIgnoredNoUndo: 71885,
RtoMax: 120000,
ActiveOpens: 11038621,
EmbryonicRsts: 0,
RcvPruned: 0,
TCPLossProbeRecovery: 401,
TCPHPHits: 56096478,
TCPPartialUndo: 3,
TCPAbortOnMemory: 0,
AttemptFails: 48997,
RetransSegs: 462961,
SyncookiesFailed: 0,
OfoPruned: 0,
TCPAbortOnLinger: 0,
TCPAbortFailed: 0,
TCPRenoReorder: 839,
TCPRcvCollapsed: 0,
TCPDSACKIgnoredOld: 0,
TCPReqQFullDrop: 0,
OutOfWindowIcmps: 0,
TWKilled: 0,
TCPLossProbes: 88648,
TCPRenoRecoveryFail: 394,
TCPFastOpenCookieReqd: 0,
TCPHPAcks: 21490641,
TCPSACKReneging: 0,
TCPTSReorder: 3,
TCPSlowStartRetrans: 290832,
MaxConn: -1,
SyncookiesRecv: 0,
TCPSackFailures: 60,
DelayedACKLocked: 90,
TCPDSACKOfoSent: 1,
TCPSynRetrans: 988,
TCPDSACKOfoRecv: 10,
TCPSACKDiscard: 0,
TCPMD5Unexpected: 0,
TCPSackMerged: 6,
RtoMin: 200,
CurrEstab: 22,
TCPTimeWaitOverflow: 0,
ListenOverflows: 0,
DelayedACKs: 503975,
TCPLossUndo: 61374,
TCPOrigDataSent: 130698387,
TCPBacklogDrop: 0,
TCPReqQFullDoCookies: 0,
TCPFastOpenPassive: 0,
PAWSActive: 0,
OutRsts: 91699,
TCPSackRecoveryFail: 2,
DelayedACKLost: 18843,
TCPAbortOnData: 8,
TCPMinTTLDrop: 0,
PruneCalled: 0,
TWRecycled: 0,
ListenDrops: 0,
TCPAbortOnTimeout: 0,
SyncookiesSent: 0,
TCPSACKReorder: 11,
TCPDSACKUndo: 33,
TCPMD5Failure: 0,
TCPLostRetransmit: 0,
TCPAbortOnClose: 7,
TCPFastOpenListenOverflow: 0,
OutSegs: 211580512,
InErrs: 31,
TCPTimeouts: 27422,
TCPLossFailures: 729,
TCPSackRecovery: 159,
RtoAlgorithm: 1,
PassiveOpens: 59,
LockDroppedIcmps: 0,
TCPRenoRecovery: 3519,
TCPFACKReorder: 0,
TCPFastRetrans: 11794,
TCPRetransFail: 0,
TCPMemoryPressures: 0,
TCPFastOpenActive: 0,
TCPFastOpenActiveFail: 0,
PAWSEstab: 0,
},
Udp: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
Udp6: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
},
Filesystem: []info.FsStats{
{
Device: "sda1",
InodesFree: 524288,
Inodes: 2097152,
Limit: 22,
Usage: 23,
ReadsCompleted: 24,
ReadsMerged: 25,
SectorsRead: 26,
ReadTime: 27,
WritesCompleted: 28,
WritesMerged: 39,
SectorsWritten: 40,
WriteTime: 41,
IoInProgress: 42,
IoTime: 43,
WeightedIoTime: 44,
},
{
Device: "sda2",
InodesFree: 262144,
Inodes: 2097152,
Limit: 37,
Usage: 38,
ReadsCompleted: 39,
ReadsMerged: 40,
SectorsRead: 41,
ReadTime: 42,
WritesCompleted: 43,
WritesMerged: 44,
SectorsWritten: 45,
WriteTime: 46,
IoInProgress: 47,
IoTime: 48,
WeightedIoTime: 49,
},
},
Accelerators: []info.AcceleratorStats{
{
Make: "nvidia",
Model: "tesla-p100",
ID: "GPU-deadbeef-1234-5678-90ab-feedfacecafe",
MemoryTotal: 20304050607,
MemoryUsed: 2030405060,
DutyCycle: 12,
},
{
Make: "nvidia",
Model: "tesla-k80",
ID: "GPU-deadbeef-0123-4567-89ab-feedfacecafe",
MemoryTotal: 10203040506,
MemoryUsed: 1020304050,
DutyCycle: 6,
},
},
Processes: info.ProcessStats{
ProcessCount: 1,
FdCount: 5,
SocketCount: 3,
ThreadsCurrent: 5,
ThreadsMax: 100,
Ulimits: []info.UlimitSpec{
{
Name: "max_open_files",
SoftLimit: 16384,
HardLimit: 16384,
},
},
},
TaskStats: info.LoadStats{
NrSleeping: 50,
NrRunning: 51,
NrStopped: 52,
NrUninterruptible: 53,
NrIoWait: 54,
},
CustomMetrics: map[string][]info.MetricVal{
"container_custom_app_metric_1": {
{
FloatValue: float64(1.1),
Timestamp: time.Now(),
Label: "testlabel_1_1_1",
Labels: map[string]string{"test_label": "1_1", "test_label_2": "2_1"},
},
{
FloatValue: float64(1.2),
Timestamp: time.Now(),
Label: "testlabel_1_1_2",
Labels: map[string]string{"test_label": "1_2", "test_label_2": "2_2"},
},
},
"container_custom_app_metric_2": {
{
FloatValue: float64(2),
Timestamp: time.Now(),
Label: "testlabel2",
Labels: map[string]string{"test_label": "test_value"},
},
},
"container_custom_app_metric_3": {
{
FloatValue: float64(3),
Timestamp: time.Now(),
Label: "testlabel3",
Labels: map[string]string{"test_label": "test_value"},
},
},
},
PerfStats: []info.PerfStat{
{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions",
Cpu: 0,
},
{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions",
Cpu: 1,
},
{
ScalingRatio: 0.66666666666,
Value: 321,
Name: "instructions_retired",
Cpu: 0,
},
{
ScalingRatio: 0.33333333333,
Value: 789,
Name: "instructions_retired",
Cpu: 1,
},
},
ReferencedMemory: 1234,
},
},
},
}, nil
}
type erroringSubcontainersInfoProvider struct {
successfulProvider testSubcontainersInfoProvider
shouldFail bool
}
func (p *erroringSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 1")
}
return p.successfulProvider.GetVersionInfo()
}
func (p *erroringSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 2")
}
return p.successfulProvider.GetMachineInfo()
}
func (p *erroringSubcontainersInfoProvider) SubcontainersInfo(
a string, r *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
if p.shouldFail {
return []*info.ContainerInfo{}, errors.New("Oops 3")
}
return p.successfulProvider.SubcontainersInfo(a, r)
}

View File

@ -0,0 +1,349 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"strconv"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog/v2"
)
var baseLabelsNames = []string{"machine_id", "system_uuid", "boot_id"}
const (
prometheusModeLabelName = "mode"
prometheusTypeLabelName = "type"
prometheusLevelLabelName = "level"
prometheusNodeLabelName = "node_id"
prometheusCoreLabelName = "core_id"
prometheusThreadLabelName = "thread_id"
prometheusPageSizeLabelName = "page_size"
nvmMemoryMode = "memory_mode"
nvmAppDirectMode = "app_direct_mode"
memoryByTypeDimmCountKey = "DimmCount"
memoryByTypeDimmCapacityKey = "Capacity"
emptyLabelValue = ""
)
// machineMetric describes a multi-dimensional metric used for exposing a
// certain type of machine statistic.
type machineMetric struct {
name string
help string
valueType prometheus.ValueType
extraLabels []string
condition func(machineInfo *info.MachineInfo) bool
getValues func(machineInfo *info.MachineInfo) metricValues
}
func (metric *machineMetric) desc(baseLabels []string) *prometheus.Desc {
return prometheus.NewDesc(metric.name, metric.help, append(baseLabels, metric.extraLabels...), nil)
}
// PrometheusMachineCollector implements prometheus.Collector.
type PrometheusMachineCollector struct {
infoProvider infoProvider
errors prometheus.Gauge
machineMetrics []machineMetric
}
// NewPrometheusMachineCollector returns a new PrometheusCollector.
func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.MetricSet) *PrometheusMachineCollector {
c := &PrometheusMachineCollector{
infoProvider: i,
errors: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "machine",
Name: "scrape_error",
Help: "1 if there was an error while getting machine metrics, 0 otherwise.",
}),
machineMetrics: []machineMetric{
{
name: "machine_cpu_physical_cores",
help: "Number of physical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumPhysicalCores), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_cpu_cores",
help: "Number of logical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumCores), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_cpu_sockets",
help: "Number of CPU sockets.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumSockets), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_memory_bytes",
help: "Amount of memory installed on the machine.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.MemoryCapacity), timestamp: machineInfo.Timestamp}}
},
},
{
name: "machine_dimm_count",
help: "Number of RAM DIMM (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCountKey)
},
},
{
name: "machine_dimm_capacity_bytes",
help: "Total RAM DIMM capacity (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCapacityKey)
},
},
{
name: "machine_nvm_capacity",
help: "NVM capacity value labeled by NVM mode (memory mode or app direct mode).",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusModeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{
{value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}, timestamp: machineInfo.Timestamp},
{value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}, timestamp: machineInfo.Timestamp},
}
},
},
{
name: "machine_nvm_avg_power_budget_watts",
help: "NVM power budget.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NVMInfo.AvgPowerBudget), timestamp: machineInfo.Timestamp}}
},
},
},
}
if includedMetrics.Has(container.CPUTopologyMetrics) {
c.machineMetrics = append(c.machineMetrics, []machineMetric{
{
name: "machine_cpu_cache_capacity_bytes",
help: "Cache size in bytes assigned to NUMA node and CPU core.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusTypeLabelName, prometheusLevelLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getCaches(machineInfo)
},
},
{
name: "machine_thread_siblings_count",
help: "Number of CPU thread siblings.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusThreadLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getThreadsSiblingsCount(machineInfo)
},
},
{
name: "machine_node_memory_capacity_bytes",
help: "Amount of memory assigned to NUMA node.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getNodeMemory(machineInfo)
},
},
{
name: "machine_node_hugepages_count",
help: "Numer of hugepages assigned to NUMA node.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusNodeLabelName, prometheusPageSizeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getHugePagesCount(machineInfo)
},
},
}...)
}
return c
}
// Describe describes all the machine metrics ever exported by cadvisor. It
// implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Describe(ch chan<- *prometheus.Desc) {
collector.errors.Describe(ch)
for _, metric := range collector.machineMetrics {
ch <- metric.desc([]string{})
}
}
// Collect fetches information about machine and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Collect(ch chan<- prometheus.Metric) {
collector.errors.Set(0)
collector.collectMachineInfo(ch)
collector.errors.Collect(ch)
}
func (collector *PrometheusMachineCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
machineInfo, err := collector.infoProvider.GetMachineInfo()
if err != nil {
collector.errors.Set(1)
klog.Warningf("Couldn't get machine info: %s", err)
return
}
baseLabelsValues := []string{machineInfo.MachineID, machineInfo.SystemUUID, machineInfo.BootID}
for _, metric := range collector.machineMetrics {
if metric.condition != nil && !metric.condition(machineInfo) {
continue
}
for _, metricValue := range metric.getValues(machineInfo) {
labelValues := make([]string, len(baseLabelsValues))
copy(labelValues, baseLabelsValues)
if len(metric.extraLabels) != 0 {
labelValues = append(labelValues, metricValue.labels...)
}
prometheusMetric := prometheus.MustNewConstMetric(metric.desc(baseLabelsNames),
metric.valueType, metricValue.value, labelValues...)
if metricValue.timestamp.IsZero() {
ch <- prometheusMetric
} else {
ch <- prometheus.NewMetricWithTimestamp(metricValue.timestamp, prometheusMetric)
}
}
}
}
func getMemoryByType(machineInfo *info.MachineInfo, property string) metricValues {
mValues := make(metricValues, 0, len(machineInfo.MemoryByType))
for memoryType, memoryInfo := range machineInfo.MemoryByType {
propertyValue := 0.0
switch property {
case memoryByTypeDimmCapacityKey:
propertyValue = float64(memoryInfo.Capacity)
case memoryByTypeDimmCountKey:
propertyValue = float64(memoryInfo.DimmCount)
default:
klog.Warningf("Incorrect propery name for MemoryByType, property %s", property)
return metricValues{}
}
mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}, timestamp: machineInfo.Timestamp})
}
return mValues
}
func getThreadsSiblingsCount(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0, machineInfo.NumCores)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, core := range node.Cores {
coreID := strconv.Itoa(core.Id)
siblingsCount := len(core.Threads)
for _, thread := range core.Threads {
mValues = append(mValues,
metricValue{
value: float64(siblingsCount),
labels: []string{nodeID, coreID, strconv.Itoa(thread)},
timestamp: machineInfo.Timestamp,
})
}
}
}
return mValues
}
func getNodeMemory(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0, len(machineInfo.Topology))
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
mValues = append(mValues,
metricValue{
value: float64(node.Memory),
labels: []string{nodeID},
timestamp: machineInfo.Timestamp,
})
}
return mValues
}
func getHugePagesCount(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, hugePage := range node.HugePages {
mValues = append(mValues,
metricValue{
value: float64(hugePage.NumPages),
labels: []string{nodeID, strconv.FormatUint(hugePage.PageSize, 10)},
timestamp: machineInfo.Timestamp,
})
}
}
return mValues
}
func getCaches(machineInfo *info.MachineInfo) metricValues {
mValues := make(metricValues, 0)
for _, node := range machineInfo.Topology {
nodeID := strconv.Itoa(node.Id)
for _, core := range node.Cores {
coreID := strconv.Itoa(core.Id)
for _, cache := range core.Caches {
mValues = append(mValues,
metricValue{
value: float64(cache.Size),
labels: []string{nodeID, coreID, cache.Type, strconv.Itoa(cache.Level)},
timestamp: machineInfo.Timestamp,
})
}
}
for _, cache := range node.Caches {
mValues = append(mValues,
metricValue{
value: float64(cache.Size),
labels: []string{nodeID, emptyLabelValue, cache.Type, strconv.Itoa(cache.Level)},
timestamp: machineInfo.Timestamp,
})
}
}
return mValues
}

28
vendor/github.com/google/cadvisor/nvm/BUILD generated vendored Normal file
View File

@ -0,0 +1,28 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["machine_no_libipmctl.go"],
cgo = True,
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/nvm",
importpath = "github.com/google/cadvisor/nvm",
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -14,26 +14,43 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package machine
package nvm
// #cgo pkg-config: libipmctl
// #include <nvm_management.h>
import "C"
import (
"fmt"
info "github.com/google/cadvisor/info/v1"
"sync"
"k8s.io/klog"
"k8s.io/klog/v2"
)
// getNVMAvgPowerBudget retrieves configured power budget
var (
isNVMLibInitialized = false
nvmLibMutex = sync.Mutex{}
)
func init() {
nvmLibMutex.Lock()
defer nvmLibMutex.Unlock()
cErr := C.nvm_init()
if cErr != C.NVM_SUCCESS {
// Unfortunately klog does not seem to work here. I believe it's better to
// output information using fmt rather then let it disappear silently.
fmt.Printf("libipmctl initialization failed with status %d", cErr)
}
isNVMLibInitialized = true
}
// getAvgPowerBudget retrieves configured power budget
// (in watts) for NVM devices. When libipmct is not available
// zero is returned.
func getNVMAvgPowerBudget() (uint, error) {
func getAvgPowerBudget() (uint, error) {
// Get number of devices on the platform
// see: https://github.com/intel/ipmctl/blob/v01.00.00.3497/src/os/nvm_api/nvm_management.h#L1478
var count C.uint
count := C.uint(0)
err := C.nvm_get_number_of_devices(&count)
if err != C.NVM_SUCCESS {
klog.Warningf("Unable to get number of NVM devices. Status code: %d", err)
@ -42,7 +59,7 @@ func getNVMAvgPowerBudget() (uint, error) {
// Load basic device information for all the devices
// to obtain UID of the first one.
var devices = make([]C.struct_device_discovery, count)
devices := make([]C.struct_device_discovery, count)
err = C.nvm_get_devices(&devices[0], C.uchar(count))
if err != C.NVM_SUCCESS {
klog.Warningf("Unable to get all NVM devices. Status code: %d", err)
@ -51,7 +68,7 @@ func getNVMAvgPowerBudget() (uint, error) {
// Power budget is same for all the devices
// so we can rely on any of them.
var device C.struct_device_details
device := C.struct_device_details{}
err = C.nvm_get_device_details(&devices[0].uid[0], &device)
if err != C.NVM_SUCCESS {
uid := C.GoString(&devices[0].uid[0])
@ -62,9 +79,9 @@ func getNVMAvgPowerBudget() (uint, error) {
return uint(device.avg_power_budget / 1000), nil
}
// getNVMCapacities retrieves the total NVM capacity in bytes for memory mode and app direct mode
func getNVMCapacities() (uint64, uint64, error) {
var caps C.struct_device_capacities
// getCapacities retrieves the total NVM capacity in bytes for memory mode and app direct mode
func getCapacities() (uint64, uint64, error) {
caps := C.struct_device_capacities{}
err := C.nvm_get_nvm_capacities(&caps)
if err != C.NVM_SUCCESS {
klog.Warningf("Unable to get NVM capacity. Status code: %d", err)
@ -73,26 +90,41 @@ func getNVMCapacities() (uint64, uint64, error) {
return uint64(caps.memory_capacity), uint64(caps.app_direct_capacity), nil
}
// GetNVMInfo returns information specific for non-volatile memory modules
func GetNVMInfo() (info.NVMInfo, error) {
// GetInfo returns information specific for non-volatile memory modules
func GetInfo() (info.NVMInfo, error) {
nvmLibMutex.Lock()
defer nvmLibMutex.Unlock()
nvmInfo := info.NVMInfo{}
// Initialize libipmctl library.
cErr := C.nvm_init()
if cErr != C.NVM_SUCCESS {
klog.Warningf("libipmctl initialization failed with status %d", cErr)
return info.NVMInfo{}, fmt.Errorf("libipmctl initialization failed with status %d", cErr)
if !isNVMLibInitialized {
klog.V(1).Info("libimpctl has not been initialized. NVM information will not be available")
return nvmInfo, nil
}
defer C.nvm_uninit()
var err error
nvmInfo.MemoryModeCapacity, nvmInfo.AppDirectModeCapacity, err = getNVMCapacities()
nvmInfo.MemoryModeCapacity, nvmInfo.AppDirectModeCapacity, err = getCapacities()
if err != nil {
return info.NVMInfo{}, fmt.Errorf("Unable to get NVM capacities, err: %s", err)
}
nvmInfo.AvgPowerBudget, err = getNVMAvgPowerBudget()
nvmInfo.AvgPowerBudget, err = getAvgPowerBudget()
if err != nil {
return info.NVMInfo{}, fmt.Errorf("Unable to get NVM average power budget, err: %s", err)
}
return nvmInfo, nil
}
// Finalize un-initializes libipmctl. See https://github.com/google/cadvisor/issues/2457.
func Finalize() {
nvmLibMutex.Lock()
defer nvmLibMutex.Unlock()
klog.V(1).Info("Attempting to un-initialize libipmctl")
if !isNVMLibInitialized {
klog.V(1).Info("libipmctl has not been initialized; not un-initializing.")
return
}
C.nvm_uninit()
isNVMLibInitialized = false
}

View File

@ -0,0 +1,34 @@
// +build !libipmctl !cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nvm
import (
info "github.com/google/cadvisor/info/v1"
"k8s.io/klog/v2"
)
// GetInfo returns information specific for non-volatile memory modules.
// When libipmctl is not available zero value is returned.
func GetInfo() (info.NVMInfo, error) {
return info.NVMInfo{}, nil
}
// Finalize un-initializes libipmctl. See https://github.com/google/cadvisor/issues/2457.
// When libipmctl is not available it just logs that it's being called.
func Finalize() {
klog.V(4).Info("libimpctl not available, doing nothing.")
}

32
vendor/github.com/google/cadvisor/perf/BUILD generated vendored Normal file
View File

@ -0,0 +1,32 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"collector_no_libpfm.go",
"config.go",
"manager_no_libpfm.go",
],
cgo = True,
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/perf",
importpath = "github.com/google/cadvisor/perf",
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/google/cadvisor/stats:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,261 @@
// +build libpfm,cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Collector of perf events for a container.
package perf
// #cgo CFLAGS: -I/usr/include
// #cgo LDFLAGS: -lpfm
// #include <perfmon/pfmlib.h>
// #include <stdlib.h>
import "C"
import (
"bytes"
"encoding/binary"
"fmt"
"os"
"sync"
"unsafe"
info "github.com/google/cadvisor/info/v1"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
)
type collector struct {
cgroupPath string
events Events
cpuFiles map[string]map[int]readerCloser
cpuFilesLock sync.Mutex
numCores int
eventToCustomEvent map[Event]*CustomEvent
}
var (
isLibpfmInitialized = false
libpmfMutex = sync.Mutex{}
)
func init() {
libpmfMutex.Lock()
defer libpmfMutex.Unlock()
pErr := C.pfm_initialize()
if pErr != C.PFM_SUCCESS {
fmt.Printf("unable to initialize libpfm: %d", int(pErr))
return
}
isLibpfmInitialized = true
}
func newCollector(cgroupPath string, events Events, numCores int) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores}
mapEventsToCustomEvents(collector)
return collector
}
func (c *collector) UpdateStats(stats *info.ContainerStats) error {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
stats.PerfStats = []info.PerfStat{}
klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath)
for name, files := range c.cpuFiles {
for cpu, file := range files {
buf := make([]byte, 32)
_, err := file.Read(buf)
if err != nil {
klog.Warningf("Unable to read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath)
continue
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
klog.Warningf("Unable to decode from binary format read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath)
continue
}
klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", name, cpu, c.cgroupPath, perfData.Value)
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
stat := info.PerfStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Cpu: cpu,
}
stats.PerfStats = append(stats.PerfStats, stat)
}
}
return nil
}
func (c *collector) setup() error {
cgroup, err := os.Open(c.cgroupPath)
if err != nil {
return fmt.Errorf("unable to open cgroup directory %s: %s", c.cgroupPath, err)
}
defer cgroup.Close()
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
cgroupFd := int(cgroup.Fd())
for _, group := range c.events.Events {
customEvent, ok := c.eventToCustomEvent[group[0]]
var err error
if ok {
err = c.setupRawNonGrouped(customEvent, cgroupFd)
} else {
err = c.setupNonGrouped(string(group[0]), cgroupFd)
}
if err != nil {
return err
}
}
return nil
}
func (c *collector) setupRawNonGrouped(event *CustomEvent, cgroup int) error {
klog.V(5).Infof("Setting up non-grouped raw perf event %#v", event)
config := createPerfEventAttr(*event)
err := c.registerEvent(config, string(event.Name), cgroup)
if err != nil {
return err
}
return nil
}
func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, cgroup int) error {
var cpu int
for cpu = 0; cpu < c.numCores; cpu++ {
pid, groupFd, flags := cgroup, -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP
fd, err := unix.PerfEventOpen(config, pid, cpu, groupFd, flags)
if err != nil {
return fmt.Errorf("setting up perf event %#v failed: %q", config, err)
}
perfFile := os.NewFile(uintptr(fd), name)
if perfFile == nil {
return fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
}
c.addEventFile(name, cpu, perfFile)
}
return nil
}
func (c *collector) addEventFile(name string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[name]
if !ok {
c.cpuFiles[name] = map[int]readerCloser{}
}
c.cpuFiles[name][cpu] = perfFile
}
func (c *collector) setupNonGrouped(name string, cgroup int) error {
if !isLibpfmInitialized {
return fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
}
klog.V(5).Infof("Setting up non-grouped perf event %s", name)
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
defer C.free(perfEventAttrMemory)
event := pfmPerfEncodeArgT{}
perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory)
fstr := C.CString("")
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.ulong(unsafe.Sizeof(event))
cSafeName := C.CString(name)
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr))
}
klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr)
setAttributes(perfEventAttr)
return c.registerEvent(perfEventAttr, string(name), cgroup)
}
func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
length := len(event.Config)
config := &unix.PerfEventAttr{
Type: event.Type,
Config: event.Config[0],
}
if length >= 2 {
config.Ext1 = event.Config[1]
}
if length == 3 {
config.Ext2 = event.Config[2]
}
setAttributes(config)
klog.V(5).Infof("perf_event_attr struct prepared: %#v", config)
return config
}
func setAttributes(config *unix.PerfEventAttr) {
config.Sample_type = perfSampleIdentifier
config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_ID
config.Bits = perfAttrBitsInherit | perfAttrBitsExcludeGuest
config.Size = uint32(unsafe.Sizeof(unix.PerfEventAttr{}))
}
func (c *collector) Destroy() {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for name, files := range c.cpuFiles {
for cpu, file := range files {
klog.V(5).Infof("Closing perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
}
}
delete(c.cpuFiles, name)
}
}
// Finalize terminates libpfm4 to free resources.
func Finalize() {
libpmfMutex.Lock()
defer libpmfMutex.Unlock()
klog.V(1).Info("Attempting to terminate libpfm4")
if !isLibpfmInitialized {
klog.V(1).Info("libpfm4 has not been initialized; not terminating.")
return
}
C.pfm_terminate()
isLibpfmInitialized = false
}
func mapEventsToCustomEvents(collector *collector) {
collector.eventToCustomEvent = map[Event]*CustomEvent{}
for key, event := range collector.events.CustomEvents {
collector.eventToCustomEvent[event.Name] = &collector.events.CustomEvents[key]
}
}

View File

@ -0,0 +1,33 @@
// +build !libpfm !cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Collector of perf events for a container.
package perf
import (
"github.com/google/cadvisor/stats"
"k8s.io/klog/v2"
)
func NewCollector(cgroupPath string, events Events, numCores int) stats.Collector {
return &stats.NoopCollector{}
}
// Finalize terminates libpfm4 to free resources.
func Finalize() {
klog.V(1).Info("cAdvisor is build without cgo and/or libpfm support. Nothing to be finalized")
}

84
vendor/github.com/google/cadvisor/perf/config.go generated vendored Normal file
View File

@ -0,0 +1,84 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Configuration for perf event manager.
package perf
import (
"encoding/json"
"fmt"
"os"
"strconv"
"k8s.io/klog/v2"
)
type Events struct {
// List of perf events' names to be measured. Any value found in
// output of perf list can be used.
Events [][]Event `json:"events"`
// List of custom perf events' to be measured. It is impossible to
// specify some events using their names and in such case you have
// to provide lower level configuration.
CustomEvents []CustomEvent `json:"custom_events"`
}
type Event string
type CustomEvent struct {
// Type of the event. See perf_event_attr documentation
// at man perf_event_open.
Type uint32 `json:"type"`
// Symbolically formed event like:
// pmu/config=PerfEvent.Config[0],config1=PerfEvent.Config[1],config2=PerfEvent.Config[2]
// as described in man perf-stat.
Config Config `json:"config"`
// Human readable name of metric that will be created from the event.
Name Event `json:"name"`
}
type Config []uint64
func (c *Config) UnmarshalJSON(b []byte) error {
config := []string{}
err := json.Unmarshal(b, &config)
if err != nil {
klog.Errorf("Unmarshalling %s into slice of strings failed: %q", b, err)
return fmt.Errorf("unmarshalling %s into slice of strings failed: %q", b, err)
}
intermediate := []uint64{}
for _, v := range config {
uintValue, err := strconv.ParseUint(v, 0, 64)
if err != nil {
klog.Errorf("Parsing %#v into uint64 failed: %q", v, err)
return fmt.Errorf("parsing %#v into uint64 failed: %q", v, err)
}
intermediate = append(intermediate, uintValue)
}
*c = intermediate
return nil
}
func parseConfig(file *os.File) (events Events, err error) {
decoder := json.NewDecoder(file)
err = decoder.Decode(&events)
if err != nil {
err = fmt.Errorf("unable to load perf events cofiguration from %q: %q", file.Name(), err)
return
}
return
}

View File

@ -0,0 +1,72 @@
// +build libpfm,cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Manager of perf events for containers.
package perf
import (
"fmt"
"os"
"github.com/google/cadvisor/stats"
)
type manager struct {
events Events
numCores int
stats.NoopDestroy
}
func NewManager(configFile string, numCores int) (stats.Manager, error) {
if configFile == "" {
return &stats.NoopManager{}, nil
}
file, err := os.Open(configFile)
if err != nil {
return nil, fmt.Errorf("Unable to read configuration file %q: %q", configFile, err)
}
config, err := parseConfig(file)
if err != nil {
return nil, fmt.Errorf("Unable to read configuration file %q: %q", configFile, err)
}
if areGroupedEventsUsed(config) {
return nil, fmt.Errorf("event grouping is not supported you must modify config file at %s", configFile)
}
return &manager{events: config, numCores: numCores}, nil
}
func areGroupedEventsUsed(events Events) bool {
for _, group := range events.Events {
if len(group) > 1 {
return true
}
}
return false
}
func (m *manager) GetCollector(cgroupPath string) (stats.Collector, error) {
collector := newCollector(cgroupPath, m.events, m.numCores)
err := collector.setup()
if err != nil {
collector.Destroy()
return &stats.NoopCollector{}, err
}
return collector, nil
}

View File

@ -1,4 +1,4 @@
// +build !libipmctl !cgo
// +build !libpfm !cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
@ -14,12 +14,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package machine
// Manager of perf events for containers.
package perf
import info "github.com/google/cadvisor/info/v1"
import (
"github.com/google/cadvisor/stats"
// GetNVMInfo returns information specific for non-volatile memory modules.
// When libipmct is not available zero value is returned.
func GetNVMInfo() (info.NVMInfo, error) {
return info.NVMInfo{}, nil
"k8s.io/klog/v2"
)
func NewManager(configFile string, numCores int) (stats.Manager, error) {
klog.V(1).Info("cAdvisor is build without cgo and/or libpfm support. Perf event counters are not available.")
return &stats.NoopManager{}, nil
}

54
vendor/github.com/google/cadvisor/perf/types_libpfm.go generated vendored Normal file
View File

@ -0,0 +1,54 @@
// +build libpfm,cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Types related to handling perf events that are missing from unix package.
package perf
import "C"
import (
"io"
"unsafe"
)
const (
perfSampleIdentifier = 1 << 16
perfAttrBitsInherit = 1 << 1
perfAttrBitsExcludeGuest = 1 << 20
)
// ReadFormat allows to read perf event's value for non-grouped events
type ReadFormat struct {
Value uint64 /* The value of the event */
TimeEnabled uint64 /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
TimeRunning uint64 /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
ID uint64 /* if PERF_FORMAT_ID */
}
// pfmPerfEncodeArgT represents structure that is used to parse perf event nam
// into perf_event_attr using libpfm.
type pfmPerfEncodeArgT struct {
attr unsafe.Pointer
fstr unsafe.Pointer
size C.size_t
_ C.int // idx
_ C.int // cpu
_ C.int // flags
}
type readerCloser interface {
io.Reader
io.Closer
}

View File

@ -2,11 +2,17 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["types.go"],
srcs = [
"noop.go",
"types.go",
],
importmap = "k8s.io/kubernetes/vendor/github.com/google/cadvisor/stats",
importpath = "github.com/google/cadvisor/stats",
visibility = ["//visibility:public"],
deps = ["//vendor/github.com/google/cadvisor/info/v1:go_default_library"],
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)
filegroup(

43
vendor/github.com/google/cadvisor/stats/noop.go generated vendored Normal file
View File

@ -0,0 +1,43 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Noop perf Manager and Collector.
package stats
import (
v1 "github.com/google/cadvisor/info/v1"
"k8s.io/klog/v2"
)
type NoopManager struct {
NoopDestroy
}
type NoopDestroy struct{}
func (nsd NoopDestroy) Destroy() {
klog.V(5).Info("No-op Destroy function called")
}
func (m *NoopManager) GetCollector(cgroup string) (Collector, error) {
return &NoopCollector{}, nil
}
type NoopCollector struct {
NoopDestroy
}
func (c *NoopCollector) UpdateStats(stats *v1.ContainerStats) error {
return nil
}

View File

@ -18,18 +18,18 @@ package stats
import info "github.com/google/cadvisor/info/v1"
// This is supposed to store global state about an cAdvisor metrics collector.
// cAdvisor manager will call Setup() when it starts and Destroy() when it stops.
// cAdvisor manager will call Destroy() when it stops.
// For each container detected by the cAdvisor manager, it will call
// GetCollector() with the devices cgroup path for that container.
// GetCollector() is supposed to return an object that can update
// accelerator stats for that container.
type Manager interface {
Setup()
Destroy()
GetCollector(deviceCgroup string) (Collector, error)
}
// Collector can update ContainerStats by adding more metrics.
type Collector interface {
Destroy()
UpdateStats(*info.ContainerStats) error
}

View File

@ -30,26 +30,26 @@ const secondsToNanoSeconds = secondsToMilliSeconds * milliSecondsToNanoSeconds
type Uint64Slice []uint64
func (a Uint64Slice) Len() int { return len(a) }
func (a Uint64Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a Uint64Slice) Less(i, j int) bool { return a[i] < a[j] }
func (s Uint64Slice) Len() int { return len(s) }
func (s Uint64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s Uint64Slice) Less(i, j int) bool { return s[i] < s[j] }
// Get percentile of the provided samples. Round to integer.
func (self Uint64Slice) GetPercentile(d float64) uint64 {
func (s Uint64Slice) GetPercentile(d float64) uint64 {
if d < 0.0 || d > 1.0 {
return 0
}
count := self.Len()
count := s.Len()
if count == 0 {
return 0
}
sort.Sort(self)
sort.Sort(s)
n := float64(d * (float64(count) + 1))
idx, frac := math.Modf(n)
index := int(idx)
percentile := float64(self[index-1])
percentile := float64(s[index-1])
if index > 1 && index < count {
percentile += frac * float64(self[index]-self[index-1])
percentile += frac * float64(s[index]-s[index-1])
}
return uint64(percentile)
}
@ -61,15 +61,21 @@ type mean struct {
Mean float64
}
func (self *mean) Add(value uint64) {
self.count++
if self.count == 1 {
self.Mean = float64(value)
func (m *mean) Add(value uint64) {
m.count++
if m.count == 1 {
m.Mean = float64(value)
return
}
c := float64(self.count)
c := float64(m.count)
v := float64(value)
self.Mean = (self.Mean*(c-1) + v) / c
m.Mean = (m.Mean*(c-1) + v) / c
}
type Percentile interface {
Add(info.Percentiles)
AddSample(uint64)
GetAllPercentiles() info.Percentiles
}
type resource struct {
@ -82,20 +88,20 @@ type resource struct {
}
// Adds a new percentile sample.
func (self *resource) Add(p info.Percentiles) {
func (r *resource) Add(p info.Percentiles) {
if !p.Present {
return
}
if p.Max > self.max {
self.max = p.Max
if p.Max > r.max {
r.max = p.Max
}
self.mean.Add(p.Mean)
r.mean.Add(p.Mean)
// Selecting 90p of 90p :(
self.samples = append(self.samples, p.Ninety)
r.samples = append(r.samples, p.Ninety)
}
// Add a single sample. Internally, we convert it to a fake percentile sample.
func (self *resource) AddSample(val uint64) {
func (r *resource) AddSample(val uint64) {
sample := info.Percentiles{
Present: true,
Mean: val,
@ -104,22 +110,22 @@ func (self *resource) AddSample(val uint64) {
Ninety: val,
NinetyFive: val,
}
self.Add(sample)
r.Add(sample)
}
// Get max, average, and 90p from existing samples.
func (self *resource) GetAllPercentiles() info.Percentiles {
func (r *resource) GetAllPercentiles() info.Percentiles {
p := info.Percentiles{}
p.Mean = uint64(self.mean.Mean)
p.Max = self.max
p.Fifty = self.samples.GetPercentile(0.5)
p.Ninety = self.samples.GetPercentile(0.9)
p.NinetyFive = self.samples.GetPercentile(0.95)
p.Mean = uint64(r.mean.Mean)
p.Max = r.max
p.Fifty = r.samples.GetPercentile(0.5)
p.Ninety = r.samples.GetPercentile(0.9)
p.NinetyFive = r.samples.GetPercentile(0.95)
p.Present = true
return p
}
func NewResource(size int) *resource {
func NewResource(size int) Percentile {
return &resource{
samples: make(Uint64Slice, 0, size),
mean: mean{count: 0, Mean: 0},
@ -155,9 +161,8 @@ func getPercentComplete(stats []*secondSample) (percent int32) {
}
// Calculate cpurate from two consecutive total cpu usage samples.
func getCpuRate(latest, previous secondSample) (uint64, error) {
var elapsed int64
elapsed = latest.Timestamp.Sub(previous.Timestamp).Nanoseconds()
func getCPURate(latest, previous secondSample) (uint64, error) {
elapsed := latest.Timestamp.Sub(previous.Timestamp).Nanoseconds()
if elapsed < 10*milliSecondsToNanoSeconds {
return 0, fmt.Errorf("elapsed time too small: %d ns: time now %s last %s", elapsed, latest.Timestamp.String(), previous.Timestamp.String())
}
@ -176,7 +181,7 @@ func GetMinutePercentiles(stats []*secondSample) info.Usage {
memory := NewResource(len(stats))
for _, stat := range stats {
if !lastSample.Timestamp.IsZero() {
cpuRate, err := getCpuRate(*stat, lastSample)
cpuRate, err := getCPURate(*stat, lastSample)
if err != nil {
continue
}

View File

@ -103,7 +103,7 @@ func (s *StatsSummary) updateLatestUsage() {
usage.Memory = latest.Memory
if numStats > 1 {
previous := s.secondSamples[numStats-2]
cpu, err := getCpuRate(*latest, *previous)
cpu, err := getCPURate(*latest, *previous)
if err == nil {
usage.Cpu = cpu
}
@ -113,7 +113,6 @@ func (s *StatsSummary) updateLatestUsage() {
defer s.dataLock.Unlock()
s.derivedStats.LatestUsage = usage
s.derivedStats.Timestamp = latest.Timestamp
return
}
// Generate new derived stats based on current minute stats samples.
@ -152,7 +151,7 @@ func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) {
samples := s.minuteSamples.RecentStats(n)
numSamples := len(samples)
if numSamples < 1 {
return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats.")
return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats")
}
// We generate derived stats even with partial data.
usage := GetDerivedPercentiles(samples)
@ -178,7 +177,7 @@ func New(spec v1.ContainerSpec) (*StatsSummary, error) {
summary.available.Memory = true
}
if !summary.available.Cpu && !summary.available.Memory {
return nil, fmt.Errorf("none of the resources are being tracked.")
return nil, fmt.Errorf("none of the resources are being tracked")
}
summary.minuteSamples = NewSamplesBuffer(60 /* one hour */)
return &summary, nil

View File

@ -8,7 +8,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -57,7 +57,11 @@ func fileContainsAmazonIdentifier(filename string) bool {
}
func getAwsMetadata(name string) string {
client := ec2metadata.New(session.New(&aws.Config{}))
sess, err := session.NewSession(&aws.Config{})
if err != nil {
return info.UnknownInstance
}
client := ec2metadata.New(sess)
data, err := client.GetMetadata(name)
if err != nil {
return info.UnknownInstance

View File

@ -18,7 +18,7 @@ package cloudinfo
import (
info "github.com/google/cadvisor/info/v1"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type CloudInfo interface {
@ -75,14 +75,14 @@ func NewRealCloudInfo() CloudInfo {
}
}
func (self *realCloudInfo) GetCloudProvider() info.CloudProvider {
return self.cloudProvider
func (i *realCloudInfo) GetCloudProvider() info.CloudProvider {
return i.cloudProvider
}
func (self *realCloudInfo) GetInstanceType() info.InstanceType {
return self.instanceType
func (i *realCloudInfo) GetInstanceType() info.InstanceType {
return i.instanceType
}
func (self *realCloudInfo) GetInstanceID() info.InstanceID {
return self.instanceID
func (i *realCloudInfo) GetInstanceID() info.InstanceID {
return i.instanceID
}

View File

@ -10,7 +10,7 @@ go_library(
"//vendor/cloud.google.com/go/compute/metadata:go_default_library",
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/utils/cloudinfo:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -22,7 +22,7 @@ import (
"github.com/google/cadvisor/utils/cloudinfo"
"cloud.google.com/go/compute/metadata"
"k8s.io/klog"
"k8s.io/klog/v2"
)
const (

View File

@ -9,7 +9,7 @@ go_library(
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/github.com/google/cadvisor/utils/cpuload/netlink:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -20,7 +20,7 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/utils/cpuload/netlink"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type CpuLoadReader interface {

View File

@ -13,7 +13,7 @@ go_library(
deps = [
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
],
)

View File

@ -55,41 +55,44 @@ func newConnection() (*Connection, error) {
return conn, err
}
func (self *Connection) Read(b []byte) (n int, err error) {
n, _, err = syscall.Recvfrom(self.fd, b, 0)
func (c *Connection) Read(b []byte) (n int, err error) {
n, _, err = syscall.Recvfrom(c.fd, b, 0)
return n, err
}
func (self *Connection) Write(b []byte) (n int, err error) {
err = syscall.Sendto(self.fd, b, 0, &self.addr)
func (c *Connection) Write(b []byte) (n int, err error) {
err = syscall.Sendto(c.fd, b, 0, &c.addr)
return len(b), err
}
func (self *Connection) Close() error {
return syscall.Close(self.fd)
func (c *Connection) Close() error {
return syscall.Close(c.fd)
}
func (self *Connection) WriteMessage(msg syscall.NetlinkMessage) error {
func (c *Connection) WriteMessage(msg syscall.NetlinkMessage) error {
w := bytes.NewBuffer(nil)
msg.Header.Len = uint32(syscall.NLMSG_HDRLEN + len(msg.Data))
msg.Header.Seq = self.seq
self.seq++
msg.Header.Pid = self.pid
binary.Write(w, binary.LittleEndian, msg.Header)
_, err := w.Write(msg.Data)
msg.Header.Seq = c.seq
c.seq++
msg.Header.Pid = c.pid
err := binary.Write(w, binary.LittleEndian, msg.Header)
if err != nil {
return err
}
_, err = self.Write(w.Bytes())
_, err = w.Write(msg.Data)
if err != nil {
return err
}
_, err = c.Write(w.Bytes())
return err
}
func (self *Connection) ReadMessage() (msg syscall.NetlinkMessage, err error) {
err = binary.Read(self.rbuf, binary.LittleEndian, &msg.Header)
func (c *Connection) ReadMessage() (msg syscall.NetlinkMessage, err error) {
err = binary.Read(c.rbuf, binary.LittleEndian, &msg.Header)
if err != nil {
return msg, err
}
msg.Data = make([]byte, msg.Header.Len-syscall.NLMSG_HDRLEN)
_, err = self.rbuf.Read(msg.Data)
_, err = c.rbuf.Read(msg.Data)
return msg, err
}

View File

@ -27,6 +27,7 @@ import (
var (
// TODO(rjnagal): Verify and fix for other architectures.
Endian = binary.LittleEndian
)
@ -42,11 +43,11 @@ type netlinkMessage struct {
Data []byte
}
func (self netlinkMessage) toRawMsg() (rawmsg syscall.NetlinkMessage) {
rawmsg.Header = self.Header
func (m netlinkMessage) toRawMsg() (rawmsg syscall.NetlinkMessage) {
rawmsg.Header = m.Header
w := bytes.NewBuffer([]byte{})
binary.Write(w, Endian, self.GenHeader)
w.Write(self.Data)
binary.Write(w, Endian, m.GenHeader)
w.Write(m.Data)
rawmsg.Data = w.Bytes()
return rawmsg
}
@ -64,9 +65,12 @@ func padding(size int, alignment int) int {
}
// Get family id for taskstats subsystem.
func getFamilyId(conn *Connection) (uint16, error) {
func getFamilyID(conn *Connection) (uint16, error) {
msg := prepareFamilyMessage()
conn.WriteMessage(msg.toRawMsg())
err := conn.WriteMessage(msg.toRawMsg())
if err != nil {
return 0, err
}
resp, err := conn.ReadMessage()
if err != nil {
@ -164,7 +168,7 @@ func parseFamilyResp(msg syscall.NetlinkMessage) (uint16, error) {
return 0, err
}
}
return 0, fmt.Errorf("family id not found in the response.")
return 0, fmt.Errorf("family id not found in the response")
}
// Extract task stats from response returned by kernel.
@ -203,7 +207,10 @@ func verifyHeader(msg syscall.NetlinkMessage) error {
case syscall.NLMSG_ERROR:
buf := bytes.NewBuffer(msg.Data)
var errno int32
binary.Read(buf, Endian, errno)
err := binary.Read(buf, Endian, errno)
if err != nil {
return err
}
return fmt.Errorf("netlink request failed with error %s", syscall.Errno(-errno))
}
return nil

View File

@ -20,11 +20,11 @@ import (
info "github.com/google/cadvisor/info/v1"
"k8s.io/klog"
"k8s.io/klog/v2"
)
type NetlinkReader struct {
familyId uint16
familyID uint16
conn *Connection
}
@ -34,24 +34,24 @@ func New() (*NetlinkReader, error) {
return nil, fmt.Errorf("failed to create a new connection: %s", err)
}
id, err := getFamilyId(conn)
id, err := getFamilyID(conn)
if err != nil {
return nil, fmt.Errorf("failed to get netlink family id for task stats: %s", err)
}
klog.V(4).Infof("Family id for taskstats: %d", id)
return &NetlinkReader{
familyId: id,
familyID: id,
conn: conn,
}, nil
}
func (self *NetlinkReader) Stop() {
if self.conn != nil {
self.conn.Close()
func (r *NetlinkReader) Stop() {
if r.conn != nil {
r.conn.Close()
}
}
func (self *NetlinkReader) Start() error {
func (r *NetlinkReader) Start() error {
// We do the start setup for netlink in New(). Nothing to do here.
return nil
}
@ -60,9 +60,9 @@ func (self *NetlinkReader) Start() error {
// Caller can use historical data to calculate cpu load.
// path is an absolute filesystem path for a container under the CPU cgroup hierarchy.
// NOTE: non-hierarchical load is returned. It does not include load for subcontainers.
func (self *NetlinkReader) GetCpuLoad(name string, path string) (info.LoadStats, error) {
func (r *NetlinkReader) GetCpuLoad(name string, path string) (info.LoadStats, error) {
if len(path) == 0 {
return info.LoadStats{}, fmt.Errorf("cgroup path can not be empty!")
return info.LoadStats{}, fmt.Errorf("cgroup path can not be empty")
}
cfd, err := os.Open(path)
@ -71,7 +71,7 @@ func (self *NetlinkReader) GetCpuLoad(name string, path string) (info.LoadStats,
}
defer cfd.Close()
stats, err := getLoadStats(self.familyId, cfd, self.conn)
stats, err := getLoadStats(r.familyID, cfd, r.conn)
if err != nil {
return info.LoadStats{}, err
}

Some files were not shown because too many files have changed in this diff Show More