Bump cAdvisor to v0.43.0
Bumping cAdvisor from v0.39.2 -> v0.43.0 * Also pin transitive dependencies * containerd v1.4.9 -> v1.4.11 * docker v20.10.2+incompatible> v20.10.7+incompatible Signed-off-by: David Porter <david@porter.me>
This commit is contained in:
9
vendor/github.com/google/cadvisor/manager/container.go
generated
vendored
9
vendor/github.com/google/cadvisor/manager/container.go
generated
vendored
@@ -27,6 +27,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/google/cadvisor/cache/memory"
|
||||
@@ -102,6 +103,8 @@ type containerData struct {
|
||||
|
||||
// resctrlCollector updates stats for resctrl controller.
|
||||
resctrlCollector stats.Collector
|
||||
|
||||
oomEvents uint64
|
||||
}
|
||||
|
||||
// jitter returns a time.Duration between duration and duration + maxFactor * duration,
|
||||
@@ -127,6 +130,7 @@ func (cd *containerData) Stop() error {
|
||||
}
|
||||
close(cd.stop)
|
||||
cd.perfCollector.Destroy()
|
||||
cd.resctrlCollector.Destroy()
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -668,6 +672,9 @@ func (cd *containerData) updateStats() error {
|
||||
klog.V(2).Infof("Failed to add summary stats for %q: %v", cd.info.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
stats.OOMEvents = atomic.LoadUint64(&cd.oomEvents)
|
||||
|
||||
var customStatsErr error
|
||||
cm := cd.collectorManager.(*collector.GenericCollectorManager)
|
||||
if len(cm.Collectors) > 0 {
|
||||
@@ -721,7 +728,7 @@ func (cd *containerData) updateStats() error {
|
||||
return perfStatsErr
|
||||
}
|
||||
if resctrlStatsErr != nil {
|
||||
klog.Errorf("error occurred while collecting resctrl stats for container %s: %s", cInfo.Name, err)
|
||||
klog.Errorf("error occurred while collecting resctrl stats for container %s: %s", cInfo.Name, resctrlStatsErr)
|
||||
return resctrlStatsErr
|
||||
}
|
||||
return customStatsErr
|
||||
|
104
vendor/github.com/google/cadvisor/manager/manager.go
generated
vendored
104
vendor/github.com/google/cadvisor/manager/manager.go
generated
vendored
@@ -24,18 +24,18 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/google/cadvisor/accelerators"
|
||||
"github.com/google/cadvisor/cache/memory"
|
||||
"github.com/google/cadvisor/collector"
|
||||
"github.com/google/cadvisor/container"
|
||||
"github.com/google/cadvisor/container/docker"
|
||||
"github.com/google/cadvisor/container/raw"
|
||||
"github.com/google/cadvisor/events"
|
||||
"github.com/google/cadvisor/fs"
|
||||
info "github.com/google/cadvisor/info/v1"
|
||||
"github.com/google/cadvisor/info/v2"
|
||||
v2 "github.com/google/cadvisor/info/v2"
|
||||
"github.com/google/cadvisor/machine"
|
||||
"github.com/google/cadvisor/nvm"
|
||||
"github.com/google/cadvisor/perf"
|
||||
@@ -47,8 +47,6 @@ import (
|
||||
"github.com/google/cadvisor/watcher"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/clock"
|
||||
@@ -61,6 +59,14 @@ var eventStorageAgeLimit = flag.String("event_storage_age_limit", "default=24h",
|
||||
var eventStorageEventLimit = flag.String("event_storage_event_limit", "default=100000", "Max number of events to store (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is an integer. Default is applied to all non-specified event types")
|
||||
var applicationMetricsCountLimit = flag.Int("application_metrics_count_limit", 100, "Max number of application metrics to store (per container)")
|
||||
|
||||
// The namespace under which Docker aliases are unique.
|
||||
const DockerNamespace = "docker"
|
||||
|
||||
var HousekeepingConfigFlags = HouskeepingConfig{
|
||||
flag.Duration("max_housekeeping_interval", 60*time.Second, "Largest interval to allow between container housekeepings"),
|
||||
flag.Bool("allow_dynamic_housekeeping", true, "Whether to allow the housekeeping interval to be dynamic"),
|
||||
}
|
||||
|
||||
// The Manager interface defines operations for starting a manager and getting
|
||||
// container and machine information.
|
||||
type Manager interface {
|
||||
@@ -129,12 +135,6 @@ type Manager interface {
|
||||
|
||||
CloseEventChannel(watchID int)
|
||||
|
||||
// Get status information about docker.
|
||||
DockerInfo() (info.DockerStatus, error)
|
||||
|
||||
// Get details about interesting docker images.
|
||||
DockerImages() ([]info.DockerImage, error)
|
||||
|
||||
// Returns debugging information. Map of lines per category.
|
||||
DebugInfo() map[string][]string
|
||||
}
|
||||
@@ -146,7 +146,7 @@ type HouskeepingConfig = struct {
|
||||
}
|
||||
|
||||
// New takes a memory storage and returns a new manager.
|
||||
func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig HouskeepingConfig, includedMetricsSet container.MetricSet, collectorHTTPClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string, perfEventsFile string) (Manager, error) {
|
||||
func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig HouskeepingConfig, includedMetricsSet container.MetricSet, collectorHTTPClient *http.Client, rawContainerCgroupPathPrefixWhiteList, containerEnvMetadataWhiteList []string, perfEventsFile string, resctrlInterval time.Duration) (Manager, error) {
|
||||
if memoryCache == nil {
|
||||
return nil, fmt.Errorf("manager requires memory storage")
|
||||
}
|
||||
@@ -203,6 +203,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig
|
||||
collectorHTTPClient: collectorHTTPClient,
|
||||
nvidiaManager: accelerators.NewNvidiaManager(includedMetricsSet),
|
||||
rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
|
||||
containerEnvMetadataWhiteList: containerEnvMetadataWhiteList,
|
||||
}
|
||||
|
||||
machineInfo, err := machine.Info(sysfs, fsInfo, inHostNamespace)
|
||||
@@ -217,7 +218,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newManager.resctrlManager, err = resctrl.NewManager(selfContainer)
|
||||
newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.CPUVendorID, inHostNamespace)
|
||||
if err != nil {
|
||||
klog.V(4).Infof("Cannot gather resctrl metrics: %v", err)
|
||||
}
|
||||
@@ -262,9 +263,11 @@ type manager struct {
|
||||
collectorHTTPClient *http.Client
|
||||
nvidiaManager stats.Manager
|
||||
perfManager stats.Manager
|
||||
resctrlManager stats.Manager
|
||||
resctrlManager resctrl.Manager
|
||||
// List of raw container cgroup path prefix whitelist.
|
||||
rawContainerCgroupPathPrefixWhiteList []string
|
||||
// List of container env prefix whitelist, the matched container envs would be collected into metrics as extra labels.
|
||||
containerEnvMetadataWhiteList []string
|
||||
}
|
||||
|
||||
// Start the container manager.
|
||||
@@ -327,7 +330,7 @@ func (m *manager) Start() error {
|
||||
|
||||
func (m *manager) Stop() error {
|
||||
defer m.nvidiaManager.Destroy()
|
||||
defer m.destroyPerfCollectors()
|
||||
defer m.destroyCollectors()
|
||||
// Stop and wait on all quit channels.
|
||||
for i, c := range m.quitChannels {
|
||||
// Send the exit signal and wait on the thread to exit (by closing the channel).
|
||||
@@ -345,9 +348,10 @@ func (m *manager) Stop() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) destroyPerfCollectors() {
|
||||
func (m *manager) destroyCollectors() {
|
||||
for _, container := range m.containers {
|
||||
container.perfCollector.Destroy()
|
||||
container.resctrlCollector.Destroy()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -590,7 +594,7 @@ func (m *manager) getAllDockerContainers() map[string]*containerData {
|
||||
|
||||
// Get containers in the Docker namespace.
|
||||
for name, cont := range m.containers {
|
||||
if name.Namespace == docker.DockerNamespace {
|
||||
if name.Namespace == DockerNamespace {
|
||||
containers[cont.info.Name] = cont
|
||||
}
|
||||
}
|
||||
@@ -622,14 +626,14 @@ func (m *manager) getDockerContainer(containerName string) (*containerData, erro
|
||||
|
||||
// Check for the container in the Docker container namespace.
|
||||
cont, ok := m.containers[namespacedContainerName{
|
||||
Namespace: docker.DockerNamespace,
|
||||
Namespace: DockerNamespace,
|
||||
Name: containerName,
|
||||
}]
|
||||
|
||||
// Look for container by short prefix name if no exact match found.
|
||||
if !ok {
|
||||
for contName, c := range m.containers {
|
||||
if contName.Namespace == docker.DockerNamespace && strings.HasPrefix(contName.Name, containerName) {
|
||||
if contName.Namespace == DockerNamespace && strings.HasPrefix(contName.Name, containerName) {
|
||||
if cont == nil {
|
||||
cont = c
|
||||
} else {
|
||||
@@ -692,6 +696,10 @@ func (m *manager) GetRequestedContainersInfo(containerName string, options v2.Re
|
||||
for name, data := range containers {
|
||||
info, err := m.containerDataToContainerInfo(data, &query)
|
||||
if err != nil {
|
||||
if err == memory.ErrDataNotFound {
|
||||
klog.Warningf("Error getting data for container %s because of race condition", name)
|
||||
continue
|
||||
}
|
||||
errs.append(name, "containerDataToContainerInfo", err)
|
||||
}
|
||||
containersMap[name] = info
|
||||
@@ -908,7 +916,7 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
|
||||
return nil
|
||||
}
|
||||
|
||||
handler, accept, err := container.NewContainerHandler(containerName, watchSource, m.inHostNamespace)
|
||||
handler, accept, err := container.NewContainerHandler(containerName, watchSource, m.containerEnvMetadataWhiteList, m.inHostNamespace)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -928,13 +936,7 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
|
||||
return err
|
||||
}
|
||||
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
perfCgroupPath := path.Join(fs2.UnifiedMountpoint, containerName)
|
||||
cont.perfCollector, err = m.perfManager.GetCollector(perfCgroupPath)
|
||||
if err != nil {
|
||||
klog.Errorf("Perf event metrics will not be available for container %q: %v", containerName, err)
|
||||
}
|
||||
} else {
|
||||
if !cgroups.IsCgroup2UnifiedMode() {
|
||||
devicesCgroupPath, err := handler.GetCgroupPath("devices")
|
||||
if err != nil {
|
||||
klog.Warningf("Error getting devices cgroup path: %v", err)
|
||||
@@ -944,6 +946,8 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
|
||||
klog.V(4).Infof("GPU metrics may be unavailable/incomplete for container %s: %s", cont.info.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.includedMetrics.Has(container.PerfMetrics) {
|
||||
perfCgroupPath, err := handler.GetCgroupPath("perf_event")
|
||||
if err != nil {
|
||||
klog.Warningf("Error getting perf_event cgroup path: %q", err)
|
||||
@@ -956,14 +960,11 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
|
||||
}
|
||||
|
||||
if m.includedMetrics.Has(container.ResctrlMetrics) {
|
||||
resctrlPath, err := intelrdt.GetIntelRdtPath(containerName)
|
||||
cont.resctrlCollector, err = m.resctrlManager.GetCollector(containerName, func() ([]string, error) {
|
||||
return cont.getContainerPids(m.inHostNamespace)
|
||||
}, len(m.machineInfo.Topology))
|
||||
if err != nil {
|
||||
klog.V(4).Infof("Error getting resctrl path: %q", err)
|
||||
} else {
|
||||
cont.resctrlCollector, err = m.resctrlManager.GetCollector(resctrlPath)
|
||||
if err != nil {
|
||||
klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err)
|
||||
}
|
||||
klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1005,7 +1006,6 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Start the container's housekeeping.
|
||||
return cont.Start()
|
||||
}
|
||||
@@ -1237,6 +1237,24 @@ func (m *manager) watchForNewOoms() error {
|
||||
if err != nil {
|
||||
klog.Errorf("failed to add OOM kill event for %q: %v", oomInstance.ContainerName, err)
|
||||
}
|
||||
|
||||
// Count OOM events for later collection by prometheus
|
||||
request := v2.RequestOptions{
|
||||
IdType: v2.TypeName,
|
||||
Count: 1,
|
||||
}
|
||||
conts, err := m.getRequestedContainers(oomInstance.ContainerName, request)
|
||||
if err != nil {
|
||||
klog.V(2).Infof("failed getting container info for %q: %v", oomInstance.ContainerName, err)
|
||||
continue
|
||||
}
|
||||
if len(conts) != 1 {
|
||||
klog.V(2).Info("Expected the request to match only one container")
|
||||
continue
|
||||
}
|
||||
for _, cont := range conts {
|
||||
atomic.AddUint64(&cont.oomEvents, 1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
@@ -1304,14 +1322,6 @@ func parseEventsStoragePolicy() events.StoragePolicy {
|
||||
return policy
|
||||
}
|
||||
|
||||
func (m *manager) DockerImages() ([]info.DockerImage, error) {
|
||||
return docker.Images()
|
||||
}
|
||||
|
||||
func (m *manager) DockerInfo() (info.DockerStatus, error) {
|
||||
return docker.Status()
|
||||
}
|
||||
|
||||
func (m *manager) DebugInfo() map[string][]string {
|
||||
debugInfo := container.DebugInfo()
|
||||
|
||||
@@ -1368,20 +1378,10 @@ func getVersionInfo() (*info.VersionInfo, error) {
|
||||
|
||||
kernelVersion := machine.KernelVersion()
|
||||
osVersion := machine.ContainerOsVersion()
|
||||
dockerVersion, err := docker.VersionString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dockerAPIVersion, err := docker.APIVersionString()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &info.VersionInfo{
|
||||
KernelVersion: kernelVersion,
|
||||
ContainerOsVersion: osVersion,
|
||||
DockerVersion: dockerVersion,
|
||||
DockerAPIVersion: dockerAPIVersion,
|
||||
CadvisorVersion: version.Info["version"],
|
||||
CadvisorRevision: version.Info["revision"],
|
||||
}, nil
|
||||
|
Reference in New Issue
Block a user