disable collecting of accelerator metrics and exposing it for containerd
This commit is contained in:
parent
204ff6caeb
commit
e8ae653c1d
@ -88,10 +88,15 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [
|
|||||||
cadvisormetrics.CpuLoadMetrics: struct{}{},
|
cadvisormetrics.CpuLoadMetrics: struct{}{},
|
||||||
cadvisormetrics.DiskIOMetrics: struct{}{},
|
cadvisormetrics.DiskIOMetrics: struct{}{},
|
||||||
cadvisormetrics.NetworkUsageMetrics: struct{}{},
|
cadvisormetrics.NetworkUsageMetrics: struct{}{},
|
||||||
cadvisormetrics.AcceleratorUsageMetrics: struct{}{},
|
|
||||||
cadvisormetrics.AppMetrics: struct{}{},
|
cadvisormetrics.AppMetrics: struct{}{},
|
||||||
cadvisormetrics.ProcessMetrics: struct{}{},
|
cadvisormetrics.ProcessMetrics: struct{}{},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only add the Accelerator metrics if the feature is inactive
|
||||||
|
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableAcceleratorUsageMetrics) {
|
||||||
|
includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
|
if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) {
|
||||||
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
|
includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{}
|
||||||
}
|
}
|
||||||
|
@ -679,7 +679,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
|
|||||||
klet.runtimeCache,
|
klet.runtimeCache,
|
||||||
kubeDeps.RemoteRuntimeService,
|
kubeDeps.RemoteRuntimeService,
|
||||||
kubeDeps.RemoteImageService,
|
kubeDeps.RemoteImageService,
|
||||||
hostStatsProvider)
|
hostStatsProvider,
|
||||||
|
utilfeature.DefaultFeatureGate.Enabled(features.DisableAcceleratorUsageMetrics))
|
||||||
}
|
}
|
||||||
|
|
||||||
klet.pleg = pleg.NewGenericPLEG(klet.containerRuntime, plegChannelCapacity, plegRelistPeriod, klet.podCache, clock.RealClock{})
|
klet.pleg = pleg.NewGenericPLEG(klet.containerRuntime, plegChannelCapacity, plegRelistPeriod, klet.podCache, clock.RealClock{})
|
||||||
|
@ -69,6 +69,7 @@ type criStatsProvider struct {
|
|||||||
// cpuUsageCache caches the cpu usage for containers.
|
// cpuUsageCache caches the cpu usage for containers.
|
||||||
cpuUsageCache map[string]*cpuUsageRecord
|
cpuUsageCache map[string]*cpuUsageRecord
|
||||||
mutex sync.RWMutex
|
mutex sync.RWMutex
|
||||||
|
disableAcceleratorUsageMetrics bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// newCRIStatsProvider returns a containerStatsProvider implementation that
|
// newCRIStatsProvider returns a containerStatsProvider implementation that
|
||||||
@ -79,6 +80,7 @@ func newCRIStatsProvider(
|
|||||||
runtimeService internalapi.RuntimeService,
|
runtimeService internalapi.RuntimeService,
|
||||||
imageService internalapi.ImageManagerService,
|
imageService internalapi.ImageManagerService,
|
||||||
hostStatsProvider HostStatsProvider,
|
hostStatsProvider HostStatsProvider,
|
||||||
|
disableAcceleratorUsageMetrics bool,
|
||||||
) containerStatsProvider {
|
) containerStatsProvider {
|
||||||
return &criStatsProvider{
|
return &criStatsProvider{
|
||||||
cadvisor: cadvisor,
|
cadvisor: cadvisor,
|
||||||
@ -87,6 +89,7 @@ func newCRIStatsProvider(
|
|||||||
imageService: imageService,
|
imageService: imageService,
|
||||||
hostStatsProvider: hostStatsProvider,
|
hostStatsProvider: hostStatsProvider,
|
||||||
cpuUsageCache: make(map[string]*cpuUsageRecord),
|
cpuUsageCache: make(map[string]*cpuUsageRecord),
|
||||||
|
disableAcceleratorUsageMetrics: disableAcceleratorUsageMetrics,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -784,9 +787,12 @@ func (p *criStatsProvider) addCadvisorContainerStats(
|
|||||||
if memory != nil {
|
if memory != nil {
|
||||||
cs.Memory = memory
|
cs.Memory = memory
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !p.disableAcceleratorUsageMetrics {
|
||||||
accelerators := cadvisorInfoToAcceleratorStats(caPodStats)
|
accelerators := cadvisorInfoToAcceleratorStats(caPodStats)
|
||||||
cs.Accelerators = accelerators
|
cs.Accelerators = accelerators
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats(
|
func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats(
|
||||||
cs *statsapi.ContainerStats,
|
cs *statsapi.ContainerStats,
|
||||||
|
@ -231,6 +231,7 @@ func TestCRIListPodStats(t *testing.T) {
|
|||||||
fakeRuntimeService,
|
fakeRuntimeService,
|
||||||
fakeImageService,
|
fakeImageService,
|
||||||
NewFakeHostStatsProviderWithData(fakeStats, fakeOS),
|
NewFakeHostStatsProviderWithData(fakeStats, fakeOS),
|
||||||
|
false,
|
||||||
)
|
)
|
||||||
|
|
||||||
stats, err := provider.ListPodStats()
|
stats, err := provider.ListPodStats()
|
||||||
@ -319,6 +320,113 @@ func TestCRIListPodStats(t *testing.T) {
|
|||||||
mockCadvisor.AssertExpectations(t)
|
mockCadvisor.AssertExpectations(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAcceleratorUsageStatsCanBeDisabled(t *testing.T) {
|
||||||
|
var (
|
||||||
|
imageFsMountpoint = "/test/mount/point"
|
||||||
|
unknownMountpoint = "/unknown/mount/point"
|
||||||
|
imageFsInfo = getTestFsInfo(2000)
|
||||||
|
rootFsInfo = getTestFsInfo(1000)
|
||||||
|
|
||||||
|
sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns", false)
|
||||||
|
sandbox0Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox0.PodSandboxStatus.Metadata.Uid))
|
||||||
|
container0 = makeFakeContainer(sandbox0, cName0, 0, false)
|
||||||
|
containerStats0 = makeFakeContainerStats(container0, imageFsMountpoint)
|
||||||
|
container1 = makeFakeContainer(sandbox0, cName1, 0, false)
|
||||||
|
containerStats1 = makeFakeContainerStats(container1, unknownMountpoint)
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
mockCadvisor = new(cadvisortest.Mock)
|
||||||
|
mockRuntimeCache = new(kubecontainertest.MockRuntimeCache)
|
||||||
|
mockPodManager = new(kubepodtest.MockManager)
|
||||||
|
resourceAnalyzer = new(fakeResourceAnalyzer)
|
||||||
|
fakeRuntimeService = critest.NewFakeRuntimeService()
|
||||||
|
fakeImageService = critest.NewFakeImageService()
|
||||||
|
)
|
||||||
|
|
||||||
|
infos := map[string]cadvisorapiv2.ContainerInfo{
|
||||||
|
"/": getTestContainerInfo(seedRoot, "", "", ""),
|
||||||
|
"/kubelet": getTestContainerInfo(seedKubelet, "", "", ""),
|
||||||
|
"/system": getTestContainerInfo(seedMisc, "", "", ""),
|
||||||
|
sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName),
|
||||||
|
sandbox0Cgroup: getTestContainerInfo(seedSandbox0, "", "", ""),
|
||||||
|
container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0),
|
||||||
|
container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1),
|
||||||
|
}
|
||||||
|
|
||||||
|
options := cadvisorapiv2.RequestOptions{
|
||||||
|
IdType: cadvisorapiv2.TypeName,
|
||||||
|
Count: 2,
|
||||||
|
Recursive: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
mockCadvisor.
|
||||||
|
On("ContainerInfoV2", "/", options).Return(infos, nil).
|
||||||
|
On("RootFsInfo").Return(rootFsInfo, nil).
|
||||||
|
On("GetDirFsInfo", imageFsMountpoint).Return(imageFsInfo, nil).
|
||||||
|
On("GetDirFsInfo", unknownMountpoint).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice)
|
||||||
|
fakeRuntimeService.SetFakeSandboxes([]*critest.FakePodSandbox{
|
||||||
|
sandbox0,
|
||||||
|
})
|
||||||
|
fakeRuntimeService.SetFakeContainers([]*critest.FakeContainer{
|
||||||
|
container0, container1,
|
||||||
|
})
|
||||||
|
fakeRuntimeService.SetFakeContainerStats([]*runtimeapi.ContainerStats{
|
||||||
|
containerStats0, containerStats1,
|
||||||
|
})
|
||||||
|
|
||||||
|
ephemeralVolumes := makeFakeVolumeStats([]string{"ephVolume1, ephVolumes2"})
|
||||||
|
persistentVolumes := makeFakeVolumeStats([]string{"persisVolume1, persisVolumes2"})
|
||||||
|
resourceAnalyzer.podVolumeStats = serverstats.PodVolumeStats{
|
||||||
|
EphemeralVolumes: ephemeralVolumes,
|
||||||
|
PersistentVolumes: persistentVolumes,
|
||||||
|
}
|
||||||
|
|
||||||
|
provider := NewCRIStatsProvider(
|
||||||
|
mockCadvisor,
|
||||||
|
resourceAnalyzer,
|
||||||
|
mockPodManager,
|
||||||
|
mockRuntimeCache,
|
||||||
|
fakeRuntimeService,
|
||||||
|
fakeImageService,
|
||||||
|
NewFakeHostStatsProvider(),
|
||||||
|
true, // this is what the test is actually testing
|
||||||
|
)
|
||||||
|
|
||||||
|
stats, err := provider.ListPodStats()
|
||||||
|
assert := assert.New(t)
|
||||||
|
assert.NoError(err)
|
||||||
|
assert.Equal(1, len(stats))
|
||||||
|
|
||||||
|
podStatsMap := make(map[statsapi.PodReference]statsapi.PodStats)
|
||||||
|
for _, s := range stats {
|
||||||
|
podStatsMap[s.PodRef] = s
|
||||||
|
}
|
||||||
|
|
||||||
|
p0 := podStatsMap[statsapi.PodReference{Name: "sandbox0-name", UID: "sandbox0-uid", Namespace: "sandbox0-ns"}]
|
||||||
|
assert.Equal(sandbox0.CreatedAt, p0.StartTime.UnixNano())
|
||||||
|
assert.Equal(2, len(p0.Containers))
|
||||||
|
|
||||||
|
containerStatsMap := make(map[string]statsapi.ContainerStats)
|
||||||
|
for _, s := range p0.Containers {
|
||||||
|
containerStatsMap[s.Name] = s
|
||||||
|
}
|
||||||
|
|
||||||
|
c0 := containerStatsMap[cName0]
|
||||||
|
assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano())
|
||||||
|
checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0])
|
||||||
|
assert.Nil(c0.Accelerators)
|
||||||
|
|
||||||
|
c1 := containerStatsMap[cName1]
|
||||||
|
assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano())
|
||||||
|
checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0])
|
||||||
|
assert.Nil(c1.Accelerators)
|
||||||
|
|
||||||
|
checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0])
|
||||||
|
|
||||||
|
mockCadvisor.AssertExpectations(t)
|
||||||
|
}
|
||||||
|
|
||||||
func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
|
func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -426,6 +534,7 @@ func TestCRIListPodCPUAndMemoryStats(t *testing.T) {
|
|||||||
fakeRuntimeService,
|
fakeRuntimeService,
|
||||||
nil,
|
nil,
|
||||||
NewFakeHostStatsProvider(),
|
NewFakeHostStatsProvider(),
|
||||||
|
false,
|
||||||
)
|
)
|
||||||
|
|
||||||
stats, err := provider.ListPodCPUAndMemoryStats()
|
stats, err := provider.ListPodCPUAndMemoryStats()
|
||||||
@ -554,6 +663,7 @@ func TestCRIImagesFsStats(t *testing.T) {
|
|||||||
fakeRuntimeService,
|
fakeRuntimeService,
|
||||||
fakeImageService,
|
fakeImageService,
|
||||||
NewFakeHostStatsProvider(),
|
NewFakeHostStatsProvider(),
|
||||||
|
false,
|
||||||
)
|
)
|
||||||
|
|
||||||
stats, err := provider.ImageFsStats()
|
stats, err := provider.ImageFsStats()
|
||||||
|
@ -42,9 +42,10 @@ func NewCRIStatsProvider(
|
|||||||
runtimeService internalapi.RuntimeService,
|
runtimeService internalapi.RuntimeService,
|
||||||
imageService internalapi.ImageManagerService,
|
imageService internalapi.ImageManagerService,
|
||||||
hostStatsProvider HostStatsProvider,
|
hostStatsProvider HostStatsProvider,
|
||||||
|
disableAcceleratorUsageMetrics bool,
|
||||||
) *Provider {
|
) *Provider {
|
||||||
return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer,
|
return newStatsProvider(cadvisor, podManager, runtimeCache, newCRIStatsProvider(cadvisor, resourceAnalyzer,
|
||||||
runtimeService, imageService, hostStatsProvider))
|
runtimeService, imageService, hostStatsProvider, disableAcceleratorUsageMetrics))
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCadvisorStatsProvider returns a containerStatsProvider that provides both
|
// NewCadvisorStatsProvider returns a containerStatsProvider that provides both
|
||||||
|
Loading…
Reference in New Issue
Block a user