Merge pull request #111930 from azylinski/new-histogram-pod_start_sli_duration_seconds

New histogram: Pod start SLI duration
This commit is contained in:
Kubernetes Prow Robot 2022-11-04 07:28:14 -07:00 committed by GitHub
commit 1bf4af4584
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 660 additions and 54 deletions

View File

@ -99,6 +99,7 @@ import (
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/server"
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
kubeletutil "k8s.io/kubernetes/pkg/kubelet/util"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
"k8s.io/kubernetes/pkg/util/flock"
nodeutil "k8s.io/kubernetes/pkg/util/node"
@ -751,6 +752,10 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
}
}
if kubeDeps.PodStartupLatencyTracker == nil {
kubeDeps.PodStartupLatencyTracker = kubeletutil.NewPodStartupLatencyTracker()
}
// TODO(vmarmol): Do this through container config.
oomAdjuster := kubeDeps.OOMAdjuster
if err := oomAdjuster.ApplyOOMScoreAdj(0, int(s.OOMScoreAdj)); err != nil {

View File

@ -21,6 +21,7 @@ import (
"fmt"
"reflect"
"sync"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
@ -51,6 +52,10 @@ const (
PodConfigNotificationIncremental
)
type podStartupSLIObserver interface {
ObservedPodOnWatch(pod *v1.Pod, when time.Time)
}
// PodConfig is a configuration mux that merges many sources of pod configuration into a single
// consistent structure, and then delivers incremental change notifications to listeners
// in order.
@ -68,9 +73,9 @@ type PodConfig struct {
// NewPodConfig creates an object that can merge many configuration sources into a stream
// of normalized updates to a pod configuration.
func NewPodConfig(mode PodConfigNotificationMode, recorder record.EventRecorder) *PodConfig {
func NewPodConfig(mode PodConfigNotificationMode, recorder record.EventRecorder, startupSLIObserver podStartupSLIObserver) *PodConfig {
updates := make(chan kubetypes.PodUpdate, 50)
storage := newPodStorage(updates, mode, recorder)
storage := newPodStorage(updates, mode, recorder, startupSLIObserver)
podConfig := &PodConfig{
pods: storage,
mux: config.NewMux(storage),
@ -132,18 +137,21 @@ type podStorage struct {
// the EventRecorder to use
recorder record.EventRecorder
startupSLIObserver podStartupSLIObserver
}
// TODO: PodConfigNotificationMode could be handled by a listener to the updates channel
// in the future, especially with multiple listeners.
// TODO: allow initialization of the current state of the store with snapshotted version.
func newPodStorage(updates chan<- kubetypes.PodUpdate, mode PodConfigNotificationMode, recorder record.EventRecorder) *podStorage {
func newPodStorage(updates chan<- kubetypes.PodUpdate, mode PodConfigNotificationMode, recorder record.EventRecorder, startupSLIObserver podStartupSLIObserver) *podStorage {
return &podStorage{
pods: make(map[string]map[types.UID]*v1.Pod),
mode: mode,
updates: updates,
sourcesSeen: sets.String{},
recorder: recorder,
pods: make(map[string]map[types.UID]*v1.Pod),
mode: mode,
updates: updates,
sourcesSeen: sets.String{},
recorder: recorder,
startupSLIObserver: startupSLIObserver,
}
}
@ -235,6 +243,10 @@ func (s *podStorage) merge(source string, change interface{}) (adds, updates, de
ref.Annotations = make(map[string]string)
}
ref.Annotations[kubetypes.ConfigSourceAnnotationKey] = source
// ignore static pods
if !kubetypes.IsStaticPod(ref) {
s.startupSLIObserver.ObservedPodOnWatch(ref, time.Now())
}
if existing, found := oldPods[ref.UID]; found {
pods[ref.UID] = existing
needUpdate, needReconcile, needGracefulDelete := checkAndUpdatePod(existing, ref)

View File

@ -61,6 +61,10 @@ func (s sortedPods) Less(i, j int) bool {
return s[i].Namespace < s[j].Namespace
}
type mockPodStartupSLIObserver struct{}
func (m *mockPodStartupSLIObserver) ObservedPodOnWatch(pod *v1.Pod, when time.Time) {}
func CreateValidPod(name, namespace string) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
@ -90,7 +94,7 @@ func CreatePodUpdate(op kubetypes.PodOperation, source string, pods ...*v1.Pod)
func createPodConfigTester(ctx context.Context, mode PodConfigNotificationMode) (chan<- interface{}, <-chan kubetypes.PodUpdate, *PodConfig) {
eventBroadcaster := record.NewBroadcaster()
config := NewPodConfig(mode, eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "kubelet"}))
config := NewPodConfig(mode, eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "kubelet"}), &mockPodStartupSLIObserver{})
channel := config.Channel(ctx, TestSource)
ch := config.Updates()
return channel, ch, config
@ -461,7 +465,7 @@ func TestPodUpdateLabels(t *testing.T) {
func TestPodConfigRace(t *testing.T) {
eventBroadcaster := record.NewBroadcaster()
config := NewPodConfig(PodConfigNotificationIncremental, eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "kubelet"}))
config := NewPodConfig(PodConfigNotificationIncremental, eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "kubelet"}), &mockPodStartupSLIObserver{})
seenSources := sets.NewString(TestSource)
var wg sync.WaitGroup
const iterations = 100

View File

@ -22,6 +22,7 @@ import (
dockerref "github.com/docker/distribution/reference"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol"
"k8s.io/klog/v2"
@ -31,6 +32,11 @@ import (
"k8s.io/kubernetes/pkg/kubelet/events"
)
type ImagePodPullingTimeRecorder interface {
RecordImageStartedPulling(podUID types.UID)
RecordImageFinishedPulling(podUID types.UID)
}
// imageManager provides the functionalities for image pulling.
type imageManager struct {
recorder record.EventRecorder
@ -38,12 +44,14 @@ type imageManager struct {
backOff *flowcontrol.Backoff
// It will check the presence of the image, and report the 'image pulling', image pulled' events correspondingly.
puller imagePuller
podPullingTimeRecorder ImagePodPullingTimeRecorder
}
var _ ImageManager = &imageManager{}
// NewImageManager instantiates a new ImageManager object.
func NewImageManager(recorder record.EventRecorder, imageService kubecontainer.ImageService, imageBackOff *flowcontrol.Backoff, serialized bool, qps float32, burst int) ImageManager {
func NewImageManager(recorder record.EventRecorder, imageService kubecontainer.ImageService, imageBackOff *flowcontrol.Backoff, serialized bool, qps float32, burst int, podPullingTimeRecorder ImagePodPullingTimeRecorder) ImageManager {
imageService = throttleImagePulling(imageService, qps, burst)
var puller imagePuller
@ -53,10 +61,11 @@ func NewImageManager(recorder record.EventRecorder, imageService kubecontainer.I
puller = newParallelImagePuller(imageService)
}
return &imageManager{
recorder: recorder,
imageService: imageService,
backOff: imageBackOff,
puller: puller,
recorder: recorder,
imageService: imageService,
backOff: imageBackOff,
puller: puller,
podPullingTimeRecorder: podPullingTimeRecorder,
}
}
@ -138,6 +147,7 @@ func (m *imageManager) EnsureImageExists(pod *v1.Pod, container *v1.Container, p
m.logIt(ref, v1.EventTypeNormal, events.BackOffPullImage, logPrefix, msg, klog.Info)
return "", msg, ErrImagePullBackOff
}
m.podPullingTimeRecorder.RecordImageStartedPulling(pod.UID)
m.logIt(ref, v1.EventTypeNormal, events.PullingImage, logPrefix, fmt.Sprintf("Pulling image %q", container.Image), klog.Info)
startTime := time.Now()
pullChan := make(chan pullResult)
@ -153,6 +163,7 @@ func (m *imageManager) EnsureImageExists(pod *v1.Pod, container *v1.Container, p
return "", imagePullResult.err.Error(), ErrImagePull
}
m.podPullingTimeRecorder.RecordImageFinishedPulling(pod.UID)
m.logIt(ref, v1.EventTypeNormal, events.PulledImage, logPrefix, fmt.Sprintf("Successfully pulled image %q in %v", container.Image, time.Since(startTime)), klog.Info)
m.backOff.GC()
return imagePullResult.imageRef, "", nil

View File

@ -24,6 +24,7 @@ import (
"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol"
. "k8s.io/kubernetes/pkg/kubelet/container"
@ -158,6 +159,12 @@ func pullerTestCases() []pullerTestCase {
}
}
type mockPodPullingTimeRecorder struct{}
func (m *mockPodPullingTimeRecorder) RecordImageStartedPulling(podUID types.UID) {}
func (m *mockPodPullingTimeRecorder) RecordImageFinishedPulling(podUID types.UID) {}
func pullerTestEnv(c pullerTestCase, serialized bool) (puller ImageManager, fakeClock *testingclock.FakeClock, fakeRuntime *ctest.FakeRuntime, container *v1.Container) {
container = &v1.Container{
Name: "container_name",
@ -176,7 +183,7 @@ func pullerTestEnv(c pullerTestCase, serialized bool) (puller ImageManager, fake
fakeRuntime.Err = c.pullerErr
fakeRuntime.InspectErr = c.inspectErr
puller = NewImageManager(fakeRecorder, fakeRuntime, backOff, serialized, c.qps, c.burst)
puller = NewImageManager(fakeRecorder, fakeRuntime, backOff, serialized, c.qps, c.burst, &mockPodPullingTimeRecorder{})
return
}

View File

@ -222,28 +222,29 @@ type Dependencies struct {
Options []Option
// Injected Dependencies
Auth server.AuthInterface
CAdvisorInterface cadvisor.Interface
Cloud cloudprovider.Interface
ContainerManager cm.ContainerManager
EventClient v1core.EventsGetter
HeartbeatClient clientset.Interface
OnHeartbeatFailure func()
KubeClient clientset.Interface
Mounter mount.Interface
HostUtil hostutil.HostUtils
OOMAdjuster *oom.OOMAdjuster
OSInterface kubecontainer.OSInterface
PodConfig *config.PodConfig
ProbeManager prober.Manager
Recorder record.EventRecorder
Subpather subpath.Interface
TracerProvider trace.TracerProvider
VolumePlugins []volume.VolumePlugin
DynamicPluginProber volume.DynamicPluginProber
TLSOptions *server.TLSOptions
RemoteRuntimeService internalapi.RuntimeService
RemoteImageService internalapi.ImageManagerService
Auth server.AuthInterface
CAdvisorInterface cadvisor.Interface
Cloud cloudprovider.Interface
ContainerManager cm.ContainerManager
EventClient v1core.EventsGetter
HeartbeatClient clientset.Interface
OnHeartbeatFailure func()
KubeClient clientset.Interface
Mounter mount.Interface
HostUtil hostutil.HostUtils
OOMAdjuster *oom.OOMAdjuster
OSInterface kubecontainer.OSInterface
PodConfig *config.PodConfig
ProbeManager prober.Manager
Recorder record.EventRecorder
Subpather subpath.Interface
TracerProvider trace.TracerProvider
VolumePlugins []volume.VolumePlugin
DynamicPluginProber volume.DynamicPluginProber
TLSOptions *server.TLSOptions
RemoteRuntimeService internalapi.RuntimeService
RemoteImageService internalapi.ImageManagerService
PodStartupLatencyTracker util.PodStartupLatencyTracker
// remove it after cadvisor.UsingLegacyCadvisorStats dropped.
useLegacyCadvisorStats bool
}
@ -261,7 +262,7 @@ func makePodSourceConfig(kubeCfg *kubeletconfiginternal.KubeletConfiguration, ku
}
// source of all configuration
cfg := config.NewPodConfig(config.PodConfigNotificationIncremental, kubeDeps.Recorder)
cfg := config.NewPodConfig(config.PodConfigNotificationIncremental, kubeDeps.Recorder, kubeDeps.PodStartupLatencyTracker)
// TODO: it needs to be replaced by a proper context in the future
ctx := context.TODO()
@ -593,7 +594,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
mirrorPodClient := kubepod.NewBasicMirrorClient(klet.kubeClient, string(nodeName), nodeLister)
klet.podManager = kubepod.NewBasicPodManager(mirrorPodClient, secretManager, configMapManager)
klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet)
klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet, kubeDeps.PodStartupLatencyTracker)
klet.resourceAnalyzer = serverstats.NewResourceAnalyzer(klet, kubeCfg.VolumeStatsAggPeriod.Duration, kubeDeps.Recorder)
@ -657,6 +658,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
kubeCfg.MemorySwap.SwapBehavior,
kubeDeps.ContainerManager.GetNodeAllocatableAbsolute,
*kubeCfg.MemoryThrottlingFactor,
kubeDeps.PodStartupLatencyTracker,
)
if err != nil {
return nil, err

View File

@ -72,6 +72,7 @@ import (
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
"k8s.io/kubernetes/pkg/kubelet/token"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
kubeletutil "k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/kubelet/util/queue"
kubeletvolume "k8s.io/kubernetes/pkg/kubelet/volumemanager"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
@ -245,7 +246,8 @@ func newTestKubeletWithImageList(
configMapManager := configmap.NewSimpleConfigMapManager(kubelet.kubeClient)
kubelet.configMapManager = configMapManager
kubelet.podManager = kubepod.NewBasicPodManager(fakeMirrorClient, kubelet.secretManager, kubelet.configMapManager)
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{})
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker)
kubelet.containerRuntime = fakeRuntime
kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime)

View File

@ -82,6 +82,12 @@ func (f *fakePodStateProvider) ShouldPodContentBeRemoved(uid types.UID) bool {
return found
}
type fakePodPullingTimeRecorder struct{}
func (f *fakePodPullingTimeRecorder) RecordImageStartedPulling(podUID types.UID) {}
func (f *fakePodPullingTimeRecorder) RecordImageFinishedPulling(podUID types.UID) {}
func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageService internalapi.ImageManagerService, machineInfo *cadvisorapi.MachineInfo, osInterface kubecontainer.OSInterface, runtimeHelper kubecontainer.RuntimeHelper, keyring credentialprovider.DockerKeyring) (*kubeGenericRuntimeManager, error) {
recorder := &record.FakeRecorder{}
logManager, err := logs.NewContainerLogManager(runtimeService, osInterface, "1", 2)
@ -123,6 +129,7 @@ func newFakeKubeRuntimeManager(runtimeService internalapi.RuntimeService, imageS
false,
0, // Disable image pull throttling by setting QPS to 0,
0,
&fakePodPullingTimeRecorder{},
)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(
&fakeHTTP{},

View File

@ -194,6 +194,7 @@ func NewKubeGenericRuntimeManager(
memorySwapBehavior string,
getNodeAllocatable func() v1.ResourceList,
memoryThrottlingFactor float64,
podPullingTimeRecorder images.ImagePodPullingTimeRecorder,
) (KubeGenericRuntime, error) {
runtimeService = newInstrumentedRuntimeService(runtimeService)
imageService = newInstrumentedImageManagerService(imageService)
@ -264,7 +265,8 @@ func NewKubeGenericRuntimeManager(
imageBackOff,
serializeImagePulls,
imagePullQPS,
imagePullBurst)
imagePullBurst,
podPullingTimeRecorder)
kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(insecureContainerLifecycleHTTPClient, kubeRuntimeManager, kubeRuntimeManager, recorder)
kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager)
kubeRuntimeManager.podStateProvider = podStateProvider

View File

@ -35,6 +35,7 @@ const (
NodeLabelKey = "node"
PodWorkerDurationKey = "pod_worker_duration_seconds"
PodStartDurationKey = "pod_start_duration_seconds"
PodStartSLIDurationKey = "pod_start_sli_duration_seconds"
CgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
PodWorkerStartDurationKey = "pod_worker_start_duration_seconds"
PodStatusSyncDurationKey = "pod_status_sync_duration_seconds"
@ -140,6 +141,24 @@ var (
StabilityLevel: metrics.ALPHA,
},
)
// PodStartSLIDuration is a Histogram that tracks the duration (in seconds) it takes for a single pod to run,
// excluding the time for image pulling. This metric should reflect the "Pod startup latency SLI" definition
// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md
//
// The histogram bucket boundaries for pod startup latency metrics, measured in seconds. These are hand-picked
// so as to be roughly exponential but still round numbers in everyday units. This is to minimise the number
// of buckets while allowing accurate measurement of thresholds which might be used in SLOs
// e.g. x% of pods start up within 30 seconds, or 15 minutes, etc.
PodStartSLIDuration = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Subsystem: KubeletSubsystem,
Name: PodStartSLIDurationKey,
Help: "Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch",
Buckets: []float64{0.5, 1, 2, 3, 4, 5, 6, 8, 10, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600},
StabilityLevel: metrics.ALPHA,
},
[]string{},
)
// CgroupManagerDuration is a Histogram that tracks the duration (in seconds) it takes for cgroup manager operations to complete.
// Broken down by method.
CgroupManagerDuration = metrics.NewHistogramVec(
@ -541,6 +560,7 @@ func Register(collectors ...metrics.StableCollector) {
legacyregistry.MustRegister(NodeName)
legacyregistry.MustRegister(PodWorkerDuration)
legacyregistry.MustRegister(PodStartDuration)
legacyregistry.MustRegister(PodStartSLIDuration)
legacyregistry.MustRegister(CgroupManagerDuration)
legacyregistry.MustRegister(PodWorkerStartDuration)
legacyregistry.MustRegister(PodStatusSyncDuration)

View File

@ -29,6 +29,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/status"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
kubeletutil "k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/probe"
"k8s.io/utils/exec"
)
@ -105,10 +106,11 @@ func setTestProbe(pod *v1.Pod, probeType probeType, probeSpec v1.Probe) {
func newTestManager() *manager {
podManager := kubepod.NewBasicPodManager(nil, nil, nil)
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
// Add test pod to pod manager, so that status manager can get the pod from pod manager if needed.
podManager.AddPod(getTestPod())
m := NewManager(
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}),
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker),
results.NewManager(),
results.NewManager(),
results.NewManager(),

View File

@ -30,6 +30,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/status"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
kubeletutil "k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/probe"
)
@ -150,7 +151,7 @@ func TestDoProbe(t *testing.T) {
}
// Clean up.
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil, nil, nil), &statustest.FakePodDeletionSafetyProvider{})
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil, nil, nil), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker())
resultsManager(m, probeType).Remove(testContainerID)
}
}

View File

@ -44,6 +44,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/server/stats"
"k8s.io/kubernetes/pkg/kubelet/status"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
kubeletutil "k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/kubelet/volumemanager"
"k8s.io/kubernetes/pkg/volume"
volumetest "k8s.io/kubernetes/pkg/volume/testing"
@ -71,6 +72,7 @@ func TestRunOnce(t *testing.T) {
podManager := kubepod.NewBasicPodManager(
podtest.NewFakeMirrorClient(), fakeSecretManager, fakeConfigMapManager)
fakeRuntime := &containertest.FakeRuntime{}
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
basePath, err := utiltesting.MkTmpdir("kubelet")
if err != nil {
t.Fatalf("can't make a temp rootdir %v", err)
@ -81,7 +83,7 @@ func TestRunOnce(t *testing.T) {
recorder: &record.FakeRecorder{},
cadvisor: cadvisor,
nodeLister: testNodeLister{},
statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}),
statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker),
podManager: podManager,
podWorkers: &fakePodWorkers{},
os: &containertest.FakeOS{},

View File

@ -75,6 +75,8 @@ type manager struct {
// apiStatusVersions must only be accessed from the sync thread.
apiStatusVersions map[kubetypes.MirrorPodUID]uint64
podDeletionSafety PodDeletionSafetyProvider
podStartupLatencyHelper PodStartupLatencyStateHelper
}
// PodStatusProvider knows how to provide status for a pod. It's intended to be used by other components
@ -93,6 +95,11 @@ type PodDeletionSafetyProvider interface {
PodCouldHaveRunningContainers(pod *v1.Pod) bool
}
type PodStartupLatencyStateHelper interface {
RecordStatusUpdated(pod *v1.Pod)
DeletePodStartupState(podUID types.UID)
}
// Manager is the Source of truth for kubelet pod status, and should be kept up-to-date with
// the latest v1.PodStatus. It also syncs updates back to the API server.
type Manager interface {
@ -124,14 +131,15 @@ type Manager interface {
const syncPeriod = 10 * time.Second
// NewManager returns a functional Manager.
func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider) Manager {
func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper) Manager {
return &manager{
kubeClient: kubeClient,
podManager: podManager,
podStatuses: make(map[types.UID]versionedPodStatus),
podStatusChannel: make(chan podStatusSyncRequest, 1000), // Buffer up to 1000 statuses
apiStatusVersions: make(map[kubetypes.MirrorPodUID]uint64),
podDeletionSafety: podDeletionSafety,
kubeClient: kubeClient,
podManager: podManager,
podStatuses: make(map[types.UID]versionedPodStatus),
podStatusChannel: make(chan podStatusSyncRequest, 1000), // Buffer up to 1000 statuses
apiStatusVersions: make(map[kubetypes.MirrorPodUID]uint64),
podDeletionSafety: podDeletionSafety,
podStartupLatencyHelper: podStartupLatencyHelper,
}
}
@ -599,6 +607,7 @@ func (m *manager) deletePodStatus(uid types.UID) {
m.podStatusesLock.Lock()
defer m.podStatusesLock.Unlock()
delete(m.podStatuses, uid)
m.podStartupLatencyHelper.DeletePodStartupState(uid)
}
// TODO(filipg): It'd be cleaner if we can do this without signal from user.
@ -710,6 +719,8 @@ func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
} else {
klog.V(3).InfoS("Status for pod updated successfully", "pod", klog.KObj(pod), "statusVersion", status.version, "status", mergedStatus)
pod = newPod
// We pass a new object (result of API call which contains updated ResourceVersion)
m.podStartupLatencyHelper.RecordStatusUpdated(pod)
}
// measure how long the status update took to propagate from generation to update on the server

View File

@ -46,6 +46,7 @@ import (
kubesecret "k8s.io/kubernetes/pkg/kubelet/secret"
statustest "k8s.io/kubernetes/pkg/kubelet/status/testing"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
"k8s.io/kubernetes/pkg/kubelet/util"
)
// Generate new instance of test pod with the same initial value.
@ -84,7 +85,8 @@ func (m *manager) testSyncBatch() {
func newTestManager(kubeClient clientset.Interface) *manager {
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), kubesecret.NewFakeManager(), kubeconfigmap.NewFakeManager())
podManager.AddPod(getTestPod())
return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}).(*manager)
podStartupLatencyTracker := util.NewPodStartupLatencyTracker()
return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager)
}
func generateRandomMessage() string {
@ -958,7 +960,8 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient(), kubesecret.NewFakeManager(), kubeconfigmap.NewFakeManager())
syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}).(*manager)
podStartupLatencyTracker := util.NewPodStartupLatencyTracker()
syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager)
original := tc.pod.DeepCopy()
syncer.SetPodStatus(original, original.Status)

View File

@ -118,6 +118,53 @@ func (mr *MockPodDeletionSafetyProviderMockRecorder) PodResourcesAreReclaimed(po
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PodResourcesAreReclaimed", reflect.TypeOf((*MockPodDeletionSafetyProvider)(nil).PodResourcesAreReclaimed), pod, status)
}
// MockPodStartupLatencyStateHelper is a mock of PodStartupLatencyStateHelper interface.
type MockPodStartupLatencyStateHelper struct {
ctrl *gomock.Controller
recorder *MockPodStartupLatencyStateHelperMockRecorder
}
// MockPodStartupLatencyStateHelperMockRecorder is the mock recorder for MockPodStartupLatencyStateHelper.
type MockPodStartupLatencyStateHelperMockRecorder struct {
mock *MockPodStartupLatencyStateHelper
}
// NewMockPodStartupLatencyStateHelper creates a new mock instance.
func NewMockPodStartupLatencyStateHelper(ctrl *gomock.Controller) *MockPodStartupLatencyStateHelper {
mock := &MockPodStartupLatencyStateHelper{ctrl: ctrl}
mock.recorder = &MockPodStartupLatencyStateHelperMockRecorder{mock}
return mock
}
// EXPECT returns an object that allows the caller to indicate expected use.
func (m *MockPodStartupLatencyStateHelper) EXPECT() *MockPodStartupLatencyStateHelperMockRecorder {
return m.recorder
}
// DeletePodStartupState mocks base method.
func (m *MockPodStartupLatencyStateHelper) DeletePodStartupState(podUID types.UID) {
m.ctrl.T.Helper()
m.ctrl.Call(m, "DeletePodStartupState", podUID)
}
// DeletePodStartupState indicates an expected call of DeletePodStartupState.
func (mr *MockPodStartupLatencyStateHelperMockRecorder) DeletePodStartupState(podUID interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeletePodStartupState", reflect.TypeOf((*MockPodStartupLatencyStateHelper)(nil).DeletePodStartupState), podUID)
}
// RecordStatusUpdated mocks base method.
func (m *MockPodStartupLatencyStateHelper) RecordStatusUpdated(pod *v1.Pod) {
m.ctrl.T.Helper()
m.ctrl.Call(m, "RecordStatusUpdated", pod)
}
// RecordStatusUpdated indicates an expected call of RecordStatusUpdated.
func (mr *MockPodStartupLatencyStateHelperMockRecorder) RecordStatusUpdated(pod interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RecordStatusUpdated", reflect.TypeOf((*MockPodStartupLatencyStateHelper)(nil).RecordStatusUpdated), pod)
}
// MockManager is a mock of Manager interface.
type MockManager struct {
ctrl *gomock.Controller

View File

@ -0,0 +1,186 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"sync"
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/utils/clock"
)
// PodStartupLatencyTracker records key moments for startup latency calculation,
// e.g. image pulling or pod observed running on watch.
type PodStartupLatencyTracker interface {
ObservedPodOnWatch(pod *v1.Pod, when time.Time)
RecordImageStartedPulling(podUID types.UID)
RecordImageFinishedPulling(podUID types.UID)
RecordStatusUpdated(pod *v1.Pod)
DeletePodStartupState(podUID types.UID)
}
type basicPodStartupLatencyTracker struct {
// protect against concurrent read and write on pods map
lock sync.Mutex
pods map[types.UID]*perPodState
// For testability
clock clock.Clock
}
type perPodState struct {
firstStartedPulling time.Time
lastFinishedPulling time.Time
// first time, when pod status changed into Running
observedRunningTime time.Time
// log, if pod latency was already Observed
metricRecorded bool
}
// NewPodStartupLatencyTracker creates an instance of PodStartupLatencyTracker
func NewPodStartupLatencyTracker() PodStartupLatencyTracker {
return &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
clock: clock.RealClock{},
}
}
func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when time.Time) {
p.lock.Lock()
defer p.lock.Unlock()
// if the pod is terminal, we do not have to track it anymore for startup
if pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded {
delete(p.pods, pod.UID)
return
}
state := p.pods[pod.UID]
if state == nil {
// create a new record for pod, only if it was not yet acknowledged by the Kubelet
// this is required, as we want to log metric only for those pods, that where scheduled
// after Kubelet started
if pod.Status.StartTime.IsZero() {
p.pods[pod.UID] = &perPodState{}
}
return
}
if state.observedRunningTime.IsZero() {
// skip, pod didn't start yet
return
}
if state.metricRecorded {
// skip, pod's latency already recorded
return
}
if hasPodStartedSLO(pod) {
podStartingDuration := when.Sub(pod.CreationTimestamp.Time)
imagePullingDuration := state.lastFinishedPulling.Sub(state.firstStartedPulling)
podStartSLOduration := (podStartingDuration - imagePullingDuration).Seconds()
klog.InfoS("Observed pod startup duration",
"pod", klog.KObj(pod),
"podStartSLOduration", podStartSLOduration,
"pod.CreationTimestamp", pod.CreationTimestamp.Time,
"firstStartedPulling", state.firstStartedPulling,
"lastFinishedPulling", state.lastFinishedPulling,
"observedRunningTime", state.observedRunningTime,
"watchObservedRunningTime", when)
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration)
state.metricRecorded = true
}
}
func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.UID) {
p.lock.Lock()
defer p.lock.Unlock()
state := p.pods[podUID]
if state == nil {
return
}
if state.firstStartedPulling.IsZero() {
state.firstStartedPulling = p.clock.Now()
}
}
func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types.UID) {
p.lock.Lock()
defer p.lock.Unlock()
state := p.pods[podUID]
if state == nil {
return
}
state.lastFinishedPulling = p.clock.Now() // Now is always grater than values from the past.
}
func (p *basicPodStartupLatencyTracker) RecordStatusUpdated(pod *v1.Pod) {
p.lock.Lock()
defer p.lock.Unlock()
state := p.pods[pod.UID]
if state == nil {
return
}
if state.metricRecorded {
// skip, pod latency already recorded
return
}
if !state.observedRunningTime.IsZero() {
// skip, pod already started
return
}
if hasPodStartedSLO(pod) {
klog.V(3).InfoS("Mark when the pod was running for the first time", "pod", klog.KObj(pod), "rv", pod.ResourceVersion)
state.observedRunningTime = p.clock.Now()
}
}
// hasPodStartedSLO, check if for given pod, each container has been started at least once
//
// This should reflect "Pod startup latency SLI" definition
// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md
func hasPodStartedSLO(pod *v1.Pod) bool {
for _, cs := range pod.Status.ContainerStatuses {
if cs.State.Running == nil || cs.State.Running.StartedAt.IsZero() {
return false
}
}
return true
}
func (p *basicPodStartupLatencyTracker) DeletePodStartupState(podUID types.UID) {
p.lock.Lock()
defer p.lock.Unlock()
delete(p.pods, podUID)
}

View File

@ -0,0 +1,278 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/component-base/metrics/testutil"
"k8s.io/kubernetes/pkg/kubelet/metrics"
testingclock "k8s.io/utils/clock/testing"
)
var frozenTime = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
const (
uid = "3c1df8a9-11a8-4791-aeae-184c18cca686"
metricsName = "kubelet_pod_start_sli_duration_seconds"
)
func TestNoEvents(t *testing.T) {
t.Run("metrics registered; no incoming events", func(t *testing.T) {
// expects no metrics in the output
wants := ""
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
}
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
}
func TestPodsRunningBeforeKubeletStarted(t *testing.T) {
t.Run("pod was running for 10m before kubelet started", func(t *testing.T) {
// expects no metrics in the output
wants := ""
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
}
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
podStarted := &corev1.Pod{
Status: corev1.PodStatus{
StartTime: &metav1.Time{Time: frozenTime.Add(-10 * time.Minute)},
},
}
tracker.ObservedPodOnWatch(podStarted, frozenTime)
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
}
func TestSinglePodOneImageDownloadRecorded(t *testing.T) {
t.Run("single pod; started in 3s, image pulling 100ms", func(t *testing.T) {
wants := `
# HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
# TYPE kubelet_pod_start_sli_duration_seconds histogram
kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
kubelet_pod_start_sli_duration_seconds_sum 2.9
kubelet_pod_start_sli_duration_seconds_count 1
`
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
clock: fakeClock,
}
podInit := buildInitializingPod()
tracker.ObservedPodOnWatch(podInit, frozenTime)
// image pulling took 100ms
tracker.RecordImageStartedPulling(podInit.UID)
fakeClock.Step(time.Millisecond * 100)
tracker.RecordImageFinishedPulling(podInit.UID)
podStarted := buildRunningPod()
tracker.RecordStatusUpdated(podStarted)
// 3s later, observe the same pod on watch
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*3))
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
// cleanup
tracker.DeletePodStartupState(podStarted.UID)
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
}
func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) {
t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
wants := `
# HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
# TYPE kubelet_pod_start_sli_duration_seconds histogram
kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
kubelet_pod_start_sli_duration_seconds_sum 20
kubelet_pod_start_sli_duration_seconds_count 1
`
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
clock: fakeClock,
}
podInitializing := buildInitializingPod()
tracker.ObservedPodOnWatch(podInitializing, frozenTime)
// image pulling started at 10s and the last one finished at 30s
// first image starts pulling at 10s
fakeClock.SetTime(frozenTime.Add(time.Second * 10))
tracker.RecordImageStartedPulling(podInitializing.UID)
// second image starts pulling at 11s
fakeClock.SetTime(frozenTime.Add(time.Second * 11))
tracker.RecordImageStartedPulling(podInitializing.UID)
// third image starts pulling at 14s
fakeClock.SetTime(frozenTime.Add(time.Second * 14))
tracker.RecordImageStartedPulling(podInitializing.UID)
// first image finished pulling at 18s
fakeClock.SetTime(frozenTime.Add(time.Second * 18))
tracker.RecordImageFinishedPulling(podInitializing.UID)
// second and third finished pulling at 20s
fakeClock.SetTime(frozenTime.Add(time.Second * 20))
tracker.RecordImageFinishedPulling(podInitializing.UID)
// pod started
podStarted := buildRunningPod()
tracker.RecordStatusUpdated(podStarted)
// at 30s observe the same pod on watch
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*30))
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
// any new pod observations should not impact the metrics, as the pod should be recorder only once
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*150))
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*200))
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*250))
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
// cleanup
tracker.DeletePodStartupState(podStarted.UID)
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
}
func buildInitializingPod() *corev1.Pod {
return buildPodWithStatus([]corev1.ContainerStatus{
{State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}},
})
}
func buildRunningPod() *corev1.Pod {
return buildPodWithStatus([]corev1.ContainerStatus{
{State: corev1.ContainerState{Running: &corev1.ContainerStateRunning{StartedAt: metav1.NewTime(frozenTime)}}},
})
}
func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: types.UID(uid),
CreationTimestamp: metav1.NewTime(frozenTime),
},
Status: corev1.PodStatus{
ContainerStatuses: cs,
},
}
}

View File

@ -46,6 +46,7 @@ var interestingKubeletMetrics = []string{
"kubelet_docker_operations_errors_total",
"kubelet_docker_operations_duration_seconds",
"kubelet_pod_start_duration_seconds",
"kubelet_pod_start_sli_duration_seconds",
"kubelet_pod_worker_duration_seconds",
"kubelet_pod_worker_start_duration_seconds",
}

View File

@ -44,6 +44,8 @@ const (
// Taken from k8s.io/kubernetes/pkg/kubelet/metrics
podStartDurationKey = "pod_start_duration_seconds"
// Taken from k8s.io/kubernetes/pkg/kubelet/metrics
PodStartSLIDurationKey = "pod_start_sli_duration_seconds"
// Taken from k8s.io/kubernetes/pkg/kubelet/metrics
cgroupManagerOperationsKey = "cgroup_manager_duration_seconds"
// Taken from k8s.io/kubernetes/pkg/kubelet/metrics
podWorkerStartDurationKey = "pod_worker_start_duration_seconds"
@ -175,6 +177,7 @@ func GetDefaultKubeletLatencyMetrics(ms KubeletMetrics) KubeletLatencyMetrics {
podWorkerDurationKey,
podWorkerStartDurationKey,
podStartDurationKey,
PodStartSLIDurationKey,
cgroupManagerOperationsKey,
dockerOperationsLatencyKey,
podWorkerStartDurationKey,