kubernetes/test/instrumentation/documentation/documentation-list.yaml

4160 lines
92 KiB
YAML

- name: version_info
namespace: etcd
help: Etcd server's binary version
type: Gauge
stabilityLevel: ALPHA
labels:
- binary_version
- name: certificate_manager_client_ttl_seconds
subsystem: kubelet
help: Gauge of the TTL (time-to-live) of the Kubelet's client certificate. The value
is in seconds until certificate expiry (negative if already expired). If client
certificate is invalid or unused, the value will be +INF.
type: Gauge
stabilityLevel: ALPHA
- name: changes
subsystem: endpoint_slice_controller
help: Number of EndpointSlice changes
type: Counter
stabilityLevel: ALPHA
labels:
- operation
- name: desired_endpoint_slices
subsystem: endpoint_slice_controller
help: Number of EndpointSlices that would exist with perfect endpoint allocation
type: Gauge
stabilityLevel: ALPHA
- name: endpoints_added_per_sync
subsystem: endpoint_slice_controller
help: Number of endpoints added on each Service sync
type: Histogram
stabilityLevel: ALPHA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: endpoints_desired
subsystem: endpoint_slice_controller
help: Number of endpoints desired
type: Gauge
stabilityLevel: ALPHA
- name: endpoints_removed_per_sync
subsystem: endpoint_slice_controller
help: Number of endpoints removed on each Service sync
type: Histogram
stabilityLevel: ALPHA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: endpointslices_changed_per_sync
subsystem: endpoint_slice_controller
help: Number of EndpointSlices changed on each Service sync
type: Histogram
stabilityLevel: ALPHA
labels:
- topology
- name: num_endpoint_slices
subsystem: endpoint_slice_controller
help: Number of EndpointSlices
type: Gauge
stabilityLevel: ALPHA
- name: syncs
subsystem: endpoint_slice_controller
help: Number of EndpointSlice syncs
type: Counter
stabilityLevel: ALPHA
labels:
- result
- name: addresses_skipped_per_sync
subsystem: endpoint_slice_mirroring_controller
help: Number of addresses skipped on each Endpoints sync due to being invalid or
exceeding MaxEndpointsPerSubset
type: Histogram
stabilityLevel: ALPHA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: changes
subsystem: endpoint_slice_mirroring_controller
help: Number of EndpointSlice changes
type: Counter
stabilityLevel: ALPHA
labels:
- operation
- name: desired_endpoint_slices
subsystem: endpoint_slice_mirroring_controller
help: Number of EndpointSlices that would exist with perfect endpoint allocation
type: Gauge
stabilityLevel: ALPHA
- name: endpoints_added_per_sync
subsystem: endpoint_slice_mirroring_controller
help: Number of endpoints added on each Endpoints sync
type: Histogram
stabilityLevel: ALPHA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: endpoints_desired
subsystem: endpoint_slice_mirroring_controller
help: Number of endpoints desired
type: Gauge
stabilityLevel: ALPHA
- name: endpoints_removed_per_sync
subsystem: endpoint_slice_mirroring_controller
help: Number of endpoints removed on each Endpoints sync
type: Histogram
stabilityLevel: ALPHA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: endpoints_sync_duration
subsystem: endpoint_slice_mirroring_controller
help: Duration of syncEndpoints() in seconds
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: endpoints_updated_per_sync
subsystem: endpoint_slice_mirroring_controller
help: Number of endpoints updated on each Endpoints sync
type: Histogram
stabilityLevel: ALPHA
buckets:
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- name: num_endpoint_slices
subsystem: endpoint_slice_mirroring_controller
help: Number of EndpointSlices
type: Gauge
stabilityLevel: ALPHA
- name: resources_sync_error_total
subsystem: garbagecollector_controller
help: Number of garbage collector resources sync errors
type: Counter
stabilityLevel: ALPHA
- name: sync_duration_seconds
subsystem: root_ca_cert_publisher
help: Number of namespace syncs happened in root ca cert publisher.
type: Histogram
stabilityLevel: ALPHA
labels:
- code
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: sync_total
subsystem: root_ca_cert_publisher
help: Number of namespace syncs happened in root ca cert publisher.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- name: job_creation_skew_duration_seconds
subsystem: cronjob_controller
help: Time between when a cronjob is scheduled to be run, and when the corresponding
job is created
type: Histogram
stabilityLevel: STABLE
buckets:
- 1
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- name: pod_failures_handled_by_failure_policy_total
subsystem: job_controller
help: "`The number of failed Pods handled by failure policy with\n\t\t\trespect
to the failure policy action applied based on the matched\n\t\t\trule. Possible
values of the action label correspond to the\n\t\t\tpossible values for the failure
policy rule action, which are:\n\t\t\t\"FailJob\", \"Ignore\" and \"Count\".`"
type: Counter
stabilityLevel: ALPHA
labels:
- action
- name: terminated_pods_tracking_finalizer_total
subsystem: job_controller
help: |-
`The number of terminated pods (phase=Failed|Succeeded)
that have the finalizer batch.kubernetes.io/job-tracking
The event label can be "add" or "delete".`
type: Counter
stabilityLevel: ALPHA
labels:
- event
- name: evictions_number
subsystem: node_collector
help: Number of Node evictions that happened since current instance of NodeController
started, This metric is replaced by node_collector_evictions_total.
type: Counter
deprecatedVersion: 1.24.0
stabilityLevel: ALPHA
labels:
- zone
- name: unhealthy_nodes_in_zone
subsystem: node_collector
help: Gauge measuring number of not Ready Nodes per zones.
type: Gauge
stabilityLevel: ALPHA
labels:
- zone
- name: update_all_nodes_health_duration_seconds
subsystem: node_collector
help: Duration in seconds for NodeController to update the health of all nodes.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.01
- 0.04
- 0.16
- 0.64
- 2.56
- 10.24
- 40.96
- 163.84
- name: update_node_health_duration_seconds
subsystem: node_collector
help: Duration in seconds for NodeController to update the health of a single node.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.001
- 0.004
- 0.016
- 0.064
- 0.256
- 1.024
- 4.096
- 16.384
- name: zone_health
subsystem: node_collector
help: Gauge measuring percentage of healthy nodes per zone.
type: Gauge
stabilityLevel: ALPHA
labels:
- zone
- name: zone_size
subsystem: node_collector
help: Gauge measuring number of registered Nodes per zones.
type: Gauge
stabilityLevel: ALPHA
labels:
- zone
- name: cidrset_allocation_tries_per_request
subsystem: node_ipam_controller
help: Number of endpoints added on each Service sync
type: Histogram
stabilityLevel: ALPHA
labels:
- clusterCIDR
buckets:
- 1
- 5
- 25
- 125
- 625
- name: cidrset_cidrs_allocations_total
subsystem: node_ipam_controller
help: Counter measuring total number of CIDR allocations.
type: Counter
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: cidrset_cidrs_releases_total
subsystem: node_ipam_controller
help: Counter measuring total number of CIDR releases.
type: Counter
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: cidrset_usage_cidrs
subsystem: node_ipam_controller
help: Gauge measuring percentage of allocated CIDRs.
type: Gauge
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: cirdset_max_cidrs
subsystem: node_ipam_controller
help: Maximum number of CIDRs that can be allocated.
type: Gauge
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: multicidrset_allocation_tries_per_request
subsystem: node_ipam_controller
help: Histogram measuring CIDR allocation tries per request.
type: Histogram
stabilityLevel: ALPHA
labels:
- clusterCIDR
buckets:
- 1
- 5
- 25
- 125
- 625
- name: multicidrset_cidrs_allocations_total
subsystem: node_ipam_controller
help: Counter measuring total number of CIDR allocations.
type: Counter
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: multicidrset_cidrs_releases_total
subsystem: node_ipam_controller
help: Counter measuring total number of CIDR releases.
type: Counter
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: multicidrset_usage_cidrs
subsystem: node_ipam_controller
help: Gauge measuring percentage of allocated CIDRs.
type: Gauge
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: multicirdset_max_cidrs
subsystem: node_ipam_controller
help: Maximum number of CIDRs that can be allocated.
type: Gauge
stabilityLevel: ALPHA
labels:
- clusterCIDR
- name: force_delete_pod_errors_total
subsystem: pod_gc_collector
help: Number of errors encountered when forcefully deleting the pods since the Pod
GC Controller started.
type: Counter
stabilityLevel: ALPHA
- name: force_delete_pods_total
subsystem: pod_gc_collector
help: Number of pods that are being forcefully deleted since the Pod GC Controller
started.
type: Counter
stabilityLevel: ALPHA
- name: sorting_deletion_age_ratio
subsystem: replicaset_controller
help: The ratio of chosen deleted pod's ages to the current youngest pod's age (at
the time). Should be <2.The intent of this metric is to measure the rough efficacy
of the LogarithmicScaleDown feature gate's effect onthe sorting (and deletion)
of pods when a replicaset scales down. This only considers Ready pods when calculating
and reporting.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.25
- 0.5
- 1
- 2
- 4
- 8
- name: create_attempts_total
subsystem: resourceclaim_controller
help: Number of ResourceClaims creation requests
type: Counter
stabilityLevel: ALPHA
- name: create_failures_total
subsystem: resourceclaim_controller
help: Number of ResourceClaims creation request failures
type: Counter
stabilityLevel: ALPHA
- name: job_deletion_duration_seconds
subsystem: ttl_after_finished_controller
help: The time it took to delete the job since it became eligible for deletion
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.1
- 0.2
- 0.4
- 0.8
- 1.6
- 3.2
- 6.4
- 12.8
- 25.6
- 51.2
- 102.4
- 204.8
- 409.6
- 819.2
- name: job_pods_finished_total
subsystem: job_controller
help: The number of finished Pods that are fully tracked
type: Counter
stabilityLevel: STABLE
labels:
- completion_mode
- result
- name: job_sync_duration_seconds
subsystem: job_controller
help: The time it took to sync a job
type: Histogram
stabilityLevel: STABLE
labels:
- action
- completion_mode
- result
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: job_syncs_total
subsystem: job_controller
help: The number of job syncs
type: Counter
stabilityLevel: STABLE
labels:
- action
- completion_mode
- result
- name: jobs_finished_total
subsystem: job_controller
help: The number of finished jobs
type: Counter
stabilityLevel: STABLE
labels:
- completion_mode
- reason
- result
- name: evictions_total
subsystem: node_collector
help: Number of Node evictions that happened since current instance of NodeController
started.
type: Counter
stabilityLevel: STABLE
labels:
- zone
- name: attachdetach_controller_forced_detaches
help: Number of times the A/D Controller performed a forced detach
type: Counter
stabilityLevel: ALPHA
- name: attachdetach_controller_total_volumes
help: Number of volumes in A/D Controller
type: Custom
stabilityLevel: ALPHA
labels:
- plugin_name
- state
- name: create_failures_total
subsystem: ephemeral_volume_controller
help: Number of PersistenVolumeClaims creation requests
type: Counter
stabilityLevel: ALPHA
- name: create_total
subsystem: ephemeral_volume_controller
help: Number of PersistenVolumeClaims creation requests
type: Counter
stabilityLevel: ALPHA
- name: client_expiration_renew_errors
subsystem: certificate_manager
namespace: kubelet
help: Counter of certificate renewal errors.
type: Counter
stabilityLevel: ALPHA
- name: certificate_manager_server_rotation_seconds
subsystem: kubelet
help: Histogram of the number of seconds the previous certificate lived before being
rotated.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 60
- 3600
- 14400
- 86400
- 604800
- 2.592e+06
- 7.776e+06
- 1.5552e+07
- 3.1104e+07
- 1.24416e+08
- name: certificate_manager_server_ttl_seconds
subsystem: kubelet
help: Gauge of the shortest TTL (time-to-live) of the Kubelet's serving certificate.
The value is in seconds until certificate expiry (negative if already expired).
If serving certificate is invalid or unused, the value will be +INF.
type: Gauge
stabilityLevel: ALPHA
- name: credential_provider_plugin_duration
subsystem: kubelet
help: Duration of execution in seconds for credential provider plugin
type: Histogram
stabilityLevel: ALPHA
labels:
- plugin_name
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: credential_provider_plugin_errors
subsystem: kubelet
help: Number of errors from credential provider plugin
type: Counter
stabilityLevel: ALPHA
labels:
- plugin_name
- name: server_expiration_renew_errors
subsystem: kubelet
help: Counter of certificate renewal errors.
type: Counter
stabilityLevel: ALPHA
- name: pv_collector_bound_pv_count
help: Gauge measuring number of persistent volume currently bound
type: Custom
stabilityLevel: ALPHA
labels:
- storage_class
- name: pv_collector_bound_pvc_count
help: Gauge measuring number of persistent volume claim currently bound
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- name: pv_collector_total_pv_count
help: Gauge measuring total number of persistent volumes
type: Custom
stabilityLevel: ALPHA
labels:
- plugin_name
- volume_mode
- name: pv_collector_unbound_pv_count
help: Gauge measuring number of persistent volume currently unbound
type: Custom
stabilityLevel: ALPHA
labels:
- storage_class
- name: pv_collector_unbound_pvc_count
help: Gauge measuring number of persistent volume claim currently unbound
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- name: retroactive_storageclass_errors_total
help: Total number of failed retroactive StorageClass assignments to persistent
volume claim
type: Counter
stabilityLevel: ALPHA
- name: retroactive_storageclass_total
help: Total number of retroactive StorageClass assignments to persistent volume
claim
type: Counter
stabilityLevel: ALPHA
- name: storage_count_attachable_volumes_in_use
help: Measure number of volumes in use
type: Custom
stabilityLevel: ALPHA
labels:
- node
- volume_plugin
- name: volume_operation_total_errors
help: Total volume operation errors
type: Counter
stabilityLevel: ALPHA
labels:
- operation_name
- plugin_name
- name: container_cpu_usage_seconds_total
help: Cumulative cpu time consumed by the container in core-seconds
type: Custom
stabilityLevel: ALPHA
labels:
- container
- pod
- namespace
- name: container_memory_working_set_bytes
help: Current working set of the container in bytes
type: Custom
stabilityLevel: ALPHA
labels:
- container
- pod
- namespace
- name: container_start_time_seconds
help: Start time of the container since unix epoch in seconds
type: Custom
stabilityLevel: ALPHA
labels:
- container
- pod
- namespace
- name: cgroup_manager_duration_seconds
subsystem: kubelet
help: Duration in seconds for cgroup manager operations. Broken down by method.
type: Histogram
stabilityLevel: ALPHA
labels:
- operation_type
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: kubelet_container_log_filesystem_used_bytes
help: Bytes used by the container's logs on the filesystem.
type: Custom
stabilityLevel: ALPHA
labels:
- uid
- namespace
- pod
- container
- name: containers_per_pod_count
subsystem: kubelet
help: The number of containers per pod.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 1
- 2
- 4
- 8
- 16
- name: cpu_manager_pinning_errors_total
subsystem: kubelet
help: The number of cpu core allocations which required pinning failed.
type: Counter
stabilityLevel: ALPHA
- name: cpu_manager_pinning_requests_total
subsystem: kubelet
help: The number of cpu core allocations which required pinning.
type: Counter
stabilityLevel: ALPHA
- name: device_plugin_alloc_duration_seconds
subsystem: kubelet
help: Duration in seconds to serve a device plugin Allocation request. Broken down
by resource name.
type: Histogram
stabilityLevel: ALPHA
labels:
- resource_name
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: device_plugin_registration_total
subsystem: kubelet
help: Cumulative number of device plugin registrations. Broken down by resource
name.
type: Counter
stabilityLevel: ALPHA
labels:
- resource_name
- name: eviction_stats_age_seconds
subsystem: kubelet
help: Time between when stats are collected, and when pod is evicted based on those
stats by eviction signal
type: Histogram
stabilityLevel: ALPHA
labels:
- eviction_signal
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: evictions
subsystem: kubelet
help: Cumulative number of pod evictions by eviction signal
type: Counter
stabilityLevel: ALPHA
labels:
- eviction_signal
- name: graceful_shutdown_end_time_seconds
subsystem: kubelet
help: Last graceful shutdown start time since unix epoch in seconds
type: Gauge
stabilityLevel: ALPHA
- name: graceful_shutdown_start_time_seconds
subsystem: kubelet
help: Last graceful shutdown start time since unix epoch in seconds
type: Gauge
stabilityLevel: ALPHA
- name: lifecycle_handler_http_fallbacks_total
subsystem: kubelet
help: The number of times lifecycle handlers successfully fell back to http from
https.
type: Counter
stabilityLevel: ALPHA
- name: managed_ephemeral_containers
subsystem: kubelet
help: Current number of ephemeral containers in pods managed by this kubelet.
type: Gauge
stabilityLevel: ALPHA
- name: node_name
subsystem: kubelet
help: The node's name. The count is always 1.
type: Gauge
stabilityLevel: ALPHA
labels:
- node
- name: pleg_discard_events
subsystem: kubelet
help: The number of discard events in PLEG.
type: Counter
stabilityLevel: ALPHA
- name: pleg_last_seen_seconds
subsystem: kubelet
help: Timestamp in seconds when PLEG was last seen active.
type: Gauge
stabilityLevel: ALPHA
- name: pleg_relist_duration_seconds
subsystem: kubelet
help: Duration in seconds for relisting pods in PLEG.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: pleg_relist_interval_seconds
subsystem: kubelet
help: Interval in seconds between relisting in PLEG.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: pod_resources_endpoint_errors_get_allocatable
subsystem: kubelet
help: Number of requests to the PodResource GetAllocatableResources endpoint which
returned error. Broken down by server api version.
type: Counter
stabilityLevel: ALPHA
labels:
- server_api_version
- name: pod_resources_endpoint_errors_list
subsystem: kubelet
help: Number of requests to the PodResource List endpoint which returned error.
Broken down by server api version.
type: Counter
stabilityLevel: ALPHA
labels:
- server_api_version
- name: pod_resources_endpoint_requests_get_allocatable
subsystem: kubelet
help: Number of requests to the PodResource GetAllocatableResources endpoint. Broken
down by server api version.
type: Counter
stabilityLevel: ALPHA
labels:
- server_api_version
- name: pod_resources_endpoint_requests_list
subsystem: kubelet
help: Number of requests to the PodResource List endpoint. Broken down by server
api version.
type: Counter
stabilityLevel: ALPHA
labels:
- server_api_version
- name: pod_resources_endpoint_requests_total
subsystem: kubelet
help: Cumulative number of requests to the PodResource endpoint. Broken down by
server api version.
type: Counter
stabilityLevel: ALPHA
labels:
- server_api_version
- name: pod_start_duration_seconds
subsystem: kubelet
help: Duration in seconds from kubelet seeing a pod for the first time to the pod
starting to run
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: pod_start_sli_duration_seconds
subsystem: kubelet
help: Duration in seconds to start a pod, excluding time to pull images and run
init containers, measured from pod creation timestamp to when all its containers
are reported as started and observed via watch
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.5
- 1
- 2
- 3
- 4
- 5
- 6
- 8
- 10
- 20
- 30
- 45
- 60
- 120
- 180
- 240
- 300
- 360
- 480
- 600
- 900
- 1200
- 1800
- 2700
- 3600
- name: pod_status_sync_duration_seconds
subsystem: kubelet
help: Duration in seconds to sync a pod status update. Measures time from detection
of a change to pod status until the API is successfully updated for that pod,
even if multiple intevening changes to pod status occur.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.01
- 0.05
- 0.1
- 0.5
- 1
- 5
- 10
- 20
- 30
- 45
- 60
- name: pod_worker_duration_seconds
subsystem: kubelet
help: 'Duration in seconds to sync a single pod. Broken down by operation type:
create, update, or sync'
type: Histogram
stabilityLevel: ALPHA
labels:
- operation_type
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: pod_worker_start_duration_seconds
subsystem: kubelet
help: Duration in seconds from kubelet seeing a pod to starting a worker.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: preemptions
subsystem: kubelet
help: Cumulative number of pod preemptions by preemption resource
type: Counter
stabilityLevel: ALPHA
labels:
- preemption_signal
- name: run_podsandbox_duration_seconds
subsystem: kubelet
help: Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler.
type: Histogram
stabilityLevel: ALPHA
labels:
- runtime_handler
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: run_podsandbox_errors_total
subsystem: kubelet
help: Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler.
type: Counter
stabilityLevel: ALPHA
labels:
- runtime_handler
- name: running_containers
subsystem: kubelet
help: Number of containers currently running
type: Gauge
stabilityLevel: ALPHA
labels:
- container_state
- name: running_pods
subsystem: kubelet
help: Number of pods that have a running pod sandbox
type: Gauge
stabilityLevel: ALPHA
- name: runtime_operations_duration_seconds
subsystem: kubelet
help: Duration in seconds of runtime operations. Broken down by operation type.
type: Histogram
stabilityLevel: ALPHA
labels:
- operation_type
buckets:
- 0.005
- 0.0125
- 0.03125
- 0.078125
- 0.1953125
- 0.48828125
- 1.220703125
- 3.0517578125
- 7.62939453125
- 19.073486328125
- 47.6837158203125
- 119.20928955078125
- 298.0232238769531
- 745.0580596923828
- name: runtime_operations_errors_total
subsystem: kubelet
help: Cumulative number of runtime operation errors by operation type.
type: Counter
stabilityLevel: ALPHA
labels:
- operation_type
- name: runtime_operations_total
subsystem: kubelet
help: Cumulative number of runtime operations by operation type.
type: Counter
stabilityLevel: ALPHA
labels:
- operation_type
- name: started_containers_errors_total
subsystem: kubelet
help: Cumulative number of errors when starting containers
type: Counter
stabilityLevel: ALPHA
labels:
- code
- container_type
- name: started_containers_total
subsystem: kubelet
help: Cumulative number of containers started
type: Counter
stabilityLevel: ALPHA
labels:
- container_type
- name: started_host_process_containers_errors_total
subsystem: kubelet
help: Cumulative number of errors when starting hostprocess containers. This metric
will only be collected on Windows and requires WindowsHostProcessContainers feature
gate to be enabled.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- container_type
- name: started_host_process_containers_total
subsystem: kubelet
help: Cumulative number of hostprocess containers started. This metric will only
be collected on Windows and requires WindowsHostProcessContainers feature gate
to be enabled.
type: Counter
stabilityLevel: ALPHA
labels:
- container_type
- name: started_pods_errors_total
subsystem: kubelet
help: Cumulative number of errors when starting pods
type: Counter
stabilityLevel: ALPHA
- name: started_pods_total
subsystem: kubelet
help: Cumulative number of pods started
type: Counter
stabilityLevel: ALPHA
- name: topology_manager_admission_duration_ms
subsystem: kubelet
help: Duration in milliseconds to serve a pod admission request.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.05
- 0.1
- 0.2
- 0.4
- 0.8
- 1.6
- 3.2
- 6.4
- 12.8
- 25.6
- 51.2
- 102.4
- 204.8
- 409.6
- 819.2
- name: topology_manager_admission_errors_total
subsystem: kubelet
help: The number of admission request failures where resources could not be aligned.
type: Counter
stabilityLevel: ALPHA
- name: topology_manager_admission_requests_total
subsystem: kubelet
help: The number of admission requests where resources have to be aligned.
type: Counter
stabilityLevel: ALPHA
- name: kubelet_volume_stats_available_bytes
help: Number of available bytes in the volume
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- persistentvolumeclaim
- name: kubelet_volume_stats_capacity_bytes
help: Capacity in bytes of the volume
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- persistentvolumeclaim
- name: kubelet_volume_stats_health_status_abnormal
help: Abnormal volume health status. The count is either 1 or 0. 1 indicates the
volume is unhealthy, 0 indicates volume is healthy
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- persistentvolumeclaim
- name: kubelet_volume_stats_inodes
help: Maximum number of inodes in the volume
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- persistentvolumeclaim
- name: kubelet_volume_stats_inodes_free
help: Number of free inodes in the volume
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- persistentvolumeclaim
- name: kubelet_volume_stats_inodes_used
help: Number of used inodes in the volume
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- persistentvolumeclaim
- name: kubelet_volume_stats_used_bytes
help: Number of used bytes in the volume
type: Custom
stabilityLevel: ALPHA
labels:
- namespace
- persistentvolumeclaim
- name: node_cpu_usage_seconds_total
help: Cumulative cpu time consumed by the node in core-seconds
type: Custom
stabilityLevel: ALPHA
- name: node_memory_working_set_bytes
help: Current working set of the node in bytes
type: Custom
stabilityLevel: ALPHA
- name: plugin_manager_total_plugins
help: Number of plugins in Plugin Manager
type: Custom
stabilityLevel: ALPHA
labels:
- socket_path
- state
- name: pod_cpu_usage_seconds_total
help: Cumulative cpu time consumed by the pod in core-seconds
type: Custom
stabilityLevel: ALPHA
labels:
- pod
- namespace
- name: pod_memory_working_set_bytes
help: Current working set of the pod in bytes
type: Custom
stabilityLevel: ALPHA
labels:
- pod
- namespace
- name: scrape_error
help: 1 if there was an error while getting container metrics, 0 otherwise
type: Custom
stabilityLevel: ALPHA
- name: http_inflight_requests
subsystem: kubelet
help: Number of the inflight http requests
type: Gauge
stabilityLevel: ALPHA
labels:
- long_running
- method
- path
- server_type
- name: http_requests_duration_seconds
subsystem: kubelet
help: Duration in seconds to serve http requests
type: Histogram
stabilityLevel: ALPHA
labels:
- long_running
- method
- path
- server_type
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: http_requests_total
subsystem: kubelet
help: Number of the http requests received since the server started
type: Counter
stabilityLevel: ALPHA
labels:
- long_running
- method
- path
- server_type
- name: volume_metric_collection_duration_seconds
subsystem: kubelet
help: Duration in seconds to calculate volume stats
type: Histogram
stabilityLevel: ALPHA
labels:
- metric_source
buckets:
- 0.005
- 0.01
- 0.025
- 0.05
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- name: network_programming_duration_seconds
subsystem: kubeproxy
help: In Cluster Network Programming Latency in seconds
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.25
- 0.5
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 65
- 70
- 75
- 80
- 85
- 90
- 95
- 100
- 105
- 110
- 115
- 120
- 150
- 180
- 210
- 240
- 270
- 300
- name: sync_proxy_rules_duration_seconds
subsystem: kubeproxy
help: SyncProxyRules latency in seconds
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: sync_proxy_rules_endpoint_changes_pending
subsystem: kubeproxy
help: Pending proxy rules Endpoint changes
type: Gauge
stabilityLevel: ALPHA
- name: sync_proxy_rules_endpoint_changes_total
subsystem: kubeproxy
help: Cumulative proxy rules Endpoint changes
type: Counter
stabilityLevel: ALPHA
- name: sync_proxy_rules_iptables_partial_restore_failures_total
subsystem: kubeproxy
help: Cumulative proxy iptables partial restore failures
type: Counter
stabilityLevel: ALPHA
- name: sync_proxy_rules_iptables_restore_failures_total
subsystem: kubeproxy
help: Cumulative proxy iptables restore failures
type: Counter
stabilityLevel: ALPHA
- name: sync_proxy_rules_iptables_total
subsystem: kubeproxy
help: Number of proxy iptables rules programmed
type: Gauge
stabilityLevel: ALPHA
labels:
- table
- name: sync_proxy_rules_last_queued_timestamp_seconds
subsystem: kubeproxy
help: The last time a sync of proxy rules was queued
type: Gauge
stabilityLevel: ALPHA
- name: sync_proxy_rules_last_timestamp_seconds
subsystem: kubeproxy
help: The last time proxy rules were successfully synced
type: Gauge
stabilityLevel: ALPHA
- name: sync_proxy_rules_no_local_endpoints_total
subsystem: kubeproxy
help: Number of services with a Local traffic policy and no endpoints
type: Gauge
stabilityLevel: ALPHA
labels:
- traffic_policy
- name: sync_proxy_rules_service_changes_pending
subsystem: kubeproxy
help: Pending proxy rules Service changes
type: Gauge
stabilityLevel: ALPHA
- name: sync_proxy_rules_service_changes_total
subsystem: kubeproxy
help: Cumulative proxy rules Service changes
type: Counter
stabilityLevel: ALPHA
- name: probe_duration_seconds
subsystem: prober
help: Duration in seconds for a probe response.
type: Histogram
stabilityLevel: ALPHA
labels:
- container
- namespace
- pod
- probe_type
- name: probe_total
subsystem: prober
help: Cumulative number of a liveness, readiness or startup probe for a container
by result.
type: Counter
stabilityLevel: ALPHA
labels:
- container
- namespace
- pod
- pod_uid
- probe_type
- result
- name: volume_manager_selinux_container_errors_total
help: Number of errors when kubelet cannot compute SELinux context for a container.
Kubelet can't start such a Pod then and it will retry, therefore value of this
metric may not represent the actual nr. of containers.
type: Gauge
stabilityLevel: ALPHA
- name: volume_manager_selinux_container_warnings_total
help: Number of errors when kubelet cannot compute SELinux context for a container
that are ignored. They will become real errors when SELinuxMountReadWriteOncePod
feature is expanded to all volume access modes.
type: Gauge
stabilityLevel: ALPHA
- name: volume_manager_selinux_pod_context_mismatch_errors_total
help: Number of errors when a Pod defines different SELinux contexts for its containers
that use the same volume. Kubelet can't start such a Pod then and it will retry,
therefore value of this metric may not represent the actual nr. of Pods.
type: Gauge
stabilityLevel: ALPHA
- name: volume_manager_selinux_pod_context_mismatch_warnings_total
help: Number of errors when a Pod defines different SELinux contexts for its containers
that use the same volume. They are not errors yet, but they will become real errors
when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes.
type: Gauge
stabilityLevel: ALPHA
- name: volume_manager_selinux_volume_context_mismatch_errors_total
help: Number of errors when a Pod uses a volume that is already mounted with a different
SELinux context than the Pod needs. Kubelet can't start such a Pod then and it
will retry, therefore value of this metric may not represent the actual nr. of
Pods.
type: Gauge
stabilityLevel: ALPHA
- name: volume_manager_selinux_volume_context_mismatch_warnings_total
help: Number of errors when a Pod uses a volume that is already mounted with a different
SELinux context than the Pod needs. They are not errors yet, but they will become
real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume
access modes.
type: Gauge
stabilityLevel: ALPHA
- name: volume_manager_selinux_volumes_admitted_total
help: Number of volumes whose SELinux context was fine and will be mounted with
mount -o context option.
type: Gauge
stabilityLevel: ALPHA
- name: volume_manager_total_volumes
help: Number of volumes in Volume Manager
type: Custom
stabilityLevel: ALPHA
labels:
- plugin_name
- state
- name: csr_honored_duration_total
subsystem: certificates_registry
namespace: apiserver
help: Total number of issued CSRs with a requested duration that was honored, sliced
by signer (only kubernetes.io signer names are specifically identified)
type: Counter
stabilityLevel: ALPHA
labels:
- signerName
- name: csr_requested_duration_total
subsystem: certificates_registry
namespace: apiserver
help: Total number of issued CSRs with a requested duration, sliced by signer (only
kubernetes.io signer names are specifically identified)
type: Counter
stabilityLevel: ALPHA
labels:
- signerName
- name: allocated_ips
subsystem: clusterip_allocator
namespace: kube_apiserver
help: Gauge measuring the number of allocated IPs for Services
type: Gauge
stabilityLevel: ALPHA
labels:
- cidr
- name: allocation_errors_total
subsystem: clusterip_allocator
namespace: kube_apiserver
help: Number of errors trying to allocate Cluster IPs
type: Counter
stabilityLevel: ALPHA
labels:
- cidr
- scope
- name: allocation_total
subsystem: clusterip_allocator
namespace: kube_apiserver
help: Number of Cluster IPs allocations
type: Counter
stabilityLevel: ALPHA
labels:
- cidr
- scope
- name: available_ips
subsystem: clusterip_allocator
namespace: kube_apiserver
help: Gauge measuring the number of available IPs for Services
type: Gauge
stabilityLevel: ALPHA
labels:
- cidr
- name: allocated_ports
subsystem: nodeport_allocator
namespace: kube_apiserver
help: Gauge measuring the number of allocated NodePorts for Services
type: Gauge
stabilityLevel: ALPHA
- name: allocation_errors_total
subsystem: nodeport_allocator
namespace: kube_apiserver
help: Number of errors trying to allocate NodePort
type: Counter
stabilityLevel: ALPHA
labels:
- scope
- name: allocation_total
subsystem: nodeport_allocator
namespace: kube_apiserver
help: Number of NodePort allocations
type: Counter
stabilityLevel: ALPHA
labels:
- scope
- name: available_ports
subsystem: nodeport_allocator
namespace: kube_apiserver
help: Gauge measuring the number of available NodePorts for Services
type: Gauge
stabilityLevel: ALPHA
- name: pods_logs_backend_tls_failure_total
subsystem: pod_logs
namespace: kube_apiserver
help: Total number of requests for pods/logs that failed due to kubelet server TLS
verification
type: Counter
stabilityLevel: ALPHA
- name: pods_logs_insecure_backend_total
subsystem: pod_logs
namespace: kube_apiserver
help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls,
skip_tls_allowed, skip_tls_denied'
type: Counter
stabilityLevel: ALPHA
labels:
- usage
- name: e2e_scheduling_duration_seconds
subsystem: scheduler
help: E2e scheduling latency in seconds (scheduling algorithm + binding). This metric
is replaced by scheduling_attempt_duration_seconds.
type: Histogram
deprecatedVersion: 1.23.0
stabilityLevel: ALPHA
labels:
- profile
- result
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: goroutines
subsystem: scheduler
help: Number of running goroutines split by the work they do such as binding.
type: Gauge
stabilityLevel: ALPHA
labels:
- operation
- name: permit_wait_duration_seconds
subsystem: scheduler
help: Duration of waiting on permit.
type: Histogram
stabilityLevel: ALPHA
labels:
- result
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: plugin_execution_duration_seconds
subsystem: scheduler
help: Duration for running a plugin at a specific extension point.
type: Histogram
stabilityLevel: ALPHA
labels:
- extension_point
- plugin
- status
buckets:
- 1e-05
- 1.5000000000000002e-05
- 2.2500000000000005e-05
- 3.375000000000001e-05
- 5.062500000000001e-05
- 7.593750000000002e-05
- 0.00011390625000000003
- 0.00017085937500000006
- 0.0002562890625000001
- 0.00038443359375000017
- 0.0005766503906250003
- 0.0008649755859375004
- 0.0012974633789062506
- 0.0019461950683593758
- 0.0029192926025390638
- 0.004378938903808595
- 0.006568408355712893
- 0.009852612533569338
- 0.014778918800354007
- 0.02216837820053101
- name: scheduler_cache_size
subsystem: scheduler
help: Number of nodes, pods, and assumed (bound) pods in the scheduler cache.
type: Gauge
stabilityLevel: ALPHA
labels:
- type
- name: scheduler_goroutines
subsystem: scheduler
help: Number of running goroutines split by the work they do such as binding. This
metric is replaced by the \"goroutines\" metric.
type: Gauge
deprecatedVersion: 1.26.0
stabilityLevel: ALPHA
labels:
- work
- name: scheduling_algorithm_duration_seconds
subsystem: scheduler
help: Scheduling algorithm latency in seconds
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: unschedulable_pods
subsystem: scheduler
help: The number of unschedulable pods broken down by plugin name. A pod will increment
the gauge for all plugins that caused it to not schedule and so this metric have
meaning only when broken down by plugin.
type: Gauge
stabilityLevel: ALPHA
labels:
- plugin
- profile
- name: binder_cache_requests_total
subsystem: scheduler_volume
help: Total number for request volume binding cache
type: Counter
stabilityLevel: ALPHA
labels:
- operation
- name: scheduling_stage_error_total
subsystem: scheduler_volume
help: Volume scheduling stage error count
type: Counter
stabilityLevel: ALPHA
labels:
- operation
- name: legacy_tokens_total
subsystem: serviceaccount
help: Cumulative legacy service account tokens used
type: Counter
stabilityLevel: ALPHA
- name: stale_tokens_total
subsystem: serviceaccount
help: Cumulative stale projected service account tokens used
type: Counter
stabilityLevel: ALPHA
- name: valid_tokens_total
subsystem: serviceaccount
help: Cumulative valid projected service account tokens used
type: Counter
stabilityLevel: ALPHA
- name: kube_pod_resource_limit
help: Resources limit for workloads on the cluster, broken down by pod. This shows
the resource usage the scheduler and kubelet expect per pod for resources along
with the unit for the resource if any.
type: Custom
stabilityLevel: STABLE
labels:
- namespace
- pod
- node
- scheduler
- priority
- resource
- unit
- name: kube_pod_resource_request
help: Resources requested by workloads on the cluster, broken down by pod. This
shows the resource usage the scheduler and kubelet expect per pod for resources
along with the unit for the resource if any.
type: Custom
stabilityLevel: STABLE
labels:
- namespace
- pod
- node
- scheduler
- priority
- resource
- unit
- name: framework_extension_point_duration_seconds
subsystem: scheduler
help: Latency for running all plugins of a specific extension point.
type: Histogram
stabilityLevel: STABLE
labels:
- extension_point
- profile
- status
buckets:
- 0.0001
- 0.0002
- 0.0004
- 0.0008
- 0.0016
- 0.0032
- 0.0064
- 0.0128
- 0.0256
- 0.0512
- 0.1024
- 0.2048
- name: pending_pods
subsystem: scheduler
help: Number of pending pods, by the queue type. 'active' means number of pods in
activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
of pods in unschedulablePods that the scheduler attempted to schedule and failed;
'gated' is the number of unschedulable pods that the scheduler never attempted
to schedule because they are gated.
type: Gauge
stabilityLevel: STABLE
labels:
- queue
- name: pod_scheduling_attempts
subsystem: scheduler
help: Number of attempts to successfully schedule a pod.
type: Histogram
stabilityLevel: STABLE
buckets:
- 1
- 2
- 4
- 8
- 16
- name: pod_scheduling_duration_seconds
subsystem: scheduler
help: E2e latency for a pod being scheduled which may include multiple scheduling
attempts.
type: Histogram
stabilityLevel: STABLE
labels:
- attempts
buckets:
- 0.01
- 0.02
- 0.04
- 0.08
- 0.16
- 0.32
- 0.64
- 1.28
- 2.56
- 5.12
- 10.24
- 20.48
- 40.96
- 81.92
- 163.84
- 327.68
- 655.36
- 1310.72
- 2621.44
- 5242.88
- name: preemption_attempts_total
subsystem: scheduler
help: Total preemption attempts in the cluster till now
type: Counter
stabilityLevel: STABLE
- name: preemption_victims
subsystem: scheduler
help: Number of selected preemption victims
type: Histogram
stabilityLevel: STABLE
buckets:
- 1
- 2
- 4
- 8
- 16
- 32
- 64
- name: queue_incoming_pods_total
subsystem: scheduler
help: Number of pods added to scheduling queues by event and queue type.
type: Counter
stabilityLevel: STABLE
labels:
- event
- queue
- name: schedule_attempts_total
subsystem: scheduler
help: Number of attempts to schedule pods, by the result. 'unschedulable' means
a pod could not be scheduled, while 'error' means an internal scheduler problem.
type: Counter
stabilityLevel: STABLE
labels:
- profile
- result
- name: scheduling_attempt_duration_seconds
subsystem: scheduler
help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
type: Histogram
stabilityLevel: STABLE
labels:
- profile
- result
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: operations_seconds
subsystem: csi
help: Container Storage Interface operation duration with gRPC error code status
total
type: Histogram
stabilityLevel: ALPHA
labels:
- driver_name
- grpc_status_code
- method_name
- migrated
buckets:
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- 15
- 25
- 50
- 120
- 300
- 600
- name: storage_operation_duration_seconds
help: Storage operation duration
type: Histogram
stabilityLevel: ALPHA
labels:
- migrated
- operation_name
- status
- volume_plugin
buckets:
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- 15
- 25
- 50
- 120
- 300
- 600
- name: volume_operation_total_seconds
help: Storage operation end to end duration in seconds
type: Histogram
stabilityLevel: ALPHA
labels:
- operation_name
- plugin_name
buckets:
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- 15
- 25
- 50
- 120
- 300
- 600
- name: graph_actions_duration_seconds
subsystem: node_authorizer
help: Histogram of duration of graph actions in node authorizer.
type: Histogram
stabilityLevel: ALPHA
labels:
- operation
buckets:
- 0.0001
- 0.0002
- 0.0004
- 0.0008
- 0.0016
- 0.0032
- 0.0064
- 0.0128
- 0.0256
- 0.0512
- 0.1024
- 0.2048
- name: apiextensions_openapi_v2_regeneration_count
help: Counter of OpenAPI v2 spec regeneration count broken down by causing CRD name
and reason.
type: Counter
stabilityLevel: ALPHA
labels:
- crd
- reason
- name: apiextensions_openapi_v3_regeneration_count
help: Counter of OpenAPI v3 spec regeneration count broken down by group, version,
causing CRD and reason.
type: Counter
stabilityLevel: ALPHA
labels:
- crd
- group
- reason
- version
- name: apiserver_crd_webhook_conversion_duration_seconds
help: CRD webhook conversion duration in seconds
type: Histogram
stabilityLevel: ALPHA
labels:
- crd_name
- from_version
- succeeded
- to_version
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: step_admission_duration_seconds_summary
subsystem: admission
namespace: apiserver
help: Admission sub-step latency summary in seconds, broken out for each operation
and API resource and step type (validate or admit).
type: Summary
stabilityLevel: ALPHA
labels:
- operation
- rejected
- type
maxAge: 18000000000000
- name: webhook_fail_open_count
subsystem: admission
namespace: apiserver
help: Admission webhook fail open count, identified by name and broken out for each
admission type (validating or mutating).
type: Counter
stabilityLevel: ALPHA
labels:
- name
- type
- name: webhook_rejection_count
subsystem: admission
namespace: apiserver
help: Admission webhook rejection count, identified by name and broken out for each
admission type (validating or admit) and operation. Additional labels specify
an error type (calling_webhook_error or apiserver_internal_error if an error occurred;
no_error otherwise) and optionally a non-zero rejection code if the webhook rejects
the request with an HTTP status code (honored by the apiserver when the code is
greater or equal to 400). Codes greater than 600 are truncated to 600, to keep
the metrics cardinality bounded.
type: Counter
stabilityLevel: ALPHA
labels:
- error_type
- name
- operation
- rejection_code
- type
- name: webhook_request_total
subsystem: admission
namespace: apiserver
help: Admission webhook request total, identified by name and broken out for each
admission type (validating or mutating) and operation. Additional labels specify
whether the request was rejected or not and an HTTP status code. Codes greater
than 600 are truncated to 600, to keep the metrics cardinality bounded.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- name
- operation
- rejected
- type
- name: check_duration_seconds
subsystem: validating_admission_policy
namespace: apiserver
help: Validation admission latency for individual validation expressions in seconds,
labeled by policy and further including binding, state and enforcement action
taken.
type: Histogram
stabilityLevel: ALPHA
labels:
- enforcement_action
- policy
- policy_binding
- state
buckets:
- 5e-07
- 0.001
- 0.01
- 0.1
- 1
- name: check_total
subsystem: validating_admission_policy
namespace: apiserver
help: Validation admission policy check total, labeled by policy and further identified
by binding, enforcement action taken, and state.
type: Counter
stabilityLevel: ALPHA
labels:
- enforcement_action
- policy
- policy_binding
- state
- name: definition_total
subsystem: validating_admission_policy
namespace: apiserver
help: Validation admission policy count total, labeled by state and enforcement
action.
type: Counter
stabilityLevel: ALPHA
labels:
- enforcement_action
- state
- name: controller_admission_duration_seconds
subsystem: admission
namespace: apiserver
help: Admission controller latency histogram in seconds, identified by name and
broken out for each operation and API resource and type (validate or admit).
type: Histogram
stabilityLevel: STABLE
labels:
- name
- operation
- rejected
- type
buckets:
- 0.005
- 0.025
- 0.1
- 0.5
- 1
- 2.5
- name: step_admission_duration_seconds
subsystem: admission
namespace: apiserver
help: Admission sub-step latency histogram in seconds, broken out for each operation
and API resource and step type (validate or admit).
type: Histogram
stabilityLevel: STABLE
labels:
- operation
- rejected
- type
buckets:
- 0.005
- 0.025
- 0.1
- 0.5
- 1
- 2.5
- name: webhook_admission_duration_seconds
subsystem: admission
namespace: apiserver
help: Admission webhook latency histogram in seconds, identified by name and broken
out for each operation and API resource and type (validate or admit).
type: Histogram
stabilityLevel: STABLE
labels:
- name
- operation
- rejected
- type
buckets:
- 0.005
- 0.025
- 0.1
- 0.5
- 1
- 2.5
- 10
- 25
- name: error_total
subsystem: apiserver_audit
help: Counter of audit events that failed to be audited properly. Plugin identifies
the plugin affected by the error.
type: Counter
stabilityLevel: ALPHA
labels:
- plugin
- name: event_total
subsystem: apiserver_audit
help: Counter of audit events generated and sent to the audit backend.
type: Counter
stabilityLevel: ALPHA
- name: level_total
subsystem: apiserver_audit
help: Counter of policy levels for audit events (1 per request).
type: Counter
stabilityLevel: ALPHA
labels:
- level
- name: requests_rejected_total
subsystem: apiserver_audit
help: Counter of apiserver requests rejected due to an error in audit logging backend.
type: Counter
stabilityLevel: ALPHA
- name: compilation_duration_seconds
subsystem: cel
namespace: apiserver
help: CEL compilation time in seconds.
type: Histogram
stabilityLevel: ALPHA
- name: evaluation_duration_seconds
subsystem: cel
namespace: apiserver
help: CEL evaluation time in seconds.
type: Histogram
stabilityLevel: ALPHA
- name: certificate_expiration_seconds
subsystem: client
namespace: apiserver
help: Distribution of the remaining lifetime on the certificate used to authenticate
a request.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 0
- 1800
- 3600
- 7200
- 21600
- 43200
- 86400
- 172800
- 345600
- 604800
- 2.592e+06
- 7.776e+06
- 1.5552e+07
- 3.1104e+07
- name: current_inqueue_requests
subsystem: apiserver
help: Maximal number of queued requests in this apiserver per request kind in last
second.
type: Gauge
stabilityLevel: ALPHA
labels:
- request_kind
- name: apiserver_delegated_authn_request_duration_seconds
help: Request latency in seconds. Broken down by status code.
type: Histogram
stabilityLevel: ALPHA
labels:
- code
buckets:
- 0.25
- 0.5
- 0.7
- 1
- 1.5
- 3
- 5
- 10
- name: apiserver_delegated_authn_request_total
help: Number of HTTP requests partitioned by status code.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- name: apiserver_delegated_authz_request_duration_seconds
help: Request latency in seconds. Broken down by status code.
type: Histogram
stabilityLevel: ALPHA
labels:
- code
buckets:
- 0.25
- 0.5
- 0.7
- 1
- 1.5
- 3
- 5
- 10
- name: apiserver_delegated_authz_request_total
help: Number of HTTP requests partitioned by status code.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- name: request_aborts_total
subsystem: apiserver
help: Number of requests which apiserver aborted possibly due to a timeout, for
each group, version, verb, resource, subresource and scope
type: Counter
stabilityLevel: ALPHA
labels:
- group
- resource
- scope
- subresource
- verb
- version
- name: request_body_sizes
subsystem: apiserver
help: Apiserver request body sizes broken out by size.
type: Histogram
stabilityLevel: ALPHA
labels:
- resource
- verb
buckets:
- 50000
- 150000
- 250000
- 350000
- 450000
- 550000
- 650000
- 750000
- 850000
- 950000
- 1.05e+06
- 1.15e+06
- 1.25e+06
- 1.35e+06
- 1.45e+06
- 1.55e+06
- 1.65e+06
- 1.75e+06
- 1.85e+06
- 1.95e+06
- 2.05e+06
- 2.15e+06
- 2.25e+06
- 2.35e+06
- 2.45e+06
- 2.55e+06
- 2.65e+06
- 2.75e+06
- 2.85e+06
- 2.95e+06
- 3.05e+06
- name: request_filter_duration_seconds
subsystem: apiserver
help: Request filter latency distribution in seconds, for each filter type
type: Histogram
stabilityLevel: ALPHA
labels:
- filter
buckets:
- 0.0001
- 0.0003
- 0.001
- 0.003
- 0.01
- 0.03
- 0.1
- 0.3
- 1
- 5
- name: request_post_timeout_total
subsystem: apiserver
help: Tracks the activity of the request handlers after the associated requests
have been timed out by the apiserver
type: Counter
stabilityLevel: ALPHA
labels:
- source
- status
- name: request_sli_duration_seconds
subsystem: apiserver
help: Response latency distribution (not counting webhook duration) in seconds for
each verb, group, version, resource, subresource, scope and component.
type: Histogram
stabilityLevel: ALPHA
labels:
- component
- group
- resource
- scope
- subresource
- verb
- version
buckets:
- 0.05
- 0.1
- 0.2
- 0.4
- 0.6
- 0.8
- 1
- 1.25
- 1.5
- 2
- 3
- 4
- 5
- 6
- 8
- 10
- 15
- 20
- 30
- 45
- 60
- name: request_slo_duration_seconds
subsystem: apiserver
help: Response latency distribution (not counting webhook duration) in seconds for
each verb, group, version, resource, subresource, scope and component.
type: Histogram
deprecatedVersion: 1.27.0
stabilityLevel: ALPHA
labels:
- component
- group
- resource
- scope
- subresource
- verb
- version
buckets:
- 0.05
- 0.1
- 0.2
- 0.4
- 0.6
- 0.8
- 1
- 1.25
- 1.5
- 2
- 3
- 4
- 5
- 6
- 8
- 10
- 15
- 20
- 30
- 45
- 60
- name: request_terminations_total
subsystem: apiserver
help: Number of requests which apiserver terminated in self-defense.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- component
- group
- resource
- scope
- subresource
- verb
- version
- name: request_timestamp_comparison_time
subsystem: apiserver
help: Time taken for comparison of old vs new objects in UPDATE or PATCH requests
type: Histogram
stabilityLevel: ALPHA
labels:
- code_path
buckets:
- 0.0001
- 0.0003
- 0.001
- 0.003
- 0.01
- 0.03
- 0.1
- 0.3
- 1
- 5
- name: selfrequest_total
subsystem: apiserver
help: Counter of apiserver self-requests broken out for each verb, API resource
and subresource.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- subresource
- verb
- name: tls_handshake_errors_total
subsystem: apiserver
help: Number of requests dropped with 'TLS handshake error from' error
type: Counter
stabilityLevel: ALPHA
- name: watch_events_sizes
subsystem: apiserver
help: Watch event size distribution in bytes
type: Histogram
stabilityLevel: ALPHA
labels:
- group
- kind
- version
buckets:
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768
- 65536
- 131072
- name: watch_events_total
subsystem: apiserver
help: Number of events sent in watch clients
type: Counter
stabilityLevel: ALPHA
labels:
- group
- kind
- version
- name: authenticated_user_requests
help: Counter of authenticated requests broken out by username.
type: Counter
stabilityLevel: ALPHA
labels:
- username
- name: authentication_attempts
help: Counter of authenticated attempts.
type: Counter
stabilityLevel: ALPHA
labels:
- result
- name: authentication_duration_seconds
help: Authentication duration in seconds broken out by result.
type: Histogram
stabilityLevel: ALPHA
labels:
- result
buckets:
- 0.001
- 0.002
- 0.004
- 0.008
- 0.016
- 0.032
- 0.064
- 0.128
- 0.256
- 0.512
- 1.024
- 2.048
- 4.096
- 8.192
- 16.384
- name: active_fetch_count
subsystem: token_cache
namespace: authentication
type: Gauge
stabilityLevel: ALPHA
labels:
- status
- name: fetch_total
subsystem: token_cache
namespace: authentication
type: Counter
stabilityLevel: ALPHA
labels:
- status
- name: request_duration_seconds
subsystem: token_cache
namespace: authentication
type: Histogram
stabilityLevel: ALPHA
labels:
- status
- name: request_total
subsystem: token_cache
namespace: authentication
type: Counter
stabilityLevel: ALPHA
labels:
- status
- name: field_validation_request_duration_seconds
help: Response latency distribution in seconds for each field validation value
type: Histogram
stabilityLevel: ALPHA
labels:
- field_validation
buckets:
- 0.05
- 0.1
- 0.2
- 0.4
- 0.6
- 0.8
- 1
- 1.25
- 1.5
- 2
- 3
- 4
- 5
- 6
- 8
- 10
- 15
- 20
- 30
- 45
- 60
- name: current_inflight_requests
subsystem: apiserver
help: Maximal number of currently used inflight request limit of this apiserver
per request kind in last second.
type: Gauge
stabilityLevel: STABLE
labels:
- request_kind
- name: longrunning_requests
subsystem: apiserver
help: Gauge of all active long-running apiserver requests broken out by verb, group,
version, resource, scope and component. Not all requests are tracked this way.
type: Gauge
stabilityLevel: STABLE
labels:
- component
- group
- resource
- scope
- subresource
- verb
- version
- name: request_duration_seconds
subsystem: apiserver
help: Response latency distribution in seconds for each verb, dry run value, group,
version, resource, subresource, scope and component.
type: Histogram
stabilityLevel: STABLE
labels:
- component
- dry_run
- group
- resource
- scope
- subresource
- verb
- version
buckets:
- 0.005
- 0.025
- 0.05
- 0.1
- 0.2
- 0.4
- 0.6
- 0.8
- 1
- 1.25
- 1.5
- 2
- 3
- 4
- 5
- 6
- 8
- 10
- 15
- 20
- 30
- 45
- 60
- name: request_total
subsystem: apiserver
help: Counter of apiserver requests broken out for each verb, dry run value, group,
version, resource, scope, component, and HTTP response code.
type: Counter
stabilityLevel: STABLE
labels:
- code
- component
- dry_run
- group
- resource
- scope
- subresource
- verb
- version
- name: requested_deprecated_apis
subsystem: apiserver
help: Gauge of deprecated APIs that have been requested, broken out by API group,
version, resource, subresource, and removed_release.
type: Gauge
stabilityLevel: STABLE
labels:
- group
- removed_release
- resource
- subresource
- version
- name: response_sizes
subsystem: apiserver
help: Response size distribution in bytes for each group, version, verb, resource,
subresource, scope and component.
type: Histogram
stabilityLevel: STABLE
labels:
- component
- group
- resource
- scope
- subresource
- verb
- version
buckets:
- 1000
- 10000
- 100000
- 1e+06
- 1e+07
- 1e+08
- 1e+09
- name: cache_list_fetched_objects_total
namespace: apiserver
help: Number of objects read from watch cache in the course of serving a LIST request
type: Counter
stabilityLevel: ALPHA
labels:
- index
- resource_prefix
- name: cache_list_returned_objects_total
namespace: apiserver
help: Number of objects returned for a LIST request from watch cache
type: Counter
stabilityLevel: ALPHA
labels:
- resource_prefix
- name: cache_list_total
namespace: apiserver
help: Number of LIST requests served from watch cache
type: Counter
stabilityLevel: ALPHA
labels:
- index
- resource_prefix
- name: dial_duration_seconds
subsystem: egress_dialer
namespace: apiserver
help: Dial latency histogram in seconds, labeled by the protocol (http-connect or
grpc), transport (tcp or uds)
type: Histogram
stabilityLevel: ALPHA
labels:
- protocol
- transport
buckets:
- 0.005
- 0.025
- 0.1
- 0.5
- 2.5
- 12.5
- name: dial_failure_count
subsystem: egress_dialer
namespace: apiserver
help: Dial failure count, labeled by the protocol (http-connect or grpc), transport
(tcp or uds), and stage (connect or proxy). The stage indicates at which stage
the dial failed
type: Counter
stabilityLevel: ALPHA
labels:
- protocol
- stage
- transport
- name: dial_start_total
subsystem: egress_dialer
namespace: apiserver
help: Dial starts, labeled by the protocol (http-connect or grpc) and transport
(tcp or uds).
type: Counter
stabilityLevel: ALPHA
labels:
- protocol
- transport
- name: dek_cache_fill_percent
subsystem: envelope_encryption
namespace: apiserver
help: Percent of the cache slots currently occupied by cached DEKs.
type: Gauge
stabilityLevel: ALPHA
- name: dek_cache_inter_arrival_time_seconds
subsystem: envelope_encryption
namespace: apiserver
help: Time (in seconds) of inter arrival of transformation requests.
type: Histogram
stabilityLevel: ALPHA
labels:
- transformation_type
buckets:
- 60
- 120
- 240
- 480
- 960
- 1920
- 3840
- 7680
- 15360
- 30720
- name: invalid_key_id_from_status_total
subsystem: envelope_encryption
namespace: apiserver
help: Number of times an invalid keyID is returned by the Status RPC call split
by error.
type: Counter
stabilityLevel: ALPHA
labels:
- error
- provider_name
- name: key_id_hash_last_timestamp_seconds
subsystem: envelope_encryption
namespace: apiserver
help: The last time in seconds when a keyID was used.
type: Gauge
stabilityLevel: ALPHA
labels:
- key_id_hash
- provider_name
- transformation_type
- name: key_id_hash_status_last_timestamp_seconds
subsystem: envelope_encryption
namespace: apiserver
help: The last time in seconds when a keyID was returned by the Status RPC call.
type: Gauge
stabilityLevel: ALPHA
labels:
- key_id_hash
- provider_name
- name: key_id_hash_total
subsystem: envelope_encryption
namespace: apiserver
help: Number of times a keyID is used split by transformation type and provider.
type: Counter
stabilityLevel: ALPHA
labels:
- key_id_hash
- provider_name
- transformation_type
- name: kms_operations_latency_seconds
subsystem: envelope_encryption
namespace: apiserver
help: KMS operation duration with gRPC error code status total.
type: Histogram
stabilityLevel: ALPHA
labels:
- grpc_status_code
- method_name
- provider_name
buckets:
- 0.0001
- 0.0002
- 0.0004
- 0.0008
- 0.0016
- 0.0032
- 0.0064
- 0.0128
- 0.0256
- 0.0512
- 0.1024
- 0.2048
- 0.4096
- 0.8192
- 1.6384
- 3.2768
- 6.5536
- 13.1072
- 26.2144
- 52.4288
- name: current_executing_requests
subsystem: flowcontrol
namespace: apiserver
help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
stage in the API Priority and Fairness subsystem
type: Gauge
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
- name: current_inqueue_requests
subsystem: flowcontrol
namespace: apiserver
help: Number of requests currently pending in queues of the API Priority and Fairness
subsystem
type: Gauge
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
- name: current_limit_seats
subsystem: flowcontrol
namespace: apiserver
help: current derived number of execution seats available to each priority level
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: current_r
subsystem: flowcontrol
namespace: apiserver
help: R(time of last change)
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: demand_seats
subsystem: flowcontrol
namespace: apiserver
help: Observations, at the end of every nanosecond, of (the number of seats each
priority level could use) / (nominal number of seats for that level)
type: TimingRatioHistogram
stabilityLevel: ALPHA
labels:
- priority_level
buckets:
- 0.2
- 0.4
- 0.6
- 0.8
- 1
- 1.2
- 1.4
- 1.7
- 2
- 2.8
- 4
- 6
- name: demand_seats_average
subsystem: flowcontrol
namespace: apiserver
help: Time-weighted average, over last adjustment period, of demand_seats
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: demand_seats_high_watermark
subsystem: flowcontrol
namespace: apiserver
help: High watermark, over last adjustment period, of demand_seats
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: demand_seats_smoothed
subsystem: flowcontrol
namespace: apiserver
help: Smoothed seat demands
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: demand_seats_stdev
subsystem: flowcontrol
namespace: apiserver
help: Time-weighted standard deviation, over last adjustment period, of demand_seats
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: dispatch_r
subsystem: flowcontrol
namespace: apiserver
help: R(time of last dispatch)
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: dispatched_requests_total
subsystem: flowcontrol
namespace: apiserver
help: Number of requests executed by API Priority and Fairness subsystem
type: Counter
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
- name: epoch_advance_total
subsystem: flowcontrol
namespace: apiserver
help: Number of times the queueset's progress meter jumped backward
type: Counter
stabilityLevel: ALPHA
labels:
- priority_level
- success
- name: latest_s
subsystem: flowcontrol
namespace: apiserver
help: S(most recently dispatched request)
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: lower_limit_seats
subsystem: flowcontrol
namespace: apiserver
help: Configured lower bound on number of execution seats available to each priority
level
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: next_discounted_s_bounds
subsystem: flowcontrol
namespace: apiserver
help: min and max, over queues, of S(oldest waiting request in queue) - estimated
work in progress
type: Gauge
stabilityLevel: ALPHA
labels:
- bound
- priority_level
- name: next_s_bounds
subsystem: flowcontrol
namespace: apiserver
help: min and max, over queues, of S(oldest waiting request in queue)
type: Gauge
stabilityLevel: ALPHA
labels:
- bound
- priority_level
- name: nominal_limit_seats
subsystem: flowcontrol
namespace: apiserver
help: Nominal number of execution seats configured for each priority level
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: priority_level_request_utilization
subsystem: flowcontrol
namespace: apiserver
help: Observations, at the end of every nanosecond, of number of requests (as a
fraction of the relevant limit) waiting or in any stage of execution (but only
initial stage for WATCHes)
type: TimingRatioHistogram
stabilityLevel: ALPHA
labels:
- phase
- priority_level
buckets:
- 0
- 0.001
- 0.003
- 0.01
- 0.03
- 0.1
- 0.25
- 0.5
- 0.75
- 1
- name: priority_level_seat_utilization
subsystem: flowcontrol
namespace: apiserver
help: Observations, at the end of every nanosecond, of utilization of seats for
any stage of execution (but only initial stage for WATCHes)
type: TimingRatioHistogram
stabilityLevel: ALPHA
labels:
- priority_level
buckets:
- 0
- 0.1
- 0.2
- 0.3
- 0.4
- 0.5
- 0.6
- 0.7
- 0.8
- 0.9
- 0.95
- 0.99
- 1
constLabels:
phase: executing
- name: read_vs_write_current_requests
subsystem: flowcontrol
namespace: apiserver
help: Observations, at the end of every nanosecond, of the number of requests (as
a fraction of the relevant limit) waiting or in regular stage of execution
type: TimingRatioHistogram
stabilityLevel: ALPHA
labels:
- phase
- request_kind
buckets:
- 0
- 0.001
- 0.01
- 0.1
- 0.2
- 0.3
- 0.4
- 0.5
- 0.6
- 0.7
- 0.8
- 0.9
- 0.95
- 0.99
- 1
- name: rejected_requests_total
subsystem: flowcontrol
namespace: apiserver
help: Number of requests rejected by API Priority and Fairness subsystem
type: Counter
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
- reason
- name: request_concurrency_in_use
subsystem: flowcontrol
namespace: apiserver
help: Concurrency (number of seats) occupied by the currently executing (initial
stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
subsystem
type: Gauge
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
- name: request_concurrency_limit
subsystem: flowcontrol
namespace: apiserver
help: Shared concurrency limit in the API Priority and Fairness subsystem
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: request_dispatch_no_accommodation_total
subsystem: flowcontrol
namespace: apiserver
help: Number of times a dispatch attempt resulted in a non accommodation due to
lack of available seats
type: Counter
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
- name: request_execution_seconds
subsystem: flowcontrol
namespace: apiserver
help: Duration of initial stage (for a WATCH) or any (for a non-WATCH) stage of
request execution in the API Priority and Fairness subsystem
type: Histogram
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
- type
buckets:
- 0
- 0.005
- 0.02
- 0.05
- 0.1
- 0.2
- 0.5
- 1
- 2
- 5
- 10
- 30
- name: request_queue_length_after_enqueue
subsystem: flowcontrol
namespace: apiserver
help: Length of queue in the API Priority and Fairness subsystem, as seen by each
request after it is enqueued
type: Histogram
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
buckets:
- 0
- 10
- 25
- 50
- 100
- 250
- 500
- 1000
- name: request_wait_duration_seconds
subsystem: flowcontrol
namespace: apiserver
help: Length of time a request spent waiting in its queue
type: Histogram
stabilityLevel: ALPHA
labels:
- execute
- flow_schema
- priority_level
buckets:
- 0
- 0.005
- 0.02
- 0.05
- 0.1
- 0.2
- 0.5
- 1
- 2
- 5
- 10
- 30
- name: seat_fair_frac
subsystem: flowcontrol
namespace: apiserver
help: Fair fraction of server's concurrency to allocate to each priority level that
can use it
type: Gauge
stabilityLevel: ALPHA
- name: target_seats
subsystem: flowcontrol
namespace: apiserver
help: Seat allocation targets
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: upper_limit_seats
subsystem: flowcontrol
namespace: apiserver
help: Configured upper bound on number of execution seats available to each priority
level
type: Gauge
stabilityLevel: ALPHA
labels:
- priority_level
- name: watch_count_samples
subsystem: flowcontrol
namespace: apiserver
help: count of watchers for mutating requests in API Priority and Fairness
type: Histogram
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
buckets:
- 0
- 1
- 10
- 100
- 1000
- 10000
- name: work_estimated_seats
subsystem: flowcontrol
namespace: apiserver
help: Number of estimated seats (maximum of initial and final seats) associated
with requests in API Priority and Fairness
type: Histogram
stabilityLevel: ALPHA
labels:
- flow_schema
- priority_level
buckets:
- 1
- 2
- 4
- 10
- name: init_events_total
namespace: apiserver
help: Counter of init events processed in watch cache broken by resource type.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: data_key_generation_duration_seconds
subsystem: storage
namespace: apiserver
help: Latencies in seconds of data encryption key(DEK) generation operations.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 5e-06
- 1e-05
- 2e-05
- 4e-05
- 8e-05
- 0.00016
- 0.00032
- 0.00064
- 0.00128
- 0.00256
- 0.00512
- 0.01024
- 0.02048
- 0.04096
- name: data_key_generation_failures_total
subsystem: storage
namespace: apiserver
help: Total number of failed data encryption key(DEK) generation operations.
type: Counter
stabilityLevel: ALPHA
- name: storage_db_total_size_in_bytes
subsystem: apiserver
help: Total size of the storage database file physically allocated in bytes.
type: Gauge
stabilityLevel: ALPHA
labels:
- endpoint
- name: storage_decode_errors_total
namespace: apiserver
help: Number of stored object decode errors split by object type
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: envelope_transformation_cache_misses_total
subsystem: storage
namespace: apiserver
help: Total number of cache misses while accessing key decryption key(KEK).
type: Counter
stabilityLevel: ALPHA
- name: storage_events_received_total
subsystem: apiserver
help: Number of etcd events received split by kind.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: apiserver_storage_list_evaluated_objects_total
help: Number of objects tested in the course of serving a LIST request from storage
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: apiserver_storage_list_fetched_objects_total
help: Number of objects read from storage in the course of serving a LIST request
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: apiserver_storage_list_returned_objects_total
help: Number of objects returned for a LIST request from storage
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: apiserver_storage_list_total
help: Number of LIST requests served from storage
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: transformation_duration_seconds
subsystem: storage
namespace: apiserver
help: Latencies in seconds of value transformation operations.
type: Histogram
stabilityLevel: ALPHA
labels:
- transformation_type
- transformer_prefix
buckets:
- 5e-06
- 1e-05
- 2e-05
- 4e-05
- 8e-05
- 0.00016
- 0.00032
- 0.00064
- 0.00128
- 0.00256
- 0.00512
- 0.01024
- 0.02048
- 0.04096
- 0.08192
- 0.16384
- 0.32768
- 0.65536
- 1.31072
- 2.62144
- 5.24288
- 10.48576
- 20.97152
- 41.94304
- 83.88608
- name: transformation_operations_total
subsystem: storage
namespace: apiserver
help: Total number of transformations.
type: Counter
stabilityLevel: ALPHA
labels:
- status
- transformation_type
- transformer_prefix
- name: terminated_watchers_total
namespace: apiserver
help: Counter of watchers closed due to unresponsiveness broken by resource type.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: events_dispatched_total
subsystem: watch_cache
namespace: apiserver
help: Counter of events dispatched in watch cache broken by resource type.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: events_received_total
subsystem: watch_cache
namespace: apiserver
help: Counter of events received in watch cache broken by resource type.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: initializations_total
subsystem: watch_cache
namespace: apiserver
help: Counter of watch cache initializations broken by resource type.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: etcd_bookmark_counts
help: Number of etcd bookmarks (progress notify events) split by kind.
type: Gauge
stabilityLevel: ALPHA
labels:
- resource
- name: etcd_lease_object_counts
help: Number of objects attached to a single etcd lease.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 10
- 50
- 100
- 500
- 1000
- 2500
- 5000
- name: etcd_request_duration_seconds
help: Etcd request latency in seconds for each operation and object type.
type: Histogram
stabilityLevel: ALPHA
labels:
- operation
- type
buckets:
- 0.005
- 0.025
- 0.05
- 0.1
- 0.2
- 0.4
- 0.6
- 0.8
- 1
- 1.25
- 1.5
- 2
- 3
- 4
- 5
- 6
- 8
- 10
- 15
- 20
- 30
- 45
- 60
- name: capacity
subsystem: watch_cache
help: Total capacity of watch cache broken by resource type.
type: Gauge
stabilityLevel: ALPHA
labels:
- resource
- name: capacity_decrease_total
subsystem: watch_cache
help: Total number of watch cache capacity decrease events broken by resource type.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: capacity_increase_total
subsystem: watch_cache
help: Total number of watch cache capacity increase events broken by resource type.
type: Counter
stabilityLevel: ALPHA
labels:
- resource
- name: apiserver_storage_objects
help: Number of stored objects at the time of last check split by kind.
type: Gauge
stabilityLevel: STABLE
labels:
- resource
- name: x509_insecure_sha1_total
subsystem: webhooks
namespace: apiserver
help: Counts the number of requests to servers with insecure SHA1 signatures in
their serving certificate OR the number of connection failures due to the insecure
SHA1 signatures (either/or, based on the runtime environment)
type: Counter
stabilityLevel: ALPHA
- name: x509_missing_san_total
subsystem: webhooks
namespace: apiserver
help: Counts the number of requests to servers missing SAN extension in their serving
certificate OR the number of connection failures due to the lack of x509 certificate
SAN extension missing (either/or, based on the runtime environment)
type: Counter
stabilityLevel: ALPHA
- name: nodesync_latency_seconds
subsystem: service_controller
help: A metric measuring the latency for nodesync which updates loadbalancer hosts
on cluster node updates.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 1
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- name: update_loadbalancer_host_latency_seconds
subsystem: service_controller
help: A metric measuring the latency for updating each load balancer hosts.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 1
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- name: kubernetes_build_info
help: A metric with a constant '1' value labeled by major, minor, git version, git
commit, git tree state, build date, Go version, and compiler from which Kubernetes
was built, and platform on which it is running.
type: Gauge
stabilityLevel: ALPHA
labels:
- build_date
- compiler
- git_commit
- git_tree_state
- git_version
- go_version
- major
- minor
- platform
- name: feature_enabled
namespace: kubernetes
help: This metric records the data about the stage and enablement of a k8s feature.
type: Gauge
stabilityLevel: ALPHA
labels:
- name
- stage
- name: healthcheck
namespace: kubernetes
help: This metric records the result of a single healthcheck.
type: Gauge
stabilityLevel: ALPHA
labels:
- name
- type
- name: healthchecks_total
namespace: kubernetes
help: This metric records the results of all healthcheck.
type: Counter
stabilityLevel: ALPHA
labels:
- name
- status
- type
- name: leader_election_master_status
help: Gauge of if the reporting system is master of the relevant lease, 0 indicates
backup, 1 indicates master. 'name' is the string used to identify the lease. Please
make sure to group by name.
type: Gauge
stabilityLevel: ALPHA
labels:
- name
- name: rest_client_exec_plugin_call_total
help: Number of calls to an exec plugin, partitioned by the type of event encountered
(no_error, plugin_execution_error, plugin_not_found_error, client_internal_error)
and an optional exit code. The exit code will be set to 0 if and only if the plugin
call was successful.
type: Counter
stabilityLevel: ALPHA
labels:
- call_status
- code
- name: rest_client_exec_plugin_certificate_rotation_age
help: Histogram of the number of seconds the last auth exec plugin client certificate
lived before being rotated. If auth exec plugin client certificates are unused,
histogram will contain no data.
type: Histogram
stabilityLevel: ALPHA
buckets:
- 600
- 1800
- 3600
- 14400
- 86400
- 604800
- 2.592e+06
- 7.776e+06
- 1.5552e+07
- 3.1104e+07
- 1.24416e+08
- name: rest_client_exec_plugin_ttl_seconds
help: Gauge of the shortest TTL (time-to-live) of the client certificate(s) managed
by the auth exec plugin. The value is in seconds until certificate expiry (negative
if already expired). If auth exec plugins are unused or manage no TLS certificates,
the value will be +INF.
type: Gauge
stabilityLevel: ALPHA
- name: rest_client_rate_limiter_duration_seconds
help: Client side rate limiter latency in seconds. Broken down by verb, and host.
type: Histogram
stabilityLevel: ALPHA
labels:
- host
- verb
buckets:
- 0.005
- 0.025
- 0.1
- 0.25
- 0.5
- 1
- 2
- 4
- 8
- 15
- 30
- 60
- name: rest_client_request_duration_seconds
help: Request latency in seconds. Broken down by verb, and host.
type: Histogram
stabilityLevel: ALPHA
labels:
- host
- verb
buckets:
- 0.005
- 0.025
- 0.1
- 0.25
- 0.5
- 1
- 2
- 4
- 8
- 15
- 30
- 60
- name: rest_client_request_retries_total
help: Number of request retries, partitioned by status code, verb, and host.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- host
- verb
- name: rest_client_request_size_bytes
help: Request size in bytes. Broken down by verb and host.
type: Histogram
stabilityLevel: ALPHA
labels:
- host
- verb
buckets:
- 64
- 256
- 512
- 1024
- 4096
- 16384
- 65536
- 262144
- 1.048576e+06
- 4.194304e+06
- 1.6777216e+07
- name: rest_client_requests_total
help: Number of HTTP requests, partitioned by status code, method, and host.
type: Counter
stabilityLevel: ALPHA
labels:
- code
- host
- method
- name: rest_client_response_size_bytes
help: Response size in bytes. Broken down by verb and host.
type: Histogram
stabilityLevel: ALPHA
labels:
- host
- verb
buckets:
- 64
- 256
- 512
- 1024
- 4096
- 16384
- 65536
- 262144
- 1.048576e+06
- 4.194304e+06
- 1.6777216e+07
- name: running_managed_controllers
help: Indicates where instances of a controller are currently running
type: Gauge
stabilityLevel: ALPHA
labels:
- manager
- name
- name: adds_total
subsystem: workqueue
help: Total number of adds handled by workqueue
type: Counter
stabilityLevel: ALPHA
labels:
- name
- name: depth
subsystem: workqueue
help: Current depth of workqueue
type: Gauge
stabilityLevel: ALPHA
labels:
- name
- name: longest_running_processor_seconds
subsystem: workqueue
help: How many seconds has the longest running processor for workqueue been running.
type: Gauge
stabilityLevel: ALPHA
labels:
- name
- name: queue_duration_seconds
subsystem: workqueue
help: How long in seconds an item stays in workqueue before being requested.
type: Histogram
stabilityLevel: ALPHA
labels:
- name
buckets:
- 1e-08
- 1e-07
- 1e-06
- 9.999999999999999e-06
- 9.999999999999999e-05
- 0.001
- 0.01
- 0.1
- 1
- 10
- name: retries_total
subsystem: workqueue
help: Total number of retries handled by workqueue
type: Counter
stabilityLevel: ALPHA
labels:
- name
- name: unfinished_work_seconds
subsystem: workqueue
help: How many seconds of work has done that is in progress and hasn't been observed
by work_duration. Large values indicate stuck threads. One can deduce the number
of stuck threads by observing the rate at which this increases.
type: Gauge
stabilityLevel: ALPHA
labels:
- name
- name: work_duration_seconds
subsystem: workqueue
help: How long in seconds processing an item from workqueue takes.
type: Histogram
stabilityLevel: ALPHA
labels:
- name
buckets:
- 1e-08
- 1e-07
- 1e-06
- 9.999999999999999e-06
- 9.999999999999999e-05
- 0.001
- 0.01
- 0.1
- 1
- 10
- name: aggregator_openapi_v2_regeneration_count
help: Counter of OpenAPI v2 spec regeneration count broken down by causing APIService
name and reason.
type: Counter
stabilityLevel: ALPHA
labels:
- apiservice
- reason
- name: aggregator_openapi_v2_regeneration_duration
help: Gauge of OpenAPI v2 spec regeneration duration in seconds.
type: Gauge
stabilityLevel: ALPHA
labels:
- reason
- name: aggregator_unavailable_apiservice
help: Gauge of APIServices which are marked as unavailable broken down by APIService
name.
type: Custom
stabilityLevel: ALPHA
labels:
- name
- name: aggregator_unavailable_apiservice_total
help: Counter of APIServices which are marked as unavailable broken down by APIService
name and reason.
type: Counter
stabilityLevel: ALPHA
labels:
- name
- reason
- name: x509_insecure_sha1_total
subsystem: kube_aggregator
namespace: apiserver
help: Counts the number of requests to servers with insecure SHA1 signatures in
their serving certificate OR the number of connection failures due to the insecure
SHA1 signatures (either/or, based on the runtime environment)
type: Counter
stabilityLevel: ALPHA
- name: x509_missing_san_total
subsystem: kube_aggregator
namespace: apiserver
help: Counts the number of requests to servers missing SAN extension in their serving
certificate OR the number of connection failures due to the lack of x509 certificate
SAN extension missing (either/or, based on the runtime environment)
type: Counter
stabilityLevel: ALPHA
- name: cloudprovider_aws_api_request_duration_seconds
help: Latency of AWS API calls
type: Histogram
stabilityLevel: ALPHA
labels:
- request
- name: cloudprovider_aws_api_request_errors
help: AWS API errors
type: Counter
stabilityLevel: ALPHA
labels:
- request
- name: cloudprovider_aws_api_throttled_requests_total
help: AWS API throttled requests
type: Counter
stabilityLevel: ALPHA
labels:
- operation_name
- name: api_request_duration_seconds
namespace: cloudprovider_azure
help: Latency of an Azure API call
type: Histogram
stabilityLevel: ALPHA
labels:
- request
- resource_group
- source
- subscription_id
buckets:
- 0.1
- 0.25
- 0.5
- 1
- 2.5
- 5
- 10
- 15
- 25
- 50
- 120
- 300
- 600
- 1200
- name: api_request_errors
namespace: cloudprovider_azure
help: Number of errors for an Azure API call
type: Counter
stabilityLevel: ALPHA
labels:
- request
- resource_group
- source
- subscription_id
- name: api_request_ratelimited_count
namespace: cloudprovider_azure
help: Number of rate limited Azure API calls
type: Counter
stabilityLevel: ALPHA
labels:
- request
- resource_group
- source
- subscription_id
- name: api_request_throttled_count
namespace: cloudprovider_azure
help: Number of throttled Azure API calls
type: Counter
stabilityLevel: ALPHA
labels:
- request
- resource_group
- source
- subscription_id
- name: op_duration_seconds
namespace: cloudprovider_azure
help: Latency of an Azure service operation
type: Histogram
stabilityLevel: ALPHA
labels:
- request
- resource_group
- source
- subscription_id
buckets:
- 0.1
- 0.2
- 0.5
- 1
- 10
- 20
- 30
- 40
- 50
- 60
- 100
- 200
- 300
- name: op_failure_count
namespace: cloudprovider_azure
help: Number of failed Azure service operations
type: Counter
stabilityLevel: ALPHA
labels:
- request
- resource_group
- source
- subscription_id
- name: number_of_l4_ilbs
help: Number of L4 ILBs
type: Gauge
stabilityLevel: ALPHA
labels:
- feature
- name: cloudprovider_gce_api_request_duration_seconds
help: Latency of a GCE API call
type: Histogram
stabilityLevel: ALPHA
labels:
- region
- request
- version
- zone
- name: cloudprovider_gce_api_request_errors
help: Number of errors for an API call
type: Counter
stabilityLevel: ALPHA
labels:
- region
- request
- version
- zone
- name: cloudprovider_vsphere_api_request_duration_seconds
help: Latency of vsphere api call
type: Histogram
stabilityLevel: ALPHA
labels:
- request
- name: cloudprovider_vsphere_api_request_errors
help: vsphere Api errors
type: Counter
stabilityLevel: ALPHA
labels:
- request
- name: cloudprovider_vsphere_operation_duration_seconds
help: Latency of vsphere operation call
type: Histogram
stabilityLevel: ALPHA
labels:
- operation
- name: cloudprovider_vsphere_operation_errors
help: vsphere operation errors
type: Counter
stabilityLevel: ALPHA
labels:
- operation
- name: cloudprovider_vsphere_vcenter_versions
help: Versions for connected vSphere vCenters
type: Custom
stabilityLevel: ALPHA
labels:
- hostname
- version
- build
- name: get_token_count
help: Counter of total Token() requests to the alternate token source
type: Counter
stabilityLevel: ALPHA
- name: get_token_fail_count
help: Counter of failed Token() requests to the alternate token source
type: Counter
stabilityLevel: ALPHA
- name: pod_security_errors_total
help: Number of errors preventing normal evaluation. Non-fatal errors may result
in the latest restricted profile being used for evaluation.
type: Counter
stabilityLevel: ALPHA
labels:
- fatal
- request_operation
- resource
- subresource
- name: pod_security_evaluations_total
help: Number of policy evaluations that occurred, not counting ignored or exempt
requests.
type: Counter
stabilityLevel: ALPHA
labels:
- decision
- mode
- policy_level
- policy_version
- request_operation
- resource
- subresource
- name: pod_security_exemptions_total
help: Number of exempt requests, not counting ignored or out of scope requests.
type: Counter
stabilityLevel: ALPHA
labels:
- request_operation
- resource
- subresource