4160 lines
92 KiB
YAML
4160 lines
92 KiB
YAML
- name: version_info
|
|
namespace: etcd
|
|
help: Etcd server's binary version
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- binary_version
|
|
- name: certificate_manager_client_ttl_seconds
|
|
subsystem: kubelet
|
|
help: Gauge of the TTL (time-to-live) of the Kubelet's client certificate. The value
|
|
is in seconds until certificate expiry (negative if already expired). If client
|
|
certificate is invalid or unused, the value will be +INF.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: changes
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of EndpointSlice changes
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- name: desired_endpoint_slices
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of EndpointSlices that would exist with perfect endpoint allocation
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: endpoints_added_per_sync
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of endpoints added on each Service sync
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- 32768
|
|
- name: endpoints_desired
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of endpoints desired
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: endpoints_removed_per_sync
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of endpoints removed on each Service sync
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- 32768
|
|
- name: endpointslices_changed_per_sync
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of EndpointSlices changed on each Service sync
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- topology
|
|
- name: num_endpoint_slices
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of EndpointSlices
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: syncs
|
|
subsystem: endpoint_slice_controller
|
|
help: Number of EndpointSlice syncs
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- result
|
|
- name: addresses_skipped_per_sync
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of addresses skipped on each Endpoints sync due to being invalid or
|
|
exceeding MaxEndpointsPerSubset
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- 32768
|
|
- name: changes
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of EndpointSlice changes
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- name: desired_endpoint_slices
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of EndpointSlices that would exist with perfect endpoint allocation
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: endpoints_added_per_sync
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of endpoints added on each Endpoints sync
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- 32768
|
|
- name: endpoints_desired
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of endpoints desired
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: endpoints_removed_per_sync
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of endpoints removed on each Endpoints sync
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- 32768
|
|
- name: endpoints_sync_duration
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Duration of syncEndpoints() in seconds
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: endpoints_updated_per_sync
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of endpoints updated on each Endpoints sync
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- 32768
|
|
- name: num_endpoint_slices
|
|
subsystem: endpoint_slice_mirroring_controller
|
|
help: Number of EndpointSlices
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: resources_sync_error_total
|
|
subsystem: garbagecollector_controller
|
|
help: Number of garbage collector resources sync errors
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: sync_duration_seconds
|
|
subsystem: root_ca_cert_publisher
|
|
help: Number of namespace syncs happened in root ca cert publisher.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: sync_total
|
|
subsystem: root_ca_cert_publisher
|
|
help: Number of namespace syncs happened in root ca cert publisher.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- name: job_creation_skew_duration_seconds
|
|
subsystem: cronjob_controller
|
|
help: Time between when a cronjob is scheduled to be run, and when the corresponding
|
|
job is created
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
buckets:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- name: pod_failures_handled_by_failure_policy_total
|
|
subsystem: job_controller
|
|
help: "`The number of failed Pods handled by failure policy with\n\t\t\trespect
|
|
to the failure policy action applied based on the matched\n\t\t\trule. Possible
|
|
values of the action label correspond to the\n\t\t\tpossible values for the failure
|
|
policy rule action, which are:\n\t\t\t\"FailJob\", \"Ignore\" and \"Count\".`"
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- action
|
|
- name: terminated_pods_tracking_finalizer_total
|
|
subsystem: job_controller
|
|
help: |-
|
|
`The number of terminated pods (phase=Failed|Succeeded)
|
|
that have the finalizer batch.kubernetes.io/job-tracking
|
|
The event label can be "add" or "delete".`
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- event
|
|
- name: evictions_number
|
|
subsystem: node_collector
|
|
help: Number of Node evictions that happened since current instance of NodeController
|
|
started, This metric is replaced by node_collector_evictions_total.
|
|
type: Counter
|
|
deprecatedVersion: 1.24.0
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- zone
|
|
- name: unhealthy_nodes_in_zone
|
|
subsystem: node_collector
|
|
help: Gauge measuring number of not Ready Nodes per zones.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- zone
|
|
- name: update_all_nodes_health_duration_seconds
|
|
subsystem: node_collector
|
|
help: Duration in seconds for NodeController to update the health of all nodes.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.01
|
|
- 0.04
|
|
- 0.16
|
|
- 0.64
|
|
- 2.56
|
|
- 10.24
|
|
- 40.96
|
|
- 163.84
|
|
- name: update_node_health_duration_seconds
|
|
subsystem: node_collector
|
|
help: Duration in seconds for NodeController to update the health of a single node.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.001
|
|
- 0.004
|
|
- 0.016
|
|
- 0.064
|
|
- 0.256
|
|
- 1.024
|
|
- 4.096
|
|
- 16.384
|
|
- name: zone_health
|
|
subsystem: node_collector
|
|
help: Gauge measuring percentage of healthy nodes per zone.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- zone
|
|
- name: zone_size
|
|
subsystem: node_collector
|
|
help: Gauge measuring number of registered Nodes per zones.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- zone
|
|
- name: cidrset_allocation_tries_per_request
|
|
subsystem: node_ipam_controller
|
|
help: Number of endpoints added on each Service sync
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
buckets:
|
|
- 1
|
|
- 5
|
|
- 25
|
|
- 125
|
|
- 625
|
|
- name: cidrset_cidrs_allocations_total
|
|
subsystem: node_ipam_controller
|
|
help: Counter measuring total number of CIDR allocations.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: cidrset_cidrs_releases_total
|
|
subsystem: node_ipam_controller
|
|
help: Counter measuring total number of CIDR releases.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: cidrset_usage_cidrs
|
|
subsystem: node_ipam_controller
|
|
help: Gauge measuring percentage of allocated CIDRs.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: cirdset_max_cidrs
|
|
subsystem: node_ipam_controller
|
|
help: Maximum number of CIDRs that can be allocated.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: multicidrset_allocation_tries_per_request
|
|
subsystem: node_ipam_controller
|
|
help: Histogram measuring CIDR allocation tries per request.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
buckets:
|
|
- 1
|
|
- 5
|
|
- 25
|
|
- 125
|
|
- 625
|
|
- name: multicidrset_cidrs_allocations_total
|
|
subsystem: node_ipam_controller
|
|
help: Counter measuring total number of CIDR allocations.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: multicidrset_cidrs_releases_total
|
|
subsystem: node_ipam_controller
|
|
help: Counter measuring total number of CIDR releases.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: multicidrset_usage_cidrs
|
|
subsystem: node_ipam_controller
|
|
help: Gauge measuring percentage of allocated CIDRs.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: multicirdset_max_cidrs
|
|
subsystem: node_ipam_controller
|
|
help: Maximum number of CIDRs that can be allocated.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- clusterCIDR
|
|
- name: force_delete_pod_errors_total
|
|
subsystem: pod_gc_collector
|
|
help: Number of errors encountered when forcefully deleting the pods since the Pod
|
|
GC Controller started.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: force_delete_pods_total
|
|
subsystem: pod_gc_collector
|
|
help: Number of pods that are being forcefully deleted since the Pod GC Controller
|
|
started.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: sorting_deletion_age_ratio
|
|
subsystem: replicaset_controller
|
|
help: The ratio of chosen deleted pod's ages to the current youngest pod's age (at
|
|
the time). Should be <2.The intent of this metric is to measure the rough efficacy
|
|
of the LogarithmicScaleDown feature gate's effect onthe sorting (and deletion)
|
|
of pods when a replicaset scales down. This only considers Ready pods when calculating
|
|
and reporting.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- name: create_attempts_total
|
|
subsystem: resourceclaim_controller
|
|
help: Number of ResourceClaims creation requests
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: create_failures_total
|
|
subsystem: resourceclaim_controller
|
|
help: Number of ResourceClaims creation request failures
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: job_deletion_duration_seconds
|
|
subsystem: ttl_after_finished_controller
|
|
help: The time it took to delete the job since it became eligible for deletion
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.1
|
|
- 0.2
|
|
- 0.4
|
|
- 0.8
|
|
- 1.6
|
|
- 3.2
|
|
- 6.4
|
|
- 12.8
|
|
- 25.6
|
|
- 51.2
|
|
- 102.4
|
|
- 204.8
|
|
- 409.6
|
|
- 819.2
|
|
- name: job_pods_finished_total
|
|
subsystem: job_controller
|
|
help: The number of finished Pods that are fully tracked
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- completion_mode
|
|
- result
|
|
- name: job_sync_duration_seconds
|
|
subsystem: job_controller
|
|
help: The time it took to sync a job
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- action
|
|
- completion_mode
|
|
- result
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: job_syncs_total
|
|
subsystem: job_controller
|
|
help: The number of job syncs
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- action
|
|
- completion_mode
|
|
- result
|
|
- name: jobs_finished_total
|
|
subsystem: job_controller
|
|
help: The number of finished jobs
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- completion_mode
|
|
- reason
|
|
- result
|
|
- name: evictions_total
|
|
subsystem: node_collector
|
|
help: Number of Node evictions that happened since current instance of NodeController
|
|
started.
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- zone
|
|
- name: attachdetach_controller_forced_detaches
|
|
help: Number of times the A/D Controller performed a forced detach
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: attachdetach_controller_total_volumes
|
|
help: Number of volumes in A/D Controller
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- plugin_name
|
|
- state
|
|
- name: create_failures_total
|
|
subsystem: ephemeral_volume_controller
|
|
help: Number of PersistenVolumeClaims creation requests
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: create_total
|
|
subsystem: ephemeral_volume_controller
|
|
help: Number of PersistenVolumeClaims creation requests
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: client_expiration_renew_errors
|
|
subsystem: certificate_manager
|
|
namespace: kubelet
|
|
help: Counter of certificate renewal errors.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: certificate_manager_server_rotation_seconds
|
|
subsystem: kubelet
|
|
help: Histogram of the number of seconds the previous certificate lived before being
|
|
rotated.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 60
|
|
- 3600
|
|
- 14400
|
|
- 86400
|
|
- 604800
|
|
- 2.592e+06
|
|
- 7.776e+06
|
|
- 1.5552e+07
|
|
- 3.1104e+07
|
|
- 1.24416e+08
|
|
- name: certificate_manager_server_ttl_seconds
|
|
subsystem: kubelet
|
|
help: Gauge of the shortest TTL (time-to-live) of the Kubelet's serving certificate.
|
|
The value is in seconds until certificate expiry (negative if already expired).
|
|
If serving certificate is invalid or unused, the value will be +INF.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: credential_provider_plugin_duration
|
|
subsystem: kubelet
|
|
help: Duration of execution in seconds for credential provider plugin
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- plugin_name
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: credential_provider_plugin_errors
|
|
subsystem: kubelet
|
|
help: Number of errors from credential provider plugin
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- plugin_name
|
|
- name: server_expiration_renew_errors
|
|
subsystem: kubelet
|
|
help: Counter of certificate renewal errors.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: pv_collector_bound_pv_count
|
|
help: Gauge measuring number of persistent volume currently bound
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- storage_class
|
|
- name: pv_collector_bound_pvc_count
|
|
help: Gauge measuring number of persistent volume claim currently bound
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- name: pv_collector_total_pv_count
|
|
help: Gauge measuring total number of persistent volumes
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- plugin_name
|
|
- volume_mode
|
|
- name: pv_collector_unbound_pv_count
|
|
help: Gauge measuring number of persistent volume currently unbound
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- storage_class
|
|
- name: pv_collector_unbound_pvc_count
|
|
help: Gauge measuring number of persistent volume claim currently unbound
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- name: retroactive_storageclass_errors_total
|
|
help: Total number of failed retroactive StorageClass assignments to persistent
|
|
volume claim
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: retroactive_storageclass_total
|
|
help: Total number of retroactive StorageClass assignments to persistent volume
|
|
claim
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: storage_count_attachable_volumes_in_use
|
|
help: Measure number of volumes in use
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- node
|
|
- volume_plugin
|
|
- name: volume_operation_total_errors
|
|
help: Total volume operation errors
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_name
|
|
- plugin_name
|
|
- name: container_cpu_usage_seconds_total
|
|
help: Cumulative cpu time consumed by the container in core-seconds
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container
|
|
- pod
|
|
- namespace
|
|
- name: container_memory_working_set_bytes
|
|
help: Current working set of the container in bytes
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container
|
|
- pod
|
|
- namespace
|
|
- name: container_start_time_seconds
|
|
help: Start time of the container since unix epoch in seconds
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container
|
|
- pod
|
|
- namespace
|
|
- name: cgroup_manager_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds for cgroup manager operations. Broken down by method.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_type
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: kubelet_container_log_filesystem_used_bytes
|
|
help: Bytes used by the container's logs on the filesystem.
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- uid
|
|
- namespace
|
|
- pod
|
|
- container
|
|
- name: containers_per_pod_count
|
|
subsystem: kubelet
|
|
help: The number of containers per pod.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- name: cpu_manager_pinning_errors_total
|
|
subsystem: kubelet
|
|
help: The number of cpu core allocations which required pinning failed.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: cpu_manager_pinning_requests_total
|
|
subsystem: kubelet
|
|
help: The number of cpu core allocations which required pinning.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: device_plugin_alloc_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds to serve a device plugin Allocation request. Broken down
|
|
by resource name.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource_name
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: device_plugin_registration_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of device plugin registrations. Broken down by resource
|
|
name.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource_name
|
|
- name: eviction_stats_age_seconds
|
|
subsystem: kubelet
|
|
help: Time between when stats are collected, and when pod is evicted based on those
|
|
stats by eviction signal
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- eviction_signal
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: evictions
|
|
subsystem: kubelet
|
|
help: Cumulative number of pod evictions by eviction signal
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- eviction_signal
|
|
- name: graceful_shutdown_end_time_seconds
|
|
subsystem: kubelet
|
|
help: Last graceful shutdown start time since unix epoch in seconds
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: graceful_shutdown_start_time_seconds
|
|
subsystem: kubelet
|
|
help: Last graceful shutdown start time since unix epoch in seconds
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: lifecycle_handler_http_fallbacks_total
|
|
subsystem: kubelet
|
|
help: The number of times lifecycle handlers successfully fell back to http from
|
|
https.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: managed_ephemeral_containers
|
|
subsystem: kubelet
|
|
help: Current number of ephemeral containers in pods managed by this kubelet.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: node_name
|
|
subsystem: kubelet
|
|
help: The node's name. The count is always 1.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- node
|
|
- name: pleg_discard_events
|
|
subsystem: kubelet
|
|
help: The number of discard events in PLEG.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: pleg_last_seen_seconds
|
|
subsystem: kubelet
|
|
help: Timestamp in seconds when PLEG was last seen active.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: pleg_relist_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds for relisting pods in PLEG.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: pleg_relist_interval_seconds
|
|
subsystem: kubelet
|
|
help: Interval in seconds between relisting in PLEG.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: pod_resources_endpoint_errors_get_allocatable
|
|
subsystem: kubelet
|
|
help: Number of requests to the PodResource GetAllocatableResources endpoint which
|
|
returned error. Broken down by server api version.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- server_api_version
|
|
- name: pod_resources_endpoint_errors_list
|
|
subsystem: kubelet
|
|
help: Number of requests to the PodResource List endpoint which returned error.
|
|
Broken down by server api version.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- server_api_version
|
|
- name: pod_resources_endpoint_requests_get_allocatable
|
|
subsystem: kubelet
|
|
help: Number of requests to the PodResource GetAllocatableResources endpoint. Broken
|
|
down by server api version.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- server_api_version
|
|
- name: pod_resources_endpoint_requests_list
|
|
subsystem: kubelet
|
|
help: Number of requests to the PodResource List endpoint. Broken down by server
|
|
api version.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- server_api_version
|
|
- name: pod_resources_endpoint_requests_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of requests to the PodResource endpoint. Broken down by
|
|
server api version.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- server_api_version
|
|
- name: pod_start_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds from kubelet seeing a pod for the first time to the pod
|
|
starting to run
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: pod_start_sli_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds to start a pod, excluding time to pull images and run
|
|
init containers, measured from pod creation timestamp to when all its containers
|
|
are reported as started and observed via watch
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.5
|
|
- 1
|
|
- 2
|
|
- 3
|
|
- 4
|
|
- 5
|
|
- 6
|
|
- 8
|
|
- 10
|
|
- 20
|
|
- 30
|
|
- 45
|
|
- 60
|
|
- 120
|
|
- 180
|
|
- 240
|
|
- 300
|
|
- 360
|
|
- 480
|
|
- 600
|
|
- 900
|
|
- 1200
|
|
- 1800
|
|
- 2700
|
|
- 3600
|
|
- name: pod_status_sync_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds to sync a pod status update. Measures time from detection
|
|
of a change to pod status until the API is successfully updated for that pod,
|
|
even if multiple intevening changes to pod status occur.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.01
|
|
- 0.05
|
|
- 0.1
|
|
- 0.5
|
|
- 1
|
|
- 5
|
|
- 10
|
|
- 20
|
|
- 30
|
|
- 45
|
|
- 60
|
|
- name: pod_worker_duration_seconds
|
|
subsystem: kubelet
|
|
help: 'Duration in seconds to sync a single pod. Broken down by operation type:
|
|
create, update, or sync'
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_type
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: pod_worker_start_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds from kubelet seeing a pod to starting a worker.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: preemptions
|
|
subsystem: kubelet
|
|
help: Cumulative number of pod preemptions by preemption resource
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- preemption_signal
|
|
- name: run_podsandbox_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- runtime_handler
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: run_podsandbox_errors_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- runtime_handler
|
|
- name: running_containers
|
|
subsystem: kubelet
|
|
help: Number of containers currently running
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container_state
|
|
- name: running_pods
|
|
subsystem: kubelet
|
|
help: Number of pods that have a running pod sandbox
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: runtime_operations_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds of runtime operations. Broken down by operation type.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_type
|
|
buckets:
|
|
- 0.005
|
|
- 0.0125
|
|
- 0.03125
|
|
- 0.078125
|
|
- 0.1953125
|
|
- 0.48828125
|
|
- 1.220703125
|
|
- 3.0517578125
|
|
- 7.62939453125
|
|
- 19.073486328125
|
|
- 47.6837158203125
|
|
- 119.20928955078125
|
|
- 298.0232238769531
|
|
- 745.0580596923828
|
|
- name: runtime_operations_errors_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of runtime operation errors by operation type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_type
|
|
- name: runtime_operations_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of runtime operations by operation type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_type
|
|
- name: started_containers_errors_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of errors when starting containers
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- container_type
|
|
- name: started_containers_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of containers started
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container_type
|
|
- name: started_host_process_containers_errors_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of errors when starting hostprocess containers. This metric
|
|
will only be collected on Windows and requires WindowsHostProcessContainers feature
|
|
gate to be enabled.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- container_type
|
|
- name: started_host_process_containers_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of hostprocess containers started. This metric will only
|
|
be collected on Windows and requires WindowsHostProcessContainers feature gate
|
|
to be enabled.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container_type
|
|
- name: started_pods_errors_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of errors when starting pods
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: started_pods_total
|
|
subsystem: kubelet
|
|
help: Cumulative number of pods started
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: topology_manager_admission_duration_ms
|
|
subsystem: kubelet
|
|
help: Duration in milliseconds to serve a pod admission request.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.4
|
|
- 0.8
|
|
- 1.6
|
|
- 3.2
|
|
- 6.4
|
|
- 12.8
|
|
- 25.6
|
|
- 51.2
|
|
- 102.4
|
|
- 204.8
|
|
- 409.6
|
|
- 819.2
|
|
- name: topology_manager_admission_errors_total
|
|
subsystem: kubelet
|
|
help: The number of admission request failures where resources could not be aligned.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: topology_manager_admission_requests_total
|
|
subsystem: kubelet
|
|
help: The number of admission requests where resources have to be aligned.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: kubelet_volume_stats_available_bytes
|
|
help: Number of available bytes in the volume
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- persistentvolumeclaim
|
|
- name: kubelet_volume_stats_capacity_bytes
|
|
help: Capacity in bytes of the volume
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- persistentvolumeclaim
|
|
- name: kubelet_volume_stats_health_status_abnormal
|
|
help: Abnormal volume health status. The count is either 1 or 0. 1 indicates the
|
|
volume is unhealthy, 0 indicates volume is healthy
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- persistentvolumeclaim
|
|
- name: kubelet_volume_stats_inodes
|
|
help: Maximum number of inodes in the volume
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- persistentvolumeclaim
|
|
- name: kubelet_volume_stats_inodes_free
|
|
help: Number of free inodes in the volume
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- persistentvolumeclaim
|
|
- name: kubelet_volume_stats_inodes_used
|
|
help: Number of used inodes in the volume
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- persistentvolumeclaim
|
|
- name: kubelet_volume_stats_used_bytes
|
|
help: Number of used bytes in the volume
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- namespace
|
|
- persistentvolumeclaim
|
|
- name: node_cpu_usage_seconds_total
|
|
help: Cumulative cpu time consumed by the node in core-seconds
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
- name: node_memory_working_set_bytes
|
|
help: Current working set of the node in bytes
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
- name: plugin_manager_total_plugins
|
|
help: Number of plugins in Plugin Manager
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- socket_path
|
|
- state
|
|
- name: pod_cpu_usage_seconds_total
|
|
help: Cumulative cpu time consumed by the pod in core-seconds
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- pod
|
|
- namespace
|
|
- name: pod_memory_working_set_bytes
|
|
help: Current working set of the pod in bytes
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- pod
|
|
- namespace
|
|
- name: scrape_error
|
|
help: 1 if there was an error while getting container metrics, 0 otherwise
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
- name: http_inflight_requests
|
|
subsystem: kubelet
|
|
help: Number of the inflight http requests
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- long_running
|
|
- method
|
|
- path
|
|
- server_type
|
|
- name: http_requests_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds to serve http requests
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- long_running
|
|
- method
|
|
- path
|
|
- server_type
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: http_requests_total
|
|
subsystem: kubelet
|
|
help: Number of the http requests received since the server started
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- long_running
|
|
- method
|
|
- path
|
|
- server_type
|
|
- name: volume_metric_collection_duration_seconds
|
|
subsystem: kubelet
|
|
help: Duration in seconds to calculate volume stats
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- metric_source
|
|
buckets:
|
|
- 0.005
|
|
- 0.01
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- name: network_programming_duration_seconds
|
|
subsystem: kubeproxy
|
|
help: In Cluster Network Programming Latency in seconds
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2
|
|
- 3
|
|
- 4
|
|
- 5
|
|
- 6
|
|
- 7
|
|
- 8
|
|
- 9
|
|
- 10
|
|
- 11
|
|
- 12
|
|
- 13
|
|
- 14
|
|
- 15
|
|
- 16
|
|
- 17
|
|
- 18
|
|
- 19
|
|
- 20
|
|
- 21
|
|
- 22
|
|
- 23
|
|
- 24
|
|
- 25
|
|
- 26
|
|
- 27
|
|
- 28
|
|
- 29
|
|
- 30
|
|
- 31
|
|
- 32
|
|
- 33
|
|
- 34
|
|
- 35
|
|
- 36
|
|
- 37
|
|
- 38
|
|
- 39
|
|
- 40
|
|
- 41
|
|
- 42
|
|
- 43
|
|
- 44
|
|
- 45
|
|
- 46
|
|
- 47
|
|
- 48
|
|
- 49
|
|
- 50
|
|
- 51
|
|
- 52
|
|
- 53
|
|
- 54
|
|
- 55
|
|
- 56
|
|
- 57
|
|
- 58
|
|
- 59
|
|
- 60
|
|
- 65
|
|
- 70
|
|
- 75
|
|
- 80
|
|
- 85
|
|
- 90
|
|
- 95
|
|
- 100
|
|
- 105
|
|
- 110
|
|
- 115
|
|
- 120
|
|
- 150
|
|
- 180
|
|
- 210
|
|
- 240
|
|
- 270
|
|
- 300
|
|
- name: sync_proxy_rules_duration_seconds
|
|
subsystem: kubeproxy
|
|
help: SyncProxyRules latency in seconds
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: sync_proxy_rules_endpoint_changes_pending
|
|
subsystem: kubeproxy
|
|
help: Pending proxy rules Endpoint changes
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: sync_proxy_rules_endpoint_changes_total
|
|
subsystem: kubeproxy
|
|
help: Cumulative proxy rules Endpoint changes
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: sync_proxy_rules_iptables_partial_restore_failures_total
|
|
subsystem: kubeproxy
|
|
help: Cumulative proxy iptables partial restore failures
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: sync_proxy_rules_iptables_restore_failures_total
|
|
subsystem: kubeproxy
|
|
help: Cumulative proxy iptables restore failures
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: sync_proxy_rules_iptables_total
|
|
subsystem: kubeproxy
|
|
help: Number of proxy iptables rules programmed
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- table
|
|
- name: sync_proxy_rules_last_queued_timestamp_seconds
|
|
subsystem: kubeproxy
|
|
help: The last time a sync of proxy rules was queued
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: sync_proxy_rules_last_timestamp_seconds
|
|
subsystem: kubeproxy
|
|
help: The last time proxy rules were successfully synced
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: sync_proxy_rules_no_local_endpoints_total
|
|
subsystem: kubeproxy
|
|
help: Number of services with a Local traffic policy and no endpoints
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- traffic_policy
|
|
- name: sync_proxy_rules_service_changes_pending
|
|
subsystem: kubeproxy
|
|
help: Pending proxy rules Service changes
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: sync_proxy_rules_service_changes_total
|
|
subsystem: kubeproxy
|
|
help: Cumulative proxy rules Service changes
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: probe_duration_seconds
|
|
subsystem: prober
|
|
help: Duration in seconds for a probe response.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container
|
|
- namespace
|
|
- pod
|
|
- probe_type
|
|
- name: probe_total
|
|
subsystem: prober
|
|
help: Cumulative number of a liveness, readiness or startup probe for a container
|
|
by result.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- container
|
|
- namespace
|
|
- pod
|
|
- pod_uid
|
|
- probe_type
|
|
- result
|
|
- name: volume_manager_selinux_container_errors_total
|
|
help: Number of errors when kubelet cannot compute SELinux context for a container.
|
|
Kubelet can't start such a Pod then and it will retry, therefore value of this
|
|
metric may not represent the actual nr. of containers.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: volume_manager_selinux_container_warnings_total
|
|
help: Number of errors when kubelet cannot compute SELinux context for a container
|
|
that are ignored. They will become real errors when SELinuxMountReadWriteOncePod
|
|
feature is expanded to all volume access modes.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: volume_manager_selinux_pod_context_mismatch_errors_total
|
|
help: Number of errors when a Pod defines different SELinux contexts for its containers
|
|
that use the same volume. Kubelet can't start such a Pod then and it will retry,
|
|
therefore value of this metric may not represent the actual nr. of Pods.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: volume_manager_selinux_pod_context_mismatch_warnings_total
|
|
help: Number of errors when a Pod defines different SELinux contexts for its containers
|
|
that use the same volume. They are not errors yet, but they will become real errors
|
|
when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: volume_manager_selinux_volume_context_mismatch_errors_total
|
|
help: Number of errors when a Pod uses a volume that is already mounted with a different
|
|
SELinux context than the Pod needs. Kubelet can't start such a Pod then and it
|
|
will retry, therefore value of this metric may not represent the actual nr. of
|
|
Pods.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: volume_manager_selinux_volume_context_mismatch_warnings_total
|
|
help: Number of errors when a Pod uses a volume that is already mounted with a different
|
|
SELinux context than the Pod needs. They are not errors yet, but they will become
|
|
real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume
|
|
access modes.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: volume_manager_selinux_volumes_admitted_total
|
|
help: Number of volumes whose SELinux context was fine and will be mounted with
|
|
mount -o context option.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: volume_manager_total_volumes
|
|
help: Number of volumes in Volume Manager
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- plugin_name
|
|
- state
|
|
- name: csr_honored_duration_total
|
|
subsystem: certificates_registry
|
|
namespace: apiserver
|
|
help: Total number of issued CSRs with a requested duration that was honored, sliced
|
|
by signer (only kubernetes.io signer names are specifically identified)
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- signerName
|
|
- name: csr_requested_duration_total
|
|
subsystem: certificates_registry
|
|
namespace: apiserver
|
|
help: Total number of issued CSRs with a requested duration, sliced by signer (only
|
|
kubernetes.io signer names are specifically identified)
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- signerName
|
|
- name: allocated_ips
|
|
subsystem: clusterip_allocator
|
|
namespace: kube_apiserver
|
|
help: Gauge measuring the number of allocated IPs for Services
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- cidr
|
|
- name: allocation_errors_total
|
|
subsystem: clusterip_allocator
|
|
namespace: kube_apiserver
|
|
help: Number of errors trying to allocate Cluster IPs
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- cidr
|
|
- scope
|
|
- name: allocation_total
|
|
subsystem: clusterip_allocator
|
|
namespace: kube_apiserver
|
|
help: Number of Cluster IPs allocations
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- cidr
|
|
- scope
|
|
- name: available_ips
|
|
subsystem: clusterip_allocator
|
|
namespace: kube_apiserver
|
|
help: Gauge measuring the number of available IPs for Services
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- cidr
|
|
- name: allocated_ports
|
|
subsystem: nodeport_allocator
|
|
namespace: kube_apiserver
|
|
help: Gauge measuring the number of allocated NodePorts for Services
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: allocation_errors_total
|
|
subsystem: nodeport_allocator
|
|
namespace: kube_apiserver
|
|
help: Number of errors trying to allocate NodePort
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- scope
|
|
- name: allocation_total
|
|
subsystem: nodeport_allocator
|
|
namespace: kube_apiserver
|
|
help: Number of NodePort allocations
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- scope
|
|
- name: available_ports
|
|
subsystem: nodeport_allocator
|
|
namespace: kube_apiserver
|
|
help: Gauge measuring the number of available NodePorts for Services
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: pods_logs_backend_tls_failure_total
|
|
subsystem: pod_logs
|
|
namespace: kube_apiserver
|
|
help: Total number of requests for pods/logs that failed due to kubelet server TLS
|
|
verification
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: pods_logs_insecure_backend_total
|
|
subsystem: pod_logs
|
|
namespace: kube_apiserver
|
|
help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls,
|
|
skip_tls_allowed, skip_tls_denied'
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- usage
|
|
- name: e2e_scheduling_duration_seconds
|
|
subsystem: scheduler
|
|
help: E2e scheduling latency in seconds (scheduling algorithm + binding). This metric
|
|
is replaced by scheduling_attempt_duration_seconds.
|
|
type: Histogram
|
|
deprecatedVersion: 1.23.0
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- profile
|
|
- result
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: goroutines
|
|
subsystem: scheduler
|
|
help: Number of running goroutines split by the work they do such as binding.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- name: permit_wait_duration_seconds
|
|
subsystem: scheduler
|
|
help: Duration of waiting on permit.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- result
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: plugin_execution_duration_seconds
|
|
subsystem: scheduler
|
|
help: Duration for running a plugin at a specific extension point.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- extension_point
|
|
- plugin
|
|
- status
|
|
buckets:
|
|
- 1e-05
|
|
- 1.5000000000000002e-05
|
|
- 2.2500000000000005e-05
|
|
- 3.375000000000001e-05
|
|
- 5.062500000000001e-05
|
|
- 7.593750000000002e-05
|
|
- 0.00011390625000000003
|
|
- 0.00017085937500000006
|
|
- 0.0002562890625000001
|
|
- 0.00038443359375000017
|
|
- 0.0005766503906250003
|
|
- 0.0008649755859375004
|
|
- 0.0012974633789062506
|
|
- 0.0019461950683593758
|
|
- 0.0029192926025390638
|
|
- 0.004378938903808595
|
|
- 0.006568408355712893
|
|
- 0.009852612533569338
|
|
- 0.014778918800354007
|
|
- 0.02216837820053101
|
|
- name: scheduler_cache_size
|
|
subsystem: scheduler
|
|
help: Number of nodes, pods, and assumed (bound) pods in the scheduler cache.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- type
|
|
- name: scheduler_goroutines
|
|
subsystem: scheduler
|
|
help: Number of running goroutines split by the work they do such as binding. This
|
|
metric is replaced by the \"goroutines\" metric.
|
|
type: Gauge
|
|
deprecatedVersion: 1.26.0
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- work
|
|
- name: scheduling_algorithm_duration_seconds
|
|
subsystem: scheduler
|
|
help: Scheduling algorithm latency in seconds
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: unschedulable_pods
|
|
subsystem: scheduler
|
|
help: The number of unschedulable pods broken down by plugin name. A pod will increment
|
|
the gauge for all plugins that caused it to not schedule and so this metric have
|
|
meaning only when broken down by plugin.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- plugin
|
|
- profile
|
|
- name: binder_cache_requests_total
|
|
subsystem: scheduler_volume
|
|
help: Total number for request volume binding cache
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- name: scheduling_stage_error_total
|
|
subsystem: scheduler_volume
|
|
help: Volume scheduling stage error count
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- name: legacy_tokens_total
|
|
subsystem: serviceaccount
|
|
help: Cumulative legacy service account tokens used
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: stale_tokens_total
|
|
subsystem: serviceaccount
|
|
help: Cumulative stale projected service account tokens used
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: valid_tokens_total
|
|
subsystem: serviceaccount
|
|
help: Cumulative valid projected service account tokens used
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: kube_pod_resource_limit
|
|
help: Resources limit for workloads on the cluster, broken down by pod. This shows
|
|
the resource usage the scheduler and kubelet expect per pod for resources along
|
|
with the unit for the resource if any.
|
|
type: Custom
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- namespace
|
|
- pod
|
|
- node
|
|
- scheduler
|
|
- priority
|
|
- resource
|
|
- unit
|
|
- name: kube_pod_resource_request
|
|
help: Resources requested by workloads on the cluster, broken down by pod. This
|
|
shows the resource usage the scheduler and kubelet expect per pod for resources
|
|
along with the unit for the resource if any.
|
|
type: Custom
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- namespace
|
|
- pod
|
|
- node
|
|
- scheduler
|
|
- priority
|
|
- resource
|
|
- unit
|
|
- name: framework_extension_point_duration_seconds
|
|
subsystem: scheduler
|
|
help: Latency for running all plugins of a specific extension point.
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- extension_point
|
|
- profile
|
|
- status
|
|
buckets:
|
|
- 0.0001
|
|
- 0.0002
|
|
- 0.0004
|
|
- 0.0008
|
|
- 0.0016
|
|
- 0.0032
|
|
- 0.0064
|
|
- 0.0128
|
|
- 0.0256
|
|
- 0.0512
|
|
- 0.1024
|
|
- 0.2048
|
|
- name: pending_pods
|
|
subsystem: scheduler
|
|
help: Number of pending pods, by the queue type. 'active' means number of pods in
|
|
activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number
|
|
of pods in unschedulablePods that the scheduler attempted to schedule and failed;
|
|
'gated' is the number of unschedulable pods that the scheduler never attempted
|
|
to schedule because they are gated.
|
|
type: Gauge
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- queue
|
|
- name: pod_scheduling_attempts
|
|
subsystem: scheduler
|
|
help: Number of attempts to successfully schedule a pod.
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
buckets:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- name: pod_scheduling_duration_seconds
|
|
subsystem: scheduler
|
|
help: E2e latency for a pod being scheduled which may include multiple scheduling
|
|
attempts.
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- attempts
|
|
buckets:
|
|
- 0.01
|
|
- 0.02
|
|
- 0.04
|
|
- 0.08
|
|
- 0.16
|
|
- 0.32
|
|
- 0.64
|
|
- 1.28
|
|
- 2.56
|
|
- 5.12
|
|
- 10.24
|
|
- 20.48
|
|
- 40.96
|
|
- 81.92
|
|
- 163.84
|
|
- 327.68
|
|
- 655.36
|
|
- 1310.72
|
|
- 2621.44
|
|
- 5242.88
|
|
- name: preemption_attempts_total
|
|
subsystem: scheduler
|
|
help: Total preemption attempts in the cluster till now
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
- name: preemption_victims
|
|
subsystem: scheduler
|
|
help: Number of selected preemption victims
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
buckets:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- name: queue_incoming_pods_total
|
|
subsystem: scheduler
|
|
help: Number of pods added to scheduling queues by event and queue type.
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- event
|
|
- queue
|
|
- name: schedule_attempts_total
|
|
subsystem: scheduler
|
|
help: Number of attempts to schedule pods, by the result. 'unschedulable' means
|
|
a pod could not be scheduled, while 'error' means an internal scheduler problem.
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- profile
|
|
- result
|
|
- name: scheduling_attempt_duration_seconds
|
|
subsystem: scheduler
|
|
help: Scheduling attempt latency in seconds (scheduling algorithm + binding)
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- profile
|
|
- result
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: operations_seconds
|
|
subsystem: csi
|
|
help: Container Storage Interface operation duration with gRPC error code status
|
|
total
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- driver_name
|
|
- grpc_status_code
|
|
- method_name
|
|
- migrated
|
|
buckets:
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- 15
|
|
- 25
|
|
- 50
|
|
- 120
|
|
- 300
|
|
- 600
|
|
- name: storage_operation_duration_seconds
|
|
help: Storage operation duration
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- migrated
|
|
- operation_name
|
|
- status
|
|
- volume_plugin
|
|
buckets:
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- 15
|
|
- 25
|
|
- 50
|
|
- 120
|
|
- 300
|
|
- 600
|
|
- name: volume_operation_total_seconds
|
|
help: Storage operation end to end duration in seconds
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_name
|
|
- plugin_name
|
|
buckets:
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- 15
|
|
- 25
|
|
- 50
|
|
- 120
|
|
- 300
|
|
- 600
|
|
- name: graph_actions_duration_seconds
|
|
subsystem: node_authorizer
|
|
help: Histogram of duration of graph actions in node authorizer.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
buckets:
|
|
- 0.0001
|
|
- 0.0002
|
|
- 0.0004
|
|
- 0.0008
|
|
- 0.0016
|
|
- 0.0032
|
|
- 0.0064
|
|
- 0.0128
|
|
- 0.0256
|
|
- 0.0512
|
|
- 0.1024
|
|
- 0.2048
|
|
- name: apiextensions_openapi_v2_regeneration_count
|
|
help: Counter of OpenAPI v2 spec regeneration count broken down by causing CRD name
|
|
and reason.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- crd
|
|
- reason
|
|
- name: apiextensions_openapi_v3_regeneration_count
|
|
help: Counter of OpenAPI v3 spec regeneration count broken down by group, version,
|
|
causing CRD and reason.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- crd
|
|
- group
|
|
- reason
|
|
- version
|
|
- name: apiserver_crd_webhook_conversion_duration_seconds
|
|
help: CRD webhook conversion duration in seconds
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- crd_name
|
|
- from_version
|
|
- succeeded
|
|
- to_version
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: step_admission_duration_seconds_summary
|
|
subsystem: admission
|
|
namespace: apiserver
|
|
help: Admission sub-step latency summary in seconds, broken out for each operation
|
|
and API resource and step type (validate or admit).
|
|
type: Summary
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- rejected
|
|
- type
|
|
maxAge: 18000000000000
|
|
- name: webhook_fail_open_count
|
|
subsystem: admission
|
|
namespace: apiserver
|
|
help: Admission webhook fail open count, identified by name and broken out for each
|
|
admission type (validating or mutating).
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- type
|
|
- name: webhook_rejection_count
|
|
subsystem: admission
|
|
namespace: apiserver
|
|
help: Admission webhook rejection count, identified by name and broken out for each
|
|
admission type (validating or admit) and operation. Additional labels specify
|
|
an error type (calling_webhook_error or apiserver_internal_error if an error occurred;
|
|
no_error otherwise) and optionally a non-zero rejection code if the webhook rejects
|
|
the request with an HTTP status code (honored by the apiserver when the code is
|
|
greater or equal to 400). Codes greater than 600 are truncated to 600, to keep
|
|
the metrics cardinality bounded.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- error_type
|
|
- name
|
|
- operation
|
|
- rejection_code
|
|
- type
|
|
- name: webhook_request_total
|
|
subsystem: admission
|
|
namespace: apiserver
|
|
help: Admission webhook request total, identified by name and broken out for each
|
|
admission type (validating or mutating) and operation. Additional labels specify
|
|
whether the request was rejected or not and an HTTP status code. Codes greater
|
|
than 600 are truncated to 600, to keep the metrics cardinality bounded.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- name
|
|
- operation
|
|
- rejected
|
|
- type
|
|
- name: check_duration_seconds
|
|
subsystem: validating_admission_policy
|
|
namespace: apiserver
|
|
help: Validation admission latency for individual validation expressions in seconds,
|
|
labeled by policy and further including binding, state and enforcement action
|
|
taken.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- enforcement_action
|
|
- policy
|
|
- policy_binding
|
|
- state
|
|
buckets:
|
|
- 5e-07
|
|
- 0.001
|
|
- 0.01
|
|
- 0.1
|
|
- 1
|
|
- name: check_total
|
|
subsystem: validating_admission_policy
|
|
namespace: apiserver
|
|
help: Validation admission policy check total, labeled by policy and further identified
|
|
by binding, enforcement action taken, and state.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- enforcement_action
|
|
- policy
|
|
- policy_binding
|
|
- state
|
|
- name: definition_total
|
|
subsystem: validating_admission_policy
|
|
namespace: apiserver
|
|
help: Validation admission policy count total, labeled by state and enforcement
|
|
action.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- enforcement_action
|
|
- state
|
|
- name: controller_admission_duration_seconds
|
|
subsystem: admission
|
|
namespace: apiserver
|
|
help: Admission controller latency histogram in seconds, identified by name and
|
|
broken out for each operation and API resource and type (validate or admit).
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- name
|
|
- operation
|
|
- rejected
|
|
- type
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.1
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- name: step_admission_duration_seconds
|
|
subsystem: admission
|
|
namespace: apiserver
|
|
help: Admission sub-step latency histogram in seconds, broken out for each operation
|
|
and API resource and step type (validate or admit).
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- operation
|
|
- rejected
|
|
- type
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.1
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- name: webhook_admission_duration_seconds
|
|
subsystem: admission
|
|
namespace: apiserver
|
|
help: Admission webhook latency histogram in seconds, identified by name and broken
|
|
out for each operation and API resource and type (validate or admit).
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- name
|
|
- operation
|
|
- rejected
|
|
- type
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.1
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 10
|
|
- 25
|
|
- name: error_total
|
|
subsystem: apiserver_audit
|
|
help: Counter of audit events that failed to be audited properly. Plugin identifies
|
|
the plugin affected by the error.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- plugin
|
|
- name: event_total
|
|
subsystem: apiserver_audit
|
|
help: Counter of audit events generated and sent to the audit backend.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: level_total
|
|
subsystem: apiserver_audit
|
|
help: Counter of policy levels for audit events (1 per request).
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- level
|
|
- name: requests_rejected_total
|
|
subsystem: apiserver_audit
|
|
help: Counter of apiserver requests rejected due to an error in audit logging backend.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: compilation_duration_seconds
|
|
subsystem: cel
|
|
namespace: apiserver
|
|
help: CEL compilation time in seconds.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
- name: evaluation_duration_seconds
|
|
subsystem: cel
|
|
namespace: apiserver
|
|
help: CEL evaluation time in seconds.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
- name: certificate_expiration_seconds
|
|
subsystem: client
|
|
namespace: apiserver
|
|
help: Distribution of the remaining lifetime on the certificate used to authenticate
|
|
a request.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 0
|
|
- 1800
|
|
- 3600
|
|
- 7200
|
|
- 21600
|
|
- 43200
|
|
- 86400
|
|
- 172800
|
|
- 345600
|
|
- 604800
|
|
- 2.592e+06
|
|
- 7.776e+06
|
|
- 1.5552e+07
|
|
- 3.1104e+07
|
|
- name: current_inqueue_requests
|
|
subsystem: apiserver
|
|
help: Maximal number of queued requests in this apiserver per request kind in last
|
|
second.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request_kind
|
|
- name: apiserver_delegated_authn_request_duration_seconds
|
|
help: Request latency in seconds. Broken down by status code.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
buckets:
|
|
- 0.25
|
|
- 0.5
|
|
- 0.7
|
|
- 1
|
|
- 1.5
|
|
- 3
|
|
- 5
|
|
- 10
|
|
- name: apiserver_delegated_authn_request_total
|
|
help: Number of HTTP requests partitioned by status code.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- name: apiserver_delegated_authz_request_duration_seconds
|
|
help: Request latency in seconds. Broken down by status code.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
buckets:
|
|
- 0.25
|
|
- 0.5
|
|
- 0.7
|
|
- 1
|
|
- 1.5
|
|
- 3
|
|
- 5
|
|
- 10
|
|
- name: apiserver_delegated_authz_request_total
|
|
help: Number of HTTP requests partitioned by status code.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- name: request_aborts_total
|
|
subsystem: apiserver
|
|
help: Number of requests which apiserver aborted possibly due to a timeout, for
|
|
each group, version, verb, resource, subresource and scope
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
- name: request_body_sizes
|
|
subsystem: apiserver
|
|
help: Apiserver request body sizes broken out by size.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- verb
|
|
buckets:
|
|
- 50000
|
|
- 150000
|
|
- 250000
|
|
- 350000
|
|
- 450000
|
|
- 550000
|
|
- 650000
|
|
- 750000
|
|
- 850000
|
|
- 950000
|
|
- 1.05e+06
|
|
- 1.15e+06
|
|
- 1.25e+06
|
|
- 1.35e+06
|
|
- 1.45e+06
|
|
- 1.55e+06
|
|
- 1.65e+06
|
|
- 1.75e+06
|
|
- 1.85e+06
|
|
- 1.95e+06
|
|
- 2.05e+06
|
|
- 2.15e+06
|
|
- 2.25e+06
|
|
- 2.35e+06
|
|
- 2.45e+06
|
|
- 2.55e+06
|
|
- 2.65e+06
|
|
- 2.75e+06
|
|
- 2.85e+06
|
|
- 2.95e+06
|
|
- 3.05e+06
|
|
- name: request_filter_duration_seconds
|
|
subsystem: apiserver
|
|
help: Request filter latency distribution in seconds, for each filter type
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- filter
|
|
buckets:
|
|
- 0.0001
|
|
- 0.0003
|
|
- 0.001
|
|
- 0.003
|
|
- 0.01
|
|
- 0.03
|
|
- 0.1
|
|
- 0.3
|
|
- 1
|
|
- 5
|
|
- name: request_post_timeout_total
|
|
subsystem: apiserver
|
|
help: Tracks the activity of the request handlers after the associated requests
|
|
have been timed out by the apiserver
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- source
|
|
- status
|
|
- name: request_sli_duration_seconds
|
|
subsystem: apiserver
|
|
help: Response latency distribution (not counting webhook duration) in seconds for
|
|
each verb, group, version, resource, subresource, scope and component.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- component
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
buckets:
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.4
|
|
- 0.6
|
|
- 0.8
|
|
- 1
|
|
- 1.25
|
|
- 1.5
|
|
- 2
|
|
- 3
|
|
- 4
|
|
- 5
|
|
- 6
|
|
- 8
|
|
- 10
|
|
- 15
|
|
- 20
|
|
- 30
|
|
- 45
|
|
- 60
|
|
- name: request_slo_duration_seconds
|
|
subsystem: apiserver
|
|
help: Response latency distribution (not counting webhook duration) in seconds for
|
|
each verb, group, version, resource, subresource, scope and component.
|
|
type: Histogram
|
|
deprecatedVersion: 1.27.0
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- component
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
buckets:
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.4
|
|
- 0.6
|
|
- 0.8
|
|
- 1
|
|
- 1.25
|
|
- 1.5
|
|
- 2
|
|
- 3
|
|
- 4
|
|
- 5
|
|
- 6
|
|
- 8
|
|
- 10
|
|
- 15
|
|
- 20
|
|
- 30
|
|
- 45
|
|
- 60
|
|
- name: request_terminations_total
|
|
subsystem: apiserver
|
|
help: Number of requests which apiserver terminated in self-defense.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- component
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
- name: request_timestamp_comparison_time
|
|
subsystem: apiserver
|
|
help: Time taken for comparison of old vs new objects in UPDATE or PATCH requests
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code_path
|
|
buckets:
|
|
- 0.0001
|
|
- 0.0003
|
|
- 0.001
|
|
- 0.003
|
|
- 0.01
|
|
- 0.03
|
|
- 0.1
|
|
- 0.3
|
|
- 1
|
|
- 5
|
|
- name: selfrequest_total
|
|
subsystem: apiserver
|
|
help: Counter of apiserver self-requests broken out for each verb, API resource
|
|
and subresource.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- subresource
|
|
- verb
|
|
- name: tls_handshake_errors_total
|
|
subsystem: apiserver
|
|
help: Number of requests dropped with 'TLS handshake error from' error
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: watch_events_sizes
|
|
subsystem: apiserver
|
|
help: Watch event size distribution in bytes
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- group
|
|
- kind
|
|
- version
|
|
buckets:
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- 32768
|
|
- 65536
|
|
- 131072
|
|
- name: watch_events_total
|
|
subsystem: apiserver
|
|
help: Number of events sent in watch clients
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- group
|
|
- kind
|
|
- version
|
|
- name: authenticated_user_requests
|
|
help: Counter of authenticated requests broken out by username.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- username
|
|
- name: authentication_attempts
|
|
help: Counter of authenticated attempts.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- result
|
|
- name: authentication_duration_seconds
|
|
help: Authentication duration in seconds broken out by result.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- result
|
|
buckets:
|
|
- 0.001
|
|
- 0.002
|
|
- 0.004
|
|
- 0.008
|
|
- 0.016
|
|
- 0.032
|
|
- 0.064
|
|
- 0.128
|
|
- 0.256
|
|
- 0.512
|
|
- 1.024
|
|
- 2.048
|
|
- 4.096
|
|
- 8.192
|
|
- 16.384
|
|
- name: active_fetch_count
|
|
subsystem: token_cache
|
|
namespace: authentication
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- status
|
|
- name: fetch_total
|
|
subsystem: token_cache
|
|
namespace: authentication
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- status
|
|
- name: request_duration_seconds
|
|
subsystem: token_cache
|
|
namespace: authentication
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- status
|
|
- name: request_total
|
|
subsystem: token_cache
|
|
namespace: authentication
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- status
|
|
- name: field_validation_request_duration_seconds
|
|
help: Response latency distribution in seconds for each field validation value
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- field_validation
|
|
buckets:
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.4
|
|
- 0.6
|
|
- 0.8
|
|
- 1
|
|
- 1.25
|
|
- 1.5
|
|
- 2
|
|
- 3
|
|
- 4
|
|
- 5
|
|
- 6
|
|
- 8
|
|
- 10
|
|
- 15
|
|
- 20
|
|
- 30
|
|
- 45
|
|
- 60
|
|
- name: current_inflight_requests
|
|
subsystem: apiserver
|
|
help: Maximal number of currently used inflight request limit of this apiserver
|
|
per request kind in last second.
|
|
type: Gauge
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- request_kind
|
|
- name: longrunning_requests
|
|
subsystem: apiserver
|
|
help: Gauge of all active long-running apiserver requests broken out by verb, group,
|
|
version, resource, scope and component. Not all requests are tracked this way.
|
|
type: Gauge
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- component
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
- name: request_duration_seconds
|
|
subsystem: apiserver
|
|
help: Response latency distribution in seconds for each verb, dry run value, group,
|
|
version, resource, subresource, scope and component.
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- component
|
|
- dry_run
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.4
|
|
- 0.6
|
|
- 0.8
|
|
- 1
|
|
- 1.25
|
|
- 1.5
|
|
- 2
|
|
- 3
|
|
- 4
|
|
- 5
|
|
- 6
|
|
- 8
|
|
- 10
|
|
- 15
|
|
- 20
|
|
- 30
|
|
- 45
|
|
- 60
|
|
- name: request_total
|
|
subsystem: apiserver
|
|
help: Counter of apiserver requests broken out for each verb, dry run value, group,
|
|
version, resource, scope, component, and HTTP response code.
|
|
type: Counter
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- code
|
|
- component
|
|
- dry_run
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
- name: requested_deprecated_apis
|
|
subsystem: apiserver
|
|
help: Gauge of deprecated APIs that have been requested, broken out by API group,
|
|
version, resource, subresource, and removed_release.
|
|
type: Gauge
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- group
|
|
- removed_release
|
|
- resource
|
|
- subresource
|
|
- version
|
|
- name: response_sizes
|
|
subsystem: apiserver
|
|
help: Response size distribution in bytes for each group, version, verb, resource,
|
|
subresource, scope and component.
|
|
type: Histogram
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- component
|
|
- group
|
|
- resource
|
|
- scope
|
|
- subresource
|
|
- verb
|
|
- version
|
|
buckets:
|
|
- 1000
|
|
- 10000
|
|
- 100000
|
|
- 1e+06
|
|
- 1e+07
|
|
- 1e+08
|
|
- 1e+09
|
|
- name: cache_list_fetched_objects_total
|
|
namespace: apiserver
|
|
help: Number of objects read from watch cache in the course of serving a LIST request
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- index
|
|
- resource_prefix
|
|
- name: cache_list_returned_objects_total
|
|
namespace: apiserver
|
|
help: Number of objects returned for a LIST request from watch cache
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource_prefix
|
|
- name: cache_list_total
|
|
namespace: apiserver
|
|
help: Number of LIST requests served from watch cache
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- index
|
|
- resource_prefix
|
|
- name: dial_duration_seconds
|
|
subsystem: egress_dialer
|
|
namespace: apiserver
|
|
help: Dial latency histogram in seconds, labeled by the protocol (http-connect or
|
|
grpc), transport (tcp or uds)
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- protocol
|
|
- transport
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.1
|
|
- 0.5
|
|
- 2.5
|
|
- 12.5
|
|
- name: dial_failure_count
|
|
subsystem: egress_dialer
|
|
namespace: apiserver
|
|
help: Dial failure count, labeled by the protocol (http-connect or grpc), transport
|
|
(tcp or uds), and stage (connect or proxy). The stage indicates at which stage
|
|
the dial failed
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- protocol
|
|
- stage
|
|
- transport
|
|
- name: dial_start_total
|
|
subsystem: egress_dialer
|
|
namespace: apiserver
|
|
help: Dial starts, labeled by the protocol (http-connect or grpc) and transport
|
|
(tcp or uds).
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- protocol
|
|
- transport
|
|
- name: dek_cache_fill_percent
|
|
subsystem: envelope_encryption
|
|
namespace: apiserver
|
|
help: Percent of the cache slots currently occupied by cached DEKs.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: dek_cache_inter_arrival_time_seconds
|
|
subsystem: envelope_encryption
|
|
namespace: apiserver
|
|
help: Time (in seconds) of inter arrival of transformation requests.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- transformation_type
|
|
buckets:
|
|
- 60
|
|
- 120
|
|
- 240
|
|
- 480
|
|
- 960
|
|
- 1920
|
|
- 3840
|
|
- 7680
|
|
- 15360
|
|
- 30720
|
|
- name: invalid_key_id_from_status_total
|
|
subsystem: envelope_encryption
|
|
namespace: apiserver
|
|
help: Number of times an invalid keyID is returned by the Status RPC call split
|
|
by error.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- error
|
|
- provider_name
|
|
- name: key_id_hash_last_timestamp_seconds
|
|
subsystem: envelope_encryption
|
|
namespace: apiserver
|
|
help: The last time in seconds when a keyID was used.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- key_id_hash
|
|
- provider_name
|
|
- transformation_type
|
|
- name: key_id_hash_status_last_timestamp_seconds
|
|
subsystem: envelope_encryption
|
|
namespace: apiserver
|
|
help: The last time in seconds when a keyID was returned by the Status RPC call.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- key_id_hash
|
|
- provider_name
|
|
- name: key_id_hash_total
|
|
subsystem: envelope_encryption
|
|
namespace: apiserver
|
|
help: Number of times a keyID is used split by transformation type and provider.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- key_id_hash
|
|
- provider_name
|
|
- transformation_type
|
|
- name: kms_operations_latency_seconds
|
|
subsystem: envelope_encryption
|
|
namespace: apiserver
|
|
help: KMS operation duration with gRPC error code status total.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- grpc_status_code
|
|
- method_name
|
|
- provider_name
|
|
buckets:
|
|
- 0.0001
|
|
- 0.0002
|
|
- 0.0004
|
|
- 0.0008
|
|
- 0.0016
|
|
- 0.0032
|
|
- 0.0064
|
|
- 0.0128
|
|
- 0.0256
|
|
- 0.0512
|
|
- 0.1024
|
|
- 0.2048
|
|
- 0.4096
|
|
- 0.8192
|
|
- 1.6384
|
|
- 3.2768
|
|
- 6.5536
|
|
- 13.1072
|
|
- 26.2144
|
|
- 52.4288
|
|
- name: current_executing_requests
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution
|
|
stage in the API Priority and Fairness subsystem
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
- name: current_inqueue_requests
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Number of requests currently pending in queues of the API Priority and Fairness
|
|
subsystem
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
- name: current_limit_seats
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: current derived number of execution seats available to each priority level
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: current_r
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: R(time of last change)
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: demand_seats
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Observations, at the end of every nanosecond, of (the number of seats each
|
|
priority level could use) / (nominal number of seats for that level)
|
|
type: TimingRatioHistogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
buckets:
|
|
- 0.2
|
|
- 0.4
|
|
- 0.6
|
|
- 0.8
|
|
- 1
|
|
- 1.2
|
|
- 1.4
|
|
- 1.7
|
|
- 2
|
|
- 2.8
|
|
- 4
|
|
- 6
|
|
- name: demand_seats_average
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Time-weighted average, over last adjustment period, of demand_seats
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: demand_seats_high_watermark
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: High watermark, over last adjustment period, of demand_seats
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: demand_seats_smoothed
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Smoothed seat demands
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: demand_seats_stdev
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Time-weighted standard deviation, over last adjustment period, of demand_seats
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: dispatch_r
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: R(time of last dispatch)
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: dispatched_requests_total
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Number of requests executed by API Priority and Fairness subsystem
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
- name: epoch_advance_total
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Number of times the queueset's progress meter jumped backward
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- success
|
|
- name: latest_s
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: S(most recently dispatched request)
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: lower_limit_seats
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Configured lower bound on number of execution seats available to each priority
|
|
level
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: next_discounted_s_bounds
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: min and max, over queues, of S(oldest waiting request in queue) - estimated
|
|
work in progress
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- bound
|
|
- priority_level
|
|
- name: next_s_bounds
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: min and max, over queues, of S(oldest waiting request in queue)
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- bound
|
|
- priority_level
|
|
- name: nominal_limit_seats
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Nominal number of execution seats configured for each priority level
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: priority_level_request_utilization
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Observations, at the end of every nanosecond, of number of requests (as a
|
|
fraction of the relevant limit) waiting or in any stage of execution (but only
|
|
initial stage for WATCHes)
|
|
type: TimingRatioHistogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- phase
|
|
- priority_level
|
|
buckets:
|
|
- 0
|
|
- 0.001
|
|
- 0.003
|
|
- 0.01
|
|
- 0.03
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 0.75
|
|
- 1
|
|
- name: priority_level_seat_utilization
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Observations, at the end of every nanosecond, of utilization of seats for
|
|
any stage of execution (but only initial stage for WATCHes)
|
|
type: TimingRatioHistogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
buckets:
|
|
- 0
|
|
- 0.1
|
|
- 0.2
|
|
- 0.3
|
|
- 0.4
|
|
- 0.5
|
|
- 0.6
|
|
- 0.7
|
|
- 0.8
|
|
- 0.9
|
|
- 0.95
|
|
- 0.99
|
|
- 1
|
|
constLabels:
|
|
phase: executing
|
|
- name: read_vs_write_current_requests
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Observations, at the end of every nanosecond, of the number of requests (as
|
|
a fraction of the relevant limit) waiting or in regular stage of execution
|
|
type: TimingRatioHistogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- phase
|
|
- request_kind
|
|
buckets:
|
|
- 0
|
|
- 0.001
|
|
- 0.01
|
|
- 0.1
|
|
- 0.2
|
|
- 0.3
|
|
- 0.4
|
|
- 0.5
|
|
- 0.6
|
|
- 0.7
|
|
- 0.8
|
|
- 0.9
|
|
- 0.95
|
|
- 0.99
|
|
- 1
|
|
- name: rejected_requests_total
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Number of requests rejected by API Priority and Fairness subsystem
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
- reason
|
|
- name: request_concurrency_in_use
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Concurrency (number of seats) occupied by the currently executing (initial
|
|
stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness
|
|
subsystem
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
- name: request_concurrency_limit
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Shared concurrency limit in the API Priority and Fairness subsystem
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: request_dispatch_no_accommodation_total
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Number of times a dispatch attempt resulted in a non accommodation due to
|
|
lack of available seats
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
- name: request_execution_seconds
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Duration of initial stage (for a WATCH) or any (for a non-WATCH) stage of
|
|
request execution in the API Priority and Fairness subsystem
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
- type
|
|
buckets:
|
|
- 0
|
|
- 0.005
|
|
- 0.02
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.5
|
|
- 1
|
|
- 2
|
|
- 5
|
|
- 10
|
|
- 30
|
|
- name: request_queue_length_after_enqueue
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Length of queue in the API Priority and Fairness subsystem, as seen by each
|
|
request after it is enqueued
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
buckets:
|
|
- 0
|
|
- 10
|
|
- 25
|
|
- 50
|
|
- 100
|
|
- 250
|
|
- 500
|
|
- 1000
|
|
- name: request_wait_duration_seconds
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Length of time a request spent waiting in its queue
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- execute
|
|
- flow_schema
|
|
- priority_level
|
|
buckets:
|
|
- 0
|
|
- 0.005
|
|
- 0.02
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.5
|
|
- 1
|
|
- 2
|
|
- 5
|
|
- 10
|
|
- 30
|
|
- name: seat_fair_frac
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Fair fraction of server's concurrency to allocate to each priority level that
|
|
can use it
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: target_seats
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Seat allocation targets
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: upper_limit_seats
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Configured upper bound on number of execution seats available to each priority
|
|
level
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- priority_level
|
|
- name: watch_count_samples
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: count of watchers for mutating requests in API Priority and Fairness
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
buckets:
|
|
- 0
|
|
- 1
|
|
- 10
|
|
- 100
|
|
- 1000
|
|
- 10000
|
|
- name: work_estimated_seats
|
|
subsystem: flowcontrol
|
|
namespace: apiserver
|
|
help: Number of estimated seats (maximum of initial and final seats) associated
|
|
with requests in API Priority and Fairness
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- flow_schema
|
|
- priority_level
|
|
buckets:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 10
|
|
- name: init_events_total
|
|
namespace: apiserver
|
|
help: Counter of init events processed in watch cache broken by resource type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: data_key_generation_duration_seconds
|
|
subsystem: storage
|
|
namespace: apiserver
|
|
help: Latencies in seconds of data encryption key(DEK) generation operations.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 5e-06
|
|
- 1e-05
|
|
- 2e-05
|
|
- 4e-05
|
|
- 8e-05
|
|
- 0.00016
|
|
- 0.00032
|
|
- 0.00064
|
|
- 0.00128
|
|
- 0.00256
|
|
- 0.00512
|
|
- 0.01024
|
|
- 0.02048
|
|
- 0.04096
|
|
- name: data_key_generation_failures_total
|
|
subsystem: storage
|
|
namespace: apiserver
|
|
help: Total number of failed data encryption key(DEK) generation operations.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: storage_db_total_size_in_bytes
|
|
subsystem: apiserver
|
|
help: Total size of the storage database file physically allocated in bytes.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- endpoint
|
|
- name: storage_decode_errors_total
|
|
namespace: apiserver
|
|
help: Number of stored object decode errors split by object type
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: envelope_transformation_cache_misses_total
|
|
subsystem: storage
|
|
namespace: apiserver
|
|
help: Total number of cache misses while accessing key decryption key(KEK).
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: storage_events_received_total
|
|
subsystem: apiserver
|
|
help: Number of etcd events received split by kind.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: apiserver_storage_list_evaluated_objects_total
|
|
help: Number of objects tested in the course of serving a LIST request from storage
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: apiserver_storage_list_fetched_objects_total
|
|
help: Number of objects read from storage in the course of serving a LIST request
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: apiserver_storage_list_returned_objects_total
|
|
help: Number of objects returned for a LIST request from storage
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: apiserver_storage_list_total
|
|
help: Number of LIST requests served from storage
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: transformation_duration_seconds
|
|
subsystem: storage
|
|
namespace: apiserver
|
|
help: Latencies in seconds of value transformation operations.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- transformation_type
|
|
- transformer_prefix
|
|
buckets:
|
|
- 5e-06
|
|
- 1e-05
|
|
- 2e-05
|
|
- 4e-05
|
|
- 8e-05
|
|
- 0.00016
|
|
- 0.00032
|
|
- 0.00064
|
|
- 0.00128
|
|
- 0.00256
|
|
- 0.00512
|
|
- 0.01024
|
|
- 0.02048
|
|
- 0.04096
|
|
- 0.08192
|
|
- 0.16384
|
|
- 0.32768
|
|
- 0.65536
|
|
- 1.31072
|
|
- 2.62144
|
|
- 5.24288
|
|
- 10.48576
|
|
- 20.97152
|
|
- 41.94304
|
|
- 83.88608
|
|
- name: transformation_operations_total
|
|
subsystem: storage
|
|
namespace: apiserver
|
|
help: Total number of transformations.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- status
|
|
- transformation_type
|
|
- transformer_prefix
|
|
- name: terminated_watchers_total
|
|
namespace: apiserver
|
|
help: Counter of watchers closed due to unresponsiveness broken by resource type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: events_dispatched_total
|
|
subsystem: watch_cache
|
|
namespace: apiserver
|
|
help: Counter of events dispatched in watch cache broken by resource type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: events_received_total
|
|
subsystem: watch_cache
|
|
namespace: apiserver
|
|
help: Counter of events received in watch cache broken by resource type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: initializations_total
|
|
subsystem: watch_cache
|
|
namespace: apiserver
|
|
help: Counter of watch cache initializations broken by resource type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: etcd_bookmark_counts
|
|
help: Number of etcd bookmarks (progress notify events) split by kind.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: etcd_lease_object_counts
|
|
help: Number of objects attached to a single etcd lease.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 10
|
|
- 50
|
|
- 100
|
|
- 500
|
|
- 1000
|
|
- 2500
|
|
- 5000
|
|
- name: etcd_request_duration_seconds
|
|
help: Etcd request latency in seconds for each operation and object type.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- type
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.05
|
|
- 0.1
|
|
- 0.2
|
|
- 0.4
|
|
- 0.6
|
|
- 0.8
|
|
- 1
|
|
- 1.25
|
|
- 1.5
|
|
- 2
|
|
- 3
|
|
- 4
|
|
- 5
|
|
- 6
|
|
- 8
|
|
- 10
|
|
- 15
|
|
- 20
|
|
- 30
|
|
- 45
|
|
- 60
|
|
- name: capacity
|
|
subsystem: watch_cache
|
|
help: Total capacity of watch cache broken by resource type.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: capacity_decrease_total
|
|
subsystem: watch_cache
|
|
help: Total number of watch cache capacity decrease events broken by resource type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: capacity_increase_total
|
|
subsystem: watch_cache
|
|
help: Total number of watch cache capacity increase events broken by resource type.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- resource
|
|
- name: apiserver_storage_objects
|
|
help: Number of stored objects at the time of last check split by kind.
|
|
type: Gauge
|
|
stabilityLevel: STABLE
|
|
labels:
|
|
- resource
|
|
- name: x509_insecure_sha1_total
|
|
subsystem: webhooks
|
|
namespace: apiserver
|
|
help: Counts the number of requests to servers with insecure SHA1 signatures in
|
|
their serving certificate OR the number of connection failures due to the insecure
|
|
SHA1 signatures (either/or, based on the runtime environment)
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: x509_missing_san_total
|
|
subsystem: webhooks
|
|
namespace: apiserver
|
|
help: Counts the number of requests to servers missing SAN extension in their serving
|
|
certificate OR the number of connection failures due to the lack of x509 certificate
|
|
SAN extension missing (either/or, based on the runtime environment)
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: nodesync_latency_seconds
|
|
subsystem: service_controller
|
|
help: A metric measuring the latency for nodesync which updates loadbalancer hosts
|
|
on cluster node updates.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- name: update_loadbalancer_host_latency_seconds
|
|
subsystem: service_controller
|
|
help: A metric measuring the latency for updating each load balancer hosts.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 16
|
|
- 32
|
|
- 64
|
|
- 128
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 2048
|
|
- 4096
|
|
- 8192
|
|
- 16384
|
|
- name: kubernetes_build_info
|
|
help: A metric with a constant '1' value labeled by major, minor, git version, git
|
|
commit, git tree state, build date, Go version, and compiler from which Kubernetes
|
|
was built, and platform on which it is running.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- build_date
|
|
- compiler
|
|
- git_commit
|
|
- git_tree_state
|
|
- git_version
|
|
- go_version
|
|
- major
|
|
- minor
|
|
- platform
|
|
- name: feature_enabled
|
|
namespace: kubernetes
|
|
help: This metric records the data about the stage and enablement of a k8s feature.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- stage
|
|
- name: healthcheck
|
|
namespace: kubernetes
|
|
help: This metric records the result of a single healthcheck.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- type
|
|
- name: healthchecks_total
|
|
namespace: kubernetes
|
|
help: This metric records the results of all healthcheck.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- status
|
|
- type
|
|
- name: leader_election_master_status
|
|
help: Gauge of if the reporting system is master of the relevant lease, 0 indicates
|
|
backup, 1 indicates master. 'name' is the string used to identify the lease. Please
|
|
make sure to group by name.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- name: rest_client_exec_plugin_call_total
|
|
help: Number of calls to an exec plugin, partitioned by the type of event encountered
|
|
(no_error, plugin_execution_error, plugin_not_found_error, client_internal_error)
|
|
and an optional exit code. The exit code will be set to 0 if and only if the plugin
|
|
call was successful.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- call_status
|
|
- code
|
|
- name: rest_client_exec_plugin_certificate_rotation_age
|
|
help: Histogram of the number of seconds the last auth exec plugin client certificate
|
|
lived before being rotated. If auth exec plugin client certificates are unused,
|
|
histogram will contain no data.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
buckets:
|
|
- 600
|
|
- 1800
|
|
- 3600
|
|
- 14400
|
|
- 86400
|
|
- 604800
|
|
- 2.592e+06
|
|
- 7.776e+06
|
|
- 1.5552e+07
|
|
- 3.1104e+07
|
|
- 1.24416e+08
|
|
- name: rest_client_exec_plugin_ttl_seconds
|
|
help: Gauge of the shortest TTL (time-to-live) of the client certificate(s) managed
|
|
by the auth exec plugin. The value is in seconds until certificate expiry (negative
|
|
if already expired). If auth exec plugins are unused or manage no TLS certificates,
|
|
the value will be +INF.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
- name: rest_client_rate_limiter_duration_seconds
|
|
help: Client side rate limiter latency in seconds. Broken down by verb, and host.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- host
|
|
- verb
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 15
|
|
- 30
|
|
- 60
|
|
- name: rest_client_request_duration_seconds
|
|
help: Request latency in seconds. Broken down by verb, and host.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- host
|
|
- verb
|
|
buckets:
|
|
- 0.005
|
|
- 0.025
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
- 15
|
|
- 30
|
|
- 60
|
|
- name: rest_client_request_retries_total
|
|
help: Number of request retries, partitioned by status code, verb, and host.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- host
|
|
- verb
|
|
- name: rest_client_request_size_bytes
|
|
help: Request size in bytes. Broken down by verb and host.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- host
|
|
- verb
|
|
buckets:
|
|
- 64
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 4096
|
|
- 16384
|
|
- 65536
|
|
- 262144
|
|
- 1.048576e+06
|
|
- 4.194304e+06
|
|
- 1.6777216e+07
|
|
- name: rest_client_requests_total
|
|
help: Number of HTTP requests, partitioned by status code, method, and host.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- code
|
|
- host
|
|
- method
|
|
- name: rest_client_response_size_bytes
|
|
help: Response size in bytes. Broken down by verb and host.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- host
|
|
- verb
|
|
buckets:
|
|
- 64
|
|
- 256
|
|
- 512
|
|
- 1024
|
|
- 4096
|
|
- 16384
|
|
- 65536
|
|
- 262144
|
|
- 1.048576e+06
|
|
- 4.194304e+06
|
|
- 1.6777216e+07
|
|
- name: running_managed_controllers
|
|
help: Indicates where instances of a controller are currently running
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- manager
|
|
- name
|
|
- name: adds_total
|
|
subsystem: workqueue
|
|
help: Total number of adds handled by workqueue
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- name: depth
|
|
subsystem: workqueue
|
|
help: Current depth of workqueue
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- name: longest_running_processor_seconds
|
|
subsystem: workqueue
|
|
help: How many seconds has the longest running processor for workqueue been running.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- name: queue_duration_seconds
|
|
subsystem: workqueue
|
|
help: How long in seconds an item stays in workqueue before being requested.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
buckets:
|
|
- 1e-08
|
|
- 1e-07
|
|
- 1e-06
|
|
- 9.999999999999999e-06
|
|
- 9.999999999999999e-05
|
|
- 0.001
|
|
- 0.01
|
|
- 0.1
|
|
- 1
|
|
- 10
|
|
- name: retries_total
|
|
subsystem: workqueue
|
|
help: Total number of retries handled by workqueue
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- name: unfinished_work_seconds
|
|
subsystem: workqueue
|
|
help: How many seconds of work has done that is in progress and hasn't been observed
|
|
by work_duration. Large values indicate stuck threads. One can deduce the number
|
|
of stuck threads by observing the rate at which this increases.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- name: work_duration_seconds
|
|
subsystem: workqueue
|
|
help: How long in seconds processing an item from workqueue takes.
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
buckets:
|
|
- 1e-08
|
|
- 1e-07
|
|
- 1e-06
|
|
- 9.999999999999999e-06
|
|
- 9.999999999999999e-05
|
|
- 0.001
|
|
- 0.01
|
|
- 0.1
|
|
- 1
|
|
- 10
|
|
- name: aggregator_openapi_v2_regeneration_count
|
|
help: Counter of OpenAPI v2 spec regeneration count broken down by causing APIService
|
|
name and reason.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- apiservice
|
|
- reason
|
|
- name: aggregator_openapi_v2_regeneration_duration
|
|
help: Gauge of OpenAPI v2 spec regeneration duration in seconds.
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- reason
|
|
- name: aggregator_unavailable_apiservice
|
|
help: Gauge of APIServices which are marked as unavailable broken down by APIService
|
|
name.
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- name: aggregator_unavailable_apiservice_total
|
|
help: Counter of APIServices which are marked as unavailable broken down by APIService
|
|
name and reason.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- name
|
|
- reason
|
|
- name: x509_insecure_sha1_total
|
|
subsystem: kube_aggregator
|
|
namespace: apiserver
|
|
help: Counts the number of requests to servers with insecure SHA1 signatures in
|
|
their serving certificate OR the number of connection failures due to the insecure
|
|
SHA1 signatures (either/or, based on the runtime environment)
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: x509_missing_san_total
|
|
subsystem: kube_aggregator
|
|
namespace: apiserver
|
|
help: Counts the number of requests to servers missing SAN extension in their serving
|
|
certificate OR the number of connection failures due to the lack of x509 certificate
|
|
SAN extension missing (either/or, based on the runtime environment)
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: cloudprovider_aws_api_request_duration_seconds
|
|
help: Latency of AWS API calls
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- name: cloudprovider_aws_api_request_errors
|
|
help: AWS API errors
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- name: cloudprovider_aws_api_throttled_requests_total
|
|
help: AWS API throttled requests
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation_name
|
|
- name: api_request_duration_seconds
|
|
namespace: cloudprovider_azure
|
|
help: Latency of an Azure API call
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- resource_group
|
|
- source
|
|
- subscription_id
|
|
buckets:
|
|
- 0.1
|
|
- 0.25
|
|
- 0.5
|
|
- 1
|
|
- 2.5
|
|
- 5
|
|
- 10
|
|
- 15
|
|
- 25
|
|
- 50
|
|
- 120
|
|
- 300
|
|
- 600
|
|
- 1200
|
|
- name: api_request_errors
|
|
namespace: cloudprovider_azure
|
|
help: Number of errors for an Azure API call
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- resource_group
|
|
- source
|
|
- subscription_id
|
|
- name: api_request_ratelimited_count
|
|
namespace: cloudprovider_azure
|
|
help: Number of rate limited Azure API calls
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- resource_group
|
|
- source
|
|
- subscription_id
|
|
- name: api_request_throttled_count
|
|
namespace: cloudprovider_azure
|
|
help: Number of throttled Azure API calls
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- resource_group
|
|
- source
|
|
- subscription_id
|
|
- name: op_duration_seconds
|
|
namespace: cloudprovider_azure
|
|
help: Latency of an Azure service operation
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- resource_group
|
|
- source
|
|
- subscription_id
|
|
buckets:
|
|
- 0.1
|
|
- 0.2
|
|
- 0.5
|
|
- 1
|
|
- 10
|
|
- 20
|
|
- 30
|
|
- 40
|
|
- 50
|
|
- 60
|
|
- 100
|
|
- 200
|
|
- 300
|
|
- name: op_failure_count
|
|
namespace: cloudprovider_azure
|
|
help: Number of failed Azure service operations
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- resource_group
|
|
- source
|
|
- subscription_id
|
|
- name: number_of_l4_ilbs
|
|
help: Number of L4 ILBs
|
|
type: Gauge
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- feature
|
|
- name: cloudprovider_gce_api_request_duration_seconds
|
|
help: Latency of a GCE API call
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- region
|
|
- request
|
|
- version
|
|
- zone
|
|
- name: cloudprovider_gce_api_request_errors
|
|
help: Number of errors for an API call
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- region
|
|
- request
|
|
- version
|
|
- zone
|
|
- name: cloudprovider_vsphere_api_request_duration_seconds
|
|
help: Latency of vsphere api call
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- name: cloudprovider_vsphere_api_request_errors
|
|
help: vsphere Api errors
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request
|
|
- name: cloudprovider_vsphere_operation_duration_seconds
|
|
help: Latency of vsphere operation call
|
|
type: Histogram
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- name: cloudprovider_vsphere_operation_errors
|
|
help: vsphere operation errors
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- operation
|
|
- name: cloudprovider_vsphere_vcenter_versions
|
|
help: Versions for connected vSphere vCenters
|
|
type: Custom
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- hostname
|
|
- version
|
|
- build
|
|
- name: get_token_count
|
|
help: Counter of total Token() requests to the alternate token source
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: get_token_fail_count
|
|
help: Counter of failed Token() requests to the alternate token source
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
- name: pod_security_errors_total
|
|
help: Number of errors preventing normal evaluation. Non-fatal errors may result
|
|
in the latest restricted profile being used for evaluation.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- fatal
|
|
- request_operation
|
|
- resource
|
|
- subresource
|
|
- name: pod_security_evaluations_total
|
|
help: Number of policy evaluations that occurred, not counting ignored or exempt
|
|
requests.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- decision
|
|
- mode
|
|
- policy_level
|
|
- policy_version
|
|
- request_operation
|
|
- resource
|
|
- subresource
|
|
- name: pod_security_exemptions_total
|
|
help: Number of exempt requests, not counting ignored or out of scope requests.
|
|
type: Counter
|
|
stabilityLevel: ALPHA
|
|
labels:
|
|
- request_operation
|
|
- resource
|
|
- subresource
|