Merge pull request #83442 from serathius/remove-prometheus-addon
Remove prometheus addon
This commit is contained in:
commit
a8e819746d
@ -1,13 +0,0 @@
|
||||
# See the OWNERS docs at https://go.k8s.io/owners
|
||||
|
||||
approvers:
|
||||
- kawych
|
||||
- piosz
|
||||
- serathius
|
||||
- brancz
|
||||
reviewers:
|
||||
- kawych
|
||||
- piosz
|
||||
- serathius
|
||||
- brancz
|
||||
|
@ -1,5 +0,0 @@
|
||||
# Prometheus Add-on
|
||||
|
||||
This add-on is an experimental configuration of k8s monitoring using Prometheus used for e2e tests.
|
||||
|
||||
For production use check out more mature setups like [Prometheus Operator](https://github.com/coreos/prometheus-operator) and [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus).
|
@ -1,18 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: alertmanager-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: EnsureExists
|
||||
data:
|
||||
alertmanager.yml: |
|
||||
global: null
|
||||
receivers:
|
||||
- name: default-receiver
|
||||
route:
|
||||
group_interval: 5m
|
||||
group_wait: 10s
|
||||
receiver: default-receiver
|
||||
repeat_interval: 3h
|
@ -1,76 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: alertmanager
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v0.14.0
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: alertmanager
|
||||
version: v0.14.0
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: alertmanager
|
||||
version: v0.14.0
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
containers:
|
||||
- name: prometheus-alertmanager
|
||||
image: "prom/alertmanager:v0.14.0"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --config.file=/etc/config/alertmanager.yml
|
||||
- --storage.path=/data
|
||||
- --web.external-url=/
|
||||
ports:
|
||||
- containerPort: 9093
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /#/status
|
||||
port: 9093
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 30
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
- name: storage-volume
|
||||
mountPath: "/data"
|
||||
subPath: ""
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
- name: prometheus-alertmanager-configmap-reload
|
||||
image: "jimmidyson/configmap-reload:v0.1"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --volume-dir=/etc/config
|
||||
- --webhook-url=http://localhost:9093/-/reload
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
readOnly: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: alertmanager-config
|
||||
- name: storage-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: alertmanager
|
@ -1,15 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: EnsureExists
|
||||
spec:
|
||||
storageClassName: standard
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: "2Gi"
|
@ -1,18 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
kubernetes.io/name: "Alertmanager"
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 9093
|
||||
selector:
|
||||
k8s-app: alertmanager
|
||||
type: "ClusterIP"
|
@ -1,89 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-state-metrics
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v1.3.0
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-state-metrics
|
||||
version: v1.3.0
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-state-metrics
|
||||
version: v1.3.0
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
serviceAccountName: kube-state-metrics
|
||||
containers:
|
||||
- name: kube-state-metrics
|
||||
image: quay.io/coreos/kube-state-metrics:v1.3.0
|
||||
ports:
|
||||
- name: http-metrics
|
||||
containerPort: 8080
|
||||
- name: telemetry
|
||||
containerPort: 8081
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
- name: addon-resizer
|
||||
image: k8s.gcr.io/addon-resizer:1.8.6
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 30Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 30Mi
|
||||
env:
|
||||
- name: MY_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: MY_POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
command:
|
||||
- /pod_nanny
|
||||
- --config-dir=/etc/config
|
||||
- --container=kube-state-metrics
|
||||
- --cpu=100m
|
||||
- --extra-cpu=1m
|
||||
- --memory=100Mi
|
||||
- --extra-memory=2Mi
|
||||
- --threshold=5
|
||||
- --deployment=kube-state-metrics
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: kube-state-metrics-config
|
||||
---
|
||||
# Config map for resource configuration.
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: kube-state-metrics-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-state-metrics
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
data:
|
||||
NannyConfiguration: |-
|
||||
apiVersion: nannyconfig/v1alpha1
|
||||
kind: NannyConfiguration
|
||||
|
@ -1,103 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
- secrets
|
||||
- nodes
|
||||
- pods
|
||||
- services
|
||||
- resourcequotas
|
||||
- replicationcontrollers
|
||||
- limitranges
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- namespaces
|
||||
- endpoints
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- statefulsets
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs: ["list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs: ["list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: kube-state-metrics-resizer
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- pods
|
||||
verbs: ["get"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources:
|
||||
- deployments
|
||||
resourceNames: ["kube-state-metrics"]
|
||||
verbs: ["get", "update"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: kube-state-metrics
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: kube-state-metrics-resizer
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
@ -1,23 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
kubernetes.io/name: "kube-state-metrics"
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
spec:
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 8080
|
||||
targetPort: http-metrics
|
||||
protocol: TCP
|
||||
- name: telemetry
|
||||
port: 8081
|
||||
targetPort: telemetry
|
||||
protocol: TCP
|
||||
selector:
|
||||
k8s-app: kube-state-metrics
|
@ -1,57 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: node-exporter
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v0.15.2
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: node-exporter
|
||||
version: v0.15.2
|
||||
updateStrategy:
|
||||
type: OnDelete
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: node-exporter
|
||||
version: v0.15.2
|
||||
spec:
|
||||
priorityClassName: system-node-critical
|
||||
containers:
|
||||
- name: prometheus-node-exporter
|
||||
image: "prom/node-exporter:v0.15.2"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 9100
|
||||
hostPort: 9100
|
||||
volumeMounts:
|
||||
- name: proc
|
||||
mountPath: /host/proc
|
||||
readOnly: true
|
||||
- name: sys
|
||||
mountPath: /host/sys
|
||||
readOnly: true
|
||||
resources:
|
||||
limits:
|
||||
memory: 50Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 50Mi
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
volumes:
|
||||
- name: proc
|
||||
hostPath:
|
||||
path: /proc
|
||||
- name: sys
|
||||
hostPath:
|
||||
path: /sys
|
@ -1,20 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: kube-system
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
kubernetes.io/name: "NodeExporter"
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 9100
|
||||
protocol: TCP
|
||||
targetPort: 9100
|
||||
selector:
|
||||
k8s-app: node-exporter
|
@ -1,171 +0,0 @@
|
||||
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-config
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: EnsureExists
|
||||
data:
|
||||
prometheus.yml: |
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
- job_name: kubernetes-apiservers
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: default;kubernetes;https
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
- __meta_kubernetes_service_name
|
||||
- __meta_kubernetes_endpoint_port_name
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
- job_name: kubernetes-nodes-kubelet
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
- job_name: kubernetes-nodes-cadvisor
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __metrics_path__
|
||||
replacement: /metrics/cadvisor
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
- job_name: kubernetes-service-endpoints
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: true
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_scrape
|
||||
- action: replace
|
||||
regex: (https?)
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_scheme
|
||||
target_label: __scheme__
|
||||
- action: replace
|
||||
regex: (.+)
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_path
|
||||
target_label: __metrics_path__
|
||||
- action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
source_labels:
|
||||
- __address__
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_port
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
target_label: kubernetes_namespace
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_name
|
||||
target_label: kubernetes_name
|
||||
|
||||
- job_name: kubernetes-services
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module:
|
||||
- http_2xx
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: true
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_annotation_prometheus_io_probe
|
||||
- source_labels:
|
||||
- __address__
|
||||
target_label: __param_target
|
||||
- replacement: blackbox
|
||||
target_label: __address__
|
||||
- source_labels:
|
||||
- __param_target
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels:
|
||||
- __meta_kubernetes_service_name
|
||||
target_label: kubernetes_name
|
||||
|
||||
- job_name: kubernetes-pods
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: true
|
||||
source_labels:
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
|
||||
- action: replace
|
||||
regex: (.+)
|
||||
source_labels:
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_path
|
||||
target_label: __metrics_path__
|
||||
- action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
source_labels:
|
||||
- __address__
|
||||
- __meta_kubernetes_pod_annotation_prometheus_io_port
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
target_label: kubernetes_namespace
|
||||
- action: replace
|
||||
source_labels:
|
||||
- __meta_kubernetes_pod_name
|
||||
target_label: kubernetes_pod_name
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- kubernetes_sd_configs:
|
||||
- role: pod
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
regex: kube-system
|
||||
action: keep
|
||||
- source_labels: [__meta_kubernetes_pod_label_k8s_app]
|
||||
regex: alertmanager
|
||||
action: keep
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_number]
|
||||
regex:
|
||||
action: drop
|
@ -1,55 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/metrics
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
- nonResourceURLs:
|
||||
- "/metrics"
|
||||
verbs:
|
||||
- get
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
labels:
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: kube-system
|
@ -1,17 +0,0 @@
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/name: "Prometheus"
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: 9090
|
||||
selector:
|
||||
k8s-app: prometheus
|
@ -1,107 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: prometheus
|
||||
kubernetes.io/cluster-service: "true"
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
version: v2.2.1
|
||||
spec:
|
||||
serviceName: "prometheus"
|
||||
replicas: 1
|
||||
podManagementPolicy: "Parallel"
|
||||
updateStrategy:
|
||||
type: "RollingUpdate"
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: prometheus
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: prometheus
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
serviceAccountName: prometheus
|
||||
initContainers:
|
||||
- name: "init-chown-data"
|
||||
image: "busybox:latest"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
command: ["chown", "-R", "65534:65534", "/data"]
|
||||
volumeMounts:
|
||||
- name: prometheus-data
|
||||
mountPath: /data
|
||||
subPath: ""
|
||||
containers:
|
||||
- name: prometheus-server-configmap-reload
|
||||
image: "jimmidyson/configmap-reload:v0.1"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --volume-dir=/etc/config
|
||||
- --webhook-url=http://localhost:9090/-/reload
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
readOnly: true
|
||||
resources:
|
||||
limits:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 10Mi
|
||||
|
||||
- name: prometheus-server
|
||||
image: "prom/prometheus:v2.2.1"
|
||||
imagePullPolicy: "IfNotPresent"
|
||||
args:
|
||||
- --config.file=/etc/config/prometheus.yml
|
||||
- --storage.tsdb.path=/data
|
||||
- --web.console.libraries=/etc/prometheus/console_libraries
|
||||
- --web.console.templates=/etc/prometheus/consoles
|
||||
- --web.enable-lifecycle
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /-/ready
|
||||
port: 9090
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 30
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /-/healthy
|
||||
port: 9090
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 30
|
||||
# based on 10 running nodes with 30 pods each
|
||||
resources:
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 1000Mi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 1000Mi
|
||||
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
- name: prometheus-data
|
||||
mountPath: /data
|
||||
subPath: ""
|
||||
terminationGracePeriodSeconds: 300
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: prometheus-config
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: prometheus-data
|
||||
spec:
|
||||
storageClassName: standard
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: "16Gi"
|
@ -160,9 +160,6 @@ ENABLE_L7_LOADBALANCING="${KUBE_ENABLE_L7_LOADBALANCING:-glbc}"
|
||||
# standalone - Heapster only. Metrics available via Heapster REST API.
|
||||
ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}"
|
||||
|
||||
# Optional: Enable deploying separate prometheus stack for monitoring kubernetes cluster
|
||||
ENABLE_PROMETHEUS_MONITORING="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}"
|
||||
|
||||
# Optional: Enable Metrics Server. Metrics Server should be enable everywhere,
|
||||
# since it's a critical component, but in the first release we need a way to disable
|
||||
# this in case of stability issues.
|
||||
|
@ -172,9 +172,6 @@ ENABLE_L7_LOADBALANCING="${KUBE_ENABLE_L7_LOADBALANCING:-glbc}"
|
||||
# standalone - Heapster only. Metrics available via Heapster REST API.
|
||||
ENABLE_CLUSTER_MONITORING="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}"
|
||||
|
||||
# Optional: Enable deploying separate prometheus stack for monitoring kubernetes cluster
|
||||
ENABLE_PROMETHEUS_MONITORING="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}"
|
||||
|
||||
# Optional: Enable Metrics Server. Metrics Server should be enable everywhere,
|
||||
# since it's a critical component, but in the first release we need a way to disable
|
||||
# this in case of stability issues.
|
||||
|
@ -2279,10 +2279,6 @@ EOF
|
||||
prepare-kube-proxy-manifest-variables "$src_dir/kube-proxy/kube-proxy-ds.yaml"
|
||||
setup-addon-manifests "addons" "kube-proxy"
|
||||
fi
|
||||
# Setup prometheus stack for monitoring kubernetes cluster
|
||||
if [[ "${ENABLE_PROMETHEUS_MONITORING:-}" == "true" ]]; then
|
||||
setup-addon-manifests "addons" "prometheus"
|
||||
fi
|
||||
# Setup cluster monitoring using heapster
|
||||
if [[ "${ENABLE_CLUSTER_MONITORING:-}" == "influxdb" ]] || \
|
||||
[[ "${ENABLE_CLUSTER_MONITORING:-}" == "google" ]] || \
|
||||
|
@ -1124,7 +1124,6 @@ SERVICE_CLUSTER_IP_RANGE: $(yaml-quote ${SERVICE_CLUSTER_IP_RANGE})
|
||||
KUBERNETES_MASTER_NAME: $(yaml-quote ${KUBERNETES_MASTER_NAME})
|
||||
ALLOCATE_NODE_CIDRS: $(yaml-quote ${ALLOCATE_NODE_CIDRS:-false})
|
||||
ENABLE_CLUSTER_MONITORING: $(yaml-quote ${ENABLE_CLUSTER_MONITORING:-none})
|
||||
ENABLE_PROMETHEUS_MONITORING: $(yaml-quote ${ENABLE_PROMETHEUS_MONITORING:-false})
|
||||
ENABLE_METRICS_SERVER: $(yaml-quote ${ENABLE_METRICS_SERVER:-false})
|
||||
ENABLE_METADATA_AGENT: $(yaml-quote ${ENABLE_METADATA_AGENT:-none})
|
||||
METADATA_AGENT_CPU_REQUEST: $(yaml-quote ${METADATA_AGENT_CPU_REQUEST:-})
|
||||
|
@ -167,7 +167,6 @@ export PATH
|
||||
--node-tag="${NODE_TAG:-}" \
|
||||
--master-tag="${MASTER_TAG:-}" \
|
||||
--cluster-monitoring-mode="${KUBE_ENABLE_CLUSTER_MONITORING:-standalone}" \
|
||||
--prometheus-monitoring="${KUBE_ENABLE_PROMETHEUS_MONITORING:-false}" \
|
||||
--dns-domain="${KUBE_DNS_DOMAIN:-cluster.local}" \
|
||||
--ginkgo.slowSpecThreshold="${GINKGO_SLOW_SPEC_THRESHOLD:-300}" \
|
||||
${KUBE_CONTAINER_RUNTIME:+"--container-runtime=${KUBE_CONTAINER_RUNTIME}"} \
|
||||
|
@ -108,13 +108,6 @@ func SkipIfMultizone(c clientset.Interface) {
|
||||
}
|
||||
}
|
||||
|
||||
// SkipUnlessPrometheusMonitoringIsEnabled skips if the prometheus monitoring is not enabled.
|
||||
func SkipUnlessPrometheusMonitoringIsEnabled(supportedMonitoring ...string) {
|
||||
if !TestContext.EnablePrometheusMonitoring {
|
||||
skipInternalf(1, "Skipped because prometheus monitoring is not enabled")
|
||||
}
|
||||
}
|
||||
|
||||
// SkipUnlessMasterOSDistroIs skips if the master OS distro is not included in the supportedMasterOsDistros.
|
||||
func SkipUnlessMasterOSDistroIs(supportedMasterOsDistros ...string) {
|
||||
if !MasterOSDistroIs(supportedMasterOsDistros...) {
|
||||
|
@ -151,8 +151,6 @@ type TestContextType struct {
|
||||
NodeTestContextType
|
||||
// Monitoring solution that is used in current cluster.
|
||||
ClusterMonitoringMode string
|
||||
// Separate Prometheus monitoring deployed in cluster
|
||||
EnablePrometheusMonitoring bool
|
||||
|
||||
// Indicates what path the kubernetes-anywhere is installed on
|
||||
KubernetesAnywherePath string
|
||||
@ -313,7 +311,6 @@ func RegisterClusterFlags(flags *flag.FlagSet) {
|
||||
flags.StringVar(&TestContext.MasterOSDistro, "master-os-distro", "debian", "The OS distribution of cluster master (debian, ubuntu, gci, coreos, or custom).")
|
||||
flags.StringVar(&TestContext.NodeOSDistro, "node-os-distro", "debian", "The OS distribution of cluster VM instances (debian, ubuntu, gci, coreos, or custom).")
|
||||
flags.StringVar(&TestContext.ClusterMonitoringMode, "cluster-monitoring-mode", "standalone", "The monitoring solution that is used in the cluster.")
|
||||
flags.BoolVar(&TestContext.EnablePrometheusMonitoring, "prometheus-monitoring", false, "Separate Prometheus monitoring deployed in cluster.")
|
||||
flags.StringVar(&TestContext.ClusterDNSDomain, "dns-domain", "cluster.local", "The DNS Domain of the cluster.")
|
||||
|
||||
// TODO: Flags per provider? Rename gce-project/gce-zone?
|
||||
|
@ -13,7 +13,6 @@ go_library(
|
||||
"custom_metrics_deployments.go",
|
||||
"custom_metrics_stackdriver.go",
|
||||
"metrics_grabber.go",
|
||||
"prometheus.go",
|
||||
"stackdriver.go",
|
||||
"stackdriver_metadata_agent.go",
|
||||
],
|
||||
@ -46,7 +45,6 @@ go_library(
|
||||
"//test/utils/image:go_default_library",
|
||||
"//vendor/github.com/onsi/ginkgo:go_default_library",
|
||||
"//vendor/github.com/onsi/gomega:go_default_library",
|
||||
"//vendor/github.com/prometheus/common/model:go_default_library",
|
||||
"//vendor/golang.org/x/oauth2/google:go_default_library",
|
||||
"//vendor/google.golang.org/api/monitoring/v3:go_default_library",
|
||||
],
|
||||
|
@ -1,388 +0,0 @@
|
||||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/onsi/ginkgo"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/kubernetes/test/e2e/common"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
instrumentation "k8s.io/kubernetes/test/e2e/instrumentation/common"
|
||||
)
|
||||
|
||||
const (
|
||||
prometheusQueryStep = time.Minute
|
||||
prometheusMetricErrorTolerance = 0.25
|
||||
prometheusMetricValidationDuration = time.Minute * 2
|
||||
prometheusRate = time.Minute * 2
|
||||
prometheusRequiredNodesUpDuration = time.Minute * 5
|
||||
prometheusService = "prometheus"
|
||||
prometheusSleepBetweenAttempts = time.Second * 30
|
||||
prometheusTestTimeout = time.Minute * 5
|
||||
customMetricValue = 1000
|
||||
targetCPUUsage = 0.1
|
||||
)
|
||||
|
||||
var _ = instrumentation.SIGDescribe("[Feature:PrometheusMonitoring] Prometheus", func() {
|
||||
ginkgo.BeforeEach(func() {
|
||||
framework.SkipUnlessPrometheusMonitoringIsEnabled()
|
||||
})
|
||||
|
||||
f := framework.NewDefaultFramework("prometheus-monitoring")
|
||||
ginkgo.It("should scrape container metrics from all nodes.", func() {
|
||||
expectedNodes, err := getAllNodes(f.ClientSet)
|
||||
framework.ExpectNoError(err)
|
||||
retryUntilSucceeds(func() error {
|
||||
return validateMetricAvailableForAllNodes(f.ClientSet, `container_cpu_usage_seconds_total`, expectedNodes)
|
||||
}, prometheusTestTimeout)
|
||||
})
|
||||
ginkgo.It("should successfully scrape all targets", func() {
|
||||
retryUntilSucceeds(func() error {
|
||||
return validateAllActiveTargetsAreHealthy(f.ClientSet)
|
||||
}, prometheusTestTimeout)
|
||||
})
|
||||
ginkgo.It("should contain correct container CPU metric.", func() {
|
||||
query := prometheusCPUQuery(f.Namespace.Name, "prometheus-cpu-consumer", prometheusRate)
|
||||
consumer := consumeCPUResources(f, "prometheus-cpu-consumer", targetCPUUsage*1000)
|
||||
defer consumer.CleanUp()
|
||||
retryUntilSucceeds(func() error {
|
||||
return validateQueryReturnsCorrectValues(f.ClientSet, query, targetCPUUsage, 3, prometheusMetricErrorTolerance)
|
||||
}, prometheusTestTimeout)
|
||||
})
|
||||
ginkgo.It("should scrape metrics from annotated pods.", func() {
|
||||
query := prometheusPodCustomMetricQuery(f.Namespace.Name, "prometheus-custom-pod-metric")
|
||||
consumer := exportCustomMetricFromPod(f, "prometheus-custom-pod-metric", customMetricValue)
|
||||
defer consumer.CleanUp()
|
||||
retryUntilSucceeds(func() error {
|
||||
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
|
||||
}, prometheusTestTimeout)
|
||||
})
|
||||
ginkgo.It("should scrape metrics from annotated services.", func() {
|
||||
query := prometheusServiceCustomMetricQuery(f.Namespace.Name, "prometheus-custom-service-metric")
|
||||
consumer := exportCustomMetricFromService(f, "prometheus-custom-service-metric", customMetricValue)
|
||||
defer consumer.CleanUp()
|
||||
retryUntilSucceeds(func() error {
|
||||
return validateQueryReturnsCorrectValues(f.ClientSet, query, customMetricValue, 1, prometheusMetricErrorTolerance)
|
||||
}, prometheusTestTimeout)
|
||||
})
|
||||
})
|
||||
|
||||
func prometheusCPUQuery(namespace, podNamePrefix string, rate time.Duration) string {
|
||||
return fmt.Sprintf(`sum(irate(container_cpu_usage_seconds_total{namespace="%v",pod=~"%v.*",image!=""}[%vm]))`,
|
||||
namespace, podNamePrefix, int64(rate.Minutes()))
|
||||
}
|
||||
|
||||
func prometheusServiceCustomMetricQuery(namespace, service string) string {
|
||||
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%v",kubernetes_name="%v"})`, namespace, service)
|
||||
}
|
||||
|
||||
func prometheusPodCustomMetricQuery(namespace, podNamePrefix string) string {
|
||||
return fmt.Sprintf(`sum(QPS{kubernetes_namespace="%s",kubernetes_pod_name=~"%s.*"})`, namespace, podNamePrefix)
|
||||
}
|
||||
|
||||
func consumeCPUResources(f *framework.Framework, consumerName string, cpuUsage int) *common.ResourceConsumer {
|
||||
return common.NewDynamicResourceConsumer(consumerName, f.Namespace.Name, common.KindDeployment, 1, cpuUsage,
|
||||
memoryUsed, 0, int64(cpuUsage), memoryLimit, f.ClientSet, f.ScalesGetter)
|
||||
}
|
||||
|
||||
func exportCustomMetricFromPod(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
|
||||
podAnnotations := map[string]string{
|
||||
"prometheus.io/scrape": "true",
|
||||
"prometheus.io/path": "/metrics",
|
||||
"prometheus.io/port": "8080",
|
||||
}
|
||||
return common.NewMetricExporter(consumerName, f.Namespace.Name, podAnnotations, nil, metricValue, f.ClientSet, f.ScalesGetter)
|
||||
}
|
||||
|
||||
func exportCustomMetricFromService(f *framework.Framework, consumerName string, metricValue int) *common.ResourceConsumer {
|
||||
serviceAnnotations := map[string]string{
|
||||
"prometheus.io/scrape": "true",
|
||||
"prometheus.io/path": "/metrics",
|
||||
"prometheus.io/port": "8080",
|
||||
}
|
||||
return common.NewMetricExporter(consumerName, f.Namespace.Name, nil, serviceAnnotations, metricValue, f.ClientSet, f.ScalesGetter)
|
||||
}
|
||||
|
||||
func validateMetricAvailableForAllNodes(c clientset.Interface, metric string, expectedNodesNames []string) error {
|
||||
instanceLabels, err := getInstanceLabelsAvailableForMetric(c, prometheusRequiredNodesUpDuration, metric)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
nodesWithMetric := make(map[string]bool)
|
||||
for _, instance := range instanceLabels {
|
||||
nodesWithMetric[instance] = true
|
||||
}
|
||||
missedNodesCount := 0
|
||||
for _, nodeName := range expectedNodesNames {
|
||||
if _, found := nodesWithMetric[nodeName]; !found {
|
||||
missedNodesCount++
|
||||
}
|
||||
}
|
||||
if missedNodesCount > 0 {
|
||||
return fmt.Errorf("Metric not found for %v out of %v nodes", missedNodesCount, len(expectedNodesNames))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateAllActiveTargetsAreHealthy(c clientset.Interface) error {
|
||||
discovery, err := fetchPrometheusTargetDiscovery(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(discovery.ActiveTargets) == 0 {
|
||||
return fmt.Errorf("Prometheus is not scraping any targets, at least one target is required")
|
||||
}
|
||||
for _, target := range discovery.ActiveTargets {
|
||||
if target.Health != HealthGood {
|
||||
return fmt.Errorf("Target health not good. Target: %v", target)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func validateQueryReturnsCorrectValues(c clientset.Interface, query string, expectedValue float64, minSamplesCount int, errorTolerance float64) error {
|
||||
samples, err := fetchQueryValues(c, query, prometheusMetricValidationDuration)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(samples) < minSamplesCount {
|
||||
return fmt.Errorf("Not enough samples for query '%v', got %v", query, samples)
|
||||
}
|
||||
framework.Logf("Executed query '%v' returned %v", query, samples)
|
||||
for _, value := range samples {
|
||||
error := math.Abs(value-expectedValue) / expectedValue
|
||||
if error >= errorTolerance {
|
||||
return fmt.Errorf("Query result values outside expected value tolerance. Expected error below %v, got %v", errorTolerance, error)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func fetchQueryValues(c clientset.Interface, query string, duration time.Duration) ([]float64, error) {
|
||||
now := time.Now()
|
||||
response, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m, ok := response.(model.Matrix)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("Expected matric response, got: %T", response)
|
||||
}
|
||||
values := make([]float64, 0)
|
||||
for _, stream := range m {
|
||||
for _, sample := range stream.Values {
|
||||
values = append(values, float64(sample.Value))
|
||||
}
|
||||
}
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func getInstanceLabelsAvailableForMetric(c clientset.Interface, duration time.Duration, metric string) ([]string, error) {
|
||||
var instance model.LabelValue
|
||||
now := time.Now()
|
||||
query := fmt.Sprintf(`sum(%v)by(instance)`, metric)
|
||||
result, err := queryPrometheus(c, query, now.Add(-duration), now, prometheusQueryStep)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
instanceLabels := make([]string, 0)
|
||||
m, ok := result.(model.Matrix)
|
||||
if !ok {
|
||||
framework.Failf("Expected matrix response for query '%v', got: %T", query, result)
|
||||
return instanceLabels, nil
|
||||
}
|
||||
for _, stream := range m {
|
||||
if instance, ok = stream.Metric["instance"]; !ok {
|
||||
continue
|
||||
}
|
||||
instanceLabels = append(instanceLabels, string(instance))
|
||||
}
|
||||
return instanceLabels, nil
|
||||
}
|
||||
|
||||
func fetchPrometheusTargetDiscovery(c clientset.Interface) (TargetDiscovery, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
response, err := c.CoreV1().RESTClient().Get().
|
||||
Context(ctx).
|
||||
Namespace("kube-system").
|
||||
Resource("services").
|
||||
Name(prometheusService+":9090").
|
||||
SubResource("proxy").
|
||||
Suffix("api", "v1", "targets").
|
||||
Do().
|
||||
Raw()
|
||||
var qres promTargetsResponse
|
||||
if err != nil {
|
||||
framework.Logf(string(response))
|
||||
return qres.Data, err
|
||||
}
|
||||
err = json.Unmarshal(response, &qres)
|
||||
|
||||
return qres.Data, nil
|
||||
}
|
||||
|
||||
type promTargetsResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data TargetDiscovery `json:"data"`
|
||||
}
|
||||
|
||||
// TargetDiscovery has all the active targets.
|
||||
type TargetDiscovery struct {
|
||||
ActiveTargets []*Target `json:"activeTargets"`
|
||||
DroppedTargets []*DroppedTarget `json:"droppedTargets"`
|
||||
}
|
||||
|
||||
// Target has the information for one target.
|
||||
type Target struct {
|
||||
DiscoveredLabels map[string]string `json:"discoveredLabels"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
|
||||
ScrapeURL string `json:"scrapeUrl"`
|
||||
|
||||
LastError string `json:"lastError"`
|
||||
LastScrape time.Time `json:"lastScrape"`
|
||||
Health TargetHealth `json:"health"`
|
||||
}
|
||||
|
||||
// DroppedTarget has the information for one target that was dropped during relabelling.
|
||||
type DroppedTarget struct {
|
||||
// Labels before any processing.
|
||||
DiscoveredLabels map[string]string `json:"discoveredLabels"`
|
||||
}
|
||||
|
||||
// The possible health states of a target based on the last performed scrape.
|
||||
const (
|
||||
HealthUnknown TargetHealth = "unknown"
|
||||
HealthGood TargetHealth = "up"
|
||||
HealthBad TargetHealth = "down"
|
||||
)
|
||||
|
||||
// TargetHealth describes the health state of a target.
|
||||
type TargetHealth string
|
||||
|
||||
func queryPrometheus(c clientset.Interface, query string, start, end time.Time, step time.Duration) (model.Value, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
response, err := c.CoreV1().RESTClient().Get().
|
||||
Context(ctx).
|
||||
Namespace("kube-system").
|
||||
Resource("services").
|
||||
Name(prometheusService+":9090").
|
||||
SubResource("proxy").
|
||||
Suffix("api", "v1", "query_range").
|
||||
Param("query", query).
|
||||
Param("start", fmt.Sprintf("%v", start.Unix())).
|
||||
Param("end", fmt.Sprintf("%v", end.Unix())).
|
||||
Param("step", fmt.Sprintf("%vs", step.Seconds())).
|
||||
Do().
|
||||
Raw()
|
||||
if err != nil {
|
||||
framework.Logf(string(response))
|
||||
return nil, err
|
||||
}
|
||||
var qres promQueryResponse
|
||||
err = json.Unmarshal(response, &qres)
|
||||
|
||||
return model.Value(qres.Data.v), err
|
||||
}
|
||||
|
||||
type promQueryResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data responseData `json:"data"`
|
||||
}
|
||||
|
||||
type responseData struct {
|
||||
Type model.ValueType `json:"resultType"`
|
||||
Result interface{} `json:"result"`
|
||||
|
||||
// The decoded value.
|
||||
v model.Value
|
||||
}
|
||||
|
||||
func (qr *responseData) UnmarshalJSON(b []byte) error {
|
||||
v := struct {
|
||||
Type model.ValueType `json:"resultType"`
|
||||
Result json.RawMessage `json:"result"`
|
||||
}{}
|
||||
|
||||
err := json.Unmarshal(b, &v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch v.Type {
|
||||
case model.ValScalar:
|
||||
var sv model.Scalar
|
||||
err = json.Unmarshal(v.Result, &sv)
|
||||
qr.v = &sv
|
||||
|
||||
case model.ValVector:
|
||||
var vv model.Vector
|
||||
err = json.Unmarshal(v.Result, &vv)
|
||||
qr.v = vv
|
||||
|
||||
case model.ValMatrix:
|
||||
var mv model.Matrix
|
||||
err = json.Unmarshal(v.Result, &mv)
|
||||
qr.v = mv
|
||||
|
||||
default:
|
||||
err = fmt.Errorf("unexpected value type %q", v.Type)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func retryUntilSucceeds(validator func() error, timeout time.Duration) {
|
||||
startTime := time.Now()
|
||||
var err error
|
||||
for {
|
||||
err = validator()
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
if time.Since(startTime) >= timeout {
|
||||
break
|
||||
}
|
||||
framework.Logf(err.Error())
|
||||
time.Sleep(prometheusSleepBetweenAttempts)
|
||||
}
|
||||
framework.Failf(err.Error())
|
||||
}
|
||||
|
||||
func getAllNodes(c clientset.Interface) ([]string, error) {
|
||||
nodeList, err := c.CoreV1().Nodes().List(metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := []string{}
|
||||
for _, node := range nodeList.Items {
|
||||
result = append(result, node.Name)
|
||||
}
|
||||
return result, nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user