From b13a651cfe979f117466811f334e9fc558b696c5 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Sat, 9 Feb 2019 16:47:19 -0800 Subject: [PATCH] Drop metrics that are unset, high cardinality, or extraneous * https://github.com/coreos/prometheus-operator/pull/2387 * https://github.com/coreos/prometheus-operator/pull/1959 --- CHANGES.md | 2 +- addons/prometheus/config.yaml | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index f89ff56f..e2cb82c1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -10,7 +10,7 @@ Notable changes between versions. #### Addons * Raise nginx-ingress liveness/readiness timeout to 5 seconds -* Improve Prometheus metrics labels +* Improve Prometheus metrics labels and drop extraneous metrics ([#397](https://github.com/poseidon/typhoon/pull/397)) * Add `pod` name label to metrics discovered via service endpoints * Rename `kubernetes_namespace` label to `namespace` diff --git a/addons/prometheus/config.yaml b/addons/prometheus/config.yaml index 2408b3de..3ea5b65d 100644 --- a/addons/prometheus/config.yaml +++ b/addons/prometheus/config.yaml @@ -55,6 +55,17 @@ data: action: replace target_label: job + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: etcd_(debugging|disk|request|server).* + - source_labels: [__name__] + action: drop + regex: apiserver_admission_controller_admission_latencies_seconds_.* + - source_labels: [__name__] + action: drop + regex: apiserver_admission_step_admission_latencies_seconds_.* + # Scrape config for node (i.e. kubelet) /metrics (e.g. 'kubelet_'). Explore # metrics from a node by scraping kubelet (127.0.0.1:10250/metrics). - job_name: 'kubelet' @@ -89,6 +100,13 @@ data: relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) + metric_relabel_configs: + - source_labels: [__name__, image] + action: drop + regex: container_([a-z_]+); + - source_labels: [__name__] + action: drop + regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) # Scrap etcd metrics from controllers via listen-metrics-urls @@ -151,6 +169,11 @@ data: - source_labels: [__meta_kubernetes_service_name] action: replace target_label: job + + metric_relabel_configs: + - source_labels: [__name__] + action: drop + regex: etcd_(debugging|disk|request|server).* # Example scrape config for probing services via the Blackbox Exporter. #