From 19de38b30dd9fddedf9573257ee00b7c65bfff3d Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Thu, 3 Oct 2019 18:56:51 -0700 Subject: [PATCH] Fix Prometheus etcd metrics scraping * Prometheus was configured to use kubernetes discovery of etcd targets based on nodes matching the node label node-role.kubernetes.io/controller=true * Kubernetes v1.16 stopped permitting node role labels node-role.kubernetes.io/* so Typhoon renamed these labels (no longer any association with roles) to node.kubermetes.io/controller=true * As a result, Prometheus didn't discover etcd targets, etcd metrics were missing, etcd alerts were ineffective, and the etcd Grafana dashboard was empty * Introduced: https://github.com/poseidon/typhoon/pull/543 --- CHANGES.md | 2 ++ addons/prometheus/config.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 99eaebc6..de1bf06e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -35,6 +35,8 @@ Notable changes between versions. #### Addons +* Fix Prometheus etcd target discovery and scraping ([#561](https://github.com/poseidon/typhoon/pull/561)) + * Fix node label matcher for etcd target discovery (regressed in v1.16.0) * Update kube-state-metrics from v1.7.2 to v1.8.0 * Update nginx-ingress from v0.25.1 to [v0.26.1](https://github.com/kubernetes/ingress-nginx/releases/tag/nginx-0.26.1) ([#555](https://github.com/poseidon/typhoon/pull/555)) * Add lifecycle hook to allow draining for up to 5 minutes diff --git a/addons/prometheus/config.yaml b/addons/prometheus/config.yaml index 3ea5b65d..298c4edc 100644 --- a/addons/prometheus/config.yaml +++ b/addons/prometheus/config.yaml @@ -115,7 +115,7 @@ data: - role: node scheme: http relabel_configs: - - source_labels: [__meta_kubernetes_node_label_node_role_kubernetes_io_controller] + - source_labels: [__meta_kubernetes_node_label_node_kubernetes_io_controller] action: keep regex: 'true' - action: labelmap