From c10edef336468acf21dc129bcff1184ef1915760 Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 16:46:23 +0200 Subject: [PATCH 1/9] Adding alertmanager support --- dicos/70_prometheus.xml | 122 +++++++++++++++++++++++++++++++--------- tmpl/alertmanager.yml | 119 +++++++++++++++++++++++++++++++++++++++ tmpl/prometheus.yml | 1 + 3 files changed, 214 insertions(+), 28 deletions(-) create mode 100644 tmpl/alertmanager.yml diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index edc0de9..e686b6c 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -1,9 +1,12 @@ - - + + + + prometheus + alertmanager grafana-server 80 @@ -46,21 +49,21 @@ non - - - - - - Node Exporter - + + + + + + Node Exporter + non - - - - - + + + + + @@ -79,21 +82,84 @@ false + + + + + + + non + + + + + + + + + + + + + + + + + + + + + + + + + + + Configuration SMTP pour l'envois des alertes + Destinatires + Rêgles de distribution + Sous-rêgles de distribution + + - - prCliIP - prCliSonde - - - prOpenCliIP - prOpenCliPort - + + alReceiverEmail + prCliSonde + - - ['Node Exporter','Port'] - + + alRouteMatchSource + alRouteMatchValue + alRouteMatchReceiver + + + + alRouteMatchRegExpSource + alRouteMatchRegExp + alRouteMatchRegxpRecv + + + + alSubRouteMatchSource + alSubRouteMatchValue + alSubRouteMatchReceiver + + + + prCliIP + prCliSonde + + + + prOpenCliIP + prOpenCliPort + + + + ['Node Exporter','Port'] + non @@ -112,12 +178,12 @@ prCliIP prCliSonde - - non + + non prOpenCli prOpenCliIP prOpenCliPort - + diff --git a/tmpl/alertmanager.yml b/tmpl/alertmanager.yml new file mode 100644 index 0000000..9f716cc --- /dev/null +++ b/tmpl/alertmanager.yml @@ -0,0 +1,119 @@ +global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: '%%alSMTPHost:%%alSMTPPort' + smtp_from: '%%alFrom' +%if %%getVar('alSMTPAuth','non') == 'oui' + smtp_auth_username: '%%alSMTPUser' + smtp_auth_password: 'alSMTPPass' +%end if + # The auth token for Hipchat. + #hipchat_auth_token: '1234556789' + # Alternative host for Hipchat. + #hipchat_api_url: 'https://hipchat.foobar.org/' + +# The directory from which notification templates are read. +templates: +- '/etc/alertmanager/template/*.tmpl' + +# The root route on which each incoming alert enters. +route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + group_by: ['alertname', 'cluster', 'service'] + + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 3h + + # A default receiver + receiver: %%alDefaultReceiver + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + # The child route trees. + routes: + # This routes performs a regular expression match on alert labels to + # catch alerts that are related to a list of services. +%for route in %%getVar('alRouteRegxp',[]) + - match_re: + %%{route.alRouteMatchRegExpSource}: %%{route.alRouteMatchRegExp} + receiver: %%route.alRouteMatchRegxpRecv + %if not is_empty('alSubRoute') + routes: + %for sroute in %%getVar('alSubRoute',[]) + # The service has a sub-route for critical alerts, any alerts + # that do not match, i.e. severity != critical, fall-back to the + # parent node and are sent to 'team-X-mails' + %if %%sroute == %%route + - match: + %%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue + receiver: %%alSubRouteMatchReceiver + %end if + %end for + %end if +%end for +%for rt in %%getVar('alRoute',[]) + - match: + %%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue} + receiver: %%rt.alRouteMatchReceiver + + %if not is_empty('alSubRoute') + routes: + %for sroute in %%getVar('alSubRoute',[]) + %if %%sroute == %%rt + - match: + %%{rt.alSubRouteMatchReceiver}: %%{rt.alSubRouteMatchReceiver} + receiver: %%rt.alSubRouteMatchReceiver + %end if + %end for + %end if +%end for + +# # This route handles all alerts coming from a database service. If there's +# # no team to handle it, it defaults to the DB team. +# - match: +# service: database +# receiver: team-DB-pager +# # Also group alerts by affected database. +# group_by: [alertname, cluster, database] +# routes: +# - match: +# owner: team-X +# receiver: team-X-pager +# - match: +# owner: team-Y +# receiver: team-Y-pager + + +# Inhibition rules allow to mute a set of alerts given that another alert is +# firing. +# We use this to mute any warning-level notifications if the same alert is +# already critical. +inhibit_rules: +- source_match: + severity: 'critical' + target_match: + severity: 'warning' + # Apply inhibition if the alertname is the same. + equal: ['alertname', 'cluster', 'service'] + + +receivers: +%for rcv in %%getVar('alReceiver',[]) +- name: '%%rcv' + email_configs: + - to: '%%rcv.alReceiverEmail' +%end for diff --git a/tmpl/prometheus.yml b/tmpl/prometheus.yml index 59363ed..31de0c7 100644 --- a/tmpl/prometheus.yml +++ b/tmpl/prometheus.yml @@ -40,6 +40,7 @@ scrape_configs: %end if ] %end if + #alerting: # alertmanagers: # - scheme: https From 5c16310e5db1b4b6270c95bc72772036c1c6fae8 Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 16:53:11 +0200 Subject: [PATCH 2/9] Adding disable support for alert service --- dicos/70_prometheus.xml | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index e686b6c..b76192f 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -1,9 +1,9 @@ - - - + + + prometheus alertmanager @@ -25,9 +25,12 @@ - oui + oui - + + oui + + prometheus @@ -115,6 +118,8 @@ + Services complèmentairse + Configuration du serveur Prometheus Configuration SMTP pour l'envois des alertes Destinatires Rêgles de distribution @@ -164,6 +169,7 @@ non prometheus + alertes prometheus prometheus activer_grafana @@ -172,6 +178,13 @@ grafana grafana + + + non + alertes prometheus + alertmanager + + non prCli From 0d87cec74a3c4c5e5a70e558168804d3a8fe788d Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 16:55:03 +0200 Subject: [PATCH 3/9] true is not True --- dicos/70_prometheus.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index b76192f..d08cb67 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -101,12 +101,12 @@ - + - + From 44e3a5c0f72c371b84ddc771d5552090692ddcef Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 16:55:45 +0200 Subject: [PATCH 4/9] Group master must be a multi --- dicos/70_prometheus.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index d08cb67..4ad2e1d 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -111,7 +111,7 @@ - + From b95d0894d9648f522a4f1bbe5302faae1b2d6091 Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 16:56:48 +0200 Subject: [PATCH 5/9] This as to be a multi to --- dicos/70_prometheus.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index 4ad2e1d..27fddb9 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -98,7 +98,7 @@ - + @@ -131,7 +131,6 @@ alReceiverEmail - prCliSonde From 5ab3f207898e8ea4470e1c6fff394c56f640f638 Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 17:05:51 +0200 Subject: [PATCH 6/9] Improving alert support --- dicos/70_prometheus.xml | 10 +++++++++- tmpl/alertmanager.yml | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index 27fddb9..39d96d2 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -122,7 +122,8 @@ Configuration du serveur Prometheus Configuration SMTP pour l'envois des alertes Destinatires - Rêgles de distribution + Rêgles de distribution simples + Rêgles de distribution regexp Sous-rêgles de distribution @@ -165,6 +166,12 @@ ['Node Exporter','Port'] + + non + alSMTPUser + alSMTPPass + + non prometheus @@ -172,6 +179,7 @@ prometheus activer_grafana + non grafana diff --git a/tmpl/alertmanager.yml b/tmpl/alertmanager.yml index 9f716cc..d230571 100644 --- a/tmpl/alertmanager.yml +++ b/tmpl/alertmanager.yml @@ -51,7 +51,7 @@ route: - match_re: %%{route.alRouteMatchRegExpSource}: %%{route.alRouteMatchRegExp} receiver: %%route.alRouteMatchRegxpRecv - %if not is_empty('alSubRoute') + %if not %%is_empty('alSubRoute') routes: %for sroute in %%getVar('alSubRoute',[]) # The service has a sub-route for critical alerts, any alerts @@ -70,7 +70,7 @@ route: %%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue} receiver: %%rt.alRouteMatchReceiver - %if not is_empty('alSubRoute') + %if not %%is_empty('alSubRoute') routes: %for sroute in %%getVar('alSubRoute',[]) %if %%sroute == %%rt From 9faff7988ad55572bf047d0556fc18299b542256 Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 17:07:34 +0200 Subject: [PATCH 7/9] =?UTF-8?q?Fix=20template=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tmpl/alertmanager.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tmpl/alertmanager.yml b/tmpl/alertmanager.yml index d230571..cfd57e8 100644 --- a/tmpl/alertmanager.yml +++ b/tmpl/alertmanager.yml @@ -75,8 +75,8 @@ route: %for sroute in %%getVar('alSubRoute',[]) %if %%sroute == %%rt - match: - %%{rt.alSubRouteMatchReceiver}: %%{rt.alSubRouteMatchReceiver} - receiver: %%rt.alSubRouteMatchReceiver + %%{rt.alSubRouteSource}: %%{rt.alSubRouteMatchValue} + receiver: %%rt.alSubRouteMatchRcv %end if %end for %end if From 73689be06c20b3418e93d93cfef5034c15a80a70 Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 17:12:17 +0200 Subject: [PATCH 8/9] Fix bad variable name --- tmpl/alertmanager.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tmpl/alertmanager.yml b/tmpl/alertmanager.yml index cfd57e8..ed36ed8 100644 --- a/tmpl/alertmanager.yml +++ b/tmpl/alertmanager.yml @@ -75,8 +75,8 @@ route: %for sroute in %%getVar('alSubRoute',[]) %if %%sroute == %%rt - match: - %%{rt.alSubRouteSource}: %%{rt.alSubRouteMatchValue} - receiver: %%rt.alSubRouteMatchRcv + %%{sroute.alSubRouteMatchSource}: %%{sroute.alSubRouteMatchValue} + receiver: %%sroute.alSubRouteMatchReceiver %end if %end for %end if From 5f263995d0ed182ef7fbae8de3e7a775e692cf19 Mon Sep 17 00:00:00 2001 From: Philippe Caseiro Date: Tue, 5 Jun 2018 17:17:11 +0200 Subject: [PATCH 9/9] Fixing Variable order --- dicos/70_prometheus.xml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index 39d96d2..79352e6 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -96,32 +96,33 @@ - + + - + - + - + Services complèmentairse Configuration du serveur Prometheus Configuration SMTP pour l'envois des alertes - Destinatires + Destinatires Rêgles de distribution simples Rêgles de distribution regexp Sous-rêgles de distribution