Adding alertmanager support

Este commit está contenido en:
Philippe Caseiro 2018-06-05 16:46:23 +02:00
padre 598c1d1807
commit c10edef336
Se han modificado 3 ficheros con 214 adiciones y 28 borrados

Ver fichero

@ -1,9 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<creole>
<files>
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' source='prometheus.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' source='grafana.ini' mkdir='True' rm='True'/>
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
<file filelist='prometheus' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
<service>prometheus</service>
<service>alertmanager</service>
<service>grafana-server</service>
<service_access service='prometheus'>
<port service_accesslist="saLemon">80</port>
@ -46,21 +49,21 @@
<family name='Clients prometheus'>
<variable name='ajout_client_prometheus' type='oui/non' description="Ajouter un nouveau client à Prometheus">
<value>non</value>
</variable>
<!-- Client standard -->
<variable name='prCli' type='string' description='Nom du client prometheus' multi='True'/>
<variable name='prCliIP' type='ip' description="Adresse IP du client prometheus"/>
<variable name='prCliSonde' type='string' description="Sonde a utiliser pour ce client">
<value>Node Exporter</value>
</variable>
</variable>
<!-- Client standard -->
<variable name='prCli' type='string' description='Nom du client prometheus' multi='True'/>
<variable name='prCliIP' type='ip' description="Adresse IP du client prometheus"/>
<variable name='prCliSonde' type='string' description="Sonde a utiliser pour ce client">
<value>Node Exporter</value>
</variable>
<variable name='addPrOpenCli' type='oui/non' description="Ajouter un client personnalisé">
<value>non</value>
</variable>
<!-- Client libre -->
<variable name='prOpenCli' type='string' description='Nom du client personnalisé prometheus' multi='True'/>
<variable name='prOpenCliIP' type='ip' description="Adresse IP"/>
<variable name='prOpenCliPort' type='number' description="Port d'écoute de la sonde"/>
</variable>
<!-- Client libre -->
<variable name='prOpenCli' type='string' description='Nom du client personnalisé prometheus' multi='True'/>
<variable name='prOpenCliIP' type='ip' description="Adresse IP"/>
<variable name='prOpenCliPort' type='number' description="Port d'écoute de la sonde"/>
</family>
<family name="grafana">
<variable name='grafana_domain' type='string' description="Nom de Domaine ou IP pour accèder à l'interface Grafana" mandatory='True'>
@ -79,21 +82,84 @@
<value>false</value>
</variable>
</family>
<family name="alertes prometheus">
<variable name='alSMTPHost' type='string' description="Adresse du serveur SMTP pour l'envois des alertes"/>
<variable name='alSMTPPort' type='string' description="Port d'écoute du serveur SMTP pour l'envois des alertes"/>
<variable name='alFrom' type='string' description="Adresse d'origine des emails d'alerte"/>
<variable name='alSMTPAuth' type='oui/non' description="Authentification requise sur le serveur SMTP ?">
<value>non</value>
</variable>
<variable name='alSMTPUser' type='string' description="Utilisateur SMTP"/>
<variable name='alSMTPPass' type='string' description="Mot de passe"/>
<variable name='alDefaultReceiver' type='string' description='Nom du "receiver" par défaut'/>
<variable name='alReceiver' type='string' description="Nom du destinataire"/>
<variable name='alReceiverEmail' type='string' description="Adresse email du destinataire"/>
<variable name='alRoute' type='string' description="Nom de la rêgle de distribution des alertes" multi="true"/>
<variable name='alRouteMatchSource' type='string' description='Source de correspondance'/>
<variable name='alRouteMatchValue' type='string' description='Valeur attendue'/>
<variable name='alRouteMatchReceiver' type='string' description="Equipe destinataire de l'alerte"/>
<variable name='alRouteRegxp' type='string' description="Rêgle de distribution des alertes" multi="true"/>
<variable name='alRouteMatchRegExpSource' type='string' description='Source de correspondance'/>
<variable name='alRouteMatchRegExp' type='string' description='Expression régulière'/>
<variable name='alRouteMatchRegxpRecv' type='string' description="Equipe destinataire de l'alerte (regxp)"/>
<variable name='alSubRoute' type='string' description="Nom de la rêgle maitresse"/>
<variable name='alSubRouteMatchSource' type='string' description='Source de correspondance'/>
<variable name='alSubRouteMatchValue' type='string' description='Valeur attendue'/>
<variable name='alSubRouteMatchReceiver' type='string' description="Equipe destinataire de l'alerte"/>
</family>
<separators>
<separator name='alSMTPHost'>Configuration SMTP pour l'envois des alertes</separator>
<separator name='alDefaultReceiver'>Destinatires</separator>
<separator name='alRoute'>Rêgles de distribution</separator>
<separator name='alSubRoute'>Sous-rêgles de distribution</separator>
</separators>
</variables>
<constraints>
<group master='prCli'>
<slave>prCliIP</slave>
<slave>prCliSonde</slave>
</group>
<group master='prOpenCli'>
<slave>prOpenCliIP</slave>
<slave>prOpenCliPort</slave>
</group>
<group master='alReceiver'>
<slave>alReceiverEmail</slave>
<slave>prCliSonde</slave>
</group>
<check name='valid_enum' target='prCliSonde'>
<param>['Node Exporter','Port']</param>
</check>
<group master='alRoute'>
<slave>alRouteMatchSource</slave>
<slave>alRouteMatchValue</slave>
<slave>alRouteMatchReceiver</slave>
</group>
<group master='alRouteRegxp'>
<slave>alRouteMatchRegExpSource</slave>
<slave>alRouteMatchRegExp</slave>
<slave>alRouteMatchRegxpRecv</slave>
</group>
<group master='alSubRoute'>
<slave>alSubRouteMatchSource</slave>
<slave>alSubRouteMatchValue</slave>
<slave>alSubRouteMatchReceiver</slave>
</group>
<group master='prCli'>
<slave>prCliIP</slave>
<slave>prCliSonde</slave>
</group>
<group master='prOpenCli'>
<slave>prOpenCliIP</slave>
<slave>prOpenCliPort</slave>
</group>
<check name='valid_enum' target='prCliSonde'>
<param>['Node Exporter','Port']</param>
</check>
<condition name='disabled_if_in' source='activer_prometheus'>
<param>non</param>
@ -112,12 +178,12 @@
<target type='variable'>prCliIP</target>
<target type='variable'>prCliSonde</target>
</condition>
<condition name='disabled_if_in' source='addPrOpenCli'>
<param>non</param>
<condition name='disabled_if_in' source='addPrOpenCli'>
<param>non</param>
<target type='variable'>prOpenCli</target>
<target type='variable'>prOpenCliIP</target>
<target type='variable'>prOpenCliPort</target>
</condition>
</condition>
</constraints>
<help>
</help>

119
tmpl/alertmanager.yml Archivo normal
Ver fichero

@ -0,0 +1,119 @@
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: '%%alSMTPHost:%%alSMTPPort'
smtp_from: '%%alFrom'
%if %%getVar('alSMTPAuth','non') == 'oui'
smtp_auth_username: '%%alSMTPUser'
smtp_auth_password: 'alSMTPPass'
%end if
# The auth token for Hipchat.
#hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
#hipchat_api_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: %%alDefaultReceiver
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes:
# This routes performs a regular expression match on alert labels to
# catch alerts that are related to a list of services.
%for route in %%getVar('alRouteRegxp',[])
- match_re:
%%{route.alRouteMatchRegExpSource}: %%{route.alRouteMatchRegExp}
receiver: %%route.alRouteMatchRegxpRecv
%if not is_empty('alSubRoute')
routes:
%for sroute in %%getVar('alSubRoute',[])
# The service has a sub-route for critical alerts, any alerts
# that do not match, i.e. severity != critical, fall-back to the
# parent node and are sent to 'team-X-mails'
%if %%sroute == %%route
- match:
%%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue
receiver: %%alSubRouteMatchReceiver
%end if
%end for
%end if
%end for
%for rt in %%getVar('alRoute',[])
- match:
%%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue}
receiver: %%rt.alRouteMatchReceiver
%if not is_empty('alSubRoute')
routes:
%for sroute in %%getVar('alSubRoute',[])
%if %%sroute == %%rt
- match:
%%{rt.alSubRouteMatchReceiver}: %%{rt.alSubRouteMatchReceiver}
receiver: %%rt.alSubRouteMatchReceiver
%end if
%end for
%end if
%end for
# # This route handles all alerts coming from a database service. If there's
# # no team to handle it, it defaults to the DB team.
# - match:
# service: database
# receiver: team-DB-pager
# # Also group alerts by affected database.
# group_by: [alertname, cluster, database]
# routes:
# - match:
# owner: team-X
# receiver: team-X-pager
# - match:
# owner: team-Y
# receiver: team-Y-pager
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
# already critical.
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
# Apply inhibition if the alertname is the same.
equal: ['alertname', 'cluster', 'service']
receivers:
%for rcv in %%getVar('alReceiver',[])
- name: '%%rcv'
email_configs:
- to: '%%rcv.alReceiverEmail'
%end for

Ver fichero

@ -40,6 +40,7 @@ scrape_configs:
%end if
]
%end if
#alerting:
# alertmanagers:
# - scheme: https