eole-prometheus/tmpl/alertmanager.yml

143 lines
4.2 KiB
YAML
Raw Permalink Normal View History

2018-06-05 16:46:23 +02:00
global:
# The smarthost and SMTP sender used for mail notifications.
2018-06-11 09:24:40 +02:00
%if %%alSMTPUseSys == 'oui'
%if %%tls_smtp == "non"
smtp_smarthost: '%%exim_relay_smtp:25'
%elif %%tls_smtp == "port 25"
smtp_smarthost: '%%exim_relay_smtp:25'
smtp_require_tls: true
%else
smtp_smarthost: '%%exim_relay_smtp:465'
smtp_require_tls: true
2018-06-11 09:24:40 +02:00
%end if
smtp_from: '%%system_mail_from'
2018-06-11 09:24:40 +02:00
%else
2018-06-05 16:46:23 +02:00
smtp_smarthost: '%%alSMTPHost:%%alSMTPPort'
smtp_from: '%%alFrom'
2018-06-11 09:24:40 +02:00
%if %%getVar('alSMTPAuth','non') == 'oui'
2018-06-05 16:46:23 +02:00
smtp_auth_username: '%%alSMTPUser'
smtp_auth_password: 'alSMTPPass'
2018-06-11 09:24:40 +02:00
%end if
2018-06-06 14:39:33 +02:00
2018-06-11 09:24:40 +02:00
%if %%getVar('alSMTPTLS','non') == 'oui'
2018-06-06 09:05:55 +02:00
smtp_require_tls: true
2018-06-11 09:24:40 +02:00
%else
2018-06-06 09:05:55 +02:00
smtp_require_tls: false
2018-06-11 09:24:40 +02:00
%end if
2018-06-05 16:46:23 +02:00
%end if
2018-06-06 14:39:33 +02:00
2018-06-05 16:46:23 +02:00
# The auth token for Hipchat.
#hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
#hipchat_api_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
2018-06-05 16:46:23 +02:00
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
2018-06-06 09:05:55 +02:00
# firing shortly after another are batched together on the first
2018-06-05 16:46:23 +02:00
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
2018-06-05 16:46:23 +02:00
# A default receiver
receiver: %%alDefaultReceiver
# All the above attributes are inherited by all child routes and can
2018-06-05 16:46:23 +02:00
# overwritten on each.
# The child route trees.
routes:
# This routes performs a regular expression match on alert labels to
# catch alerts that are related to a list of services.
%for route in %%getVar('alRouteRegxp',[])
- match_re:
%%{route.alRouteMatchRegExpSource}: %%{route.alRouteMatchRegExp}
receiver: %%route.alRouteMatchRegxpRecv
2018-06-05 17:05:51 +02:00
%if not %%is_empty('alSubRoute')
2018-06-05 16:46:23 +02:00
routes:
%for sroute in %%getVar('alSubRoute',[])
# The service has a sub-route for critical alerts, any alerts
# that do not match, i.e. severity != critical, fall-back to the
# parent node and are sent to 'team-X-mails'
%if %%sroute == %%route
- match:
%%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue
receiver: %%alSubRouteMatchReceiver
continue: true
2018-06-05 16:46:23 +02:00
%end if
%end for
%end if
%end for
%for rt in %%getVar('alRoute',[])
- match:
%%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue}
receiver: %%rt.alRouteMatchReceiver
continue: true
2018-06-05 16:46:23 +02:00
2018-06-05 17:05:51 +02:00
%if not %%is_empty('alSubRoute')
2018-06-05 16:46:23 +02:00
%for sroute in %%getVar('alSubRoute',[])
%if %%sroute == %%rt
2022-02-07 12:23:45 +01:00
routes:
2018-06-05 16:46:23 +02:00
- match:
2018-06-05 17:12:17 +02:00
%%{sroute.alSubRouteMatchSource}: %%{sroute.alSubRouteMatchValue}
receiver: %%sroute.alSubRouteMatchReceiver
continue: true
2018-06-05 16:46:23 +02:00
%end if
%end for
%end if
%end for
# # This route handles all alerts coming from a database service. If there's
# # no team to handle it, it defaults to the DB team.
# - match:
# service: database
# receiver: team-DB-pager
# # Also group alerts by affected database.
# group_by: [alertname, cluster, database]
# routes:
# - match:
# owner: team-X
# receiver: team-X-pager
# - match:
# owner: team-Y
# receiver: team-Y-pager
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
2018-06-05 16:46:23 +02:00
# already critical.
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
# Apply inhibition if the alertname is the same.
equal: ['alertname', 'cluster', 'service']
receivers:
%for rcv in %%getVar('alReceiver',[])
- name: '%%rcv'
email_configs:
- to: '%%rcv.alReceiverEmail'
%end for