Merge branch 'master' into dist/eole/2.6.2/master

This commit is contained in:
Philippe Caseiro 2018-06-11 09:36:47 +02:00
commit 37d0dee412
6 changed files with 79 additions and 10 deletions

View File

@ -1,6 +1,7 @@
#!/bin/bash #!/bin/bash
if [ $(CreoleGet activer_grafana) = "oui" ];then if [[ $(CreoleGet activer_grafana) == "oui" ]]
then
. /usr/lib/eole/diagnose.sh . /usr/lib/eole/diagnose.sh
EchoGras "*** Accès au serveur grafana" EchoGras "*** Accès au serveur grafana"

View File

@ -1,6 +1,7 @@
#!/bin/bash #!/bin/bash
if [ $(CreoleGet activer_prometheus) = "oui" ];then if [[ $(CreoleGet activer_prometheus) == "oui" ]]
then
. /usr/lib/eole/diagnose.sh . /usr/lib/eole/diagnose.sh
EchoGras "*** Accès au serveur Prometheus" EchoGras "*** Accès au serveur Prometheus"

View File

@ -1,9 +1,10 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<creole> <creole>
<files> <files>
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/> <file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
<file filelist='alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/> <file filelist='alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/> <file filelist='alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
<service>prometheus</service> <service>prometheus</service>
<service>alertmanager</service> <service>alertmanager</service>
@ -102,6 +103,9 @@
</family> </family>
<family name="alertes prometheus"> <family name="alertes prometheus">
<variable name='alSMTPUseSys' type='oui/non' description="Utiliser la passerelle SMTP du système ?">
<value>non</value>
</variable>
<variable name='alSMTPHost' type='string' description="Adresse du serveur SMTP pour l'envois des alertes"/> <variable name='alSMTPHost' type='string' description="Adresse du serveur SMTP pour l'envois des alertes"/>
<variable name='alSMTPPort' type='string' description="Port d'écoute du serveur SMTP pour l'envois des alertes"/> <variable name='alSMTPPort' type='string' description="Port d'écoute du serveur SMTP pour l'envois des alertes"/>
<variable name='alFrom' type='string' description="Adresse d'origine des emails d'alerte"/> <variable name='alFrom' type='string' description="Adresse d'origine des emails d'alerte"/>
@ -210,6 +214,16 @@
<param>['','service','severity']</param> <param>['','service','severity']</param>
</check> </check>
<condition name='disabled_if_in' source='alSMTPUseSys'>
<param>oui</param>
<target type='variable'>alSMTPUser</target>
<target type='variable'>alSMTPPass</target>
<target type='variable'>alSMTPPort</target>
<target type='variable'>alSMTPTLS</target>
<target type='variable'>alSMTPHost</target>
<target type='variable'>alSMTPAuth</target>
</condition>
<condition name='disabled_if_in' source='alSMTPAuth'> <condition name='disabled_if_in' source='alSMTPAuth'>
<param>non</param> <param>non</param>
<target type='variable'>alSMTPUser</target> <target type='variable'>alSMTPUser</target>

41
tmpl/alert-rules.yml Normal file
View File

@ -0,0 +1,41 @@
#
# Alert Rules
#
groups:
- name: EoleRules
rules:
# Instance is Down
- alert: JobInstanceDown
expr: up == 0
for: 1m
annotations:
DESCRIPTION: Job {{ $labels.job }} instance {{ $labels.instance }} is down.
SUMMARY: Job instance is down
# Heavy CPU usage
- alert: cpu_threshold_exceeded
expr: (100 * (1 - avg by(instance) (irate(node_cpu{job="%%{job_name_node}",mode="idle"}[5m]))))
> 80
annotations:
description: This device's cpu usage has exceeded the threshold with a value
of {{ $value }}.
summary: Instance {{ $labels.instance }} CPU usage is dangerously high
# Heavy Memory usage
- alert: mem_threshold_exceeded
expr: (node_memory_MemFree{job="%%{job_name_node}"} + node_memory_Cached{job="%%{job_name_node}"} + node_memory_Buffers{job="%%{job_name_node}"})
/ 1e+06 < 80
annotations:
description: This device's memory usage has exceeded the threshold with a value
of {{ $value }}.
summary: Instance {{ $labels.instance }} memory usage is dangerously high
# Heavy "/" use
- alert: filesystem_threshold_exceeded
expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"}
* 100 < 90
annotations:
description: This device's filesystem usage has exceeded the threshold with
a value of {{ $value }}.
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high

View File

@ -1,16 +1,28 @@
global: global:
# The smarthost and SMTP sender used for mail notifications. # The smarthost and SMTP sender used for mail notifications.
%if %%alSMTPUseSys == 'oui'
%if %%tls_smtp == "non"
smtp_smarthost: '%%exim_relay_smtp:25'
%elif %%tls_smtp == "port 25"
smtp_smarthost: '%%exim_relay_smtp:25'
smtp_require_tls: true
%else
smtp_smarthost: '%%exim_relay_smtp:465'
smtp_require_tls: true
%end if
%else
smtp_smarthost: '%%alSMTPHost:%%alSMTPPort' smtp_smarthost: '%%alSMTPHost:%%alSMTPPort'
smtp_from: '%%alFrom' smtp_from: '%%alFrom'
%if %%getVar('alSMTPAuth','non') == 'oui' %if %%getVar('alSMTPAuth','non') == 'oui'
smtp_auth_username: '%%alSMTPUser' smtp_auth_username: '%%alSMTPUser'
smtp_auth_password: 'alSMTPPass' smtp_auth_password: 'alSMTPPass'
%end if %end if
%if %%getVar('alSMTPTLS','non') == 'oui' %if %%getVar('alSMTPTLS','non') == 'oui'
smtp_require_tls: true smtp_require_tls: true
%else %else
smtp_require_tls: false smtp_require_tls: false
%end if
%end if %end if
# The auth token for Hipchat. # The auth token for Hipchat.

View File

@ -5,7 +5,7 @@ global:
scrape_timeout: %%prometheusScrapeTimeout scrape_timeout: %%prometheusScrapeTimeout
rule_files: rule_files:
- "/etc/prometheus/rules.d/*.yml" - "/etc/prometheus/rules.d/*.yml"
scrape_configs: scrape_configs:
- job_name: %%prometheusJobName - job_name: %%prometheusJobName