Compare commits

...

15 Commits

9 changed files with 129 additions and 29 deletions

View File

@ -1,6 +1,7 @@
#!/bin/bash
if [ $(CreoleGet activer_grafana) = "oui" ];then
if [[ $(CreoleGet activer_grafana) == "oui" ]]
then
. /usr/lib/eole/diagnose.sh
EchoGras "*** Accès au serveur grafana"

View File

@ -1,6 +1,7 @@
#!/bin/bash
if [ $(CreoleGet activer_prometheus) = "oui" ];then
if [[ $(CreoleGet activer_prometheus) == "oui" ]]
then
. /usr/lib/eole/diagnose.sh
EchoGras "*** Accès au serveur Prometheus"

View File

@ -1,9 +1,12 @@
<?xml version="1.0" encoding="utf-8"?>
<creole>
<files>
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
<file filelist='alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
<file filelist='prometheus' name='/etc/default/prometheus' source='prometheus.defaults' mkdir='True' rm='True'/>
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
<file filelist='alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
<file filelist='alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/>
<file filelist='alertmanager' name='/etc/prometheus/rules.d/predict-rules.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
<service>prometheus</service>
<service>alertmanager</service>
@ -29,6 +32,9 @@
<variable name='activerAlertmanager' type='oui/non' description="Activer le service d'alertes">
<value>oui</value>
</variable>
<variable name='promStorageRetention' type='number' description='Durée de rétention des métriques (en heures)'>
<value>24</value>
</variable>
<variable name='prometheusJobName' type='string' description="Nom du job ajouté au label">
<value>prometheus</value>
</variable>
@ -53,7 +59,7 @@
<!-- Job standard -->
<variable name='prTarg' type='string' description='Nom de la cible prometheus' multi='True'/>
<variable name='prTargIP' type='ip' description="Adresse IP de la cible prometheus"/>
<variable name='prTargIP' type='string' description="Adresse IP ou nom de domaine de la cible prometheus"/>
<variable name='prTargSonde' type='string' description="Sonde a utiliser pour ce client">
<value>Node Exporter</value>
</variable>
@ -72,6 +78,9 @@
<variable name='scrpScheme' type='string' description="Protocole à utiliser pour l'interrogation de la sonde">
<value>http</value>
</variable>
<variable name='scrpMetricPath' type='string' description="Chemin d'accès de la ressource">
<value>/metrics</value>
</variable>
<variable name='addPrOpenTarg' type='oui/non' description="Ajouter des cibles statiques pour les jobs personnalisé">
<value>non</value>
@ -79,7 +88,7 @@
<!-- Job libre -->
<variable name='prOpenTarg' type='string' description='Nom de la cible personnalisé prometheus' multi='True'/>
<variable name='prOpenTargJob' type='string' description='Nom du job de rattachement de la cible'/>
<variable name='prOpenTargIP' type='ip' description="Adresse IP de la cible"/>
<variable name='prOpenTargIP' type='string' description="Adresse IP ou nom de domaine de la cible"/>
<variable name='prOpenTargPort' type='number' description="Port d'écoute de la sonde"/>
</family>
@ -99,9 +108,13 @@
<variable name='grafana_auth_anonymous' type='string' description="Activer l'accès aux utilisateurs non enregistrés">
<value>false</value>
</variable>
<variable name='grafanaRootURL' type='string' description='Url publique de grafana (avec http:// ou https://)' mode='expert'/>
</family>
<family name="alertes prometheus">
<variable name='alSMTPUseSys' type='oui/non' description="Utiliser la passerelle SMTP du système ?">
<value>non</value>
</variable>
<variable name='alSMTPHost' type='string' description="Adresse du serveur SMTP pour l'envois des alertes"/>
<variable name='alSMTPPort' type='string' description="Port d'écoute du serveur SMTP pour l'envois des alertes"/>
<variable name='alFrom' type='string' description="Adresse d'origine des emails d'alerte"/>
@ -139,7 +152,7 @@
<separators>
<separator name='activer_grafana'>Services complèmentairse</separator>
<separator name='prometheusJobName'>Configuration du serveur Prometheus</separator>
<separator name='job_name_node'>Configuration des jobs standards</separator>
<separator name='job_name_node'>Configuration des jobs standards</separator>
<separator name='alSMTPHost'>Configuration SMTP pour l'envois des alertes</separator>
<separator name='alReceiver'>Destinatires</separator>
<separator name='alRoute'>Rêgles de distribution simples</separator>
@ -158,7 +171,8 @@
<slave>scrpInterval</slave>
<slave>scrpTimeout</slave>
<slave>honorLabels</slave>
<slave>scrpScheme</slave>
<slave>scrpScheme</slave>
<slave>scrpMetricPath</slave>
</group>
<group master='alRoute'>
@ -210,6 +224,16 @@
<param>['','service','severity']</param>
</check>
<condition name='disabled_if_in' source='alSMTPUseSys'>
<param>oui</param>
<target type='variable'>alSMTPUser</target>
<target type='variable'>alSMTPPass</target>
<target type='variable'>alSMTPPort</target>
<target type='variable'>alSMTPTLS</target>
<target type='variable'>alSMTPHost</target>
<target type='variable'>alSMTPAuth</target>
</condition>
<condition name='disabled_if_in' source='alSMTPAuth'>
<param>non</param>
<target type='variable'>alSMTPUser</target>

41
tmpl/alert-rules.yml Normal file
View File

@ -0,0 +1,41 @@
#
# Alert Rules
#
groups:
- name: EoleRules
rules:
# Instance is Down
- alert: JobInstanceDown
expr: up == 0
for: 1m
annotations:
DESCRIPTION: Job {{ $labels.job }} instance {{ $labels.instance }} is down.
SUMMARY: Job instance is down
# Heavy CPU usage
- alert: cpu_threshold_exceeded
expr: (100 * (1 - avg by(instance) (irate(node_cpu{job="%%{job_name_node}",mode="idle"}[5m]))))
> 80
annotations:
description: This device's cpu usage has exceeded the threshold with a value
of {{ $value }}.
summary: Instance {{ $labels.instance }} CPU usage is dangerously high
# Heavy Memory usage
- alert: mem_threshold_exceeded
expr: (node_memory_MemFree{job="%%{job_name_node}"} + node_memory_Cached{job="%%{job_name_node}"} + node_memory_Buffers{job="%%{job_name_node}"})
/ 1e+06 < 80
annotations:
description: This device's memory usage has exceeded the threshold with a value
of {{ $value }}.
summary: Instance {{ $labels.instance }} memory usage is dangerously high
# Heavy "/" use
- alert: filesystem_threshold_exceeded
expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"}
* 100 < 20
annotations:
description: This device's filesystem usage has exceeded the threshold with
a value of {{ $value }}.
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high

View File

@ -1,16 +1,29 @@
global:
# The smarthost and SMTP sender used for mail notifications.
%if %%alSMTPUseSys == 'oui'
%if %%tls_smtp == "non"
smtp_smarthost: '%%exim_relay_smtp:25'
%elif %%tls_smtp == "port 25"
smtp_smarthost: '%%exim_relay_smtp:25'
smtp_require_tls: true
%else
smtp_smarthost: '%%exim_relay_smtp:465'
smtp_require_tls: true
%end if
smtp_from: '%%system_mail_from'
%else
smtp_smarthost: '%%alSMTPHost:%%alSMTPPort'
smtp_from: '%%alFrom'
%if %%getVar('alSMTPAuth','non') == 'oui'
%if %%getVar('alSMTPAuth','non') == 'oui'
smtp_auth_username: '%%alSMTPUser'
smtp_auth_password: 'alSMTPPass'
%end if
%end if
%if %%getVar('alSMTPTLS','non') == 'oui'
%if %%getVar('alSMTPTLS','non') == 'oui'
smtp_require_tls: true
%else
%else
smtp_require_tls: false
%end if
%end if
# The auth token for Hipchat.
@ -19,8 +32,8 @@ global:
#hipchat_api_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
@ -42,12 +55,12 @@ route:
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
repeat_interval: 3h
# A default receiver
receiver: %%alDefaultReceiver
# All the above attributes are inherited by all child routes and can
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
@ -107,7 +120,7 @@ route:
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
# We use this to mute any warning-level notifications if the same alert is
# already critical.
inhibit_rules:
- source_match:

View File

@ -40,11 +40,13 @@ domain = %%grafana_domain
# Redirect to correct domain if host header does not match domain
# Prevents DNS rebinding attacks
;enforce_domain = false
enforce_domain = true
# The full public facing url you use in browser, used for redirects and emails
# If you use reverse proxy and sub path specify full url (with sub path)
;root_url = http://localhost:3000
%if %%is_empty('grafanaRootURL')
root_url = %%grafanaRootURL
%end if
# Log web requests
;router_logging = false
@ -299,18 +301,20 @@ enabled = %%grafana_auth_anonymous
#################################### SMTP / Emailing ##########################
[smtp]
;enabled = false
;host = localhost:25
%if %%getVar('activer_exim_relay_smtp','non') == 'oui'
enabled = true
host = %%exim_relay_smtp:25
;user =
# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;"""
;password =
;cert_file =
;key_file =
;skip_verify = false
;from_address = admin@grafana.localhost
;from_name = Grafana
skip_verify = true
from_address = %%system_mail_from
from_name = Grafana
# EHLO identity in SMTP dialog (defaults to instance_name)
;ehlo_identity = dashboard.example.com
%end if
[emails]
;welcome_email_on_sign_up = false

6
tmpl/predict-rules.yml Normal file
View File

@ -0,0 +1,6 @@
groups:
- name: PredictRules
rules:
- alert: disk_full_within_6_hours
expr: predict_linear(node_filesystem_free{job="%%{job_name_node}",mountpoint="/"}[1h], 6 * 3600) < 0
for: 5m

1
tmpl/prometheus.defaults Normal file
View File

@ -0,0 +1 @@
PROMETHEUS_OPTS='--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus/data'

View File

@ -5,7 +5,7 @@ global:
scrape_timeout: %%prometheusScrapeTimeout
rule_files:
- "/etc/prometheus/rules.d/*.yml"
- "/etc/prometheus/rules.d/*.yml"
scrape_configs:
- job_name: %%prometheusJobName
@ -22,7 +22,7 @@ scrape_configs:
- job_name: '%%job_name_node'
file_sd_configs:
- files: [ "%%job_file_config/*.yml" ]
# - files: [ "%%job_file_config/*.yml" ]
%if %%getVar('addTargetPrometheus','non') == 'oui'
static_configs:
- targets: [ "%%adresse_ip_eth0:9100"%slurp
@ -44,12 +44,21 @@ scrape_configs:
scrape_interval: %%{job.scrpInterval}s
scrape_timeout: %%{job.scrpTimeout}s
scheme: %%job.scrpScheme
%for %%target in %%getVar('prOpenTarg',[])
metrics_path: %%job.scrpMetricPath
%set first = True
static_configs:
- targets: [ %slurp
%for %%target in %%getVar('prOpenTarg',[])
%if %%target.prOpenTargJob == %%job
- targets: [ "%%target.prOpenTargIP:%%target.prOpenTargPort" ]
%if %%first
"%%target.prOpenTargIP:%%target.prOpenTargPort"%slurp
%set first = False
%else
, "%%target.prOpenTargIP:%%target.prOpenTargPort"%slurp
%end if
%end if
%end for
]
%end for
%if %%getVar('activerAlertmanager','non') == 'oui'