Compare commits

...

6 Commits

6 changed files with 60 additions and 2 deletions

View File

@ -14,7 +14,7 @@ L'exporter système (node-exporter) est dans la configuration par défaut (Prome
```
Mode expert > Dépot tiers > Ajouter un dépot
Dépôt officiel Grafana
Libellé du dépot = Cadoles
Libellé du dépot = Dépôt officiel Grafana
Déclaration du dépôt = deb https://packages.grafana.com/oss/deb stable main
Méthode de récupération de la clé = URL de la clé
URL de la clé = https://packages.grafana.com/gpg.key

View File

@ -5,6 +5,7 @@
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules-node-exporter.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/predict-rules.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/provisioning/dashboards/eole.yml' source='grafana-dashboards.yml' mkdir='True' rm='True'/>

View File

@ -0,0 +1,35 @@
#
# Alert Rules
#
groups:
- name: GeneralNodeExporterRules
rules:
# TooMuch Data IN
- alert: TooMuchNetworkThroughputIn
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: Host unusual network throughput in (instance {{ $labels.instance }})
description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: AlertInodes
expr: node_filesystem_files_free{mountpoint ="/"} / node_filesystem_files{mountpoint="/"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: Host out of inodes (instance {{ $labels.instance }})
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostSwapIsFillingUp
expr: (1 - (node_memory_SwapFree / node_memory_SwapTotal)) * 100 > 80
for: 2m
labels:
severity: warning
annotations:
summary: Host swap is filling up (instance {{ $labels.instance }})
description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

View File

@ -34,8 +34,27 @@ groups:
- alert: filesystem_threshold_exceeded
expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"}
* 100 < 20
for: 2m
annotations:
description: This device's filesystem usage has exceeded the threshold with
a value of {{ $value }}.
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high
# Heavy "/var" use
- alert: var_filesystem_threshold_exceeded
expr: node_filesystem_avail{job="node",mountpoint="/var"} / node_filesystem_size{job="node"}
* 100 < 20
annotations:
description: This device's filesystem usage has exceeded the threshold with
a value of {{ $value }}.
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high
# Heavy CPU temperature
- alert: cpu_temp_threshold_exceeded
expr: avg(node_hwmon_temp_celsius{job="node"}) BY (instance)
> 50
annotations:
description: This device's cpu temperature has exceeded the threshold with a value
of {{ $value }}.
summary: Instance {{ $labels.instance }} CPU temperature is dangerously high

View File

@ -81,6 +81,7 @@ route:
- match:
%%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue
receiver: %%alSubRouteMatchReceiver
continue: true
%end if
%end for
%end if
@ -89,6 +90,7 @@ route:
- match:
%%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue}
receiver: %%rt.alRouteMatchReceiver
continue: true
%if not %%is_empty('alSubRoute')
routes:
@ -97,6 +99,7 @@ route:
- match:
%%{sroute.alSubRouteMatchSource}: %%{sroute.alSubRouteMatchValue}
receiver: %%sroute.alSubRouteMatchReceiver
continue: true
%end if
%end for
%end if

View File

@ -3,7 +3,7 @@ apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: direct
access: proxy
orgId: 1
url: http://%%adresse_ip_eth0:9090
isDefault: true