Compare commits
6 Commits
pkg/dev/eo
...
ajout_regl
Author | SHA1 | Date | |
---|---|---|---|
a3472eaea5 | |||
4ef331f6a1 | |||
43e34f8de8 | |||
16d7bfa7f2 | |||
d9b253b63d | |||
a5fb3de2c0 |
@ -14,7 +14,7 @@ L'exporter système (node-exporter) est dans la configuration par défaut (Prome
|
|||||||
```
|
```
|
||||||
Mode expert > Dépot tiers > Ajouter un dépot
|
Mode expert > Dépot tiers > Ajouter un dépot
|
||||||
Dépôt officiel Grafana
|
Dépôt officiel Grafana
|
||||||
Libellé du dépot = Cadoles
|
Libellé du dépot = Dépôt officiel Grafana
|
||||||
Déclaration du dépôt = deb https://packages.grafana.com/oss/deb stable main
|
Déclaration du dépôt = deb https://packages.grafana.com/oss/deb stable main
|
||||||
Méthode de récupération de la clé = URL de la clé
|
Méthode de récupération de la clé = URL de la clé
|
||||||
URL de la clé = https://packages.grafana.com/gpg.key
|
URL de la clé = https://packages.grafana.com/gpg.key
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
|
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
|
||||||
<file filelist='prometheus-alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
|
<file filelist='prometheus-alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
|
||||||
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/>
|
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/>
|
||||||
|
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules-node-exporter.yml' mkdir='True' rm='True'/>
|
||||||
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/predict-rules.yml' mkdir='True' rm='True'/>
|
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/predict-rules.yml' mkdir='True' rm='True'/>
|
||||||
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
|
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
|
||||||
<file filelist='grafana' name='/etc/grafana/provisioning/dashboards/eole.yml' source='grafana-dashboards.yml' mkdir='True' rm='True'/>
|
<file filelist='grafana' name='/etc/grafana/provisioning/dashboards/eole.yml' source='grafana-dashboards.yml' mkdir='True' rm='True'/>
|
||||||
|
35
tmpl/alert-rules-node-exporter.yml
Normal file
35
tmpl/alert-rules-node-exporter.yml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#
|
||||||
|
# Alert Rules
|
||||||
|
#
|
||||||
|
groups:
|
||||||
|
- name: GeneralNodeExporterRules
|
||||||
|
rules:
|
||||||
|
# TooMuch Data IN
|
||||||
|
- alert: TooMuchNetworkThroughputIn
|
||||||
|
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host unusual network throughput in (instance {{ $labels.instance }})
|
||||||
|
description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: AlertInodes
|
||||||
|
expr: node_filesystem_files_free{mountpoint ="/"} / node_filesystem_files{mountpoint="/"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/"} == 0
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host out of inodes (instance {{ $labels.instance }})
|
||||||
|
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
- alert: HostSwapIsFillingUp
|
||||||
|
expr: (1 - (node_memory_SwapFree / node_memory_SwapTotal)) * 100 > 80
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host swap is filling up (instance {{ $labels.instance }})
|
||||||
|
description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||||
|
|
||||||
|
|
@ -34,8 +34,27 @@ groups:
|
|||||||
- alert: filesystem_threshold_exceeded
|
- alert: filesystem_threshold_exceeded
|
||||||
expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"}
|
expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"}
|
||||||
* 100 < 20
|
* 100 < 20
|
||||||
|
for: 2m
|
||||||
annotations:
|
annotations:
|
||||||
description: This device's filesystem usage has exceeded the threshold with
|
description: This device's filesystem usage has exceeded the threshold with
|
||||||
a value of {{ $value }}.
|
a value of {{ $value }}.
|
||||||
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high
|
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high
|
||||||
|
|
||||||
|
# Heavy "/var" use
|
||||||
|
- alert: var_filesystem_threshold_exceeded
|
||||||
|
expr: node_filesystem_avail{job="node",mountpoint="/var"} / node_filesystem_size{job="node"}
|
||||||
|
* 100 < 20
|
||||||
|
annotations:
|
||||||
|
description: This device's filesystem usage has exceeded the threshold with
|
||||||
|
a value of {{ $value }}.
|
||||||
|
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high
|
||||||
|
|
||||||
|
# Heavy CPU temperature
|
||||||
|
- alert: cpu_temp_threshold_exceeded
|
||||||
|
expr: avg(node_hwmon_temp_celsius{job="node"}) BY (instance)
|
||||||
|
> 50
|
||||||
|
annotations:
|
||||||
|
description: This device's cpu temperature has exceeded the threshold with a value
|
||||||
|
of {{ $value }}.
|
||||||
|
summary: Instance {{ $labels.instance }} CPU temperature is dangerously high
|
||||||
|
|
||||||
|
@ -81,6 +81,7 @@ route:
|
|||||||
- match:
|
- match:
|
||||||
%%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue
|
%%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue
|
||||||
receiver: %%alSubRouteMatchReceiver
|
receiver: %%alSubRouteMatchReceiver
|
||||||
|
continue: true
|
||||||
%end if
|
%end if
|
||||||
%end for
|
%end for
|
||||||
%end if
|
%end if
|
||||||
@ -89,6 +90,7 @@ route:
|
|||||||
- match:
|
- match:
|
||||||
%%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue}
|
%%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue}
|
||||||
receiver: %%rt.alRouteMatchReceiver
|
receiver: %%rt.alRouteMatchReceiver
|
||||||
|
continue: true
|
||||||
|
|
||||||
%if not %%is_empty('alSubRoute')
|
%if not %%is_empty('alSubRoute')
|
||||||
routes:
|
routes:
|
||||||
@ -97,6 +99,7 @@ route:
|
|||||||
- match:
|
- match:
|
||||||
%%{sroute.alSubRouteMatchSource}: %%{sroute.alSubRouteMatchValue}
|
%%{sroute.alSubRouteMatchSource}: %%{sroute.alSubRouteMatchValue}
|
||||||
receiver: %%sroute.alSubRouteMatchReceiver
|
receiver: %%sroute.alSubRouteMatchReceiver
|
||||||
|
continue: true
|
||||||
%end if
|
%end if
|
||||||
%end for
|
%end for
|
||||||
%end if
|
%end if
|
||||||
|
@ -3,7 +3,7 @@ apiVersion: 1
|
|||||||
datasources:
|
datasources:
|
||||||
- name: Prometheus
|
- name: Prometheus
|
||||||
type: prometheus
|
type: prometheus
|
||||||
access: direct
|
access: proxy
|
||||||
orgId: 1
|
orgId: 1
|
||||||
url: http://%%adresse_ip_eth0:9090
|
url: http://%%adresse_ip_eth0:9090
|
||||||
isDefault: true
|
isDefault: true
|
||||||
|
Reference in New Issue
Block a user