Adding alert rules file template
This commit is contained in:
parent
e95d6f9e1d
commit
1013775b1a
|
@ -1,9 +1,10 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<creole>
|
<creole>
|
||||||
<files>
|
<files>
|
||||||
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
|
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
|
||||||
<file filelist='alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
|
<file filelist='alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
|
||||||
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
|
<file filelist='alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/>
|
||||||
|
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
|
||||||
|
|
||||||
<service>prometheus</service>
|
<service>prometheus</service>
|
||||||
<service>alertmanager</service>
|
<service>alertmanager</service>
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
#
|
||||||
|
# Alert Rules
|
||||||
|
#
|
||||||
|
groups:
|
||||||
|
- name: EoleRules
|
||||||
|
rules:
|
||||||
|
# Instance is Down
|
||||||
|
- alert: JobInstanceDown
|
||||||
|
expr: up == 0
|
||||||
|
for: 1m
|
||||||
|
annotations:
|
||||||
|
DESCRIPTION: Job {{ $labels.job }} instance {{ $labels.instance }} is down.
|
||||||
|
SUMMARY: Job instance is down
|
||||||
|
|
||||||
|
# Heavy CPU usage
|
||||||
|
- alert: cpu_threshold_exceeded
|
||||||
|
expr: (100 * (1 - avg by(instance) (irate(node_cpu{job="%%{job_name_node}",mode="idle"}[5m]))))
|
||||||
|
> 80
|
||||||
|
annotations:
|
||||||
|
description: This device's cpu usage has exceeded the threshold with a value
|
||||||
|
of {{ $value }}.
|
||||||
|
summary: Instance {{ $labels.instance }} CPU usage is dangerously high
|
||||||
|
|
||||||
|
# Heavy Memory usage
|
||||||
|
- alert: mem_threshold_exceeded
|
||||||
|
expr: (node_memory_MemFree{job="%%{job_name_node}"} + node_memory_Cached{job="%%{job_name_node}"} + node_memory_Buffers{job="%%{job_name_node}"})
|
||||||
|
/ 1e+06 < 80
|
||||||
|
annotations:
|
||||||
|
description: This device's memory usage has exceeded the threshold with a value
|
||||||
|
of {{ $value }}.
|
||||||
|
summary: Instance {{ $labels.instance }} memory usage is dangerously high
|
||||||
|
|
||||||
|
# Heavy "/" use
|
||||||
|
- alert: filesystem_threshold_exceeded
|
||||||
|
expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"}
|
||||||
|
* 100 < 90
|
||||||
|
annotations:
|
||||||
|
description: This device's filesystem usage has exceeded the threshold with
|
||||||
|
a value of {{ $value }}.
|
||||||
|
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high
|
||||||
|
|
|
@ -5,7 +5,7 @@ global:
|
||||||
scrape_timeout: %%prometheusScrapeTimeout
|
scrape_timeout: %%prometheusScrapeTimeout
|
||||||
|
|
||||||
rule_files:
|
rule_files:
|
||||||
- "/etc/prometheus/rules.d/*.yml"
|
- "/etc/prometheus/rules.d/*.yml"
|
||||||
|
|
||||||
scrape_configs:
|
scrape_configs:
|
||||||
- job_name: %%prometheusJobName
|
- job_name: %%prometheusJobName
|
||||||
|
|
Loading…
Reference in New Issue