diff --git a/tmpl/alert-rules.yml b/tmpl/alert-rules.yml index 72d5c01..f7c78c7 100644 --- a/tmpl/alert-rules.yml +++ b/tmpl/alert-rules.yml @@ -34,11 +34,21 @@ groups: - alert: filesystem_threshold_exceeded expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"} * 100 < 20 + for: 2m annotations: description: This device's filesystem usage has exceeded the threshold with a value of {{ $value }}. summary: Instance {{ $labels.instance }} filesystem usage is dangerously high + # Heavy "/var" use + - alert: var_filesystem_threshold_exceeded + expr: node_filesystem_avail{job="node",mountpoint="/var"} / node_filesystem_size{job="node"} + * 100 < 20 + annotations: + description: This device's filesystem usage has exceeded the threshold with + a value of {{ $value }}. + summary: Instance {{ $labels.instance }} filesystem usage is dangerously high + # Heavy CPU temperature - alert: cpu_temp_threshold_exceeded expr: avg(node_hwmon_temp_celsius{job="node"}) BY (instance)