From 4ef331f6a13ac41dac14dbf8c5fc026a95aedea5 Mon Sep 17 00:00:00 2001 From: vfebvre Date: Thu, 21 Oct 2021 11:00:11 +0200 Subject: [PATCH] ajout surveillance /var --- tmpl/alert-rules.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tmpl/alert-rules.yml b/tmpl/alert-rules.yml index 72d5c01..f7c78c7 100644 --- a/tmpl/alert-rules.yml +++ b/tmpl/alert-rules.yml @@ -34,11 +34,21 @@ groups: - alert: filesystem_threshold_exceeded expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"} * 100 < 20 + for: 2m annotations: description: This device's filesystem usage has exceeded the threshold with a value of {{ $value }}. summary: Instance {{ $labels.instance }} filesystem usage is dangerously high + # Heavy "/var" use + - alert: var_filesystem_threshold_exceeded + expr: node_filesystem_avail{job="node",mountpoint="/var"} / node_filesystem_size{job="node"} + * 100 < 20 + annotations: + description: This device's filesystem usage has exceeded the threshold with + a value of {{ $value }}. + summary: Instance {{ $labels.instance }} filesystem usage is dangerously high + # Heavy CPU temperature - alert: cpu_temp_threshold_exceeded expr: avg(node_hwmon_temp_celsius{job="node"}) BY (instance)