diff --git a/dicos/70_prometheus.xml b/dicos/70_prometheus.xml index 1223e71..b6ada52 100644 --- a/dicos/70_prometheus.xml +++ b/dicos/70_prometheus.xml @@ -5,6 +5,7 @@ + diff --git a/tmpl/alert-rules-node-exporter.yml b/tmpl/alert-rules-node-exporter.yml new file mode 100644 index 0000000..29df57c --- /dev/null +++ b/tmpl/alert-rules-node-exporter.yml @@ -0,0 +1,35 @@ +# +# Alert Rules +# +groups: +- name: GeneralNodeExporterRules + rules: + # TooMuch Data IN + - alert: TooMuchNetworkThroughputIn + expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100 + for: 5m + labels: + severity: warning + annotations: + summary: Host unusual network throughput in (instance {{ $labels.instance }}) + description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: AlertInodes + expr: node_filesystem_files_free{mountpoint ="/"} / node_filesystem_files{mountpoint="/"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/"} == 0 + for: 2m + labels: + severity: warning + annotations: + summary: Host out of inodes (instance {{ $labels.instance }}) + description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + + - alert: HostSwapIsFillingUp + expr: (1 - (node_memory_SwapFree / node_memory_SwapTotal)) * 100 > 80 + for: 2m + labels: + severity: warning + annotations: + summary: Host swap is filling up (instance {{ $labels.instance }}) + description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + +