10
0

Commits vergleichen

..

3 Commits

Autor SHA1 Nachricht Datum
a3472eaea5 ajout de règles supplémentaires pour les alertes 2021-10-21 16:09:53 +02:00
4ef331f6a1 ajout surveillance /var 2021-10-21 11:00:11 +02:00
43e34f8de8 modification configuration d'alertmanager 2020-12-17 10:21:49 +01:00
12 geänderte Dateien mit 49 neuen und 73 gelöschten Zeilen

1
debian/compat vendored
Datei anzeigen

@ -1 +0,0 @@
7

11
debian/control vendored
Datei anzeigen

@ -1,11 +0,0 @@
Source: eole-prometheus
Section: web
Priority: optional
Maintainer: Cadoles <contact@cadoles.com>
Build-Depends: debhelper (>= 9)
Standards-Version: 3.9.3
Package: eole-prometheus
Architecture: amd64
Depends: ${misc:Depends}, prometheus, grafana, eole-node-exporter, prometheus-alertmanager, curl
Description: Eolisation de Prometheus

44
debian/copyright vendored
Datei anzeigen

@ -1,44 +0,0 @@
Format: http://dep.debian.net/deps/dep5
Upstream-Name: {PROJECT}
Source: {URL}
Files: *
Copyright: YEAR {UPSTREAM} {AUTHOR} <{MAIL}>
License: {UPSTREAM LICENSE}
Files: debian/*
Copyright: 2012 Équipe EOLE <eole@ac-dijon.fr>
License: CeCILL-2
License: {UPSTREAM LICENSE}
{TEXT OF THE LICENSE}
License: CeCILL-2
This software is governed by the CeCILL-2 license under French law and
abiding by the rules of distribution of free software. You can use,
modify and or redistribute the software under the terms of the CeCILL-2
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info";.
.
As a counterpart to the access to the source code and rights to copy,
modify and redistribute granted by the license, users are provided only
with a limited warranty and the software's author, the holder of the
economic rights, and the successive licensors have only limited
liability.
.
In this respect, the user's attention is drawn to the risks associated
with loading, using, modifying and/or developing or reproducing the
software by the user in light of its specific status of free software,
that may mean that it is complicated to manipulate, and that also
therefore means that it is reserved for developers and experienced
professionals having in-depth computer knowledge. Users are therefore
encouraged to load and test the software's suitability as regards their
requirements in conditions enabling the security of their systems and/or
data to be ensured and, more generally, to use and operate it in the
same conditions as regards security.
.
The fact that you are presently reading this means that you have had
knowledge of the CeCILL-2 license and that you accept its terms.
.
On Eole systems, the complete text of the CeCILL-2 License can be found
in '/usr/share/common-licenses/CeCILL-2-en'.

1
debian/dirs vendored
Datei anzeigen

@ -1 +0,0 @@
/var/lib/grafana/dashboards

Datei anzeigen

7
debian/postinst vendored
Datei anzeigen

@ -1,7 +0,0 @@
#!/bin/bash
case "$1" in
configure)
chown grafana:grafana /var/lib/grafana/dashboards
;;
esac

8
debian/rules vendored
Datei anzeigen

@ -1,8 +0,0 @@
#!/usr/bin/make -f
# -*- makefile -*-
# Uncomment this to turn on verbose mode.
# export DH_VERBOSE=1
%:
dh $@

Datei anzeigen

@ -1 +0,0 @@
3.0 (quilt)

Datei anzeigen

@ -5,6 +5,7 @@
<file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/> <file filelist='prometheus' name='/etc/prometheus/prometheus.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/> <file filelist='prometheus-alertmanager' name='/etc/prometheus/alertmanager.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/> <file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/alert-rules-node-exporter.yml' mkdir='True' rm='True'/>
<file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/predict-rules.yml' mkdir='True' rm='True'/> <file filelist='prometheus-alertmanager' name='/etc/prometheus/rules.d/predict-rules.yml' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/> <file filelist='grafana' name='/etc/grafana/grafana.ini' mkdir='True' rm='True'/>
<file filelist='grafana' name='/etc/grafana/provisioning/dashboards/eole.yml' source='grafana-dashboards.yml' mkdir='True' rm='True'/> <file filelist='grafana' name='/etc/grafana/provisioning/dashboards/eole.yml' source='grafana-dashboards.yml' mkdir='True' rm='True'/>

Datei anzeigen

@ -0,0 +1,35 @@
#
# Alert Rules
#
groups:
- name: GeneralNodeExporterRules
rules:
# TooMuch Data IN
- alert: TooMuchNetworkThroughputIn
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: Host unusual network throughput in (instance {{ $labels.instance }})
description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: AlertInodes
expr: node_filesystem_files_free{mountpoint ="/"} / node_filesystem_files{mountpoint="/"} * 100 < 10 and ON (instance, device, mountpoint) node_filesystem_readonly{mountpoint="/"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: Host out of inodes (instance {{ $labels.instance }})
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostSwapIsFillingUp
expr: (1 - (node_memory_SwapFree / node_memory_SwapTotal)) * 100 > 80
for: 2m
labels:
severity: warning
annotations:
summary: Host swap is filling up (instance {{ $labels.instance }})
description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"

Datei anzeigen

@ -34,6 +34,16 @@ groups:
- alert: filesystem_threshold_exceeded - alert: filesystem_threshold_exceeded
expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"} expr: node_filesystem_avail{job="%%{job_name_node}",mountpoint="/"} / node_filesystem_size{job="%%{job_name_node}"}
* 100 < 20 * 100 < 20
for: 2m
annotations:
description: This device's filesystem usage has exceeded the threshold with
a value of {{ $value }}.
summary: Instance {{ $labels.instance }} filesystem usage is dangerously high
# Heavy "/var" use
- alert: var_filesystem_threshold_exceeded
expr: node_filesystem_avail{job="node",mountpoint="/var"} / node_filesystem_size{job="node"}
* 100 < 20
annotations: annotations:
description: This device's filesystem usage has exceeded the threshold with description: This device's filesystem usage has exceeded the threshold with
a value of {{ $value }}. a value of {{ $value }}.

Datei anzeigen

@ -81,6 +81,7 @@ route:
- match: - match:
%%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue %%{sroute.alSubRouteMatchSource}: %%alSubRouteMatchValue
receiver: %%alSubRouteMatchReceiver receiver: %%alSubRouteMatchReceiver
continue: true
%end if %end if
%end for %end for
%end if %end if
@ -89,6 +90,7 @@ route:
- match: - match:
%%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue} %%{rt.alRouteMatchSource}: %%{rt.alRouteMatchValue}
receiver: %%rt.alRouteMatchReceiver receiver: %%rt.alRouteMatchReceiver
continue: true
%if not %%is_empty('alSubRoute') %if not %%is_empty('alSubRoute')
routes: routes:
@ -97,6 +99,7 @@ route:
- match: - match:
%%{sroute.alSubRouteMatchSource}: %%{sroute.alSubRouteMatchValue} %%{sroute.alSubRouteMatchSource}: %%{sroute.alSubRouteMatchValue}
receiver: %%sroute.alSubRouteMatchReceiver receiver: %%sroute.alSubRouteMatchReceiver
continue: true
%end if %end if
%end for %end for
%end if %end if