Refresh Prometheus rules and Grafana dashboards

* Adds several network related alerts from upstream
This commit is contained in:
Dalton Hubble
2019-04-27 22:41:13 -07:00
parent 0e94708fd8
commit ec5aef5c92
3 changed files with 400 additions and 44 deletions

View File

@ -992,6 +992,60 @@ data:
}
]
},
{
"name": "node-time",
"rules": [
{
"alert": "ClockSkewDetected",
"annotations": {
"message": "Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host."
},
"expr": "abs(node_timex_offset_seconds{job=\"node-exporter\"}) > 0.03\n",
"for": "2m",
"labels": {
"severity": "warning"
}
}
]
},
{
"name": "node-network",
"rules": [
{
"alert": "NetworkReceiveErrors",
"annotations": {
"message": "Network interface \"{{ $labels.device }}\" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}\""
},
"expr": "rate(node_network_receive_errs_total{job=\"node-exporter\",device!~\"veth.+|tunl.+\"}[2m]) > 0\n",
"for": "2m",
"labels": {
"severity": "warning"
}
},
{
"alert": "NetworkTransmitErrors",
"annotations": {
"message": "Network interface \"{{ $labels.device }}\" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}\""
},
"expr": "rate(node_network_transmit_errs_total{job=\"node-exporter\",device!~\"veth.+|tunl.+\"}[2m]) > 0\n",
"for": "2m",
"labels": {
"severity": "warning"
}
},
{
"alert": "NodeNetworkInterfaceFlapping",
"annotations": {
"message": "Network interface \"{{ $labels.device }}\" changing it's up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}\""
},
"expr": "changes(node_network_up{job=\"node-exporter\",device!~\"veth.+|tunl.+\"}[2m]) > 2\n",
"for": "2m",
"labels": {
"severity": "warning"
}
}
]
},
{
"name": "prometheus.rules",
"rules": [