Refresh Prometheus rules/alerts and Grafana dashboards
* Refresh upstream Prometheus rules and alerts and Grafana dashboards * All Loki recording rules for convenience
This commit is contained in:
parent
3c1be7b0e0
commit
d47d40b517
|
@ -15,6 +15,10 @@ Notable changes between versions.
|
||||||
* Fix Azure worker UDP outbound connections ([#691](https://github.com/poseidon/typhoon/pull/691))
|
* Fix Azure worker UDP outbound connections ([#691](https://github.com/poseidon/typhoon/pull/691))
|
||||||
* Fix Azure worker clock sync timeouts
|
* Fix Azure worker clock sync timeouts
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Refresh Prometheus rules and alerts
|
||||||
|
|
||||||
## v1.18.0
|
## v1.18.0
|
||||||
|
|
||||||
* Kubernetes [v1.18.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1180)
|
* Kubernetes [v1.18.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1180)
|
||||||
|
|
|
@ -59,7 +59,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))",
|
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__interval]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -1561,7 +1561,7 @@ data:
|
||||||
],
|
],
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -1570,7 +1570,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -1579,7 +1579,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -1588,7 +1588,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -1597,7 +1597,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -1606,7 +1606,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -1706,7 +1706,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -1804,7 +1804,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -1902,7 +1902,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -2000,7 +2000,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -2098,7 +2098,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -2196,7 +2196,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -2294,7 +2294,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -2392,7 +2392,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{namespace}}",
|
"legendFormat": "{{namespace}}",
|
||||||
|
@ -2499,41 +2499,6 @@ data:
|
||||||
"type": "query",
|
"type": "query",
|
||||||
"useTags": false
|
"useTags": false
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"auto": false,
|
|
||||||
"auto_count": 30,
|
|
||||||
"auto_min": "10s",
|
|
||||||
"current": {
|
|
||||||
"text": "5m",
|
|
||||||
"value": "5m"
|
|
||||||
},
|
|
||||||
"datasource": "$datasource",
|
|
||||||
"hide": 2,
|
|
||||||
"includeAll": false,
|
|
||||||
"label": null,
|
|
||||||
"multi": false,
|
|
||||||
"name": "interval",
|
|
||||||
"options": [
|
|
||||||
{
|
|
||||||
"selected": true,
|
|
||||||
"text": "4h",
|
|
||||||
"value": "4h"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "4h",
|
|
||||||
"refresh": 2,
|
|
||||||
"regex": "",
|
|
||||||
"skipUrlSync": false,
|
|
||||||
"sort": 1,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [
|
|
||||||
|
|
||||||
],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "interval",
|
|
||||||
"useTags": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"allValue": null,
|
"allValue": null,
|
||||||
"current": {
|
"current": {
|
||||||
|
@ -4033,7 +3998,7 @@ data:
|
||||||
],
|
],
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4042,7 +4007,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4051,7 +4016,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4060,7 +4025,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4069,7 +4034,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4078,7 +4043,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4178,7 +4143,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -4276,7 +4241,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -4374,7 +4339,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -4472,7 +4437,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -4570,7 +4535,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -4668,7 +4633,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -4748,41 +4713,6 @@ data:
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"type": "datasource"
|
"type": "datasource"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"auto": false,
|
|
||||||
"auto_count": 30,
|
|
||||||
"auto_min": "10s",
|
|
||||||
"current": {
|
|
||||||
"text": "5m",
|
|
||||||
"value": "5m"
|
|
||||||
},
|
|
||||||
"datasource": "$datasource",
|
|
||||||
"hide": 2,
|
|
||||||
"includeAll": false,
|
|
||||||
"label": null,
|
|
||||||
"multi": false,
|
|
||||||
"name": "interval",
|
|
||||||
"options": [
|
|
||||||
{
|
|
||||||
"selected": true,
|
|
||||||
"text": "4h",
|
|
||||||
"value": "4h"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "4h",
|
|
||||||
"refresh": 2,
|
|
||||||
"regex": "",
|
|
||||||
"skipUrlSync": false,
|
|
||||||
"sort": 1,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [
|
|
||||||
|
|
||||||
],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "interval",
|
|
||||||
"useTags": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"allValue": null,
|
"allValue": null,
|
||||||
"current": {
|
"current": {
|
||||||
|
@ -5724,41 +5654,6 @@ data:
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"type": "datasource"
|
"type": "datasource"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"auto": false,
|
|
||||||
"auto_count": 30,
|
|
||||||
"auto_min": "10s",
|
|
||||||
"current": {
|
|
||||||
"text": "5m",
|
|
||||||
"value": "5m"
|
|
||||||
},
|
|
||||||
"datasource": "$datasource",
|
|
||||||
"hide": 2,
|
|
||||||
"includeAll": false,
|
|
||||||
"label": null,
|
|
||||||
"multi": false,
|
|
||||||
"name": "interval",
|
|
||||||
"options": [
|
|
||||||
{
|
|
||||||
"selected": true,
|
|
||||||
"text": "4h",
|
|
||||||
"value": "4h"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "4h",
|
|
||||||
"refresh": 2,
|
|
||||||
"regex": "",
|
|
||||||
"skipUrlSync": false,
|
|
||||||
"sort": 1,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [
|
|
||||||
|
|
||||||
],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "interval",
|
|
||||||
"useTags": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"allValue": null,
|
"allValue": null,
|
||||||
"current": {
|
"current": {
|
||||||
|
|
|
@ -1042,7 +1042,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -1140,7 +1140,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -1238,7 +1238,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -1336,7 +1336,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -1434,7 +1434,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -1532,7 +1532,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
"expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -1612,41 +1612,6 @@ data:
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"type": "datasource"
|
"type": "datasource"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"auto": false,
|
|
||||||
"auto_count": 30,
|
|
||||||
"auto_min": "10s",
|
|
||||||
"current": {
|
|
||||||
"text": "5m",
|
|
||||||
"value": "5m"
|
|
||||||
},
|
|
||||||
"datasource": "$datasource",
|
|
||||||
"hide": 2,
|
|
||||||
"includeAll": false,
|
|
||||||
"label": null,
|
|
||||||
"multi": false,
|
|
||||||
"name": "interval",
|
|
||||||
"options": [
|
|
||||||
{
|
|
||||||
"selected": true,
|
|
||||||
"text": "4h",
|
|
||||||
"value": "4h"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "4h",
|
|
||||||
"refresh": 2,
|
|
||||||
"regex": "",
|
|
||||||
"skipUrlSync": false,
|
|
||||||
"sort": 1,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [
|
|
||||||
|
|
||||||
],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "interval",
|
|
||||||
"useTags": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"allValue": null,
|
"allValue": null,
|
||||||
"current": {
|
"current": {
|
||||||
|
@ -2701,7 +2666,7 @@ data:
|
||||||
],
|
],
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -2710,7 +2675,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -2719,7 +2684,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -2728,7 +2693,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -2737,7 +2702,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -2746,7 +2711,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -2846,7 +2811,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -2944,7 +2909,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -3042,7 +3007,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -3140,7 +3105,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -3238,7 +3203,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -3336,7 +3301,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -3434,7 +3399,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -3532,7 +3497,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{pod}}",
|
"legendFormat": "{{pod}}",
|
||||||
|
@ -3612,41 +3577,6 @@ data:
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"type": "datasource"
|
"type": "datasource"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"auto": false,
|
|
||||||
"auto_count": 30,
|
|
||||||
"auto_min": "10s",
|
|
||||||
"current": {
|
|
||||||
"text": "5m",
|
|
||||||
"value": "5m"
|
|
||||||
},
|
|
||||||
"datasource": "$datasource",
|
|
||||||
"hide": 2,
|
|
||||||
"includeAll": false,
|
|
||||||
"label": null,
|
|
||||||
"multi": false,
|
|
||||||
"name": "interval",
|
|
||||||
"options": [
|
|
||||||
{
|
|
||||||
"selected": true,
|
|
||||||
"text": "4h",
|
|
||||||
"value": "4h"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "4h",
|
|
||||||
"refresh": 2,
|
|
||||||
"regex": "",
|
|
||||||
"skipUrlSync": false,
|
|
||||||
"sort": 1,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [
|
|
||||||
|
|
||||||
],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "interval",
|
|
||||||
"useTags": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"allValue": null,
|
"allValue": null,
|
||||||
"current": {
|
"current": {
|
||||||
|
@ -3708,7 +3638,7 @@ data:
|
||||||
"value": ""
|
"value": ""
|
||||||
},
|
},
|
||||||
"datasource": "$datasource",
|
"datasource": "$datasource",
|
||||||
"hide": 2,
|
"hide": 0,
|
||||||
"includeAll": false,
|
"includeAll": false,
|
||||||
"label": null,
|
"label": null,
|
||||||
"multi": false,
|
"multi": false,
|
||||||
|
@ -4906,7 +4836,7 @@ data:
|
||||||
],
|
],
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4915,7 +4845,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4924,7 +4854,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4933,7 +4863,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4942,7 +4872,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -4951,7 +4881,7 @@ data:
|
||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
@ -5051,7 +4981,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5149,7 +5079,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5247,7 +5177,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5345,7 +5275,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5443,7 +5373,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5541,7 +5471,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5639,7 +5569,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5737,7 +5667,7 @@ data:
|
||||||
"steppedLine": false,
|
"steppedLine": false,
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{workload}}",
|
"legendFormat": "{{workload}}",
|
||||||
|
@ -5817,41 +5747,6 @@ data:
|
||||||
"regex": "",
|
"regex": "",
|
||||||
"type": "datasource"
|
"type": "datasource"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"allValue": null,
|
|
||||||
"auto": false,
|
|
||||||
"auto_count": 30,
|
|
||||||
"auto_min": "10s",
|
|
||||||
"current": {
|
|
||||||
"text": "5m",
|
|
||||||
"value": "5m"
|
|
||||||
},
|
|
||||||
"datasource": "$datasource",
|
|
||||||
"hide": 2,
|
|
||||||
"includeAll": false,
|
|
||||||
"label": null,
|
|
||||||
"multi": false,
|
|
||||||
"name": "interval",
|
|
||||||
"options": [
|
|
||||||
{
|
|
||||||
"selected": true,
|
|
||||||
"text": "4h",
|
|
||||||
"value": "4h"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"query": "4h",
|
|
||||||
"refresh": 2,
|
|
||||||
"regex": "",
|
|
||||||
"skipUrlSync": false,
|
|
||||||
"sort": 1,
|
|
||||||
"tagValuesQuery": "",
|
|
||||||
"tags": [
|
|
||||||
|
|
||||||
],
|
|
||||||
"tagsQuery": "",
|
|
||||||
"type": "interval",
|
|
||||||
"useTags": false
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"allValue": null,
|
"allValue": null,
|
||||||
"auto": false,
|
"auto": false,
|
||||||
|
|
|
@ -252,25 +252,25 @@ data:
|
||||||
"name": "kube-apiserver.rules",
|
"name": "kube-apiserver.rules",
|
||||||
"rules": [
|
"rules": [
|
||||||
{
|
{
|
||||||
"expr": "sum(rate(apiserver_request_duration_seconds_sum{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|PROXY|CONNECT\"}[5m])) without(instance, pod)\n/\nsum(rate(apiserver_request_duration_seconds_count{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|PROXY|CONNECT\"}[5m])) without(instance, pod)\n",
|
"expr": "sum(rate(apiserver_request_duration_seconds_sum{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod)\n/\nsum(rate(apiserver_request_duration_seconds_count{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod)\n",
|
||||||
"record": "cluster:apiserver_request_duration_seconds:mean5m"
|
"record": "cluster:apiserver_request_duration_seconds:mean5m"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||||
"labels": {
|
"labels": {
|
||||||
"quantile": "0.99"
|
"quantile": "0.99"
|
||||||
},
|
},
|
||||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
"expr": "histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||||
"labels": {
|
"labels": {
|
||||||
"quantile": "0.9"
|
"quantile": "0.9"
|
||||||
},
|
},
|
||||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
"expr": "histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||||
"labels": {
|
"labels": {
|
||||||
"quantile": "0.5"
|
"quantile": "0.5"
|
||||||
},
|
},
|
||||||
|
@ -805,6 +805,7 @@ data:
|
||||||
{
|
{
|
||||||
"alert": "ErrorBudgetBurn",
|
"alert": "ErrorBudgetBurn",
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
"message": "High requests error budget burn for job=apiserver (current value: {{ $value }})",
|
||||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
||||||
},
|
},
|
||||||
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1h{job=\"apiserver\"} > (14.4*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate5m{job=\"apiserver\"} > (14.4*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (6*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate30m{job=\"apiserver\"} > (6*0.010000)\n)\n",
|
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1h{job=\"apiserver\"} > (14.4*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate5m{job=\"apiserver\"} > (14.4*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (6*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate30m{job=\"apiserver\"} > (6*0.010000)\n)\n",
|
||||||
|
@ -816,6 +817,7 @@ data:
|
||||||
{
|
{
|
||||||
"alert": "ErrorBudgetBurn",
|
"alert": "ErrorBudgetBurn",
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
"message": "High requests error budget burn for job=apiserver (current value: {{ $value }})",
|
||||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
||||||
},
|
},
|
||||||
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1d{job=\"apiserver\"} > (3*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate2h{job=\"apiserver\"} > (3*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate3d{job=\"apiserver\"} > (0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (0.010000)\n)\n",
|
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1d{job=\"apiserver\"} > (3*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate2h{job=\"apiserver\"} > (3*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate3d{job=\"apiserver\"} > (0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (0.010000)\n)\n",
|
||||||
|
@ -853,30 +855,6 @@ data:
|
||||||
"severity": "critical"
|
"severity": "critical"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"alert": "KubeAPIErrorsHigh",
|
|
||||||
"annotations": {
|
|
||||||
"message": "API server is returning errors for {{ $value | humanizePercentage }} of requests.",
|
|
||||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh"
|
|
||||||
},
|
|
||||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\",code=~\"5..\"}[5m]))\n /\nsum(rate(apiserver_request_total{job=\"apiserver\"}[5m])) > 0.03\n",
|
|
||||||
"for": "10m",
|
|
||||||
"labels": {
|
|
||||||
"severity": "critical"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"alert": "KubeAPIErrorsHigh",
|
|
||||||
"annotations": {
|
|
||||||
"message": "API server is returning errors for {{ $value | humanizePercentage }} of requests.",
|
|
||||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh"
|
|
||||||
},
|
|
||||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\",code=~\"5..\"}[5m]))\n /\nsum(rate(apiserver_request_total{job=\"apiserver\"}[5m])) > 0.01\n",
|
|
||||||
"for": "10m",
|
|
||||||
"labels": {
|
|
||||||
"severity": "warning"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"alert": "KubeAPIErrorsHigh",
|
"alert": "KubeAPIErrorsHigh",
|
||||||
"annotations": {
|
"annotations": {
|
||||||
|
@ -993,7 +971,7 @@ data:
|
||||||
"message": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
"message": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
||||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods"
|
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods"
|
||||||
},
|
},
|
||||||
"expr": "max(max(kubelet_running_pod_count{job=\"kubelet\"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"}) by(node) / max(kube_node_status_capacity_pods{job=\"kube-state-metrics\"}) by(node) > 0.95\n",
|
"expr": "max(max(kubelet_running_pod_count{job=\"kubelet\"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"}) by(node) / max(kube_node_status_capacity_pods{job=\"kube-state-metrics\"} != 1) by(node) > 0.95\n",
|
||||||
"for": "15m",
|
"for": "15m",
|
||||||
"labels": {
|
"labels": {
|
||||||
"severity": "warning"
|
"severity": "warning"
|
||||||
|
@ -1029,7 +1007,7 @@ data:
|
||||||
"message": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
|
"message": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
|
||||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh"
|
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh"
|
||||||
},
|
},
|
||||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 5\n",
|
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60\n",
|
||||||
"for": "15m",
|
"for": "15m",
|
||||||
"labels": {
|
"labels": {
|
||||||
"severity": "warning"
|
"severity": "warning"
|
||||||
|
@ -1085,9 +1063,167 @@ data:
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
loki.yaml: |-
|
||||||
|
{
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"name": "loki_rules",
|
||||||
|
"rules": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job))",
|
||||||
|
"record": "job:loki_request_duration_seconds:99quantile"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job))",
|
||||||
|
"record": "job:loki_request_duration_seconds:50quantile"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m])) by (job)",
|
||||||
|
"record": "job:loki_request_duration_seconds:avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job)",
|
||||||
|
"record": "job:loki_request_duration_seconds_bucket:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job)",
|
||||||
|
"record": "job:loki_request_duration_seconds_sum:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (job)",
|
||||||
|
"record": "job:loki_request_duration_seconds_count:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route))",
|
||||||
|
"record": "job_route:loki_request_duration_seconds:99quantile"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route))",
|
||||||
|
"record": "job_route:loki_request_duration_seconds:50quantile"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)",
|
||||||
|
"record": "job_route:loki_request_duration_seconds:avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route)",
|
||||||
|
"record": "job_route:loki_request_duration_seconds_bucket:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)",
|
||||||
|
"record": "job_route:loki_request_duration_seconds_sum:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)",
|
||||||
|
"record": "job_route:loki_request_duration_seconds_count:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route))",
|
||||||
|
"record": "namespace_job_route:loki_request_duration_seconds:99quantile"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route))",
|
||||||
|
"record": "namespace_job_route:loki_request_duration_seconds:50quantile"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)",
|
||||||
|
"record": "namespace_job_route:loki_request_duration_seconds:avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route)",
|
||||||
|
"record": "namespace_job_route:loki_request_duration_seconds_bucket:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)",
|
||||||
|
"record": "namespace_job_route:loki_request_duration_seconds_sum:sum_rate"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)",
|
||||||
|
"record": "namespace_job_route:loki_request_duration_seconds_count:sum_rate"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "loki_alerts",
|
||||||
|
"rules": [
|
||||||
|
{
|
||||||
|
"alert": "LokiRequestErrors",
|
||||||
|
"annotations": {
|
||||||
|
"message": "{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}% errors.\n"
|
||||||
|
},
|
||||||
|
"expr": "100 * sum(rate(loki_request_duration_seconds_count{status_code=~\"5..\"}[1m])) by (namespace, job, route)\n /\nsum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)\n > 10\n",
|
||||||
|
"for": "15m",
|
||||||
|
"labels": {
|
||||||
|
"severity": "critical"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alert": "LokiRequestLatency",
|
||||||
|
"annotations": {
|
||||||
|
"message": "{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency.\n"
|
||||||
|
},
|
||||||
|
"expr": "namespace_job_route:loki_request_duration_seconds:99quantile{route!~\"(?i).*tail.*\"} > 1\n",
|
||||||
|
"for": "15m",
|
||||||
|
"labels": {
|
||||||
|
"severity": "critical"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
node-exporter.yaml: |-
|
node-exporter.yaml: |-
|
||||||
{
|
{
|
||||||
"groups": [
|
"groups": [
|
||||||
|
{
|
||||||
|
"name": "node-exporter.rules",
|
||||||
|
"rules": [
|
||||||
|
{
|
||||||
|
"expr": "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n",
|
||||||
|
"record": "instance:node_num_cpu:sum"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[1m])\n)\n",
|
||||||
|
"record": "instance:node_cpu_utilisation:rate1m"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "(\n node_load1{job=\"node-exporter\"}\n/\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n",
|
||||||
|
"record": "instance:node_load1_per_cpu:ratio"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
|
||||||
|
"record": "instance:node_memory_utilisation:ratio"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "rate(node_vmstat_pgmajfault{job=\"node-exporter\"}[1m])\n",
|
||||||
|
"record": "instance:node_vmstat_pgmajfault:rate1m"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", device!~\"dm.*\"}[1m])\n",
|
||||||
|
"record": "instance_device:node_disk_io_time_seconds:rate1m"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "rate(node_disk_io_time_weighted_seconds_total{job=\"node-exporter\", device!~\"dm.*\"}[1m])\n",
|
||||||
|
"record": "instance_device:node_disk_io_time_weighted_seconds:rate1m"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum without (device) (\n rate(node_network_receive_bytes_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||||
|
"record": "instance:node_network_receive_bytes_excluding_lo:rate1m"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum without (device) (\n rate(node_network_transmit_bytes_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||||
|
"record": "instance:node_network_transmit_bytes_excluding_lo:rate1m"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum without (device) (\n rate(node_network_receive_drop_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||||
|
"record": "instance:node_network_receive_drop_excluding_lo:rate1m"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum without (device) (\n rate(node_network_transmit_drop_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||||
|
"record": "instance:node_network_transmit_drop_excluding_lo:rate1m"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "node-exporter",
|
"name": "node-exporter",
|
||||||
"rules": [
|
"rules": [
|
||||||
|
@ -1210,6 +1346,41 @@ data:
|
||||||
"labels": {
|
"labels": {
|
||||||
"severity": "warning"
|
"severity": "warning"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alert": "NodeHighNumberConntrackEntriesUsed",
|
||||||
|
"annotations": {
|
||||||
|
"description": "{{ $value | humanizePercentage }} of conntrack entries are used",
|
||||||
|
"summary": "Number of conntrack are getting close to the limit"
|
||||||
|
},
|
||||||
|
"expr": "(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75\n",
|
||||||
|
"labels": {
|
||||||
|
"severity": "warning"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alert": "NodeClockSkewDetected",
|
||||||
|
"annotations": {
|
||||||
|
"message": "Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.",
|
||||||
|
"summary": "Clock skew detected."
|
||||||
|
},
|
||||||
|
"expr": "(\n node_timex_offset_seconds > 0.05\nand\n deriv(node_timex_offset_seconds[5m]) >= 0\n)\nor\n(\n node_timex_offset_seconds < -0.05\nand\n deriv(node_timex_offset_seconds[5m]) <= 0\n)\n",
|
||||||
|
"for": "10m",
|
||||||
|
"labels": {
|
||||||
|
"severity": "warning"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alert": "NodeClockNotSynchronising",
|
||||||
|
"annotations": {
|
||||||
|
"message": "Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.",
|
||||||
|
"summary": "Clock not synchronising."
|
||||||
|
},
|
||||||
|
"expr": "min_over_time(node_timex_sync_status[5m]) == 0\n",
|
||||||
|
"for": "10m",
|
||||||
|
"labels": {
|
||||||
|
"severity": "warning"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue