Discard Prometheus etcd gRPC failure alert
* Kubernetes watch expiry is not a gRPC code we care about * Background: This rule is typically removed, but was added back in
This commit is contained in:
parent
1b3a0f6ebc
commit
f884de847e
|
@ -50,28 +50,6 @@ data:
|
|||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "etcdHighNumberOfFailedGRPCRequests",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}."
|
||||
},
|
||||
"expr": "100 * sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", grpc_code!=\"OK\"}[5m])) without (grpc_type, grpc_code)\n /\nsum(rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 1\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "etcdHighNumberOfFailedGRPCRequests",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}."
|
||||
},
|
||||
"expr": "100 * sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", grpc_code!=\"OK\"}[5m])) without (grpc_type, grpc_code)\n /\nsum(rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 5\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "etcdGRPCRequestsSlow",
|
||||
"annotations": {
|
||||
|
|
Loading…
Reference in New Issue