From f884de847eb1c1eec42cc3fb99b957607678dca9 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Sat, 14 Nov 2020 13:17:56 -0800 Subject: [PATCH] Discard Prometheus etcd gRPC failure alert * Kubernetes watch expiry is not a gRPC code we care about * Background: This rule is typically removed, but was added back in --- addons/prometheus/rules.yaml | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/addons/prometheus/rules.yaml b/addons/prometheus/rules.yaml index 926af9e1..c7179aae 100644 --- a/addons/prometheus/rules.yaml +++ b/addons/prometheus/rules.yaml @@ -50,28 +50,6 @@ data: "severity": "warning" } }, - { - "alert": "etcdHighNumberOfFailedGRPCRequests", - "annotations": { - "message": "etcd cluster \"{{ $labels.job }}\": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}." - }, - "expr": "100 * sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", grpc_code!=\"OK\"}[5m])) without (grpc_type, grpc_code)\n /\nsum(rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 1\n", - "for": "10m", - "labels": { - "severity": "warning" - } - }, - { - "alert": "etcdHighNumberOfFailedGRPCRequests", - "annotations": { - "message": "etcd cluster \"{{ $labels.job }}\": {{ $value }}% of requests for {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}." - }, - "expr": "100 * sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", grpc_code!=\"OK\"}[5m])) without (grpc_type, grpc_code)\n /\nsum(rate(grpc_server_handled_total{job=~\".*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 5\n", - "for": "5m", - "labels": { - "severity": "critical" - } - }, { "alert": "etcdGRPCRequestsSlow", "annotations": {