From 9307e97c465628138a19aecfe12a55b1cde8906c Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Fri, 2 Mar 2018 18:47:37 -0800 Subject: [PATCH] addons: Update Prometheus from v2.1.0 to v2.2.0 * Annotate Prometheus service to scrape metrics from Prometheus itself (enables Prometheus* alerts) * Update kube-state-metrics addon-resizer to 1.7 * Use port 8080 for kube-state-metrics * Add PrometheusNotIngestingSamples alert rule * Change K8SKubeletDown alert rule to fire when 10% of kubelets are down, not 1% * https://github.com/coreos/prometheus-operator/pull/1032 --- addons/prometheus/deployment.yaml | 2 +- .../exporters/kube-state-metrics/deployment.yaml | 8 ++++---- .../exporters/kube-state-metrics/service.yaml | 2 +- addons/prometheus/rules.yaml | 10 +++++++++- addons/prometheus/service.yaml | 2 ++ 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/addons/prometheus/deployment.yaml b/addons/prometheus/deployment.yaml index 2a2b7b15..1ba1565e 100644 --- a/addons/prometheus/deployment.yaml +++ b/addons/prometheus/deployment.yaml @@ -18,7 +18,7 @@ spec: serviceAccountName: prometheus containers: - name: prometheus - image: quay.io/prometheus/prometheus:v2.1.0 + image: quay.io/prometheus/prometheus:v2.2.0-rc.1 args: - '--config.file=/etc/prometheus/prometheus.yaml' ports: diff --git a/addons/prometheus/exporters/kube-state-metrics/deployment.yaml b/addons/prometheus/exporters/kube-state-metrics/deployment.yaml index 33ff72ee..8cdb0368 100644 --- a/addons/prometheus/exporters/kube-state-metrics/deployment.yaml +++ b/addons/prometheus/exporters/kube-state-metrics/deployment.yaml @@ -33,7 +33,7 @@ spec: initialDelaySeconds: 5 timeoutSeconds: 5 - name: addon-resizer - image: gcr.io/google_containers/addon-resizer:1.0 + image: gcr.io/google_containers/addon-resizer:1.7 resources: limits: cpu: 100m @@ -54,8 +54,8 @@ spec: - /pod_nanny - --container=kube-state-metrics - --cpu=100m - - --extra-cpu=2m - - --memory=150Mi - - --extra-memory=30Mi + - --extra-cpu=1m + - --memory=100Mi + - --extra-memory=2Mi - --threshold=5 - --deployment=kube-state-metrics diff --git a/addons/prometheus/exporters/kube-state-metrics/service.yaml b/addons/prometheus/exporters/kube-state-metrics/service.yaml index 43222273..fbdad789 100644 --- a/addons/prometheus/exporters/kube-state-metrics/service.yaml +++ b/addons/prometheus/exporters/kube-state-metrics/service.yaml @@ -15,5 +15,5 @@ spec: ports: - name: metrics protocol: TCP - port: 80 + port: 8080 targetPort: 8080 diff --git a/addons/prometheus/rules.yaml b/addons/prometheus/rules.yaml index b61ae4d4..4bdcde24 100644 --- a/addons/prometheus/rules.yaml +++ b/addons/prometheus/rules.yaml @@ -353,7 +353,7 @@ data: description: Prometheus failed to scrape {{ $value }}% of kubelets. - alert: K8SKubeletDown expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"})) - * 100 > 1 + * 100 > 10 for: 1h labels: severity: critical @@ -588,3 +588,11 @@ data: description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead log (WAL).' summary: Prometheus write-ahead log is corrupted + - alert: PrometheusNotIngestingSamples + expr: rate(prometheus_tsdb_head_samples_appended_total[5m]) <= 0 + for: 10m + labels: + severity: warning + annotations: + description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples." + summary: "Prometheus isn't ingesting samples" diff --git a/addons/prometheus/service.yaml b/addons/prometheus/service.yaml index 454977da..50014e25 100644 --- a/addons/prometheus/service.yaml +++ b/addons/prometheus/service.yaml @@ -3,6 +3,8 @@ kind: Service metadata: name: prometheus namespace: monitoring + annotations: + prometheus.io/scrape: 'true' spec: type: ClusterIP selector: