addons: Update Prometheus from v2.1.0 to v2.2.0

* Annotate Prometheus service to scrape metrics from
Prometheus itself (enables Prometheus* alerts)
* Update kube-state-metrics addon-resizer to 1.7
* Use port 8080 for kube-state-metrics
* Add PrometheusNotIngestingSamples alert rule
* Change K8SKubeletDown alert rule to fire when 10%
of kubelets are down, not 1%
  * https://github.com/coreos/prometheus-operator/pull/1032
This commit is contained in:
Dalton Hubble 2018-03-02 18:47:37 -08:00
parent c112ee3829
commit 9307e97c46
5 changed files with 17 additions and 7 deletions

View File

@ -18,7 +18,7 @@ spec:
serviceAccountName: prometheus
containers:
- name: prometheus
image: quay.io/prometheus/prometheus:v2.1.0
image: quay.io/prometheus/prometheus:v2.2.0-rc.1
args:
- '--config.file=/etc/prometheus/prometheus.yaml'
ports:

View File

@ -33,7 +33,7 @@ spec:
initialDelaySeconds: 5
timeoutSeconds: 5
- name: addon-resizer
image: gcr.io/google_containers/addon-resizer:1.0
image: gcr.io/google_containers/addon-resizer:1.7
resources:
limits:
cpu: 100m
@ -54,8 +54,8 @@ spec:
- /pod_nanny
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=2m
- --memory=150Mi
- --extra-memory=30Mi
- --extra-cpu=1m
- --memory=100Mi
- --extra-memory=2Mi
- --threshold=5
- --deployment=kube-state-metrics

View File

@ -15,5 +15,5 @@ spec:
ports:
- name: metrics
protocol: TCP
port: 80
port: 8080
targetPort: 8080

View File

@ -353,7 +353,7 @@ data:
description: Prometheus failed to scrape {{ $value }}% of kubelets.
- alert: K8SKubeletDown
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
* 100 > 1
* 100 > 10
for: 1h
labels:
severity: critical
@ -588,3 +588,11 @@ data:
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
log (WAL).'
summary: Prometheus write-ahead log is corrupted
- alert: PrometheusNotIngestingSamples
expr: rate(prometheus_tsdb_head_samples_appended_total[5m]) <= 0
for: 10m
labels:
severity: warning
annotations:
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples."
summary: "Prometheus isn't ingesting samples"

View File

@ -3,6 +3,8 @@ kind: Service
metadata:
name: prometheus
namespace: monitoring
annotations:
prometheus.io/scrape: 'true'
spec:
type: ClusterIP
selector: