addons: Update Prometheus from v2.1.0 to v2.2.0
* Annotate Prometheus service to scrape metrics from Prometheus itself (enables Prometheus* alerts) * Update kube-state-metrics addon-resizer to 1.7 * Use port 8080 for kube-state-metrics * Add PrometheusNotIngestingSamples alert rule * Change K8SKubeletDown alert rule to fire when 10% of kubelets are down, not 1% * https://github.com/coreos/prometheus-operator/pull/1032
This commit is contained in:
parent
c112ee3829
commit
9307e97c46
|
@ -18,7 +18,7 @@ spec:
|
||||||
serviceAccountName: prometheus
|
serviceAccountName: prometheus
|
||||||
containers:
|
containers:
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
image: quay.io/prometheus/prometheus:v2.1.0
|
image: quay.io/prometheus/prometheus:v2.2.0-rc.1
|
||||||
args:
|
args:
|
||||||
- '--config.file=/etc/prometheus/prometheus.yaml'
|
- '--config.file=/etc/prometheus/prometheus.yaml'
|
||||||
ports:
|
ports:
|
||||||
|
|
|
@ -33,7 +33,7 @@ spec:
|
||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
- name: addon-resizer
|
- name: addon-resizer
|
||||||
image: gcr.io/google_containers/addon-resizer:1.0
|
image: gcr.io/google_containers/addon-resizer:1.7
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
|
@ -54,8 +54,8 @@ spec:
|
||||||
- /pod_nanny
|
- /pod_nanny
|
||||||
- --container=kube-state-metrics
|
- --container=kube-state-metrics
|
||||||
- --cpu=100m
|
- --cpu=100m
|
||||||
- --extra-cpu=2m
|
- --extra-cpu=1m
|
||||||
- --memory=150Mi
|
- --memory=100Mi
|
||||||
- --extra-memory=30Mi
|
- --extra-memory=2Mi
|
||||||
- --threshold=5
|
- --threshold=5
|
||||||
- --deployment=kube-state-metrics
|
- --deployment=kube-state-metrics
|
||||||
|
|
|
@ -15,5 +15,5 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- name: metrics
|
- name: metrics
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
port: 80
|
port: 8080
|
||||||
targetPort: 8080
|
targetPort: 8080
|
||||||
|
|
|
@ -353,7 +353,7 @@ data:
|
||||||
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
||||||
- alert: K8SKubeletDown
|
- alert: K8SKubeletDown
|
||||||
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
||||||
* 100 > 1
|
* 100 > 10
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
|
@ -588,3 +588,11 @@ data:
|
||||||
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
|
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
|
||||||
log (WAL).'
|
log (WAL).'
|
||||||
summary: Prometheus write-ahead log is corrupted
|
summary: Prometheus write-ahead log is corrupted
|
||||||
|
- alert: PrometheusNotIngestingSamples
|
||||||
|
expr: rate(prometheus_tsdb_head_samples_appended_total[5m]) <= 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples."
|
||||||
|
summary: "Prometheus isn't ingesting samples"
|
||||||
|
|
|
@ -3,6 +3,8 @@ kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
selector:
|
selector:
|
||||||
|
|
Loading…
Reference in New Issue