mirror of
https://github.com/puppetmaster/typhoon.git
synced 2024-12-24 04:19:33 +01:00
addons: Update Prometheus from v2.1.0 to v2.2.0
* Annotate Prometheus service to scrape metrics from Prometheus itself (enables Prometheus* alerts) * Update kube-state-metrics addon-resizer to 1.7 * Use port 8080 for kube-state-metrics * Add PrometheusNotIngestingSamples alert rule * Change K8SKubeletDown alert rule to fire when 10% of kubelets are down, not 1% * https://github.com/coreos/prometheus-operator/pull/1032
This commit is contained in:
parent
c112ee3829
commit
9307e97c46
@ -18,7 +18,7 @@ spec:
|
||||
serviceAccountName: prometheus
|
||||
containers:
|
||||
- name: prometheus
|
||||
image: quay.io/prometheus/prometheus:v2.1.0
|
||||
image: quay.io/prometheus/prometheus:v2.2.0-rc.1
|
||||
args:
|
||||
- '--config.file=/etc/prometheus/prometheus.yaml'
|
||||
ports:
|
||||
|
@ -33,7 +33,7 @@ spec:
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
- name: addon-resizer
|
||||
image: gcr.io/google_containers/addon-resizer:1.0
|
||||
image: gcr.io/google_containers/addon-resizer:1.7
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
@ -54,8 +54,8 @@ spec:
|
||||
- /pod_nanny
|
||||
- --container=kube-state-metrics
|
||||
- --cpu=100m
|
||||
- --extra-cpu=2m
|
||||
- --memory=150Mi
|
||||
- --extra-memory=30Mi
|
||||
- --extra-cpu=1m
|
||||
- --memory=100Mi
|
||||
- --extra-memory=2Mi
|
||||
- --threshold=5
|
||||
- --deployment=kube-state-metrics
|
||||
|
@ -15,5 +15,5 @@ spec:
|
||||
ports:
|
||||
- name: metrics
|
||||
protocol: TCP
|
||||
port: 80
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
|
@ -353,7 +353,7 @@ data:
|
||||
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
||||
- alert: K8SKubeletDown
|
||||
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
||||
* 100 > 1
|
||||
* 100 > 10
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
@ -588,3 +588,11 @@ data:
|
||||
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
|
||||
log (WAL).'
|
||||
summary: Prometheus write-ahead log is corrupted
|
||||
- alert: PrometheusNotIngestingSamples
|
||||
expr: rate(prometheus_tsdb_head_samples_appended_total[5m]) <= 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples."
|
||||
summary: "Prometheus isn't ingesting samples"
|
||||
|
@ -3,6 +3,8 @@ kind: Service
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
|
Loading…
Reference in New Issue
Block a user