addons: Include Prometheus and node-exporter manifests

This commit is contained in:
Dalton Hubble 2017-10-22 17:00:41 -07:00
parent a73f57fe4e
commit d046d45769
14 changed files with 453 additions and 5 deletions

View File

@ -0,0 +1,228 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: monitoring
data:
prometheus.yaml: |-
# Global config
global:
scrape_interval: 15s
# Scrape configs for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
scrape_configs:
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# Using endpoints to discover kube-apiserver targets finds the pod IP
# (host IP since apiserver is uses host network) which is not used in
# the server certificate.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Scrape config for node (i.e. kubelet) /metrics (e.g. 'kubelet_'). Explore
# metrics from a node by scraping kubelet (127.0.0.1:10255/metrics).
#
# Rather than connecting directly to the node, the scrape is proxied though the
# Kubernetes apiserver. This means it will work if Prometheus is running out of
# cluster, or can't connect to nodes for some other reason (e.g. because of
# firewalling).
- job_name: 'kubernetes-nodes'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# Scrape config for Kubelet cAdvisor. Explore metrics from a node by
# scraping kubelet (127.0.0.1:10255/metrics/cadvisor).
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# Rather than connecting directly to the node, the scrape is proxied though the
# Kubernetes apiserver. This means it will work if Prometheus is running out of
# cluster, or can't connect to nodes for some other reason (e.g. because of
# firewalling).
- job_name: 'kubernetes-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# Scrape config for node-exporter endpoints (e.g. 'node_'). Explore metrics
# from a host running a node-exporter by scraping (127.0.0.1:9100/metrics).
- job_name: 'node-exporters'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: monitoring;node-exporter;metrics
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# Example scrape config for probing services via the Blackbox Exporter.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/probe`: Only probe services that have a value of `true`
- job_name: 'kubernetes-services'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
# pod's declared ports (default is a port-free target if none are declared).
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
# Rule files
rule_files:
- "/etc/prometheus/rules/*.rules"

View File

@ -0,0 +1,45 @@
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: prometheus
namespace: monitoring
spec:
replicas: 1
strategy:
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
name: prometheus
phase: prod
spec:
containers:
- name: prometheus
image: quay.io/prometheus/prometheus:v1.8.0
args:
- '-config.file=/etc/prometheus/prometheus.yaml'
- '-storage.local.retention=12h'
- '-storage.local.memory-chunks=500000'
ports:
- name: web
containerPort: 9090
volumeMounts:
- name: config
mountPath: /etc/prometheus
- name: rules
mountPath: /etc/prometheus/rules
- name: data
mountPath: /var/lib/prometheus
dnsPolicy: ClusterFirst
restartPolicy: Always
terminationGracePeriodSeconds: 30
volumes:
- name: config
configMap:
name: prometheus-config
- name: rules
configMap:
name: prometheus-rules
- name: data
emptyDir: {}

View File

@ -0,0 +1,53 @@
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
spec:
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
template:
metadata:
labels:
name: node-exporter
phase: prod
spec:
hostNetwork: true
hostPID: true
containers:
- name: node-exporter
image: quay.io/prometheus/node-exporter:v0.15.0
args:
- "--path.procfs=/host/proc"
- "--path.sysfs=/host/sys"
ports:
- name: metrics
containerPort: 9100
hostPort: 9100
resources:
requests:
memory: 30Mi
cpu: 100m
limits:
memory: 50Mi
cpu: 200m
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Service
metadata:
name: node-exporter
namespace: monitoring
spec:
type: ClusterIP
# service is created to allow prometheus to scape endpoints
clusterIP: None
selector:
name: node-exporter
phase: prod
ports:
- name: metrics
protocol: TCP
port: 80
targetPort: 9100

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: monitoring

View File

@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: default
namespace: monitoring

View File

@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]

View File

@ -0,0 +1,8 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-rules
namespace: monitoring
data:
example.rule: |
job_service:rpc_durations_seconds_count:avg_rate5m = avg(rate(rpc_durations_seconds_count[5m])) by (job, service)

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
spec:
type: ClusterIP
selector:
name: prometheus
phase: prod
ports:
- name: web
protocol: TCP
port: 80
targetPort: 9090

View File

@ -2,10 +2,10 @@
Every Typhoon cluster is verified to work well with several post-install addons.
* [CLUO](cluo.md) (Container Linux only)
* Nginx [Ingress Controller](ingress.md)
* [Heapster](heapster.md)
* Kubernetes [Dashboard](dashboard.md)
* [CLUO](cluo.md) (Container Linux only)
* Prometheus
* [Prometheus](prometheus.md)
* Grafana

50
docs/addons/prometheus.md Normal file
View File

@ -0,0 +1,50 @@
# Prometheus
Prometheus collects metrics (e.g. `node_memory_usage_bytes`) from *targets* by scraping their HTTP metrics endpoints. Targets are organized into *jobs*, defined in the Prometheus config. Targets may expose counter, gauge, histogram, or summary metrics.
Here's a simple config from the Prometheus [tutorial](https://prometheus.io/docs/introduction/getting_started/).
```
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'prometheus'
scrape_interval: 5s
static_configs:
- targets: ['localhost:9090']
```
On Kubernetes clusters, Prometheus is run as a Deployment, configured with a ConfigMap, and accessed via a Service or Ingress.
```
kubectl apply -f addons/prometheus -R
```
The ConfigMap configures Prometheus to target apiserver endpoints, node metrics, cAdvisor metrics, and exporters. By default, data is kept in an `emptyDir` so it is persisted until the pod is rescheduled.
### Exporters
Exporters expose metrics for 3rd-party systems that don't natively expose Prometheus metrics.
* [node_exporter](https://github.com/prometheus/node_exporter) - DaemonSet that exposes a machine's hardware and OS metrics
* [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) - Deployment that exposes Kubernetes object metrics
* [blackbox_exporter](https://github.com/prometheus/blackbox_exporter) - Scrapes HTTP, HTTPS, DNS, TCP, or ICMP endpoints and exposes availability as metrics
### Queries and Graphs
Prometheus provides a simplistic UI for querying and graphing metrics. Use `kubectl` to authenticate to the apiserver and create a local port-forward to the Prometheus pod.
```
kubectl get pods -n monitoring
kubectl port-forward prometheus-POD-ID 9090 -n monitoring
```
Visit [127.0.0.1:9090](http://127.0.0.1:9090) to query [expressions](http://127.0.0.1:9090/graph), view [targets](http://127.0.0.1:9090/targets), or check [alerts](http://127.0.0.1:9090/alerts).
![Prometheus Graph](/img/prometheus-graph.png)
<br/>
![Prometheus Targets](/img/prometheus-targets.png)
### Visualization
Grafana can be used to build dashboards and rich visualizations that use Prometheus as the datasource. Favor Grafana for these use cases and use the Prometheus for debugging or quickly checking available metrics.

Binary file not shown.

After

Width:  |  Height:  |  Size: 224 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 404 KiB

View File

@ -46,10 +46,11 @@ pages:
- 'Google Cloud': 'google-cloud.md'
- 'Addons':
- 'Overview': 'addons/overview.md'
- 'Nginx Ingress': 'addons/ingress.md'
- 'Heapster': 'addons/heapster.md'
- 'Dashboard': 'addons/dashboard.md'
- 'CLUO': 'addons/cluo.md'
- 'Heapster': 'addons/heapster.md'
- 'Nginx Ingress': 'addons/ingress.md'
- 'Prometheus': 'addons/prometheus.md'
- 'Dashboard': 'addons/dashboard.md'
- 'Topics':
- 'Hardware': 'topics/hardware.md'
- 'Security': 'topics/security.md'