From 064ce83f2539aa299d9f55573cd3cdc9c2441c1e Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Sat, 27 Jan 2018 20:56:49 -0800 Subject: [PATCH] addons: Update Prometheus to v2.1.0 * Change service discovery to relabel jobs to align with rule expressions in upstream examples * Use a separate service account for prometheus instead of granting roles to the namespace's default * Use a separate service account for node-exporter * Update node-exporter and kube-state-metrics exporters --- addons/prometheus/config.yaml | 11 ++- addons/prometheus/deployment.yaml | 3 +- .../kube-state-metrics/cluster-role.yaml | 1 - .../kube-state-metrics/deployment.yaml | 6 +- .../exporters/node-exporter/daemonset.yaml | 9 +- .../node-exporter/service-account.yaml | 5 + .../prometheus/rbac/cluster-role-binding.yaml | 2 +- addons/prometheus/rules.yaml | 96 ++++++++++++++----- addons/prometheus/service-account.yaml | 5 + 9 files changed, 99 insertions(+), 39 deletions(-) create mode 100644 addons/prometheus/exporters/node-exporter/service-account.yaml create mode 100644 addons/prometheus/service-account.yaml diff --git a/addons/prometheus/config.yaml b/addons/prometheus/config.yaml index b6e282bb..ec04e772 100644 --- a/addons/prometheus/config.yaml +++ b/addons/prometheus/config.yaml @@ -39,7 +39,7 @@ data: tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # Using endpoints to discover kube-apiserver targets finds the pod IP - # (host IP since apiserver is uses host network) which is not used in + # (host IP since apiserver uses host network) which is not used in # the server certificate. insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token @@ -51,6 +51,9 @@ data: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https + - replacement: apiserver + action: replace + target_label: job # Scrape config for node (i.e. kubelet) /metrics (e.g. 'kubelet_'). Explore # metrics from a node by scraping kubelet (127.0.0.1:10255/metrics). @@ -59,7 +62,7 @@ data: # Kubernetes apiserver. This means it will work if Prometheus is running out of # cluster, or can't connect to nodes for some other reason (e.g. because of # firewalling). - - job_name: 'kubernetes-nodes' + - job_name: 'kubelet' kubernetes_sd_configs: - role: node @@ -149,7 +152,7 @@ data: target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace - target_label: kubernetes_name + target_label: job # Example scrape config for probing services via the Blackbox Exporter. # @@ -181,7 +184,7 @@ data: - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] - target_label: kubernetes_name + target_label: job # Example scrape config for pods # diff --git a/addons/prometheus/deployment.yaml b/addons/prometheus/deployment.yaml index 37e056db..648fcc93 100644 --- a/addons/prometheus/deployment.yaml +++ b/addons/prometheus/deployment.yaml @@ -14,9 +14,10 @@ spec: name: prometheus phase: prod spec: + serviceAccountName: prometheus containers: - name: prometheus - image: quay.io/prometheus/prometheus:v2.0.0 + image: quay.io/prometheus/prometheus:v2.1.0 args: - '--config.file=/etc/prometheus/prometheus.yaml' ports: diff --git a/addons/prometheus/exporters/kube-state-metrics/cluster-role.yaml b/addons/prometheus/exporters/kube-state-metrics/cluster-role.yaml index 5e93b40d..4cef71cc 100644 --- a/addons/prometheus/exporters/kube-state-metrics/cluster-role.yaml +++ b/addons/prometheus/exporters/kube-state-metrics/cluster-role.yaml @@ -35,4 +35,3 @@ rules: resources: - horizontalpodautoscalers verbs: ["list", "watch"] - diff --git a/addons/prometheus/exporters/kube-state-metrics/deployment.yaml b/addons/prometheus/exporters/kube-state-metrics/deployment.yaml index 19a14952..118e648d 100644 --- a/addons/prometheus/exporters/kube-state-metrics/deployment.yaml +++ b/addons/prometheus/exporters/kube-state-metrics/deployment.yaml @@ -54,8 +54,8 @@ spec: - /pod_nanny - --container=kube-state-metrics - --cpu=100m - - --extra-cpu=1m - - --memory=100Mi - - --extra-memory=2Mi + - --extra-cpu=2m + - --memory=150Mi + - --extra-memory=30Mi - --threshold=5 - --deployment=kube-state-metrics diff --git a/addons/prometheus/exporters/node-exporter/daemonset.yaml b/addons/prometheus/exporters/node-exporter/daemonset.yaml index 9289f60f..279f37d0 100644 --- a/addons/prometheus/exporters/node-exporter/daemonset.yaml +++ b/addons/prometheus/exporters/node-exporter/daemonset.yaml @@ -18,11 +18,15 @@ spec: name: node-exporter phase: prod spec: + serviceAccountName: node-exporter + securityContext: + runAsNonRoot: true + runAsUser: 65534 hostNetwork: true hostPID: true containers: - name: node-exporter - image: quay.io/prometheus/node-exporter:v0.15.0 + image: quay.io/prometheus/node-exporter:v0.15.2 args: - "--path.procfs=/host/proc" - "--path.sysfs=/host/sys" @@ -45,9 +49,8 @@ spec: mountPath: /host/sys readOnly: true tolerations: - - key: node-role.kubernetes.io/master + - effect: NoSchedule operator: Exists - effect: NoSchedule volumes: - name: proc hostPath: diff --git a/addons/prometheus/exporters/node-exporter/service-account.yaml b/addons/prometheus/exporters/node-exporter/service-account.yaml new file mode 100644 index 00000000..8a03ac16 --- /dev/null +++ b/addons/prometheus/exporters/node-exporter/service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: node-exporter + namespace: monitoring diff --git a/addons/prometheus/rbac/cluster-role-binding.yaml b/addons/prometheus/rbac/cluster-role-binding.yaml index 9ae6de5c..db4dd2a7 100644 --- a/addons/prometheus/rbac/cluster-role-binding.yaml +++ b/addons/prometheus/rbac/cluster-role-binding.yaml @@ -8,5 +8,5 @@ roleRef: name: prometheus subjects: - kind: ServiceAccount - name: default + name: prometheus namespace: monitoring diff --git a/addons/prometheus/rules.yaml b/addons/prometheus/rules.yaml index 6474e0fc..b61ae4d4 100644 --- a/addons/prometheus/rules.yaml +++ b/addons/prometheus/rules.yaml @@ -4,8 +4,7 @@ metadata: name: prometheus-rules namespace: monitoring data: - # Rules adapted from those provided by coreos/prometheus-operator and SoundCloud - alertmanager.rules.yaml: |+ + alertmanager.rules.yaml: | groups: - name: alertmanager.rules rules: @@ -36,7 +35,7 @@ data: annotations: description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}. - etcd3.rules.yaml: |+ + etcd3.rules.yaml: | groups: - name: ./etcd3.rules rules: @@ -65,8 +64,8 @@ data: changes within the last hour summary: a high number of leader changes within the etcd cluster are happening - alert: HighNumberOfFailedGRPCRequests - expr: sum(rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) BY (grpc_method) - / sum(rate(etcd_grpc_total{job="etcd"}[5m])) BY (grpc_method) > 0.01 + expr: sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method) + / sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method) > 0.01 for: 10m labels: severity: warning @@ -75,8 +74,8 @@ data: on etcd instance {{ $labels.instance }}' summary: a high number of gRPC requests are failing - alert: HighNumberOfFailedGRPCRequests - expr: sum(rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) BY (grpc_method) - / sum(rate(etcd_grpc_total{job="etcd"}[5m])) BY (grpc_method) > 0.05 + expr: sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method) + / sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method) > 0.05 for: 5m labels: severity: critical @@ -85,7 +84,7 @@ data: on etcd instance {{ $labels.instance }}' summary: a high number of gRPC requests are failing - alert: GRPCRequestsSlow - expr: histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m])) + expr: histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job="etcd",grpc_type="unary"}[5m])) by (grpc_service, grpc_method, le)) > 0.15 for: 10m labels: @@ -125,7 +124,7 @@ data: }} are slow summary: slow HTTP requests - alert: EtcdMemberCommunicationSlow - expr: histogram_quantile(0.99, rate(etcd_network_member_round_trip_time_seconds_bucket[5m])) + expr: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) > 0.15 for: 10m labels: @@ -160,7 +159,7 @@ data: annotations: description: etcd instance {{ $labels.instance }} commit durations are high summary: high commit durations - general.rules.yaml: |+ + general.rules.yaml: | groups: - name: general.rules rules: @@ -192,12 +191,12 @@ data: description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance will exhaust in file/socket descriptors within the next hour' summary: file descriptors soon exhausted - kube-controller-manager.rules.yaml: |+ + kube-controller-manager.rules.yaml: | groups: - name: kube-controller-manager.rules rules: - alert: K8SControllerManagerDown - expr: absent(up{kubernetes_name="kube-controller-manager"} == 1) + expr: absent(up{job="kube-controller-manager"} == 1) for: 5m labels: severity: critical @@ -205,7 +204,7 @@ data: description: There is no running K8S controller manager. Deployments and replication controllers are not making progress. summary: Controller manager is down - kube-scheduler.rules.yaml: |+ + kube-scheduler.rules.yaml: | groups: - name: kube-scheduler.rules rules: @@ -255,7 +254,7 @@ data: labels: quantile: "0.5" - alert: K8SSchedulerDown - expr: absent(up{kubernetes_name="kube-scheduler"} == 1) + expr: absent(up{job="kube-scheduler"} == 1) for: 5m labels: severity: critical @@ -263,7 +262,7 @@ data: description: There is no running K8S scheduler. New pods are not being assigned to nodes. summary: Scheduler is down - kube-state-metrics.rules.yaml: |+ + kube-state-metrics.rules.yaml: | groups: - name: kube-state-metrics.rules rules: @@ -274,7 +273,8 @@ data: severity: warning annotations: description: Observed deployment generation does not match expected one for - deployment {{$labels.namespaces}}{{$labels.deployment}} + deployment {{$labels.namespaces}}/{{$labels.deployment}} + summary: Deployment is outdated - alert: DeploymentReplicasNotUpdated expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas) or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas)) @@ -284,8 +284,9 @@ data: severity: warning annotations: description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}} + summary: Deployment replicas are outdated - alert: DaemonSetRolloutStuck - expr: kube_daemonset_status_current_number_ready / kube_daemonset_status_desired_number_scheduled + expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled * 100 < 100 for: 15m labels: @@ -293,6 +294,7 @@ data: annotations: description: Only {{$value}}% of desired pods scheduled and ready for daemon set {{$labels.namespaces}}/{{$labels.daemonset}} + summary: DaemonSet is missing pods - alert: K8SDaemonSetsNotScheduled expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled > 0 @@ -312,14 +314,15 @@ data: to run. summary: Daemonsets are not scheduled correctly - alert: PodFrequentlyRestarting - expr: increase(kube_pod_container_status_restarts[1h]) > 5 + expr: increase(kube_pod_container_status_restarts_total[1h]) > 5 for: 10m labels: severity: warning annotations: description: Pod {{$labels.namespaces}}/{{$labels.pod}} is was restarted {{$value}} times within the last hour - kubelet.rules.yaml: |+ + summary: Pod is restarting frequently + kubelet.rules.yaml: | groups: - name: kubelet.rules rules: @@ -342,14 +345,14 @@ data: annotations: description: '{{ $value }}% of Kubernetes nodes are not ready' - alert: K8SKubeletDown - expr: count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"}) * 100 > 3 + expr: count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) * 100 > 3 for: 1h labels: severity: warning annotations: description: Prometheus failed to scrape {{ $value }}% of kubelets. - alert: K8SKubeletDown - expr: (absent(up{job="kubernetes-nodes"} == 1) or count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"})) + expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"})) * 100 > 1 for: 1h labels: @@ -367,7 +370,7 @@ data: description: Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110 summary: Kubelet is close to pod limit - kubernetes.rules.yaml: |+ + kubernetes.rules.yaml: | groups: - name: kubernetes.rules rules: @@ -447,14 +450,28 @@ data: annotations: description: API server returns errors for {{ $value }}% of requests - alert: K8SApiserverDown - expr: absent(up{job="kubernetes-apiservers"} == 1) + expr: absent(up{job="apiserver"} == 1) for: 20m labels: severity: critical annotations: description: No API servers are reachable or all have disappeared from service discovery - node.rules.yaml: |+ + + - alert: K8sCertificateExpirationNotice + labels: + severity: warning + annotations: + description: Kubernetes API Certificate is expiring soon (less than 7 days) + expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0 + + - alert: K8sCertificateExpirationNotice + labels: + severity: critical + annotations: + description: Kubernetes API Certificate is expiring in less than 1 day + expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0 + node.rules.yaml: | groups: - name: node.rules rules: @@ -476,7 +493,7 @@ data: - record: cluster:node_cpu:ratio expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu)) - alert: NodeExporterDown - expr: absent(up{kubernetes_name="node-exporter"} == 1) + expr: absent(up{job="node-exporter"} == 1) for: 10m labels: severity: warning @@ -499,7 +516,7 @@ data: annotations: description: device {{$labels.device}} on node {{$labels.instance}} is running full within the next 2 hours (mounted at {{$labels.mountpoint}}) - prometheus.rules.yaml: |+ + prometheus.rules.yaml: | groups: - name: prometheus.rules rules: @@ -544,3 +561,30 @@ data: annotations: description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected to any Alertmanagers + - alert: PrometheusTSDBReloadsFailing + expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0 + for: 12h + labels: + severity: warning + annotations: + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} + reload failures over the last four hours.' + summary: Prometheus has issues reloading data blocks from disk + - alert: PrometheusTSDBCompactionsFailing + expr: increase(prometheus_tsdb_compactions_failed_total[2h]) > 0 + for: 12h + labels: + severity: warning + annotations: + description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} + compaction failures over the last four hours.' + summary: Prometheus has issues compacting sample blocks + - alert: PrometheusTSDBWALCorruptions + expr: tsdb_wal_corruptions_total > 0 + for: 4h + labels: + severity: warning + annotations: + description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead + log (WAL).' + summary: Prometheus write-ahead log is corrupted diff --git a/addons/prometheus/service-account.yaml b/addons/prometheus/service-account.yaml new file mode 100644 index 00000000..f4c5f206 --- /dev/null +++ b/addons/prometheus/service-account.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + namespace: monitoring