mirror of
https://github.com/puppetmaster/typhoon.git
synced 2025-08-27 14:18:28 +02:00
Compare commits
303 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 78c9fdc18f | |||
| 884c8b39dc | |||
| 0e71f7e565 | |||
| 8c4200d425 | |||
| 5be5b261e2 | |||
| f034ef90ae | |||
| 3bba1ba0dc | |||
| dbe7604b67 | |||
| 9b405a19b2 | |||
| bfa1a679eb | |||
| f1da0731d8 | |||
| d641a058fe | |||
| 99a6d5478b | |||
| bc750aec33 | |||
| d55bfd5589 | |||
| 0be4673e44 | |||
| 3b44972d78 | |||
| 0127ee82c1 | |||
| a10d6977b8 | |||
| 05fe923c14 | |||
| d10620fb58 | |||
| 9b6113a058 | |||
| 5eb4078d68 | |||
| 8f0d2b5db4 | |||
| 2e89e161e9 | |||
| 55bb4dfba6 | |||
| 43fe78a2cc | |||
| 5a283b6443 | |||
| ff0c271d7b | |||
| 89088b6a99 | |||
| ee569e3a59 | |||
| daeaec0832 | |||
| db36036c81 | |||
| 7653e511be | |||
| f8474d68c9 | |||
| 032a24133b | |||
| f5f442b658 | |||
| ad871dbfa9 | |||
| 5801be53ff | |||
| c1b1669cf8 | |||
| dc03f7a4a9 | |||
| 1b8234eb91 | |||
| 4ba090feb0 | |||
| 4882fe1053 | |||
| 019009e9ee | |||
| 991a5c6cee | |||
| c60ec642bc | |||
| 38b4ff4700 | |||
| 7eb09237f4 | |||
| e58b424882 | |||
| b8eeafe4f9 | |||
| bdf1e6986e | |||
| 99e3721181 | |||
| ea365b551a | |||
| bbf2c13eef | |||
| da5d2c5321 | |||
| bceec9fdf5 | |||
| 49a9dc9b8b | |||
| bec5250e73 | |||
| dbdc3fc850 | |||
| e00f97c578 | |||
| f7ebdf475d | |||
| 716dfe4d17 | |||
| edc250d62a | |||
| db64ce3312 | |||
| 7c327b8bf4 | |||
| e6720cf738 | |||
| 844f380b4e | |||
| 13beb13aab | |||
| 90c4a7483d | |||
| 4e7dfc115d | |||
| ec5ea51141 | |||
| d8d524d10b | |||
| 02cd8eb8d3 | |||
| 84d6cfe7b3 | |||
| 3352388fe6 | |||
| 915f89d3c8 | |||
| f40f60b83c | |||
| 6f958d7577 | |||
| ee31074679 | |||
| 97517fa7f3 | |||
| 18502d64d6 | |||
| a3349b5c68 | |||
| 74dc6b0bf9 | |||
| fd1de27aef | |||
| 93de7506ef | |||
| def445a344 | |||
| 8464b258d8 | |||
| 855aec5af3 | |||
| 0c4d59db87 | |||
| 2eaf04c68b | |||
| 0227014fa0 | |||
| fb6f40051f | |||
| 316f06df06 | |||
| f4d3059b00 | |||
| 6c5a1964aa | |||
| 6e64634748 | |||
| d5de41e07a | |||
| 05b99178ae | |||
| ed0b781296 | |||
| 51906bf398 | |||
| 18dd7ccc09 | |||
| 0764bd30b5 | |||
| 899424c94f | |||
| ca8c0a7ac0 | |||
| cbe646fba6 | |||
| c166b2ba33 | |||
| 6676484490 | |||
| 79260c48f6 | |||
| 589c3569b7 | |||
| 4d75ae1373 | |||
| d32e6797ae | |||
| 32a9a83190 | |||
| 6e968cd152 | |||
| 6a581ab577 | |||
| 4ac4d7cbaf | |||
| 4ea1fde9c5 | |||
| 1e2eec6487 | |||
| 28d0891729 | |||
| 2ae126bf68 | |||
| 714419342e | |||
| 3701c0b1fe | |||
| 0c3557e68e | |||
| adc6c6866d | |||
| 9ac7b0655f | |||
| 983489bb52 | |||
| c2b719dc75 | |||
| 37981f9fb1 | |||
| 5eb11f5104 | |||
| f2ee75ac98 | |||
| 8b8e364915 | |||
| fb88113523 | |||
| 1854f5c104 | |||
| 726b58b697 | |||
| a5916da0e2 | |||
| a54e3c0da1 | |||
| 9d4cbb38f6 | |||
| cc29530ba0 | |||
| 385584b712 | |||
| 731a6ec23a | |||
| e889430926 | |||
| d81a091756 | |||
| 32ddfa94e1 | |||
| 681450aa0d | |||
| fafa028052 | |||
| 86e5adf348 | |||
| a89f25e31a | |||
| 2e4bf4d7ae | |||
| b6a51d0b68 | |||
| 567e18f015 | |||
| 0a7fab56e2 | |||
| d784b0fca6 | |||
| cd913986df | |||
| af54efec28 | |||
| 7198b9016c | |||
| f36c890234 | |||
| 233ec6dcb0 | |||
| 3f2978821b | |||
| 9b88d4bbfd | |||
| 3dde4ba8ba | |||
| e148552220 | |||
| d8d1468f03 | |||
| 2b74aba564 | |||
| 24d230505a | |||
| cf22e70b46 | |||
| b3cf9508b6 | |||
| 5212684472 | |||
| f990473cde | |||
| 8523a086e2 | |||
| 19bc5aea9e | |||
| 8d7cfc1a45 | |||
| 9969c357da | |||
| 4e43b2ff48 | |||
| ddc75e99ac | |||
| b80a2eb8a0 | |||
| 3610da8b71 | |||
| 485586e5d8 | |||
| a54f76db2a | |||
| e0d9e9979c | |||
| ad2e4311d1 | |||
| 490b628e2d | |||
| 23a8156bdf | |||
| 9789881243 | |||
| 77c0a4cf2e | |||
| 5035d56db2 | |||
| 9bb3de5327 | |||
| c8eabc2af4 | |||
| 2eaf858c5c | |||
| b8656fd74b | |||
| d276fffcda | |||
| 6b08bde479 | |||
| f4b2396718 | |||
| b76126db93 | |||
| 7186aa46da | |||
| 18dbaf74ce | |||
| ce001e9d56 | |||
| d770393dbc | |||
| 642f7ec22f | |||
| 1cc043d1eb | |||
| f8e9bfb1c0 | |||
| b1e41dcb99 | |||
| de4d90750e | |||
| 7acd4931f6 | |||
| cfd603bea2 | |||
| fdb543e834 | |||
| 8d3d4220fd | |||
| ba9daf439e | |||
| 38adb14bd2 | |||
| e43cf9f608 | |||
| 455a4af27e | |||
| 39876e455f | |||
| da2be86e8c | |||
| 65a2751f77 | |||
| a04ef3919a | |||
| 851bc1a3f8 | |||
| 758c09fa5c | |||
| b1cdd361ef | |||
| 7f7bc960a6 | |||
| 29108fd99d | |||
| 18d08de898 | |||
| f3730b2bfa | |||
| 88aa9a46e5 | |||
| efa90d8b44 | |||
| 46226a8015 | |||
| 270d1ce357 | |||
| ab87b6cea3 | |||
| d621512dd6 | |||
| c59a9c66b1 | |||
| 21f2cef12f | |||
| 931e311786 | |||
| 2592a0aad4 | |||
| 6c5e287c29 | |||
| 2a4595eeee | |||
| 8e7e6b9f7f | |||
| 35f3b1b28c | |||
| 9fb1e1a0e2 | |||
| b61d6373c5 | |||
| 42708f9a70 | |||
| d54709f89c | |||
| 0e688ef05a | |||
| 9dcc255f8e | |||
| 9307e97c46 | |||
| c112ee3829 | |||
| 45b556c08f | |||
| da6aafe816 | |||
| cce4537487 | |||
| 73126eb7f8 | |||
| 160ae34e71 | |||
| 06d40c5b44 | |||
| 98985e5acd | |||
| ea6bf9c9fb | |||
| 486fdb6968 | |||
| a44cf0edbd | |||
| 983c7aa012 | |||
| 3d9683b6e8 | |||
| 0da7757ef4 | |||
| 04c6613ff3 | |||
| 92600efd11 | |||
| 66c64b4e45 | |||
| 13f3745093 | |||
| c4914c326b | |||
| 461fd46986 | |||
| ceb5555222 | |||
| 86420fd507 | |||
| 5c383f4184 | |||
| 22fa051002 | |||
| c8313751d7 | |||
| 195d902ab6 | |||
| c19a68b59b | |||
| de88fa5457 | |||
| d9a0183f3f | |||
| 7e24c67608 | |||
| a37aff7f35 | |||
| 03d23bfde7 | |||
| 2c10d24113 | |||
| 82a616c70b | |||
| 2fa7dac247 | |||
| a41691b222 | |||
| 9034203d7a | |||
| d42f6d6b5d | |||
| 2fa1840c30 | |||
| 8e0b8d7e40 | |||
| a0cf527ccf | |||
| 65321acad2 | |||
| 064ce83f25 | |||
| c3b0cdddf3 | |||
| 211ec94c75 | |||
| 8aca5a089e | |||
| 3e6e4ea339 | |||
| 103f1e16d7 | |||
| 50dd3e3b82 | |||
| 3dc755994b | |||
| ddbfb2eee1 | |||
| 868265988b | |||
| 6adffcb778 | |||
| bc967ddcd0 | |||
| ef18f19ec4 | |||
| f5efcc1ff8 | |||
| 996651c605 | |||
| 38fa7dff1a | |||
| bbe295a3f1 | |||
| d8db296932 | |||
| 388ac08492 |
10
.github/ISSUE_TEMPLATE.md
vendored
10
.github/ISSUE_TEMPLATE.md
vendored
@ -4,11 +4,11 @@
|
|||||||
|
|
||||||
### Environment
|
### Environment
|
||||||
|
|
||||||
* Platform: bare-metal, google-cloud, digital-ocean
|
* Platform: aws, azure, bare-metal, google-cloud, digital-ocean
|
||||||
* OS: container-linux, fedora-cloud
|
* OS: container-linux, fedora-atomic
|
||||||
* Terraform: `terraform version`
|
* Ref: Release version or Git SHA (reporting latest is **not** helpful)
|
||||||
* Plugins: Provider plugin versions
|
* Terraform: `terraform version` (reporting latest is **not** helpful)
|
||||||
* Ref: Git SHA (if applicable)
|
* Plugins: Provider plugin versions (reporting latest is **not** helpful)
|
||||||
|
|
||||||
### Problem
|
### Problem
|
||||||
|
|
||||||
|
|||||||
496
CHANGES.md
496
CHANGES.md
@ -4,12 +4,506 @@ Notable changes between versions.
|
|||||||
|
|
||||||
## Latest
|
## Latest
|
||||||
|
|
||||||
|
## v1.12.2
|
||||||
|
|
||||||
|
* Kubernetes [v1.12.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.12.md#v1122)
|
||||||
|
* Update CoreDNS from 1.2.2 to [1.2.4](https://github.com/coredns/coredns/releases/tag/v1.2.4)
|
||||||
|
* Update Calico from v3.2.3 to [v3.3.0](https://docs.projectcalico.org/v3.3/releases/)
|
||||||
|
* Disable Kubelet read-only port ([#324](https://github.com/poseidon/typhoon/pull/324))
|
||||||
|
* Fix CoreDNS AntiAffinity spec to prefer spreading replicas
|
||||||
|
* Ignore controller node user-data changes ([#335](https://github.com/poseidon/typhoon/pull/335))
|
||||||
|
* Once all managed clusters use v1.12.2, it is possible to update `terraform-provider-ct`
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* Add `disk_iops` variable for EBS volume IOPS ([#314](https://github.com/poseidon/typhoon/pull/314))
|
||||||
|
|
||||||
|
#### Azure
|
||||||
|
|
||||||
|
* Use new `azurerm_network_interface_backend_address_pool_association` ([#332](https://github.com/poseidon/typhoon/pull/332))
|
||||||
|
* Require `terraform-provider-azurerm` v1.17+ (action required)
|
||||||
|
* Add `primary` field to `ip_configuration` needed by v1.17+ ([#331](https://github.com/poseidon/typhoon/pull/331))
|
||||||
|
|
||||||
|
#### DigitalOcean
|
||||||
|
|
||||||
|
* Add AAAA DNS records resolving to worker nodes ([#333](https://github.com/poseidon/typhoon/pull/333))
|
||||||
|
* Hosting IPv6 apps requires editing nginx-ingress with `hostNetwork: true`
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* Add an IPv6 address and IPv6 forwarding rules for load balancing IPv6 Ingress ([#334](https://github.com/poseidon/typhoon/pull/334))
|
||||||
|
* Add `ingress_static_ipv6` output variable for use in AAAA DNS records
|
||||||
|
* Allow serving IPv6 applications via Kubernetes Ingress
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Configure Heapster to scrape Kubelets with bearer token auth ([#323](https://github.com/poseidon/typhoon/pull/323))
|
||||||
|
* Update Grafana from v5.3.1 to v5.3.2
|
||||||
|
|
||||||
|
## v1.12.1
|
||||||
|
|
||||||
|
* Kubernetes [v1.12.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.12.md#v1121)
|
||||||
|
* Update etcd from v3.3.9 to [v3.3.10](https://github.com/etcd-io/etcd/blob/master/CHANGELOG-3.3.md#v3310-2018-10-10)
|
||||||
|
* Update CoreDNS from 1.1.3 to [1.2.2](https://github.com/coredns/coredns/releases/tag/v1.2.2)
|
||||||
|
* Update Calico from v3.2.1 to [v3.2.3](https://docs.projectcalico.org/v3.2/releases/)
|
||||||
|
* Raise scheduler and controller-manager replicas to the larger of 2 or the number of controller nodes ([#312](https://github.com/poseidon/typhoon/pull/312))
|
||||||
|
* Single-controller clusters continue to run 2 replicas as before
|
||||||
|
* Raise default CoreDNS replicas to the larger of 2 or the number of controller nodes ([#313](https://github.com/poseidon/typhoon/pull/313))
|
||||||
|
* Add AntiAffinity preferred rule to favor spreading CoreDNS pods
|
||||||
|
* Annotate control plane and addon containers to use the Docker runtime seccomp profile ([#319](https://github.com/poseidon/typhoon/pull/319))
|
||||||
|
* Override Kubernetes default behavior that starts containers with `seccomp=unconfined`
|
||||||
|
|
||||||
|
#### Azure
|
||||||
|
|
||||||
|
* Remove `admin_password` field (disabled) since it is now optional
|
||||||
|
* Require `terraform-provider-azurerm` v1.16+ (action required)
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Add support for `cached_install` mode with Flatcar Linux ([#315](https://github.com/poseidon/typhoon/pull/315))
|
||||||
|
|
||||||
|
#### DigitalOcean
|
||||||
|
|
||||||
|
* Require `terraform-provider-digitalocean` v1.0+ (action required)
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update nginx-ingress from v0.19.0 to v0.20.0
|
||||||
|
* Update Prometheus from v2.3.2 to v2.4.3
|
||||||
|
* Update Grafana from v5.2.4 to v5.3.1
|
||||||
|
|
||||||
|
## v1.11.3
|
||||||
|
|
||||||
|
* Kubernetes [v1.11.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.11.md#v1113)
|
||||||
|
* Introduce Typhoon for Azure as alpha ([#288](https://github.com/poseidon/typhoon/pull/288))
|
||||||
|
* Special thanks @justaugustus for an earlier variant
|
||||||
|
* Update Calico from v3.1.3 to v3.2.1 ([#278](https://github.com/poseidon/typhoon/pull/278))
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* Remove firewall rule allowing ICMP packets to nodes ([#285](https://github.com/poseidon/typhoon/pull/285))
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Remove `controller_networkds` and `worker_networkds` variables. Use Container Linux Config snippets [#277](https://github.com/poseidon/typhoon/pull/277)
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* Fix firewall to allow etcd client port 2379 traffic between controller nodes ([#287](https://github.com/poseidon/typhoon/pull/287))
|
||||||
|
* kube-apiservers were only able to connect to their node's local etcd peer. While master node outages were tolerated, reaching a healthy peer took longer than neccessary in some cases
|
||||||
|
* Reduce time needed to bootstrap the cluster
|
||||||
|
* Remove firewall rule allowing workers to access Nginx Ingress health check ([#284](https://github.com/poseidon/typhoon/pull/284))
|
||||||
|
* Nginx Ingress addon no longer uses hostNetwork, Prometheus scrapes via CNI network
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update nginx-ingress from 0.17.1 to 0.19.0
|
||||||
|
* Update kube-state-metrics from v1.3.1 to v1.4.0
|
||||||
|
* Update Grafana from 5.2.2 to 5.2.4
|
||||||
|
|
||||||
|
## v1.11.2
|
||||||
|
|
||||||
|
* Kubernetes [v1.11.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.11.md#v1112)
|
||||||
|
* Update etcd from v3.3.8 to [v3.3.9](https://github.com/coreos/etcd/blob/master/CHANGELOG-3.3.md#v339-2018-07-24)
|
||||||
|
* Use kubernetes-incubator/bootkube v0.13.0
|
||||||
|
* Fix Fedora Atomic modules' Kubelet version ([#270](https://github.com/poseidon/typhoon/issues/270))
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Introduce [Container Linux Config snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) on bare-metal
|
||||||
|
* Validate and additively merge custom Container Linux Configs during terraform plan
|
||||||
|
* Define files, systemd units, dropins, networkd configs, mounts, users, and more
|
||||||
|
* [Require](https://typhoon.psdn.io/cl/bare-metal/#terraform-setup) `terraform-provider-ct` plugin v0.2.1 (**action required!**)
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update nginx-ingress from 0.16.2 to 0.17.1
|
||||||
|
* Add nginx-ingress manifests for bare-metal
|
||||||
|
* Update Grafana from 5.2.1 to 5.2.2
|
||||||
|
* Update heapster from v1.5.3 to v1.5.4
|
||||||
|
|
||||||
|
## v1.11.1
|
||||||
|
|
||||||
|
* Kubernetes [v1.11.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.11.md#v1111)
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update Prometheus from v2.3.1 to v2.3.2
|
||||||
|
|
||||||
|
#### Errata
|
||||||
|
|
||||||
|
* Fedora Atomic modules shipped with Kubelet v1.11.0, instead of v1.11.1. Fixed in [#270](https://github.com/poseidon/typhoon/issues/270).
|
||||||
|
|
||||||
|
## v1.11.0
|
||||||
|
|
||||||
|
* Kubernetes [v1.11.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.11.md#v1110)
|
||||||
|
* Force apiserver to stop listening on `127.0.0.1:8080`
|
||||||
|
* Replace `kube-dns` with [CoreDNS](https://coredns.io/) ([#261](https://github.com/poseidon/typhoon/pull/261))
|
||||||
|
* Edit the `coredns` ConfigMap to [customize](https://coredns.io/plugins/)
|
||||||
|
* CoreDNS doesn't use a resizer. For large clusters, scaling may be required.
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* Update from Fedora Atomic 27 to 28 ([#258](https://github.com/poseidon/typhoon/pull/258))
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Update from Fedora Atomic 27 to 28 ([#263](https://github.com/poseidon/typhoon/pull/263))
|
||||||
|
|
||||||
|
#### Google
|
||||||
|
|
||||||
|
* Promote Google Cloud to stable
|
||||||
|
* Update from Fedora Atomic 27 to 28 ([#259](https://github.com/poseidon/typhoon/pull/259))
|
||||||
|
* Remove `ingress_static_ip` module output. Use `ingress_static_ipv4`.
|
||||||
|
* Remove `controllers_ipv4_public` module output.
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update nginx-ingress from 0.15.0 to 0.16.2
|
||||||
|
* Update Grafana from 5.1.4 to [5.2.1](http://docs.grafana.org/guides/whats-new-in-v5-2/)
|
||||||
|
* Update heapster from v1.5.2 to v1.5.3
|
||||||
|
|
||||||
|
## v1.10.5
|
||||||
|
|
||||||
|
* Kubernetes [v1.10.5](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.10.md#v1105)
|
||||||
|
* Update etcd from v3.3.6 to v3.3.8 ([#243](https://github.com/poseidon/typhoon/pull/243), [#247](https://github.com/poseidon/typhoon/pull/247))
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* Switch `kube-apiserver` port from 443 to 6443 ([#248](https://github.com/poseidon/typhoon/pull/248))
|
||||||
|
* Combine apiserver and ingress NLBs ([#249](https://github.com/poseidon/typhoon/pull/249))
|
||||||
|
* Reduce cost by ~$18/month per cluster. Typhoon AWS clusters now use one network load balancer.
|
||||||
|
* Ingress addon users may keep using CNAME records to the `ingress_dns_name` module output (few million RPS)
|
||||||
|
* Ingress users with heavy traffic (many million RPS) should create a separate NLB(s)
|
||||||
|
* Worker pools no longer include an extraneous load balancer. Remove worker module's `ingress_dns_name` output
|
||||||
|
* Disable detailed (paid) monitoring on worker nodes ([#251](https://github.com/poseidon/typhoon/pull/251))
|
||||||
|
* Favor Prometheus for cloud-agnostic metrics, aggregation, and alerting
|
||||||
|
* Add `worker_target_group_http` and `worker_target_group_https` module outputs to allow custom load balancing
|
||||||
|
* Add `target_group_http` and `target_group_https` worker module outputs to allow custom load balancing
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Switch `kube-apiserver` port from 443 to 6443 ([#248](https://github.com/poseidon/typhoon/pull/248))
|
||||||
|
* Users who exposed kube-apiserver on a WAN via their router/load-balancer will need to adjust its configuration (e.g. DNAT 6443). Most apiservers are on a LAN (internal, VPN-only, etc) so if you didn't specially configure network gear for 443, no change is needed. (possible action required)
|
||||||
|
* Fix possible deadlock when provisioning clusters larger than 10 nodes ([#244](https://github.com/poseidon/typhoon/pull/244))
|
||||||
|
|
||||||
|
#### DigitalOcean
|
||||||
|
|
||||||
|
* Switch `kube-apiserver` port from 443 to 6443 ([#248](https://github.com/poseidon/typhoon/pull/248))
|
||||||
|
* Update firewall rules and generated kubeconfig's
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* Use global HTTP and TCP proxy load balancing for Kubernetes Ingress ([#252](https://github.com/poseidon/typhoon/pull/252))
|
||||||
|
* Switch Ingress from regional network load balancers to global HTTP/TCP Proxy load balancing
|
||||||
|
* Reduce cost by ~$19/month per cluster. Google bills the first 5 global and regional forwarding rules separately. Typhoon clusters now use 3 global and 0 regional forwarding rules.
|
||||||
|
* Worker pools no longer include an extraneous load balancer. Remove worker module's `ingress_static_ip` output
|
||||||
|
* Allow using nginx-ingress addon on Fedora Atomic clusters ([#200](https://github.com/poseidon/typhoon/issues/200))
|
||||||
|
* Add `worker_instance_group` module output to allow custom global load balancing
|
||||||
|
* Add `instance_group` worker module output to allow custom global load balancing
|
||||||
|
* Deprecate `ingress_static_ip` module output. Add `ingress_static_ipv4` module output instead.
|
||||||
|
* Deprecate `controllers_ipv4_public` module output
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update CLUO from v0.6.0 to v0.7.0 ([#242](https://github.com/poseidon/typhoon/pull/242))
|
||||||
|
* Update Prometheus from v2.3.0 to v2.3.1
|
||||||
|
* Update Grafana from 5.1.3 to 5.1.4
|
||||||
|
* Drop `hostNetwork` from nginx-ingress addon
|
||||||
|
* Both flannel and Calico support host port via `portmap`
|
||||||
|
* Allows writing NetworkPolicies that reference ingress pods in `from` or `to`. HostNetwork pods were difficult to write network policy for since they could circumvent the CNI network to communicate with pods on the same node.
|
||||||
|
|
||||||
|
## v1.10.4
|
||||||
|
|
||||||
|
* Kubernetes [v1.10.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.10.md#v1104)
|
||||||
|
* Update etcd from v3.3.5 to v3.3.6
|
||||||
|
* Update Calico from v3.1.2 to v3.1.3
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update Prometheus from v2.2.1 to v2.3.0
|
||||||
|
* Add Prometheus liveness and readiness probes
|
||||||
|
* Annotate Grafana service so Prometheus scrapes metrics
|
||||||
|
* Label namespaces to ease writing Network Policies
|
||||||
|
|
||||||
|
## v1.10.3
|
||||||
|
|
||||||
|
* Kubernetes [v1.10.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.10.md#v1103)
|
||||||
|
* Add [Flatcar Linux](https://docs.flatcar-linux.org/) (Container Linux derivative) as an option for AWS and bare-metal (thanks @kinvolk folks)
|
||||||
|
* Allow bearer token authentication to the Kubelet ([#216](https://github.com/poseidon/typhoon/issues/216))
|
||||||
|
* Require Webhook authorization to the Kubelet
|
||||||
|
* Switch apiserver X509 client cert org to satisfy new authorization requirement
|
||||||
|
* Require Terraform v0.11.x and drop support for v0.10.x ([migration guide](https://typhoon.psdn.io/topics/maintenance/#terraform-v011x))
|
||||||
|
* Update etcd from v3.3.4 to v3.3.5 ([#213](https://github.com/poseidon/typhoon/pull/213))
|
||||||
|
* Update Calico from v3.1.1 to v3.1.2
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* Allow Flatcar Linux by setting `os_image` to flatcar-stable (default), flatcar-beta, flatcar-alpha ([#211](https://github.com/poseidon/typhoon/pull/211))
|
||||||
|
* Replace `os_channel` variable with `os_image` to align naming across clouds
|
||||||
|
* Please change values stable, beta, or alpha to coreos-stable, coreos-beta, coreos-alpha (**action required!**)
|
||||||
|
* Allow preemptible workers via spot instances ([#202](https://github.com/poseidon/typhoon/pull/202))
|
||||||
|
* Add `worker_price` to allow worker spot instances. Default to empty string for the worker autoscaling group to use regular on-demand instances
|
||||||
|
* Add `spot_price` to internal `workers` module for spot [worker pools](https://typhoon.psdn.io/advanced/worker-pools/)
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Allow Flatcar Linux by setting `os_channel` to flatcar-stable, flatcar-beta, flatcar-alpha ([#220](https://github.com/poseidon/typhoon/pull/220))
|
||||||
|
* Replace `container_linux_channel` variable with `os_channel`
|
||||||
|
* Please change values stable, beta, or alpha to coreos-stable, coreos-beta, coreos-alpha (**action required!**)
|
||||||
|
* Replace `container_linux_version` variable with `os_version`
|
||||||
|
* Add `network_ip_autodetection_method` variable for Calico host IPv4 address detection
|
||||||
|
* Use Calico's default "first-found" to support single NIC and bonded NIC nodes
|
||||||
|
* Allow [alternative](https://docs.projectcalico.org/v3.1/reference/node/configuration#ip-autodetection-methods) methods for multi NIC nodes, like can-reach=IP or interface=REGEX
|
||||||
|
* Deprecate `container_linux_oem` variable
|
||||||
|
|
||||||
|
#### DigitalOcean
|
||||||
|
|
||||||
|
* Update Fedora Atomic module to use Fedora Atomic 28 ([#225](https://github.com/poseidon/typhoon/pull/225))
|
||||||
|
* Fedora Atomic 27 images disappeared from DigitalOcean and forced this early update
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Fix Prometheus data directory location ([#203](https://github.com/poseidon/typhoon/pull/203))
|
||||||
|
* Configure Prometheus to scrape Kubelets directly with bearer token auth instead of proxying through the apiserver ([#217](https://github.com/poseidon/typhoon/pull/217))
|
||||||
|
* Security improvement: Drop RBAC permission from `nodes/proxy` to `nodes/metrics`
|
||||||
|
* Scale: Remove per-node proxied scrape load from the apiserver
|
||||||
|
* Update Grafana from v5.04 to v5.1.3 ([#208](https://github.com/poseidon/typhoon/pull/208))
|
||||||
|
* Disable Grafana Google Analytics by default ([#214](https://github.com/poseidon/typhoon/issues/214))
|
||||||
|
* Update nginx-ingress from 0.14.0 to 0.15.0
|
||||||
|
* Annotate nginx-ingress service so Prometheus auto-discovers and scrapes service endpoints ([#222](https://github.com/poseidon/typhoon/pull/222))
|
||||||
|
|
||||||
|
## v1.10.2
|
||||||
|
|
||||||
|
* Kubernetes [v1.10.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.10.md#v1102)
|
||||||
|
* [Introduce](https://typhoon.psdn.io/announce/#april-26-2018) Typhoon for Fedora Atomic ([#199](https://github.com/poseidon/typhoon/pull/199))
|
||||||
|
* Update Calico from v3.0.4 to v3.1.1 ([#197](https://github.com/poseidon/typhoon/pull/197))
|
||||||
|
* https://www.projectcalico.org/announcing-calico-v3-1/
|
||||||
|
* https://github.com/projectcalico/calico/releases/tag/v3.1.0
|
||||||
|
* Update etcd from v3.3.3 to v3.3.4
|
||||||
|
* Update kube-dns from v1.14.9 to v1.14.10
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* Add support for multi-controller clusters (i.e. multi-master) ([#54](https://github.com/poseidon/typhoon/issues/54), [#190](https://github.com/poseidon/typhoon/pull/190))
|
||||||
|
* Switch from Google Cloud network load balancer to a TCP proxy load balancer. Avoid a [bug](https://issuetracker.google.com/issues/67366622) in Google network load balancers that limited clusters to only bootstrapping one controller node.
|
||||||
|
* Add TCP health check for apiserver pods on controllers. Replace kubelet check approximation.
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update nginx-ingress from 0.12.0 to 0.14.0
|
||||||
|
* Update kube-state-metrics from v1.3.0 to v1.3.1
|
||||||
|
|
||||||
|
## v1.10.1
|
||||||
|
|
||||||
|
* Kubernetes [v1.10.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.10.md#v1101)
|
||||||
|
* Enable etcd v3.3 metrics endpoint ([#175](https://github.com/poseidon/typhoon/pull/175))
|
||||||
|
* Use `k8s.gcr.io` instead of `gcr.io/google_containers` ([#180](https://github.com/poseidon/typhoon/pull/180))
|
||||||
|
* Kubernetes [recommends](https://groups.google.com/forum/#!msg/kubernetes-dev/ytjk_rNrTa0/3EFUHvovCAAJ) using the alias to pull from the nearest regional mirror and to abstract the backing container registry
|
||||||
|
* Update etcd from v3.3.2 to v3.3.3
|
||||||
|
* Update kube-dns from v1.14.8 to v1.14.9
|
||||||
|
* Use kubernetes-incubator/bootkube v0.12.0
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Fix need for multiple `terraform apply` runs to create a cluster with Terraform v0.11.4 ([#181](https://github.com/poseidon/typhoon/pull/181))
|
||||||
|
* To SSH during a disk install for debugging, SSH as user "core" with port 2222
|
||||||
|
* Remove the old trick of using a user "debug" during disk install
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* Refactor out the `controller` internal module
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Add Prometheus discovery for etcd peers on controller nodes ([#175](https://github.com/poseidon/typhoon/pull/175))
|
||||||
|
* Scrape etcd v3.3 `--listen-metrics-urls` for metrics
|
||||||
|
* Enable etcd alerts and populate the etcd Grafana dashboard
|
||||||
|
* Update kube-state-metrics from v1.2.0 to v1.3.0
|
||||||
|
|
||||||
|
## v1.10.0
|
||||||
|
|
||||||
|
* Kubernetes [v1.10.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.10.md#v1100)
|
||||||
|
* Remove unused, unmaintained `pxe-worker` internal module
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* Add `disk_type` optional variable for setting the EBS volume type ([#176](https://github.com/poseidon/typhoon/pull/176))
|
||||||
|
* Change default type from `standard` to `gp2`. Prometheus etcd alerts are tuned for fast disks.
|
||||||
|
|
||||||
|
#### Digital Ocean
|
||||||
|
|
||||||
|
* Ensure etcd secrets are only distributed to controller hosts, not workers.
|
||||||
|
* Remove `networking` optional variable. Only flannel works on Digital Ocean.
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* Add `disk_size` optional variable for setting instance disk size in GB
|
||||||
|
* Add `controller_type` optional variable for setting machine type for controllers
|
||||||
|
* Add `worker_type` optional variable for setting machine type for workers
|
||||||
|
* Remove `machine_type` optional variable. Use `controller_type` and `worker_type`.
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update Grafana from v4.6.3 to v5.0.4 ([#153](https://github.com/poseidon/typhoon/pull/153), [#174](https://github.com/poseidon/typhoon/pull/174))
|
||||||
|
* Restrict dashboard organization role to Viewer
|
||||||
|
|
||||||
|
## v1.9.6
|
||||||
|
|
||||||
|
* Kubernetes [v1.9.6](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.9.md#v196)
|
||||||
|
* Update Calico from v3.0.3 to v3.0.4
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update heapster from v1.5.1 to v1.5.2
|
||||||
|
|
||||||
|
## v1.9.5
|
||||||
|
|
||||||
|
* Kubernetes [v1.9.5](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.9.md#v195)
|
||||||
|
* Fix `subPath` volume mounts regression ([kubernetes#61076](https://github.com/kubernetes/kubernetes/issues/61076))
|
||||||
|
* Introduce [Container Linux Config snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) on cloud platforms ([#145](https://github.com/poseidon/typhoon/pull/145))
|
||||||
|
* Validate and additively merge custom Container Linux Configs during `terraform plan`
|
||||||
|
* Define files, systemd units, dropins, networkd configs, mounts, users, and more
|
||||||
|
* Require updating `terraform-provider-ct` plugin from v0.2.0 to v0.2.1
|
||||||
|
* Add `node-role.kubernetes.io/controller="true"` node label to controllers ([#160](https://github.com/poseidon/typhoon/pull/160))
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* [Require](https://typhoon.psdn.io/topics/maintenance/#terraform-provider-ct-v021) updating `terraform-provider-ct` plugin from v0.2.0 to [v0.2.1](https://github.com/coreos/terraform-provider-ct/releases/tag/v0.2.1) (action required!)
|
||||||
|
|
||||||
|
#### Digital Ocean
|
||||||
|
|
||||||
|
* [Require](https://typhoon.psdn.io/topics/maintenance/#terraform-provider-ct-v021) updating `terraform-provider-ct` plugin from v0.2.0 to [v0.2.1](https://github.com/coreos/terraform-provider-ct/releases/tag/v0.2.1) (action required!)
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* [Require](https://typhoon.psdn.io/topics/maintenance/#terraform-provider-ct-v021) updating `terraform-provider-ct` plugin from v0.2.0 to [v0.2.1](https://github.com/coreos/terraform-provider-ct/releases/tag/v0.2.1) (action required!)
|
||||||
|
* Relax `os_image` to optional. Default to "coreos-stable".
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update nginx-ingress from 0.11.0 to 0.12.0
|
||||||
|
* Update Prometheus from 2.2.0 to 2.2.1
|
||||||
|
|
||||||
|
## v1.9.4
|
||||||
|
|
||||||
|
* Kubernetes [v1.9.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.9.md#v194)
|
||||||
|
* Secret, configMap, downward API, and projected volumes now read-only (breaking, [kubernetes#58720](https://github.com/kubernetes/kubernetes/pull/58720))
|
||||||
|
* Regressed `subPath` volume mounts (regression, [kubernetes#61076](https://github.com/kubernetes/kubernetes/issues/61076))
|
||||||
|
* Mitigated `subPath` [CVE-2017-1002101](https://github.com/kubernetes/kubernetes/issues/60813)
|
||||||
|
* Introduce [worker pools](https://typhoon.psdn.io/advanced/worker-pools/) for AWS and Google Cloud for joining heterogeneous workers to existing clusters.
|
||||||
|
* Use new Network Load Balancers and cross zone load balancing on AWS
|
||||||
|
* Allow flexvolume plugins to be used on any Typhoon cluster (not just bare-metal)
|
||||||
|
* Upgrade etcd from v3.2.15 to v3.3.2
|
||||||
|
* Update Calico from v3.0.2 to v3.0.3
|
||||||
|
* Use kubernetes-incubator/bootkube v0.11.0
|
||||||
|
* [Recommend](https://typhoon.psdn.io/topics/maintenance/#terraform-provider-ct-v021) updating `terraform-provider-ct` plugin from v0.2.0 to [v0.2.1](https://github.com/coreos/terraform-provider-ct/releases/tag/v0.2.1) (action recommended)
|
||||||
|
|
||||||
|
#### AWS
|
||||||
|
|
||||||
|
* Promote AWS platform to stable
|
||||||
|
* Allow groups of workers to be defined and joined to a cluster (i.e. worker pools) ([#150](https://github.com/poseidon/typhoon/pull/150))
|
||||||
|
* Replace the apiserver elastic load balancer with a network load balancer ([#136](https://github.com/poseidon/typhoon/pull/136))
|
||||||
|
* Replace the Ingress elastic load balancer with a network load balancer ([#141](https://github.com/poseidon/typhoon/pull/141))
|
||||||
|
* AWS [NLBs](https://aws.amazon.com/blogs/aws/new-network-load-balancer-effortless-scaling-to-millions-of-requests-per-second/) can handle millions of RPS with high throughput and low latency.
|
||||||
|
* Require `terraform-provider-aws` 1.7.0 or higher
|
||||||
|
* Enable NLB [cross-zone](https://aws.amazon.com/about-aws/whats-new/2018/02/network-load-balancer-now-supports-cross-zone-load-balancing/) load balancing ([#159](https://github.com/poseidon/typhoon/pull/159))
|
||||||
|
* Requests are automatically evenly distributed to targets regardless of AZ
|
||||||
|
* Require `terraform-provider-aws` 1.11.0 or higher
|
||||||
|
* Add kubelet `--volume-plugin-dir` flag to allow flexvolume plugins ([#142](https://github.com/poseidon/typhoon/pull/142))
|
||||||
|
* Fix controller and worker launch configs to ignore AMI changes ([#126](https://github.com/poseidon/typhoon/pull/126), [#158](https://github.com/poseidon/typhoon/pull/158))
|
||||||
|
|
||||||
|
#### Digital Ocean
|
||||||
|
|
||||||
|
* Add kubelet `--volume-plugin-dir` flag to allow flexvolume plugins ([#142](https://github.com/poseidon/typhoon/pull/142))
|
||||||
|
* Fix to pass `ssh_fingerprints` as a list to droplets ([#143](https://github.com/poseidon/typhoon/pull/143))
|
||||||
|
|
||||||
|
#### Google Cloud
|
||||||
|
|
||||||
|
* Allow groups of workers to be defined and joined to a cluster (i.e. worker pools) ([#148](https://github.com/poseidon/typhoon/pull/148))
|
||||||
|
* Add kubelet `--volume-plugin-dir` flag to allow flexvolume plugins ([#142](https://github.com/poseidon/typhoon/pull/142))
|
||||||
|
* Add `kubeconfig` variable to `controllers` and `workers` submodules ([#147](https://github.com/poseidon/typhoon/pull/147))
|
||||||
|
* Remove `kubeconfig_*` variables from `controllers` and `workers` submodules ([#147](https://github.com/poseidon/typhoon/pull/147))
|
||||||
|
* Allow initial experimentation with accelerators (i.e. GPUs) on workers ([#161](https://github.com/poseidon/typhoon/pull/161)) (unofficial)
|
||||||
|
* Require `terraform-provider-google` v1.6.0
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update Prometheus from 2.1.0 to 2.2.0 ([#153](https://github.com/poseidon/typhoon/pull/153))
|
||||||
|
* Scrape Prometheus itself to enable alerts about Prometheus itself
|
||||||
|
* Adjust KubeletDown rule to fire when 10% of kubelets are down
|
||||||
|
* Update heapster from v1.5.0 to v1.5.1 ([#131](https://github.com/poseidon/typhoon/pull/131))
|
||||||
|
* Use separate service account
|
||||||
|
* Update nginx-ingress from 0.10.2 to 0.11.0
|
||||||
|
|
||||||
|
## v1.9.3
|
||||||
|
|
||||||
|
* Kubernetes [v1.9.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.9.md#v193)
|
||||||
|
* Network improvements and fixes ([#104](https://github.com/poseidon/typhoon/pull/104))
|
||||||
|
* Switch from Calico v2.6.6 to v3.0.2
|
||||||
|
* Add Calico GlobalNetworkSet CRD
|
||||||
|
* Update flannel from v0.9.0 to v0.10.0
|
||||||
|
* Use separate service account for flannel
|
||||||
|
* Update etcd from v3.2.14 to v3.2.15
|
||||||
|
|
||||||
|
#### Digital Ocean
|
||||||
|
|
||||||
|
* Use new Droplet [types](https://developers.digitalocean.com/documentation/changelog/api-v2/new-size-slugs-for-droplet-plan-changes/) which offer more CPU/memory, at lower cost. ([#105](https://github.com/poseidon/typhoon/pull/105))
|
||||||
|
* A small Digital Ocean cluster costs less than $25 a month!
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update Prometheus from v2.0.0 to v2.1.0 ([#113](https://github.com/poseidon/typhoon/pull/113))
|
||||||
|
* Improve alerting rules
|
||||||
|
* Relabel discovered kubelet, endpoint, service, and apiserver scrapes
|
||||||
|
* Use separate service accounts
|
||||||
|
* Update node-exporter and kube-state-metrics
|
||||||
|
* Include Grafana dashboards for Kubernetes admins ([#113](https://github.com/poseidon/typhoon/pull/113))
|
||||||
|
* Add grafana-watcher to load bundled upstream dashboards
|
||||||
|
* Update nginx-ingress from 0.9.0 to 0.10.2
|
||||||
|
* Update CLUO from v0.5.0 to v0.6.0
|
||||||
|
* Switch manifests to use `apps/v1` Deployments and Daemonsets ([#120](https://github.com/poseidon/typhoon/pull/120))
|
||||||
|
* Remove Kubernetes Dashboard manifests ([#121](https://github.com/poseidon/typhoon/pull/121))
|
||||||
|
|
||||||
|
## v1.9.2
|
||||||
|
|
||||||
|
* Kubernetes [v1.9.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.9.md#v192)
|
||||||
|
* Add Terraform v0.11.x support
|
||||||
|
* Add explicit "providers" section to modules for Terraform v0.11.x
|
||||||
|
* Retain support for Terraform v0.10.4+
|
||||||
|
* Add [migration guide](https://typhoon.psdn.io/topics/maintenance/#terraform-v011x) from Terraform v0.10.x to v0.11.x (**action required!**)
|
||||||
|
* Update etcd from 3.2.13 to 3.2.14
|
||||||
|
* Update calico from 2.6.5 to 2.6.6
|
||||||
|
* Update kube-dns from v1.14.7 to v1.14.8
|
||||||
|
* Use separate service account for kube-dns
|
||||||
|
* Use kubernetes-incubator/bootkube v0.10.0
|
||||||
|
|
||||||
|
#### Bare-Metal
|
||||||
|
|
||||||
|
* Use per-node Container Linux install profiles ([#97](https://github.com/poseidon/typhoon/pull/97))
|
||||||
|
* Allow Container Linux channel/version to be chosen per-cluster
|
||||||
|
* Fix issue where cluster deletion could require `terraform apply` multiple times
|
||||||
|
|
||||||
|
#### Digital Ocean
|
||||||
|
|
||||||
|
* Relax `digitalocean` provider version constraint
|
||||||
|
* Fix bug with `terraform plan` always showing a firewall diff to be applied ([#3](https://github.com/poseidon/typhoon/issues/3))
|
||||||
|
|
||||||
|
#### Addons
|
||||||
|
|
||||||
|
* Update CLUO to v0.5.0 to fix compatibility with Kubernetes 1.9 (**important**)
|
||||||
|
* Earlier versions can't roll out Container Linux updates on Kubernetes 1.9 nodes ([cluo#163](https://github.com/coreos/container-linux-update-operator/issues/163))
|
||||||
|
* Update kube-state-metrics from v1.1.0 to v1.2.0
|
||||||
|
* Fix RBAC cluster role for kube-state-metrics
|
||||||
|
|
||||||
|
## v1.9.1
|
||||||
|
|
||||||
* Kubernetes [v1.9.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.9.md#v191)
|
* Kubernetes [v1.9.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.9.md#v191)
|
||||||
* Update kube-dns from 1.14.5 to v1.14.7
|
* Update kube-dns from 1.14.5 to v1.14.7
|
||||||
* Update etcd from 3.2.0 to 3.2.13
|
* Update etcd from 3.2.0 to 3.2.13
|
||||||
* Update Calico from v2.6.4 to v2.6.5
|
* Update Calico from v2.6.4 to v2.6.5
|
||||||
* Enable portmap to fix hostPort with Calico
|
* Enable portmap to fix hostPort with Calico
|
||||||
* Service account for controller-manager
|
* Use separate service account for controller-manager
|
||||||
|
|
||||||
## v1.8.6
|
## v1.8.6
|
||||||
|
|
||||||
|
|||||||
72
README.md
72
README.md
@ -11,10 +11,11 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
|||||||
|
|
||||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||||
|
|
||||||
* Kubernetes v1.9.1 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
* Kubernetes v1.12.2 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||||
* Ready for Ingress, Dashboards, Metrics, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/) and [preemption](https://typhoon.psdn.io/cl/google-cloud/#preemption) (varies by platform)
|
||||||
|
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||||
|
|
||||||
## Modules
|
## Modules
|
||||||
|
|
||||||
@ -22,54 +23,63 @@ Typhoon provides a Terraform Module for each supported operating system and plat
|
|||||||
|
|
||||||
| Platform | Operating System | Terraform Module | Status |
|
| Platform | Operating System | Terraform Module | Status |
|
||||||
|---------------|------------------|------------------|--------|
|
|---------------|------------------|------------------|--------|
|
||||||
| AWS | Container Linux | [aws/container-linux/kubernetes](aws/container-linux/kubernetes) | beta |
|
| AWS | Container Linux | [aws/container-linux/kubernetes](aws/container-linux/kubernetes) | stable |
|
||||||
|
| AWS | Fedora Atomic | [aws/fedora-atomic/kubernetes](aws/fedora-atomic/kubernetes) | alpha |
|
||||||
|
| Azure | Container Linux | [azure/container-linux/kubernetes](cl/azure.md) | alpha |
|
||||||
| Bare-Metal | Container Linux | [bare-metal/container-linux/kubernetes](bare-metal/container-linux/kubernetes) | stable |
|
| Bare-Metal | Container Linux | [bare-metal/container-linux/kubernetes](bare-metal/container-linux/kubernetes) | stable |
|
||||||
|
| Bare-Metal | Fedora Atomic | [bare-metal/fedora-atomic/kubernetes](bare-metal/fedora-atomic/kubernetes) | alpha |
|
||||||
| Digital Ocean | Container Linux | [digital-ocean/container-linux/kubernetes](digital-ocean/container-linux/kubernetes) | beta |
|
| Digital Ocean | Container Linux | [digital-ocean/container-linux/kubernetes](digital-ocean/container-linux/kubernetes) | beta |
|
||||||
| Google Cloud | Container Linux | [google-cloud/container-linux/kubernetes](google-cloud/container-linux/kubernetes) | beta |
|
| Digital Ocean | Fedora Atomic | [digital-ocean/fedora-atomic/kubernetes](digital-ocean/fedora-atomic/kubernetes) | alpha |
|
||||||
|
| Google Cloud | Container Linux | [google-cloud/container-linux/kubernetes](google-cloud/container-linux/kubernetes) | stable |
|
||||||
|
| Google Cloud | Fedora Atomic | [google-cloud/fedora-atomic/kubernetes](google-cloud/fedora-atomic/kubernetes) | alpha |
|
||||||
|
|
||||||
## Usage
|
The AWS and bare-metal `container-linux` modules allow picking Red Hat Container Linux (formerly CoreOS Container Linux) or Kinvolk's Flatcar Linux friendly fork.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
* [Docs](https://typhoon.psdn.io)
|
* [Docs](https://typhoon.psdn.io)
|
||||||
* [Concepts](https://typhoon.psdn.io/concepts/)
|
* Architecture [concepts](https://typhoon.psdn.io/architecture/concepts/) and [operating systems](https://typhoon.psdn.io/architecture/operating-systems/)
|
||||||
* Tutorials
|
* Tutorials for [AWS](docs/cl/aws.md), [Azure](docs/cl/azure.md), [Bare-Metal](docs/cl/bare-metal.md), [Digital Ocean](docs/cl/digital-ocean.md), and [Google-Cloud](docs/cl/google-cloud.md)
|
||||||
* [AWS](https://typhoon.psdn.io/aws/)
|
|
||||||
* [Bare-Metal](https://typhoon.psdn.io/bare-metal/)
|
|
||||||
* [Digital Ocean](https://typhoon.psdn.io/digital-ocean/)
|
|
||||||
* [Google-Cloud](https://typhoon.psdn.io/google-cloud/)
|
|
||||||
|
|
||||||
## Example
|
## Usage
|
||||||
|
|
||||||
Define a Kubernetes cluster by using the Terraform module for your chosen platform and operating system. Here's a minimal example:
|
Define a Kubernetes cluster by using the Terraform module for your chosen platform and operating system. Here's a minimal example:
|
||||||
|
|
||||||
```tf
|
```tf
|
||||||
module "google-cloud-yavin" {
|
module "google-cloud-yavin" {
|
||||||
source = "git::https://github.com/poseidon/typhoon//google-cloud/container-linux/kubernetes"
|
source = "git::https://github.com/poseidon/typhoon//google-cloud/container-linux/kubernetes?ref=v1.12.2"
|
||||||
|
|
||||||
|
providers = {
|
||||||
|
google = "google.default"
|
||||||
|
local = "local.default"
|
||||||
|
null = "null.default"
|
||||||
|
template = "template.default"
|
||||||
|
tls = "tls.default"
|
||||||
|
}
|
||||||
|
|
||||||
# Google Cloud
|
# Google Cloud
|
||||||
|
cluster_name = "yavin"
|
||||||
region = "us-central1"
|
region = "us-central1"
|
||||||
dns_zone = "example.com"
|
dns_zone = "example.com"
|
||||||
dns_zone_name = "example-zone"
|
dns_zone_name = "example-zone"
|
||||||
os_image = "coreos-stable-1576-5-0-v20180105"
|
|
||||||
|
|
||||||
cluster_name = "yavin"
|
# configuration
|
||||||
controller_count = 1
|
|
||||||
worker_count = 2
|
|
||||||
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
|
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
|
||||||
|
|
||||||
# output assets dir
|
|
||||||
asset_dir = "/home/user/.secrets/clusters/yavin"
|
asset_dir = "/home/user/.secrets/clusters/yavin"
|
||||||
|
|
||||||
|
# optional
|
||||||
|
worker_count = 2
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Fetch modules, plan the changes to be made, and apply the changes.
|
Initialize modules, plan the changes to be made, and apply the changes.
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ terraform init
|
$ terraform init
|
||||||
$ terraform get --update
|
|
||||||
$ terraform plan
|
$ terraform plan
|
||||||
Plan: 37 to add, 0 to change, 0 to destroy.
|
Plan: 64 to add, 0 to change, 0 to destroy.
|
||||||
$ terraform apply
|
$ terraform apply
|
||||||
Apply complete! Resources: 37 added, 0 changed, 0 destroyed.
|
Apply complete! Resources: 64 added, 0 changed, 0 destroyed.
|
||||||
```
|
```
|
||||||
|
|
||||||
In 4-8 minutes (varies by platform), the cluster will be ready. This Google Cloud example creates a `yavin.example.com` DNS record to resolve to a network load balancer across controller nodes.
|
In 4-8 minutes (varies by platform), the cluster will be ready. This Google Cloud example creates a `yavin.example.com` DNS record to resolve to a network load balancer across controller nodes.
|
||||||
@ -78,9 +88,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
|
|||||||
$ export KUBECONFIG=/home/user/.secrets/clusters/yavin/auth/kubeconfig
|
$ export KUBECONFIG=/home/user/.secrets/clusters/yavin/auth/kubeconfig
|
||||||
$ kubectl get nodes
|
$ kubectl get nodes
|
||||||
NAME STATUS AGE VERSION
|
NAME STATUS AGE VERSION
|
||||||
yavin-controller-0.c.example-com.internal Ready 6m v1.9.1
|
yavin-controller-0.c.example-com.internal Ready 6m v1.12.2
|
||||||
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.9.1
|
yavin-worker-jrbf.c.example-com.internal Ready 5m v1.12.2
|
||||||
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.9.1
|
yavin-worker-mzdm.c.example-com.internal Ready 5m v1.12.2
|
||||||
```
|
```
|
||||||
|
|
||||||
List the pods.
|
List the pods.
|
||||||
@ -91,10 +101,10 @@ NAMESPACE NAME READY STATUS RESTART
|
|||||||
kube-system calico-node-1cs8z 2/2 Running 0 6m
|
kube-system calico-node-1cs8z 2/2 Running 0 6m
|
||||||
kube-system calico-node-d1l5b 2/2 Running 0 6m
|
kube-system calico-node-d1l5b 2/2 Running 0 6m
|
||||||
kube-system calico-node-sp9ps 2/2 Running 0 6m
|
kube-system calico-node-sp9ps 2/2 Running 0 6m
|
||||||
|
kube-system coredns-1187388186-zj5dl 1/1 Running 0 6m
|
||||||
kube-system kube-apiserver-zppls 1/1 Running 0 6m
|
kube-system kube-apiserver-zppls 1/1 Running 0 6m
|
||||||
kube-system kube-controller-manager-3271970485-gh9kt 1/1 Running 0 6m
|
kube-system kube-controller-manager-3271970485-gh9kt 1/1 Running 0 6m
|
||||||
kube-system kube-controller-manager-3271970485-h90v8 1/1 Running 1 6m
|
kube-system kube-controller-manager-3271970485-h90v8 1/1 Running 1 6m
|
||||||
kube-system kube-dns-1187388186-zj5dl 3/3 Running 0 6m
|
|
||||||
kube-system kube-proxy-117v6 1/1 Running 0 6m
|
kube-system kube-proxy-117v6 1/1 Running 0 6m
|
||||||
kube-system kube-proxy-9886n 1/1 Running 0 6m
|
kube-system kube-proxy-9886n 1/1 Running 0 6m
|
||||||
kube-system kube-proxy-njn47 1/1 Running 0 6m
|
kube-system kube-proxy-njn47 1/1 Running 0 6m
|
||||||
@ -115,11 +125,11 @@ Typhoon is strict about minimalism, maturity, and scope. These are not in scope:
|
|||||||
|
|
||||||
Ask questions on the IRC #typhoon channel on [freenode.net](http://freenode.net/).
|
Ask questions on the IRC #typhoon channel on [freenode.net](http://freenode.net/).
|
||||||
|
|
||||||
## Background
|
## Motivation
|
||||||
|
|
||||||
Typhoon powers the author's cloud and colocation clusters. The project has evolved through operational experience and Kubernetes changes. Typhoon is shared under a free license to allow others to use the work freely and contribute to its upkeep.
|
Typhoon powers the author's cloud and colocation clusters. The project has evolved through operational experience and Kubernetes changes. Typhoon is shared under a free license to allow others to use the work freely and contribute to its upkeep.
|
||||||
|
|
||||||
Typhoon addresses real world needs, which you may share. It is honest about limitations or areas that aren't mature yet. It avoids buzzword bingo and hype. It does not aim to be the one-solution-fits-all distro. An ecosystem of free (or enterprise) Kubernetes distros is healthy.
|
Typhoon addresses real world needs, which you may share. It is honest about limitations or areas that aren't mature yet. It avoids buzzword bingo and hype. It does not aim to be the one-solution-fits-all distro. An ecosystem of Kubernetes distributions is healthy.
|
||||||
|
|
||||||
## Social Contract
|
## Social Contract
|
||||||
|
|
||||||
@ -127,4 +137,6 @@ Typhoon is not a product, trial, or free-tier. It is not run by a company, does
|
|||||||
|
|
||||||
Typhoon clusters will contain only [free](https://www.debian.org/intro/free) components. Cluster components will not collect data on users without their permission.
|
Typhoon clusters will contain only [free](https://www.debian.org/intro/free) components. Cluster components will not collect data on users without their permission.
|
||||||
|
|
||||||
*Disclosure: The author works for CoreOS and previously wrote Matchbox and original Tectonic for bare-metal and AWS. This project is not associated with CoreOS.*
|
## Donations
|
||||||
|
|
||||||
|
Typhoon does not accept money donations. Instead, we encourage you to donate to one of [these organizations](https://github.com/poseidon/typhoon/wiki/Donations) to show your appreciation.
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: reboot-coordinator
|
name: reboot-coordinator
|
||||||
roleRef:
|
roleRef:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
name: reboot-coordinator
|
name: reboot-coordinator
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: DaemonSet
|
kind: DaemonSet
|
||||||
metadata:
|
metadata:
|
||||||
name: container-linux-update-agent
|
name: container-linux-update-agent
|
||||||
@ -8,14 +8,19 @@ spec:
|
|||||||
type: RollingUpdate
|
type: RollingUpdate
|
||||||
rollingUpdate:
|
rollingUpdate:
|
||||||
maxUnavailable: 1
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: container-linux-update-agent
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: container-linux-update-agent
|
app: container-linux-update-agent
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: update-agent
|
- name: update-agent
|
||||||
image: quay.io/coreos/container-linux-update-operator:v0.4.1
|
image: quay.io/coreos/container-linux-update-operator:v0.7.0
|
||||||
command:
|
command:
|
||||||
- "/bin/update-agent"
|
- "/bin/update-agent"
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|||||||
@ -1,18 +1,23 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: container-linux-update-operator
|
name: container-linux-update-operator
|
||||||
namespace: reboot-coordinator
|
namespace: reboot-coordinator
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: container-linux-update-operator
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: container-linux-update-operator
|
app: container-linux-update-operator
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: update-operator
|
- name: update-operator
|
||||||
image: quay.io/coreos/container-linux-update-operator:v0.4.1
|
image: quay.io/coreos/container-linux-update-operator:v0.7.0
|
||||||
command:
|
command:
|
||||||
- "/bin/update-operator"
|
- "/bin/update-operator"
|
||||||
env:
|
env:
|
||||||
|
|||||||
@ -1,32 +0,0 @@
|
|||||||
apiVersion: extensions/v1beta1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: kubernetes-dashboard
|
|
||||||
namespace: kube-system
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
name: kubernetes-dashboard
|
|
||||||
phase: prod
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: kubernetes-dashboard
|
|
||||||
image: gcr.io/google_containers/kubernetes-dashboard-amd64:v1.6.1
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: 9090
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 300Mi
|
|
||||||
requests:
|
|
||||||
cpu: 100m
|
|
||||||
memory: 100Mi
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /
|
|
||||||
port: 9090
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
timeoutSeconds: 30
|
|
||||||
15
addons/grafana/dashboard-providers.yaml
Normal file
15
addons/grafana/dashboard-providers.yaml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: grafana-dashboard-providers
|
||||||
|
namespace: monitoring
|
||||||
|
data:
|
||||||
|
dashboard-providers.yaml: |+
|
||||||
|
apiVersion: 1
|
||||||
|
providers:
|
||||||
|
- name: 'default'
|
||||||
|
ordId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
options:
|
||||||
|
path: /var/lib/grafana/dashboards
|
||||||
7361
addons/grafana/dashboards.yaml
Normal file
7361
addons/grafana/dashboards.yaml
Normal file
@ -0,0 +1,7361 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: grafana-dashboards
|
||||||
|
namespace: monitoring
|
||||||
|
data:
|
||||||
|
deployment-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "200px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 8,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "cores",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 4,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "CPU",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "110%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 9,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "GB",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "80%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 4,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(container_memory_usage_bytes{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}) / 1024^3",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Memory",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "110%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "Bps",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": false
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 4,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$deployment_namespace\",pod_name=~\"$deployment_name.*\"}[3m]))",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Network",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "100px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": false
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"metric": "kube_deployment_spec_replicas",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Desired Replicas",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Available Replicas",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(kube_deployment_status_observed_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Observed Generation",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(kube_deployment_metadata_generation{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Metadata Generation",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "350px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"isNew": true,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 12,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(kube_deployment_status_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "current replicas",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 30
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "min(kube_deployment_status_replicas_available{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "available",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 30
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "max(kube_deployment_status_replicas_unavailable{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "unavailable",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 30
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "min(kube_deployment_status_replicas_updated{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "updated",
|
||||||
|
"refId": "D",
|
||||||
|
"step": 30
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "max(kube_deployment_spec_replicas{deployment=\"$deployment_name\",namespace=\"$deployment_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "desired",
|
||||||
|
"refId": "E",
|
||||||
|
"step": 30
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Replicas",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": true,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "none",
|
||||||
|
"label": "",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": "",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"allValue": ".*",
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": "Namespace",
|
||||||
|
"multi": false,
|
||||||
|
"name": "deployment_namespace",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(kube_deployment_metadata_generation, namespace)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": null,
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": "Deployment",
|
||||||
|
"multi": false,
|
||||||
|
"name": "deployment_name",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(kube_deployment_metadata_generation{namespace=\"$deployment_namespace\"}, deployment)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "deployment",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-6h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Deployment",
|
||||||
|
"version": 1
|
||||||
|
}
|
||||||
|
etcd-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"name": "prometheus",
|
||||||
|
"label": "prometheus",
|
||||||
|
"description": "",
|
||||||
|
"type": "datasource",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"__requires": [
|
||||||
|
{
|
||||||
|
"type": "grafana",
|
||||||
|
"id": "grafana",
|
||||||
|
"name": "Grafana",
|
||||||
|
"version": "4.5.2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "panel",
|
||||||
|
"id": "graph",
|
||||||
|
"name": "Graph",
|
||||||
|
"version": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "datasource",
|
||||||
|
"id": "prometheus",
|
||||||
|
"name": "Prometheus",
|
||||||
|
"version": "1.0.0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "panel",
|
||||||
|
"id": "singlestat",
|
||||||
|
"name": "Singlestat",
|
||||||
|
"version": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"description": "etcd sample Grafana dashboard with Prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"gnetId": null,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"refresh": false,
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"cacheTimeout": null,
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 28,
|
||||||
|
"interval": null,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"nullText": null,
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"tableColumn": "",
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(etcd_server_has_leader)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"metric": "etcd_server_has_leader",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "",
|
||||||
|
"title": "Up",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "200%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"id": 23,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 5,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(grpc_server_started_total{grpc_type=\"unary\"}[5m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "RPC Rate",
|
||||||
|
"metric": "grpc_server_started_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(grpc_server_handled_total{grpc_type=\"unary\",grpc_code!=\"OK\"}[5m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "RPC Failed Rate",
|
||||||
|
"metric": "grpc_server_handled_total",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "RPC Rate",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "ops",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"id": 41,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 4,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Watch Streams",
|
||||||
|
"metric": "grpc_server_handled_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(grpc_server_started_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Lease Streams",
|
||||||
|
"metric": "grpc_server_handled_total",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Active Streams",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": "",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"repeat": null,
|
||||||
|
"repeatIteration": null,
|
||||||
|
"repeatRowId": null,
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"decimals": null,
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"grid": {},
|
||||||
|
"id": 1,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 4,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "etcd_debugging_mvcc_db_total_size_in_bytes",
|
||||||
|
"format": "time_series",
|
||||||
|
"hide": false,
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} DB Size",
|
||||||
|
"metric": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "DB Size",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"grid": {},
|
||||||
|
"id": 3,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 1,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 4,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": true,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[5m])) by (instance, le))",
|
||||||
|
"format": "time_series",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} WAL fsync",
|
||||||
|
"metric": "etcd_disk_wal_fsync_duration_seconds_bucket",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[5m])) by (instance, le))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} DB fsync",
|
||||||
|
"metric": "etcd_disk_backend_commit_duration_seconds_bucket",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Disk Sync Duration",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "s",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"id": 29,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 4,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "process_resident_memory_bytes",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} Resident Memory",
|
||||||
|
"metric": "process_resident_memory_bytes",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Memory",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"repeat": null,
|
||||||
|
"repeatIteration": null,
|
||||||
|
"repeatRowId": null,
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 5,
|
||||||
|
"id": 22,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 3,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(etcd_network_client_grpc_received_bytes_total[5m])",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} Client Traffic In",
|
||||||
|
"metric": "etcd_network_client_grpc_received_bytes_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Client Traffic In",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "Bps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 5,
|
||||||
|
"id": 21,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 3,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(etcd_network_client_grpc_sent_bytes_total[5m])",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} Client Traffic Out",
|
||||||
|
"metric": "etcd_network_client_grpc_sent_bytes_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Client Traffic Out",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "Bps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"id": 20,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 3,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(etcd_network_peer_received_bytes_total[5m])) by (instance)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} Peer Traffic In",
|
||||||
|
"metric": "etcd_network_peer_received_bytes_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Peer Traffic In",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "Bps",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"decimals": null,
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"grid": {},
|
||||||
|
"id": 16,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 3,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(etcd_network_peer_sent_bytes_total[5m])) by (instance)",
|
||||||
|
"format": "time_series",
|
||||||
|
"hide": false,
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} Peer Traffic Out",
|
||||||
|
"metric": "etcd_network_peer_sent_bytes_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 4
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Peer Traffic Out",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "Bps",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"repeat": null,
|
||||||
|
"repeatIteration": null,
|
||||||
|
"repeatRowId": null,
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"id": 40,
|
||||||
|
"legend": {
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(etcd_server_proposals_failed_total[5m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Proposal Failure Rate",
|
||||||
|
"metric": "etcd_server_proposals_failed_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(etcd_server_proposals_pending)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Proposal Pending Total",
|
||||||
|
"metric": "etcd_server_proposals_pending",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(etcd_server_proposals_committed_total[5m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Proposal Commit Rate",
|
||||||
|
"metric": "etcd_server_proposals_committed_total",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(etcd_server_proposals_applied_total[5m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Proposal Apply Rate",
|
||||||
|
"refId": "D",
|
||||||
|
"step": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Raft Proposals",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": "",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"decimals": 0,
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 0,
|
||||||
|
"id": 19,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": false,
|
||||||
|
"total": false,
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "changes(etcd_server_leader_changes_seen_total[1d])",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{instance}} Total Leader Elections Per Day",
|
||||||
|
"metric": "etcd_server_leader_changes_seen_total",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": [],
|
||||||
|
"timeFrom": null,
|
||||||
|
"timeShift": null,
|
||||||
|
"title": "Total Leader Elections Per Day",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"buckets": null,
|
||||||
|
"mode": "time",
|
||||||
|
"name": null,
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": null,
|
||||||
|
"logBase": 1,
|
||||||
|
"max": null,
|
||||||
|
"min": null,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"repeat": null,
|
||||||
|
"repeatIteration": null,
|
||||||
|
"repeatRowId": null,
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-15m",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"now": true,
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "etcd",
|
||||||
|
"version": 4
|
||||||
|
}
|
||||||
|
kubernetes-capacity-planning-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"gnetId": 22,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"refresh": false,
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(node_cpu{mode=\"idle\"}[2m])) * 100",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 10,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 50
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Idle CPU",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "percent",
|
||||||
|
"label": "cpu usage",
|
||||||
|
"logBase": 1,
|
||||||
|
"min": 0,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 9,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(node_load1)",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "load 1m",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(node_load5)",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "load 5m",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(node_load15)",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "load 15m",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "System Load",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "percentunit",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 9,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory usage",
|
||||||
|
"metric": "memo",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(node_memory_Buffers)",
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory buffers",
|
||||||
|
"metric": "memo",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(node_memory_Cached)",
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory cached",
|
||||||
|
"metric": "memo",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(node_memory_MemFree)",
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory free",
|
||||||
|
"metric": "memo",
|
||||||
|
"refId": "D",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Memory Usage",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"min": "0",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 5,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"metric": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 60,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "Memory Usage",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "246px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "read",
|
||||||
|
"yaxis": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "{instance=\"172.17.0.1:9100\"}",
|
||||||
|
"yaxis": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "io time",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 9,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(node_disk_bytes_read[5m]))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "read",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(node_disk_bytes_written[5m]))",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "written",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(node_disk_io_time_ms[5m]))",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "io time",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Disk I/O",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "ms",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percentunit",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 1,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 12,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(node_filesystem_size{device!=\"rootfs\"}) - sum(node_filesystem_free{device!=\"rootfs\"})) / sum(node_filesystem_size{device!=\"rootfs\"})",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 60,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "0.75, 0.9",
|
||||||
|
"title": "Disk Space Usage",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 8,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "transmitted",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(node_network_receive_bytes{device!~\"lo\"}[5m]))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Network Received",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 10,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "transmitted",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(node_network_transmit_bytes{device!~\"lo\"}[5m]))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Network Transmitted",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "276px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 11,
|
||||||
|
"isNew": true,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 11,
|
||||||
|
"span": 9,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(kube_pod_info)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Current number of Pods",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum(kube_node_status_capacity_pods)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Maximum capacity of pods",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Cluster Pod Utilization",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 7,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 60,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "Pod Utilization",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-1h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Kubernetes Capacity Planning",
|
||||||
|
"version": 4
|
||||||
|
}
|
||||||
|
kubernetes-cluster-health-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"refresh": "10s",
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "254px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 1,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(up{job=~\"apiserver|kube-scheduler|kube-controller-manager\"} == 0)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Control Plane Components Down",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "Everything UP and healthy",
|
||||||
|
"value": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "",
|
||||||
|
"value": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 2,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Alerts Firing",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "0",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 3,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(ALERTS{alertstate=\"pending\",alertname!=\"DeadMansSwitch\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "3, 5",
|
||||||
|
"title": "Alerts Pending",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "0",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 4,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "count(increase(kube_pod_container_status_restarts[1h]) > 5)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Crashlooping Pods",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "0",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 5,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status!=\"true\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Node Not Ready",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 6,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(kube_node_status_condition{condition=\"DiskPressure\",status=\"true\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Node Disk Pressure",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 7,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(kube_node_status_condition{condition=\"MemoryPressure\",status=\"true\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Node Memory Pressure",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 8,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(kube_node_spec_unschedulable)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Nodes Unschedulable",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-6h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Kubernetes Cluster Health",
|
||||||
|
"version": 9
|
||||||
|
}
|
||||||
|
kubernetes-cluster-status-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "129px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 6,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(up{job=~\"apiserver|kube-scheduler|kube-controller-manager\"} == 0)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Control Plane UP",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "UP",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "total"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 6,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(ALERTS{alertstate=\"firing\",alertname!=\"DeadMansSwitch\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "3, 5",
|
||||||
|
"title": "Alerts Firing",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "0",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": true,
|
||||||
|
"title": "Cluster Health",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "168px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(up{job=\"apiserver\"} == 1) / count(up{job=\"apiserver\"})) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "50, 80",
|
||||||
|
"title": "API Servers UP",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(up{job=\"kube-controller-manager\"} == 1) / count(up{job=\"kube-controller-manager\"})) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "50, 80",
|
||||||
|
"title": "Controller Managers UP",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(up{job=\"kube-scheduler\"} == 1) / count(up{job=\"kube-scheduler\"})) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "50, 80",
|
||||||
|
"title": "Schedulers UP",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": true,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "count(increase(kube_pod_container_status_restarts{namespace=~\"kube-system|tectonic-system\"}[1h]) > 5)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "1, 3",
|
||||||
|
"title": "Crashlooping Control Plane Pods",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "0",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": true,
|
||||||
|
"title": "Control Plane Status",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "158px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 8,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(100 - (avg by (instance) (rate(node_cpu{job=\"node-exporter\",mode=\"idle\"}[5m])) * 100)) / count(node_cpu{job=\"node-exporter\",mode=\"idle\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "CPU Utilization",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "((sum(node_memory_MemTotal) - sum(node_memory_MemFree) - sum(node_memory_Buffers) - sum(node_memory_Cached)) / sum(node_memory_MemTotal)) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "Memory Utilization",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 9,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(node_filesystem_size{device!=\"rootfs\"}) - sum(node_filesystem_free{device!=\"rootfs\"})) / sum(node_filesystem_size{device!=\"rootfs\"})",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "Filesystem Utilization",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 10,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "100 - (sum(kube_node_status_capacity_pods) - sum(kube_pod_info)) / sum(kube_node_status_capacity_pods) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "Pod Utilization",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": true,
|
||||||
|
"title": "Capacity Planning",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-6h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Kubernetes Cluster Status",
|
||||||
|
"version": 3
|
||||||
|
}
|
||||||
|
kubernetes-control-plane-status-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 1,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(up{job=\"apiserver\"} == 1) / sum(up{job=\"apiserver\"})) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "50, 80",
|
||||||
|
"title": "API Servers UP",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 2,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(up{job=\"kube-controller-manager\"} == 1) / sum(up{job=\"kube-controller-manager\"})) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "50, 80",
|
||||||
|
"title": "Controller Managers UP",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 3,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(up{job=\"kube-scheduler\"} == 1) / sum(up{job=\"kube-scheduler\"})) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "50, 80",
|
||||||
|
"title": "Schedulers UP",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 4,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(sum by(instance) (rate(apiserver_request_count{code=~\"5..\"}[5m])) / sum by(instance) (rate(apiserver_request_count[5m]))) * 100",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "5, 10",
|
||||||
|
"title": "API Server Request Error Rate",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "0",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 12,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum by(verb) (rate(apiserver_latency_seconds:quantile[5m]) >= 0)",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 30
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "API Server Request Latency",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "cluster:scheduler_e2e_scheduling_latency_seconds:quantile",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 60
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "End to End Scheduling Latency",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "dtdurations",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum by(instance) (rate(apiserver_request_count{code!~\"2..\"}[5m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Error Rate",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 60
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum by(instance) (rate(apiserver_request_count[5m]))",
|
||||||
|
"format": "time_series",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Request Rate",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 60
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "API Server Request Rates",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-6h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Kubernetes Control Plane Status",
|
||||||
|
"version": 3
|
||||||
|
}
|
||||||
|
kubernetes-resource-requests-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"refresh": false,
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "300px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"description": "This represents the total [CPU resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu) in the cluster.\nFor comparison the total [allocatable CPU cores](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 9,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "min(sum(kube_node_status_allocatable_cpu_cores) by (instance))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Allocatable CPU Cores",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Requested CPU Cores",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "CPU Cores",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": "CPU Cores",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 2,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(sum(kube_pod_container_resource_requests_cpu_cores) by (instance)) / min(sum(kube_node_status_allocatable_cpu_cores) by (instance)) * 100",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 240
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "CPU Cores",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "110%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "CPU Cores",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "300px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"description": "This represents the total [memory resource requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory) in the cluster.\nFor comparison the total [allocatable memory](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node-allocatable.md) is also shown.",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 1,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "null",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 9,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "min(sum(kube_node_status_allocatable_memory_bytes) by (instance))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Allocatable Memory",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Requested Memory",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Memory",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"label": "Memory",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 4,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(sum(kube_pod_container_resource_requests_memory_bytes) by (instance)) / min(sum(kube_node_status_allocatable_memory_bytes) by (instance)) * 100",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 240
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "Memory",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "110%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Memory",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-3h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Kubernetes Resource Requests",
|
||||||
|
"version": 2
|
||||||
|
}
|
||||||
|
nodes-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"description": "Dashboard to get an overview of one server",
|
||||||
|
"editable": false,
|
||||||
|
"gnetId": 22,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"refresh": false,
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=\"$server\"}[5m])) * 100)",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 10,
|
||||||
|
"legendFormat": "{{cpu}}",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 50
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Idle CPU",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "percent",
|
||||||
|
"label": "cpu usage",
|
||||||
|
"logBase": 1,
|
||||||
|
"max": 100,
|
||||||
|
"min": 0,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 9,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "node_load1{instance=\"$server\"}",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "load 1m",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "node_load5{instance=\"$server\"}",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "load 5m",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "node_load15{instance=\"$server\"}",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "load 15m",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "System Load",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "percentunit",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 9,
|
||||||
|
"stack": true,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "node_memory_MemTotal{instance=\"$server\"} - node_memory_MemFree{instance=\"$server\"} - node_memory_Buffers{instance=\"$server\"} - node_memory_Cached{instance=\"$server\"}",
|
||||||
|
"hide": false,
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory used",
|
||||||
|
"metric": "",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "node_memory_Buffers{instance=\"$server\"}",
|
||||||
|
"interval": "",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory buffers",
|
||||||
|
"metric": "",
|
||||||
|
"refId": "E",
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "node_memory_Cached{instance=\"$server\"}",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory cached",
|
||||||
|
"metric": "",
|
||||||
|
"refId": "F",
|
||||||
|
"step": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "node_memory_MemFree{instance=\"$server\"}",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "memory free",
|
||||||
|
"metric": "",
|
||||||
|
"refId": "D",
|
||||||
|
"step": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Memory Usage",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "individual"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"min": "0",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percent",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 5,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "((node_memory_MemTotal{instance=\"$server\"} - node_memory_MemFree{instance=\"$server\"} - node_memory_Buffers{instance=\"$server\"} - node_memory_Cached{instance=\"$server\"}) / node_memory_MemTotal{instance=\"$server\"}) * 100",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 60,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "80, 90",
|
||||||
|
"title": "Memory Usage",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"isNew": true,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "read",
|
||||||
|
"yaxis": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "{instance=\"172.17.0.1:9100\"}",
|
||||||
|
"yaxis": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"alias": "io time",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 9,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum by (instance) (rate(node_disk_bytes_read{instance=\"$server\"}[2m]))",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "read",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 20,
|
||||||
|
"target": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum by (instance) (rate(node_disk_bytes_written{instance=\"$server\"}[2m]))",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "written",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "sum by (instance) (rate(node_disk_io_time_ms{instance=\"$server\"}[2m]))",
|
||||||
|
"intervalFactor": 4,
|
||||||
|
"legendFormat": "io time",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Disk I/O",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "ms",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(50, 172, 45, 0.97)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(245, 54, 54, 0.9)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "percentunit",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 1,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": true,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"hideTimeOverride": false,
|
||||||
|
"id": 7,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "(sum(node_filesystem_size{device!=\"rootfs\",instance=\"$server\"}) - sum(node_filesystem_free{device!=\"rootfs\",instance=\"$server\"})) / sum(node_filesystem_size{device!=\"rootfs\",instance=\"$server\"})",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 60,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": "0.75, 0.9",
|
||||||
|
"title": "Disk Space Usage",
|
||||||
|
"transparent": false,
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "current"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 8,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "transmitted",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(node_network_receive_bytes{instance=\"$server\",device!~\"lo\"}[5m])",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{device}}",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Network Received",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 10,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [
|
||||||
|
{
|
||||||
|
"alias": "transmitted",
|
||||||
|
"yaxis": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 6,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "rate(node_network_transmit_bytes{instance=\"$server\",device!~\"lo\"}[5m])",
|
||||||
|
"hide": false,
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{device}}",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 10,
|
||||||
|
"target": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Network Transmitted",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": false,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": null,
|
||||||
|
"multi": false,
|
||||||
|
"name": "server",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(node_boot_time, instance)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-1h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Nodes",
|
||||||
|
"version": 2
|
||||||
|
}
|
||||||
|
pods-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"refresh": false,
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": true,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 12,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum by(container_name) (container_memory_usage_bytes{pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"})",
|
||||||
|
"interval": "10s",
|
||||||
|
"intervalFactor": 1,
|
||||||
|
"legendFormat": "Current: {{ container_name }}",
|
||||||
|
"metric": "container_memory_usage_bytes",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 15
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "kube_pod_container_resource_requests_memory_bytes{pod=\"$pod\", container=~\"$container\"}",
|
||||||
|
"interval": "10s",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Requested: {{ container }}",
|
||||||
|
"metric": "kube_pod_container_resource_requests_memory_bytes",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "kube_pod_container_resource_limits_memory_bytes{pod=\"$pod\", container=~\"$container\"}",
|
||||||
|
"interval": "10s",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Limit: {{ container }}",
|
||||||
|
"metric": "kube_pod_container_resource_limits_memory_bytes",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Memory Usage",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": true,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": true,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 12,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum by (container_name)(rate(container_cpu_usage_seconds_total{image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{ container_name }}",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 30
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "kube_pod_container_resource_requests_cpu_cores{pod=\"$pod\", container=~\"$container\"}",
|
||||||
|
"interval": "10s",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Requested: {{ container }}",
|
||||||
|
"metric": "kube_pod_container_resource_requests_cpu_cores",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 20
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "kube_pod_container_resource_limits_cpu_cores{pod=\"$pod\", container=~\"$container\"}",
|
||||||
|
"interval": "10s",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "Limit: {{ container }}",
|
||||||
|
"metric": "kube_pod_container_resource_limits_memory_bytes",
|
||||||
|
"refId": "C",
|
||||||
|
"step": 20
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "CPU Usage",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": true,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "250px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"isNew": false,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": true,
|
||||||
|
"avg": true,
|
||||||
|
"current": true,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": true,
|
||||||
|
"show": true,
|
||||||
|
"total": false,
|
||||||
|
"values": true
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 12,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{pod_name=\"$pod\"}[1m])))",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "{{ pod_name }}",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 30
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Network I/O",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": true,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "bytes",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "New Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"allValue": ".*",
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": true,
|
||||||
|
"label": "Namespace",
|
||||||
|
"multi": false,
|
||||||
|
"name": "namespace",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(kube_pod_info, namespace)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": "Pod",
|
||||||
|
"multi": false,
|
||||||
|
"name": "pod",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(kube_pod_info{namespace=~\"$namespace\"}, pod)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": ".*",
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": true,
|
||||||
|
"label": "Container",
|
||||||
|
"multi": false,
|
||||||
|
"name": "container",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-6h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Pods",
|
||||||
|
"version": 1
|
||||||
|
}
|
||||||
|
statefulset-dashboard.json: |+
|
||||||
|
{
|
||||||
|
"__inputs": [
|
||||||
|
{
|
||||||
|
"description": "",
|
||||||
|
"label": "prometheus",
|
||||||
|
"name": "prometheus",
|
||||||
|
"pluginId": "prometheus",
|
||||||
|
"pluginName": "Prometheus",
|
||||||
|
"type": "datasource"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": false,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"hideControls": false,
|
||||||
|
"links": [],
|
||||||
|
"rows": [
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "200px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 8,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "cores",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 4,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "CPU",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "110%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 9,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "GB",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "80%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 4,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(container_memory_usage_bytes{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}) / 1024^3",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Memory",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "110%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "Bps",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": false
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfix": "",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 4,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(rate(container_network_transmit_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$statefulset_namespace\",pod_name=~\"$statefulset_name.*\"}[3m]))",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Network",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "100px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": false
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"metric": "kube_statefulset_replicas",
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Desired Replicas",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Available Replicas",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(kube_statefulset_status_observed_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Observed Generation",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"colorBackground": false,
|
||||||
|
"colorValue": false,
|
||||||
|
"colors": [
|
||||||
|
"rgba(245, 54, 54, 0.9)",
|
||||||
|
"rgba(237, 129, 40, 0.89)",
|
||||||
|
"rgba(50, 172, 45, 0.97)"
|
||||||
|
],
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"format": "none",
|
||||||
|
"gauge": {
|
||||||
|
"maxValue": 100,
|
||||||
|
"minValue": 0,
|
||||||
|
"show": false,
|
||||||
|
"thresholdLabels": false,
|
||||||
|
"thresholdMarkers": true
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"links": [],
|
||||||
|
"mappingType": 1,
|
||||||
|
"mappingTypes": [
|
||||||
|
{
|
||||||
|
"name": "value to text",
|
||||||
|
"value": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "range to text",
|
||||||
|
"value": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"maxDataPoints": 100,
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"postfixFontSize": "50%",
|
||||||
|
"prefix": "",
|
||||||
|
"prefixFontSize": "50%",
|
||||||
|
"rangeMaps": [
|
||||||
|
{
|
||||||
|
"from": "null",
|
||||||
|
"text": "N/A",
|
||||||
|
"to": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"span": 3,
|
||||||
|
"sparkline": {
|
||||||
|
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||||
|
"full": false,
|
||||||
|
"lineColor": "rgb(31, 120, 193)",
|
||||||
|
"show": false
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "max(kube_statefulset_metadata_generation{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"refId": "A",
|
||||||
|
"step": 600
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Metadata Generation",
|
||||||
|
"type": "singlestat",
|
||||||
|
"valueFontSize": "80%",
|
||||||
|
"valueMaps": [
|
||||||
|
{
|
||||||
|
"op": "=",
|
||||||
|
"text": "N/A",
|
||||||
|
"value": "null"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"valueName": "avg"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapse": false,
|
||||||
|
"editable": false,
|
||||||
|
"height": "350px",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"aliasColors": {},
|
||||||
|
"bars": false,
|
||||||
|
"dashLength": 10,
|
||||||
|
"dashes": false,
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"editable": false,
|
||||||
|
"error": false,
|
||||||
|
"fill": 1,
|
||||||
|
"grid": {
|
||||||
|
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
||||||
|
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"isNew": true,
|
||||||
|
"legend": {
|
||||||
|
"alignAsTable": false,
|
||||||
|
"avg": false,
|
||||||
|
"current": false,
|
||||||
|
"hideEmpty": false,
|
||||||
|
"hideZero": false,
|
||||||
|
"max": false,
|
||||||
|
"min": false,
|
||||||
|
"rightSide": false,
|
||||||
|
"show": true,
|
||||||
|
"total": false
|
||||||
|
},
|
||||||
|
"lines": true,
|
||||||
|
"linewidth": 2,
|
||||||
|
"links": [],
|
||||||
|
"nullPointMode": "connected",
|
||||||
|
"percentage": false,
|
||||||
|
"pointradius": 5,
|
||||||
|
"points": false,
|
||||||
|
"renderer": "flot",
|
||||||
|
"seriesOverrides": [],
|
||||||
|
"spaceLength": 10,
|
||||||
|
"span": 12,
|
||||||
|
"stack": false,
|
||||||
|
"steppedLine": false,
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "min(kube_statefulset_status_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "available",
|
||||||
|
"refId": "B",
|
||||||
|
"step": 30
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": "max(kube_statefulset_replicas{statefulset=\"$statefulset_name\",namespace=\"$statefulset_namespace\"}) without (instance, pod)",
|
||||||
|
"intervalFactor": 2,
|
||||||
|
"legendFormat": "desired",
|
||||||
|
"refId": "E",
|
||||||
|
"step": 30
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Replicas",
|
||||||
|
"tooltip": {
|
||||||
|
"msResolution": true,
|
||||||
|
"shared": true,
|
||||||
|
"sort": 0,
|
||||||
|
"value_type": "cumulative"
|
||||||
|
},
|
||||||
|
"type": "graph",
|
||||||
|
"xaxis": {
|
||||||
|
"mode": "time",
|
||||||
|
"show": true,
|
||||||
|
"values": []
|
||||||
|
},
|
||||||
|
"yaxes": [
|
||||||
|
{
|
||||||
|
"format": "none",
|
||||||
|
"label": "",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"format": "short",
|
||||||
|
"label": "",
|
||||||
|
"logBase": 1,
|
||||||
|
"show": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"showTitle": false,
|
||||||
|
"title": "Dashboard Row",
|
||||||
|
"titleSize": "h6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schemaVersion": 14,
|
||||||
|
"sharedCrosshair": false,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"allValue": ".*",
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": "Namespace",
|
||||||
|
"multi": false,
|
||||||
|
"name": "statefulset_namespace",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(kube_statefulset_metadata_generation, namespace)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": null,
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"allValue": null,
|
||||||
|
"current": {},
|
||||||
|
"datasource": "prometheus",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": false,
|
||||||
|
"label": "StatefulSet",
|
||||||
|
"multi": false,
|
||||||
|
"name": "statefulset_name",
|
||||||
|
"options": [],
|
||||||
|
"query": "label_values(kube_statefulset_metadata_generation{namespace=\"$statefulset_namespace\"}, statefulset)",
|
||||||
|
"refresh": 1,
|
||||||
|
"regex": "",
|
||||||
|
"sort": 0,
|
||||||
|
"tagValuesQuery": "",
|
||||||
|
"tags": [],
|
||||||
|
"tagsQuery": "statefulset",
|
||||||
|
"type": "query",
|
||||||
|
"useTags": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-6h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {
|
||||||
|
"refresh_intervals": [
|
||||||
|
"5s",
|
||||||
|
"10s",
|
||||||
|
"30s",
|
||||||
|
"1m",
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"30m",
|
||||||
|
"1h",
|
||||||
|
"2h",
|
||||||
|
"1d"
|
||||||
|
],
|
||||||
|
"time_options": [
|
||||||
|
"5m",
|
||||||
|
"15m",
|
||||||
|
"1h",
|
||||||
|
"6h",
|
||||||
|
"12h",
|
||||||
|
"24h",
|
||||||
|
"2d",
|
||||||
|
"7d",
|
||||||
|
"30d"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "StatefulSet",
|
||||||
|
"version": 1
|
||||||
|
}
|
||||||
|
---
|
||||||
16
addons/grafana/datasources.yaml
Normal file
16
addons/grafana/datasources.yaml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: grafana-datasources
|
||||||
|
namespace: monitoring
|
||||||
|
data:
|
||||||
|
prometheus.yaml: |+
|
||||||
|
apiVersion: 1
|
||||||
|
datasources:
|
||||||
|
- name: prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
orgId: 1
|
||||||
|
url: http://prometheus.monitoring.svc.cluster.local
|
||||||
|
version: 1
|
||||||
|
editable: false
|
||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: apps/v1beta2
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: grafana
|
name: grafana
|
||||||
@ -18,10 +18,12 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
name: grafana
|
name: grafana
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: grafana
|
- name: grafana
|
||||||
image: grafana/grafana:4.6.3
|
image: grafana/grafana:5.3.2
|
||||||
env:
|
env:
|
||||||
- name: GF_SERVER_HTTP_PORT
|
- name: GF_SERVER_HTTP_PORT
|
||||||
value: "8080"
|
value: "8080"
|
||||||
@ -30,7 +32,9 @@ spec:
|
|||||||
- name: GF_AUTH_ANONYMOUS_ENABLED
|
- name: GF_AUTH_ANONYMOUS_ENABLED
|
||||||
value: "true"
|
value: "true"
|
||||||
- name: GF_AUTH_ANONYMOUS_ORG_ROLE
|
- name: GF_AUTH_ANONYMOUS_ORG_ROLE
|
||||||
value: Admin
|
value: Viewer
|
||||||
|
- name: GF_ANALYTICS_REPORTING_ENABLED
|
||||||
|
value: "false"
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
containerPort: 8080
|
containerPort: 8080
|
||||||
@ -41,6 +45,20 @@ spec:
|
|||||||
limits:
|
limits:
|
||||||
memory: 200Mi
|
memory: 200Mi
|
||||||
cpu: 200m
|
cpu: 200m
|
||||||
|
volumeMounts:
|
||||||
|
- name: datasources
|
||||||
|
mountPath: /etc/grafana/provisioning/datasources
|
||||||
|
- name: dashboard-providers
|
||||||
|
mountPath: /etc/grafana/provisioning/dashboards
|
||||||
|
- name: dashboards
|
||||||
|
mountPath: /var/lib/grafana/dashboards
|
||||||
volumes:
|
volumes:
|
||||||
- name: grafana-storage
|
- name: datasources
|
||||||
emptyDir: {}
|
configMap:
|
||||||
|
name: grafana-datasources
|
||||||
|
- name: dashboard-providers
|
||||||
|
configMap:
|
||||||
|
name: grafana-dashboard-providers
|
||||||
|
- name: dashboards
|
||||||
|
configMap:
|
||||||
|
name: grafana-dashboards
|
||||||
|
|||||||
@ -3,6 +3,9 @@ kind: Service
|
|||||||
metadata:
|
metadata:
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '8080'
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
selector:
|
selector:
|
||||||
|
|||||||
12
addons/heapster/cluster-role-binding.yaml
Normal file
12
addons/heapster/cluster-role-binding.yaml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: heapster
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: heapster
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: heapster
|
||||||
|
namespace: kube-system
|
||||||
30
addons/heapster/cluster-role.yaml
Normal file
30
addons/heapster/cluster-role.yaml
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: heapster
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
- namespaces
|
||||||
|
- nodes
|
||||||
|
- pods
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- extensions
|
||||||
|
resources:
|
||||||
|
- deployments
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes/stats
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: apps/v1beta2
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: heapster
|
name: heapster
|
||||||
@ -15,14 +15,15 @@ spec:
|
|||||||
name: heapster
|
name: heapster
|
||||||
phase: prod
|
phase: prod
|
||||||
annotations:
|
annotations:
|
||||||
scheduler.alpha.kubernetes.io/critical-pod: ''
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
|
serviceAccountName: heapster
|
||||||
containers:
|
containers:
|
||||||
- name: heapster
|
- name: heapster
|
||||||
image: gcr.io/google_containers/heapster-amd64:v1.5.0
|
image: k8s.gcr.io/heapster-amd64:v1.5.4
|
||||||
command:
|
command:
|
||||||
- /heapster
|
- /heapster
|
||||||
- --source=kubernetes.summary_api:''
|
- --source=kubernetes.summary_api:''?useServiceAccount=true&kubeletHttps=true&kubeletPort=10250&insecure=true
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /healthz
|
path: /healthz
|
||||||
@ -31,7 +32,7 @@ spec:
|
|||||||
initialDelaySeconds: 180
|
initialDelaySeconds: 180
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
- name: heapster-nanny
|
- name: heapster-nanny
|
||||||
image: gcr.io/google_containers/addon-resizer:1.7
|
image: k8s.gcr.io/addon-resizer:1.7
|
||||||
command:
|
command:
|
||||||
- /pod_nanny
|
- /pod_nanny
|
||||||
- --cpu=80m
|
- --cpu=80m
|
||||||
|
|||||||
13
addons/heapster/role-binding.yaml
Normal file
13
addons/heapster/role-binding.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: heapster
|
||||||
|
namespace: kube-system
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: system:pod-nanny
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: heapster
|
||||||
|
namespace: kube-system
|
||||||
19
addons/heapster/role.yaml
Normal file
19
addons/heapster/role.yaml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: system:pod-nanny
|
||||||
|
namespace: kube-system
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- "extensions"
|
||||||
|
resources:
|
||||||
|
- deployments
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- update
|
||||||
5
addons/heapster/service-account.yaml
Normal file
5
addons/heapster/service-account.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: heapster
|
||||||
|
namespace: kube-system
|
||||||
@ -2,3 +2,5 @@ apiVersion: v1
|
|||||||
kind: Namespace
|
kind: Namespace
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
|
labels:
|
||||||
|
name: ingress
|
||||||
@ -1,22 +1,28 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: default-backend
|
name: default-backend
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: default-backend
|
name: default-backend
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: default-backend
|
- name: default-backend
|
||||||
# Any image is permissable as long as:
|
# Any image is permissable as long as:
|
||||||
# 1. It serves a 404 page at /
|
# 1. It serves a 404 page at /
|
||||||
# 2. It serves 200 on a /healthz endpoint
|
# 2. It serves 200 on a /healthz endpoint
|
||||||
image: gcr.io/google_containers/defaultbackend:1.4
|
image: k8s.gcr.io/defaultbackend:1.4
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
@ -8,18 +8,23 @@ spec:
|
|||||||
strategy:
|
strategy:
|
||||||
rollingUpdate:
|
rollingUpdate:
|
||||||
maxUnavailable: 1
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
phase: prod
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
node-role.kubernetes.io/node: ""
|
node-role.kubernetes.io/node: ""
|
||||||
hostNetwork: true
|
|
||||||
containers:
|
containers:
|
||||||
- name: nginx-ingress-controller
|
- name: nginx-ingress-controller
|
||||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.9.0
|
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.20.0
|
||||||
args:
|
args:
|
||||||
- /nginx-ingress-controller
|
- /nginx-ingress-controller
|
||||||
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
||||||
@ -63,5 +68,12 @@ spec:
|
|||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
successThreshold: 1
|
successThreshold: 1
|
||||||
timeoutSeconds: 1
|
timeoutSeconds: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- NET_BIND_SERVICE
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsUser: 33 # www-data
|
||||||
restartPolicy: Always
|
restartPolicy: Always
|
||||||
terminationGracePeriodSeconds: 60
|
terminationGracePeriodSeconds: 60
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
roleRef:
|
roleRef:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: Role
|
kind: Role
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
|||||||
@ -3,6 +3,9 @@ kind: Service
|
|||||||
metadata:
|
metadata:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '10254'
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
selector:
|
selector:
|
||||||
|
|||||||
@ -2,3 +2,5 @@ apiVersion: v1
|
|||||||
kind: Namespace
|
kind: Namespace
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
|
labels:
|
||||||
|
name: ingress
|
||||||
42
addons/nginx-ingress/azure/default-backend/deployment.yaml
Normal file
42
addons/nginx-ingress/azure/default-backend/deployment.yaml
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: default-backend
|
||||||
|
namespace: ingress
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: default-backend
|
||||||
|
# Any image is permissable as long as:
|
||||||
|
# 1. It serves a 404 page at /
|
||||||
|
# 2. It serves 200 on a /healthz endpoint
|
||||||
|
image: k8s.gcr.io/defaultbackend:1.4
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 20Mi
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 20Mi
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
timeoutSeconds: 5
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
@ -1,15 +1,15 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: kubernetes-dashboard
|
name: default-backend
|
||||||
namespace: kube-system
|
namespace: ingress
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
selector:
|
selector:
|
||||||
name: kubernetes-dashboard
|
name: default-backend
|
||||||
phase: prod
|
phase: prod
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
port: 80
|
port: 80
|
||||||
targetPort: 9090
|
targetPort: 8080
|
||||||
79
addons/nginx-ingress/azure/deployment.yaml
Normal file
79
addons/nginx-ingress/azure/deployment.yaml
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
namespace: ingress
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
strategy:
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
phase: prod
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
node-role.kubernetes.io/node: ""
|
||||||
|
containers:
|
||||||
|
- name: nginx-ingress-controller
|
||||||
|
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.20.0
|
||||||
|
args:
|
||||||
|
- /nginx-ingress-controller
|
||||||
|
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
||||||
|
- --ingress-class=public
|
||||||
|
# use downward API
|
||||||
|
env:
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 80
|
||||||
|
hostPort: 80
|
||||||
|
- name: https
|
||||||
|
containerPort: 443
|
||||||
|
hostPort: 443
|
||||||
|
- name: health
|
||||||
|
containerPort: 10254
|
||||||
|
hostPort: 10254
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 3
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 10254
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 1
|
||||||
|
readinessProbe:
|
||||||
|
failureThreshold: 3
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 10254
|
||||||
|
scheme: HTTP
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- NET_BIND_SERVICE
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsUser: 33 # www-data
|
||||||
|
restartPolicy: Always
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
12
addons/nginx-ingress/azure/rbac/cluster-role-binding.yaml
Normal file
12
addons/nginx-ingress/azure/rbac/cluster-role-binding.yaml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: ingress
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
namespace: ingress
|
||||||
|
name: default
|
||||||
51
addons/nginx-ingress/azure/rbac/cluster-role.yaml
Normal file
51
addons/nginx-ingress/azure/rbac/cluster-role.yaml
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- endpoints
|
||||||
|
- nodes
|
||||||
|
- pods
|
||||||
|
- secrets
|
||||||
|
verbs:
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- services
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- "extensions"
|
||||||
|
resources:
|
||||||
|
- ingresses
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- "extensions"
|
||||||
|
resources:
|
||||||
|
- ingresses/status
|
||||||
|
verbs:
|
||||||
|
- update
|
||||||
13
addons/nginx-ingress/azure/rbac/role-binding.yaml
Normal file
13
addons/nginx-ingress/azure/rbac/role-binding.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
namespace: ingress
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: ingress
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
namespace: ingress
|
||||||
|
name: default
|
||||||
41
addons/nginx-ingress/azure/rbac/role.yaml
Normal file
41
addons/nginx-ingress/azure/rbac/role.yaml
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
namespace: ingress
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- pods
|
||||||
|
- secrets
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
resourceNames:
|
||||||
|
# Defaults to "<election-id>-<ingress-class>"
|
||||||
|
# Here: "<ingress-controller-leader>-<nginx>"
|
||||||
|
# This has to be adapted if you change either parameter
|
||||||
|
# when launching the nginx-ingress-controller.
|
||||||
|
- "ingress-controller-leader-public"
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- endpoints
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- create
|
||||||
|
- update
|
||||||
22
addons/nginx-ingress/azure/service.yaml
Normal file
22
addons/nginx-ingress/azure/service.yaml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
namespace: ingress
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '10254'
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
phase: prod
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
protocol: TCP
|
||||||
|
port: 80
|
||||||
|
targetPort: 80
|
||||||
|
- name: https
|
||||||
|
protocol: TCP
|
||||||
|
port: 443
|
||||||
|
targetPort: 443
|
||||||
@ -2,3 +2,5 @@ apiVersion: v1
|
|||||||
kind: Namespace
|
kind: Namespace
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
|
labels:
|
||||||
|
name: ingress
|
||||||
@ -0,0 +1,42 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: default-backend
|
||||||
|
namespace: ingress
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: default-backend
|
||||||
|
# Any image is permissable as long as:
|
||||||
|
# 1. It serves a 404 page at /
|
||||||
|
# 2. It serves 200 on a /healthz endpoint
|
||||||
|
image: k8s.gcr.io/defaultbackend:1.4
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 20Mi
|
||||||
|
requests:
|
||||||
|
cpu: 10m
|
||||||
|
memory: 20Mi
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
timeoutSeconds: 5
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
15
addons/nginx-ingress/bare-metal/default-backend/service.yaml
Normal file
15
addons/nginx-ingress/bare-metal/default-backend/service.yaml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: default-backend
|
||||||
|
namespace: ingress
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
protocol: TCP
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
75
addons/nginx-ingress/bare-metal/deployment.yaml
Normal file
75
addons/nginx-ingress/bare-metal/deployment.yaml
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: ingress-controller-public
|
||||||
|
namespace: ingress
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
strategy:
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: ingress-controller-public
|
||||||
|
phase: prod
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
name: ingress-controller-public
|
||||||
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: nginx-ingress-controller
|
||||||
|
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.20.0
|
||||||
|
args:
|
||||||
|
- /nginx-ingress-controller
|
||||||
|
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
||||||
|
- --ingress-class=public
|
||||||
|
# use downward API
|
||||||
|
env:
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 80
|
||||||
|
- name: https
|
||||||
|
containerPort: 443
|
||||||
|
- name: health
|
||||||
|
containerPort: 10254
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 10254
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
timeoutSeconds: 1
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 10254
|
||||||
|
scheme: HTTP
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
failureThreshold: 3
|
||||||
|
timeoutSeconds: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- NET_BIND_SERVICE
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsUser: 33 # www-data
|
||||||
|
restartPolicy: Always
|
||||||
|
terminationGracePeriodSeconds: 60
|
||||||
|
|
||||||
@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: ingress
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
namespace: ingress
|
||||||
|
name: default
|
||||||
51
addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml
Normal file
51
addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- endpoints
|
||||||
|
- nodes
|
||||||
|
- pods
|
||||||
|
- secrets
|
||||||
|
verbs:
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- services
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- "extensions"
|
||||||
|
resources:
|
||||||
|
- ingresses
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- "extensions"
|
||||||
|
resources:
|
||||||
|
- ingresses/status
|
||||||
|
verbs:
|
||||||
|
- update
|
||||||
13
addons/nginx-ingress/bare-metal/rbac/role-binding.yaml
Normal file
13
addons/nginx-ingress/bare-metal/rbac/role-binding.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
namespace: ingress
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: ingress
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
namespace: ingress
|
||||||
|
name: default
|
||||||
41
addons/nginx-ingress/bare-metal/rbac/role.yaml
Normal file
41
addons/nginx-ingress/bare-metal/rbac/role.yaml
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
namespace: ingress
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- pods
|
||||||
|
- secrets
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
resourceNames:
|
||||||
|
# Defaults to "<election-id>-<ingress-class>"
|
||||||
|
# Here: "<ingress-controller-leader>-<nginx>"
|
||||||
|
# This has to be adapted if you change either parameter
|
||||||
|
# when launching the nginx-ingress-controller.
|
||||||
|
- "ingress-controller-leader-public"
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- configmaps
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- endpoints
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- create
|
||||||
|
- update
|
||||||
23
addons/nginx-ingress/bare-metal/service.yaml
Normal file
23
addons/nginx-ingress/bare-metal/service.yaml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: ingress-controller-public
|
||||||
|
namespace: ingress
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '10254'
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
clusterIP: 10.3.0.12
|
||||||
|
selector:
|
||||||
|
name: ingress-controller-public
|
||||||
|
phase: prod
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
protocol: TCP
|
||||||
|
port: 80
|
||||||
|
targetPort: 80
|
||||||
|
- name: https
|
||||||
|
protocol: TCP
|
||||||
|
port: 443
|
||||||
|
targetPort: 443
|
||||||
6
addons/nginx-ingress/digital-ocean/0-namespace.yaml
Normal file
6
addons/nginx-ingress/digital-ocean/0-namespace.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
labels:
|
||||||
|
name: ingress
|
||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: DaemonSet
|
kind: DaemonSet
|
||||||
metadata:
|
metadata:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
@ -8,18 +8,23 @@ spec:
|
|||||||
type: RollingUpdate
|
type: RollingUpdate
|
||||||
rollingUpdate:
|
rollingUpdate:
|
||||||
maxUnavailable: 1
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
phase: prod
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
node-role.kubernetes.io/node: ""
|
node-role.kubernetes.io/node: ""
|
||||||
hostNetwork: true
|
|
||||||
containers:
|
containers:
|
||||||
- name: nginx-ingress-controller
|
- name: nginx-ingress-controller
|
||||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.9.0
|
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.20.0
|
||||||
args:
|
args:
|
||||||
- /nginx-ingress-controller
|
- /nginx-ingress-controller
|
||||||
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
||||||
@ -63,5 +68,12 @@ spec:
|
|||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
successThreshold: 1
|
successThreshold: 1
|
||||||
timeoutSeconds: 1
|
timeoutSeconds: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- NET_BIND_SERVICE
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsUser: 33 # www-data
|
||||||
restartPolicy: Always
|
restartPolicy: Always
|
||||||
terminationGracePeriodSeconds: 60
|
terminationGracePeriodSeconds: 60
|
||||||
|
|||||||
@ -1,22 +1,28 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: default-backend
|
name: default-backend
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: default-backend
|
name: default-backend
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: default-backend
|
- name: default-backend
|
||||||
# Any image is permissable as long as:
|
# Any image is permissable as long as:
|
||||||
# 1. It serves a 404 page at /
|
# 1. It serves a 404 page at /
|
||||||
# 2. It serves 200 on a /healthz endpoint
|
# 2. It serves 200 on a /healthz endpoint
|
||||||
image: gcr.io/google_containers/defaultbackend:1.4
|
image: k8s.gcr.io/defaultbackend:1.4
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
roleRef:
|
roleRef:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: Role
|
kind: Role
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
|||||||
@ -3,6 +3,9 @@ kind: Service
|
|||||||
metadata:
|
metadata:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '10254'
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
selector:
|
selector:
|
||||||
|
|||||||
6
addons/nginx-ingress/google-cloud/0-namespace.yaml
Normal file
6
addons/nginx-ingress/google-cloud/0-namespace.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: ingress
|
||||||
|
labels:
|
||||||
|
name: ingress
|
||||||
@ -1,22 +1,28 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: default-backend
|
name: default-backend
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: default-backend
|
||||||
|
phase: prod
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: default-backend
|
name: default-backend
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: default-backend
|
- name: default-backend
|
||||||
# Any image is permissable as long as:
|
# Any image is permissable as long as:
|
||||||
# 1. It serves a 404 page at /
|
# 1. It serves a 404 page at /
|
||||||
# 2. It serves 200 on a /healthz endpoint
|
# 2. It serves 200 on a /healthz endpoint
|
||||||
image: gcr.io/google_containers/defaultbackend:1.4
|
image: k8s.gcr.io/defaultbackend:1.4
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
@ -8,18 +8,23 @@ spec:
|
|||||||
strategy:
|
strategy:
|
||||||
rollingUpdate:
|
rollingUpdate:
|
||||||
maxUnavailable: 1
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: nginx-ingress-controller
|
||||||
|
phase: prod
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
node-role.kubernetes.io/node: ""
|
node-role.kubernetes.io/node: ""
|
||||||
hostNetwork: true
|
|
||||||
containers:
|
containers:
|
||||||
- name: nginx-ingress-controller
|
- name: nginx-ingress-controller
|
||||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.9.0
|
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.20.0
|
||||||
args:
|
args:
|
||||||
- /nginx-ingress-controller
|
- /nginx-ingress-controller
|
||||||
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
- --default-backend-service=$(POD_NAMESPACE)/default-backend
|
||||||
@ -63,5 +68,12 @@ spec:
|
|||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
successThreshold: 1
|
successThreshold: 1
|
||||||
timeoutSeconds: 1
|
timeoutSeconds: 1
|
||||||
|
securityContext:
|
||||||
|
capabilities:
|
||||||
|
add:
|
||||||
|
- NET_BIND_SERVICE
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
runAsUser: 33 # www-data
|
||||||
restartPolicy: Always
|
restartPolicy: Always
|
||||||
terminationGracePeriodSeconds: 60
|
terminationGracePeriodSeconds: 60
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
roleRef:
|
roleRef:
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: RoleBinding
|
kind: RoleBinding
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: Role
|
kind: Role
|
||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
|
||||||
metadata:
|
metadata:
|
||||||
name: ingress
|
name: ingress
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
|||||||
@ -3,6 +3,9 @@ kind: Service
|
|||||||
metadata:
|
metadata:
|
||||||
name: nginx-ingress-controller
|
name: nginx-ingress-controller
|
||||||
namespace: ingress
|
namespace: ingress
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '10254'
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
selector:
|
selector:
|
||||||
|
|||||||
@ -2,3 +2,5 @@ apiVersion: v1
|
|||||||
kind: Namespace
|
kind: Namespace
|
||||||
metadata:
|
metadata:
|
||||||
name: monitoring
|
name: monitoring
|
||||||
|
labels:
|
||||||
|
name: monitoring
|
||||||
@ -39,7 +39,7 @@ data:
|
|||||||
tls_config:
|
tls_config:
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
# Using endpoints to discover kube-apiserver targets finds the pod IP
|
# Using endpoints to discover kube-apiserver targets finds the pod IP
|
||||||
# (host IP since apiserver is uses host network) which is not used in
|
# (host IP since apiserver uses host network) which is not used in
|
||||||
# the server certificate.
|
# the server certificate.
|
||||||
insecure_skip_verify: true
|
insecure_skip_verify: true
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
@ -51,63 +51,61 @@ data:
|
|||||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||||
action: keep
|
action: keep
|
||||||
regex: default;kubernetes;https
|
regex: default;kubernetes;https
|
||||||
|
- replacement: apiserver
|
||||||
|
action: replace
|
||||||
|
target_label: job
|
||||||
|
|
||||||
# Scrape config for node (i.e. kubelet) /metrics (e.g. 'kubelet_'). Explore
|
# Scrape config for node (i.e. kubelet) /metrics (e.g. 'kubelet_'). Explore
|
||||||
# metrics from a node by scraping kubelet (127.0.0.1:10255/metrics).
|
# metrics from a node by scraping kubelet (127.0.0.1:10250/metrics).
|
||||||
#
|
- job_name: 'kubelet'
|
||||||
# Rather than connecting directly to the node, the scrape is proxied though the
|
|
||||||
# Kubernetes apiserver. This means it will work if Prometheus is running out of
|
|
||||||
# cluster, or can't connect to nodes for some other reason (e.g. because of
|
|
||||||
# firewalling).
|
|
||||||
- job_name: 'kubernetes-nodes'
|
|
||||||
kubernetes_sd_configs:
|
kubernetes_sd_configs:
|
||||||
- role: node
|
- role: node
|
||||||
|
|
||||||
scheme: https
|
scheme: https
|
||||||
tls_config:
|
tls_config:
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
|
# Kubelet certs don't have any fixed IP SANs
|
||||||
|
insecure_skip_verify: true
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
|
||||||
relabel_configs:
|
relabel_configs:
|
||||||
- action: labelmap
|
- action: labelmap
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
regex: __meta_kubernetes_node_label_(.+)
|
||||||
- target_label: __address__
|
|
||||||
replacement: kubernetes.default.svc:443
|
|
||||||
- source_labels: [__meta_kubernetes_node_name]
|
|
||||||
regex: (.+)
|
|
||||||
target_label: __metrics_path__
|
|
||||||
replacement: /api/v1/nodes/${1}/proxy/metrics
|
|
||||||
|
|
||||||
# Scrape config for Kubelet cAdvisor. Explore metrics from a node by
|
# Scrape config for Kubelet cAdvisor. Explore metrics from a node by
|
||||||
# scraping kubelet (127.0.0.1:10255/metrics/cadvisor).
|
# scraping kubelet (127.0.0.1:10250/metrics/cadvisor).
|
||||||
#
|
|
||||||
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
|
|
||||||
# (those whose names begin with 'container_') have been removed from the
|
|
||||||
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
|
|
||||||
# retrieve those metrics.
|
|
||||||
#
|
|
||||||
# Rather than connecting directly to the node, the scrape is proxied though the
|
|
||||||
# Kubernetes apiserver. This means it will work if Prometheus is running out of
|
|
||||||
# cluster, or can't connect to nodes for some other reason (e.g. because of
|
|
||||||
# firewalling).
|
|
||||||
- job_name: 'kubernetes-cadvisor'
|
- job_name: 'kubernetes-cadvisor'
|
||||||
kubernetes_sd_configs:
|
kubernetes_sd_configs:
|
||||||
- role: node
|
- role: node
|
||||||
|
|
||||||
scheme: https
|
scheme: https
|
||||||
|
metrics_path: /metrics/cadvisor
|
||||||
tls_config:
|
tls_config:
|
||||||
|
# Kubelet certs don't have any fixed IP SANs
|
||||||
|
insecure_skip_verify: true
|
||||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
|
|
||||||
relabel_configs:
|
relabel_configs:
|
||||||
- action: labelmap
|
- action: labelmap
|
||||||
regex: __meta_kubernetes_node_label_(.+)
|
regex: __meta_kubernetes_node_label_(.+)
|
||||||
- target_label: __address__
|
|
||||||
replacement: kubernetes.default.svc:443
|
|
||||||
|
# Scrap etcd metrics from controllers via listen-metrics-urls
|
||||||
|
- job_name: 'etcd'
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: node
|
||||||
|
scheme: http
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_kubernetes_node_label_node_role_kubernetes_io_controller]
|
||||||
|
action: keep
|
||||||
|
regex: 'true'
|
||||||
|
- action: labelmap
|
||||||
|
regex: __meta_kubernetes_node_label_(.+)
|
||||||
- source_labels: [__meta_kubernetes_node_name]
|
- source_labels: [__meta_kubernetes_node_name]
|
||||||
regex: (.+)
|
action: replace
|
||||||
target_label: __metrics_path__
|
target_label: __address__
|
||||||
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
replacement: '${1}:2381'
|
||||||
|
|
||||||
# Scrape config for service endpoints.
|
# Scrape config for service endpoints.
|
||||||
#
|
#
|
||||||
@ -149,7 +147,7 @@ data:
|
|||||||
target_label: kubernetes_namespace
|
target_label: kubernetes_namespace
|
||||||
- source_labels: [__meta_kubernetes_service_name]
|
- source_labels: [__meta_kubernetes_service_name]
|
||||||
action: replace
|
action: replace
|
||||||
target_label: kubernetes_name
|
target_label: job
|
||||||
|
|
||||||
# Example scrape config for probing services via the Blackbox Exporter.
|
# Example scrape config for probing services via the Blackbox Exporter.
|
||||||
#
|
#
|
||||||
@ -181,7 +179,7 @@ data:
|
|||||||
- source_labels: [__meta_kubernetes_namespace]
|
- source_labels: [__meta_kubernetes_namespace]
|
||||||
target_label: kubernetes_namespace
|
target_label: kubernetes_namespace
|
||||||
- source_labels: [__meta_kubernetes_service_name]
|
- source_labels: [__meta_kubernetes_service_name]
|
||||||
target_label: kubernetes_name
|
target_label: job
|
||||||
|
|
||||||
# Example scrape config for pods
|
# Example scrape config for pods
|
||||||
#
|
#
|
||||||
|
|||||||
@ -1,24 +1,30 @@
|
|||||||
apiVersion: extensions/v1beta1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
strategy:
|
selector:
|
||||||
rollingUpdate:
|
matchLabels:
|
||||||
maxUnavailable: 1
|
name: prometheus
|
||||||
|
phase: prod
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
|
serviceAccountName: prometheus
|
||||||
containers:
|
containers:
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
image: quay.io/prometheus/prometheus:v2.0.0
|
image: quay.io/prometheus/prometheus:v2.4.3
|
||||||
args:
|
args:
|
||||||
- '--config.file=/etc/prometheus/prometheus.yaml'
|
- --web.listen-address=0.0.0.0:9090
|
||||||
|
- --config.file=/etc/prometheus/prometheus.yaml
|
||||||
|
- --storage.tsdb.path=/var/lib/prometheus
|
||||||
ports:
|
ports:
|
||||||
- name: web
|
- name: web
|
||||||
containerPort: 9090
|
containerPort: 9090
|
||||||
@ -29,8 +35,18 @@ spec:
|
|||||||
mountPath: /etc/prometheus/rules
|
mountPath: /etc/prometheus/rules
|
||||||
- name: data
|
- name: data
|
||||||
mountPath: /var/lib/prometheus
|
mountPath: /var/lib/prometheus
|
||||||
dnsPolicy: ClusterFirst
|
readinessProbe:
|
||||||
restartPolicy: Always
|
httpGet:
|
||||||
|
path: /-/ready
|
||||||
|
port: 9090
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
timeoutSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/healthy
|
||||||
|
port: 9090
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
timeoutSeconds: 10
|
||||||
terminationGracePeriodSeconds: 30
|
terminationGracePeriodSeconds: 30
|
||||||
volumes:
|
volumes:
|
||||||
- name: config
|
- name: config
|
||||||
|
|||||||
@ -5,6 +5,8 @@ metadata:
|
|||||||
rules:
|
rules:
|
||||||
- apiGroups: [""]
|
- apiGroups: [""]
|
||||||
resources:
|
resources:
|
||||||
|
- configmaps
|
||||||
|
- secrets
|
||||||
- nodes
|
- nodes
|
||||||
- pods
|
- pods
|
||||||
- services
|
- services
|
||||||
@ -12,7 +14,9 @@ rules:
|
|||||||
- replicationcontrollers
|
- replicationcontrollers
|
||||||
- limitranges
|
- limitranges
|
||||||
- persistentvolumeclaims
|
- persistentvolumeclaims
|
||||||
|
- persistentvolumes
|
||||||
- namespaces
|
- namespaces
|
||||||
|
- endpoints
|
||||||
verbs: ["list", "watch"]
|
verbs: ["list", "watch"]
|
||||||
- apiGroups: ["extensions"]
|
- apiGroups: ["extensions"]
|
||||||
resources:
|
resources:
|
||||||
@ -29,3 +33,7 @@ rules:
|
|||||||
- cronjobs
|
- cronjobs
|
||||||
- jobs
|
- jobs
|
||||||
verbs: ["list", "watch"]
|
verbs: ["list", "watch"]
|
||||||
|
- apiGroups: ["autoscaling"]
|
||||||
|
resources:
|
||||||
|
- horizontalpodautoscalers
|
||||||
|
verbs: ["list", "watch"]
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: apps/v1beta2
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
@ -18,11 +18,13 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: kube-state-metrics
|
serviceAccountName: kube-state-metrics
|
||||||
containers:
|
containers:
|
||||||
- name: kube-state-metrics
|
- name: kube-state-metrics
|
||||||
image: quay.io/coreos/kube-state-metrics:v1.1.0
|
image: quay.io/coreos/kube-state-metrics:v1.4.0
|
||||||
ports:
|
ports:
|
||||||
- name: metrics
|
- name: metrics
|
||||||
containerPort: 8080
|
containerPort: 8080
|
||||||
@ -33,7 +35,7 @@ spec:
|
|||||||
initialDelaySeconds: 5
|
initialDelaySeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
- name: addon-resizer
|
- name: addon-resizer
|
||||||
image: gcr.io/google_containers/addon-resizer:1.0
|
image: k8s.gcr.io/addon-resizer:1.7
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
|
|||||||
@ -15,5 +15,5 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- name: metrics
|
- name: metrics
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
port: 80
|
port: 8080
|
||||||
targetPort: 8080
|
targetPort: 8080
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: apps/v1beta2
|
apiVersion: apps/v1
|
||||||
kind: DaemonSet
|
kind: DaemonSet
|
||||||
metadata:
|
metadata:
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
@ -17,12 +17,18 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
name: node-exporter
|
name: node-exporter
|
||||||
phase: prod
|
phase: prod
|
||||||
|
annotations:
|
||||||
|
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||||
spec:
|
spec:
|
||||||
|
serviceAccountName: node-exporter
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 65534
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
hostPID: true
|
hostPID: true
|
||||||
containers:
|
containers:
|
||||||
- name: node-exporter
|
- name: node-exporter
|
||||||
image: quay.io/prometheus/node-exporter:v0.15.0
|
image: quay.io/prometheus/node-exporter:v0.15.2
|
||||||
args:
|
args:
|
||||||
- "--path.procfs=/host/proc"
|
- "--path.procfs=/host/proc"
|
||||||
- "--path.sysfs=/host/sys"
|
- "--path.sysfs=/host/sys"
|
||||||
@ -45,9 +51,8 @@ spec:
|
|||||||
mountPath: /host/sys
|
mountPath: /host/sys
|
||||||
readOnly: true
|
readOnly: true
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: node-role.kubernetes.io/master
|
- effect: NoSchedule
|
||||||
operator: Exists
|
operator: Exists
|
||||||
effect: NoSchedule
|
|
||||||
volumes:
|
volumes:
|
||||||
- name: proc
|
- name: proc
|
||||||
hostPath:
|
hostPath:
|
||||||
|
|||||||
@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: node-exporter
|
||||||
|
namespace: monitoring
|
||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRoleBinding
|
kind: ClusterRoleBinding
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
@ -8,5 +8,5 @@ roleRef:
|
|||||||
name: prometheus
|
name: prometheus
|
||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: default
|
name: prometheus
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
@ -6,7 +6,7 @@ rules:
|
|||||||
- apiGroups: [""]
|
- apiGroups: [""]
|
||||||
resources:
|
resources:
|
||||||
- nodes
|
- nodes
|
||||||
- nodes/proxy
|
- nodes/metrics
|
||||||
- services
|
- services
|
||||||
- endpoints
|
- endpoints
|
||||||
- pods
|
- pods
|
||||||
|
|||||||
@ -4,8 +4,7 @@ metadata:
|
|||||||
name: prometheus-rules
|
name: prometheus-rules
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
data:
|
data:
|
||||||
# Rules adapted from those provided by coreos/prometheus-operator and SoundCloud
|
alertmanager.rules.yaml: |
|
||||||
alertmanager.rules.yaml: |+
|
|
||||||
groups:
|
groups:
|
||||||
- name: alertmanager.rules
|
- name: alertmanager.rules
|
||||||
rules:
|
rules:
|
||||||
@ -36,7 +35,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
||||||
}}/{{ $labels.pod}}.
|
}}/{{ $labels.pod}}.
|
||||||
etcd3.rules.yaml: |+
|
etcd3.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: ./etcd3.rules
|
- name: ./etcd3.rules
|
||||||
rules:
|
rules:
|
||||||
@ -64,28 +63,8 @@ data:
|
|||||||
description: etcd instance {{ $labels.instance }} has seen {{ $value }} leader
|
description: etcd instance {{ $labels.instance }} has seen {{ $value }} leader
|
||||||
changes within the last hour
|
changes within the last hour
|
||||||
summary: a high number of leader changes within the etcd cluster are happening
|
summary: a high number of leader changes within the etcd cluster are happening
|
||||||
- alert: HighNumberOfFailedGRPCRequests
|
|
||||||
expr: sum(rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) BY (grpc_method)
|
|
||||||
/ sum(rate(etcd_grpc_total{job="etcd"}[5m])) BY (grpc_method) > 0.01
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed
|
|
||||||
on etcd instance {{ $labels.instance }}'
|
|
||||||
summary: a high number of gRPC requests are failing
|
|
||||||
- alert: HighNumberOfFailedGRPCRequests
|
|
||||||
expr: sum(rate(etcd_grpc_requests_failed_total{job="etcd"}[5m])) BY (grpc_method)
|
|
||||||
/ sum(rate(etcd_grpc_total{job="etcd"}[5m])) BY (grpc_method) > 0.05
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
description: '{{ $value }}% of requests for {{ $labels.grpc_method }} failed
|
|
||||||
on etcd instance {{ $labels.instance }}'
|
|
||||||
summary: a high number of gRPC requests are failing
|
|
||||||
- alert: GRPCRequestsSlow
|
- alert: GRPCRequestsSlow
|
||||||
expr: histogram_quantile(0.99, rate(etcd_grpc_unary_requests_duration_seconds_bucket[5m]))
|
expr: histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job="etcd",grpc_type="unary"}[5m])) by (grpc_service, grpc_method, le))
|
||||||
> 0.15
|
> 0.15
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
@ -125,7 +104,7 @@ data:
|
|||||||
}} are slow
|
}} are slow
|
||||||
summary: slow HTTP requests
|
summary: slow HTTP requests
|
||||||
- alert: EtcdMemberCommunicationSlow
|
- alert: EtcdMemberCommunicationSlow
|
||||||
expr: histogram_quantile(0.99, rate(etcd_network_member_round_trip_time_seconds_bucket[5m]))
|
expr: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m]))
|
||||||
> 0.15
|
> 0.15
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
@ -160,7 +139,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: etcd instance {{ $labels.instance }} commit durations are high
|
description: etcd instance {{ $labels.instance }} commit durations are high
|
||||||
summary: high commit durations
|
summary: high commit durations
|
||||||
general.rules.yaml: |+
|
general.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: general.rules
|
- name: general.rules
|
||||||
rules:
|
rules:
|
||||||
@ -192,12 +171,12 @@ data:
|
|||||||
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
|
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
|
||||||
will exhaust in file/socket descriptors within the next hour'
|
will exhaust in file/socket descriptors within the next hour'
|
||||||
summary: file descriptors soon exhausted
|
summary: file descriptors soon exhausted
|
||||||
kube-controller-manager.rules.yaml: |+
|
kube-controller-manager.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: kube-controller-manager.rules
|
- name: kube-controller-manager.rules
|
||||||
rules:
|
rules:
|
||||||
- alert: K8SControllerManagerDown
|
- alert: K8SControllerManagerDown
|
||||||
expr: absent(up{kubernetes_name="kube-controller-manager"} == 1)
|
expr: absent(up{job="kube-controller-manager"} == 1)
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
@ -205,7 +184,7 @@ data:
|
|||||||
description: There is no running K8S controller manager. Deployments and replication
|
description: There is no running K8S controller manager. Deployments and replication
|
||||||
controllers are not making progress.
|
controllers are not making progress.
|
||||||
summary: Controller manager is down
|
summary: Controller manager is down
|
||||||
kube-scheduler.rules.yaml: |+
|
kube-scheduler.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: kube-scheduler.rules
|
- name: kube-scheduler.rules
|
||||||
rules:
|
rules:
|
||||||
@ -255,7 +234,7 @@ data:
|
|||||||
labels:
|
labels:
|
||||||
quantile: "0.5"
|
quantile: "0.5"
|
||||||
- alert: K8SSchedulerDown
|
- alert: K8SSchedulerDown
|
||||||
expr: absent(up{kubernetes_name="kube-scheduler"} == 1)
|
expr: absent(up{job="kube-scheduler"} == 1)
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
@ -263,7 +242,7 @@ data:
|
|||||||
description: There is no running K8S scheduler. New pods are not being assigned
|
description: There is no running K8S scheduler. New pods are not being assigned
|
||||||
to nodes.
|
to nodes.
|
||||||
summary: Scheduler is down
|
summary: Scheduler is down
|
||||||
kube-state-metrics.rules.yaml: |+
|
kube-state-metrics.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: kube-state-metrics.rules
|
- name: kube-state-metrics.rules
|
||||||
rules:
|
rules:
|
||||||
@ -274,7 +253,8 @@ data:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Observed deployment generation does not match expected one for
|
description: Observed deployment generation does not match expected one for
|
||||||
deployment {{$labels.namespaces}}{{$labels.deployment}}
|
deployment {{$labels.namespaces}}/{{$labels.deployment}}
|
||||||
|
summary: Deployment is outdated
|
||||||
- alert: DeploymentReplicasNotUpdated
|
- alert: DeploymentReplicasNotUpdated
|
||||||
expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas)
|
expr: ((kube_deployment_status_replicas_updated != kube_deployment_spec_replicas)
|
||||||
or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas))
|
or (kube_deployment_status_replicas_available != kube_deployment_spec_replicas))
|
||||||
@ -284,8 +264,9 @@ data:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}}
|
description: Replicas are not updated and available for deployment {{$labels.namespaces}}/{{$labels.deployment}}
|
||||||
|
summary: Deployment replicas are outdated
|
||||||
- alert: DaemonSetRolloutStuck
|
- alert: DaemonSetRolloutStuck
|
||||||
expr: kube_daemonset_status_current_number_ready / kube_daemonset_status_desired_number_scheduled
|
expr: kube_daemonset_status_number_ready / kube_daemonset_status_desired_number_scheduled
|
||||||
* 100 < 100
|
* 100 < 100
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
@ -293,6 +274,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Only {{$value}}% of desired pods scheduled and ready for daemon
|
description: Only {{$value}}% of desired pods scheduled and ready for daemon
|
||||||
set {{$labels.namespaces}}/{{$labels.daemonset}}
|
set {{$labels.namespaces}}/{{$labels.daemonset}}
|
||||||
|
summary: DaemonSet is missing pods
|
||||||
- alert: K8SDaemonSetsNotScheduled
|
- alert: K8SDaemonSetsNotScheduled
|
||||||
expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled
|
expr: kube_daemonset_status_desired_number_scheduled - kube_daemonset_status_current_number_scheduled
|
||||||
> 0
|
> 0
|
||||||
@ -312,14 +294,15 @@ data:
|
|||||||
to run.
|
to run.
|
||||||
summary: Daemonsets are not scheduled correctly
|
summary: Daemonsets are not scheduled correctly
|
||||||
- alert: PodFrequentlyRestarting
|
- alert: PodFrequentlyRestarting
|
||||||
expr: increase(kube_pod_container_status_restarts[1h]) > 5
|
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Pod {{$labels.namespaces}}/{{$labels.pod}} is was restarted {{$value}}
|
description: Pod {{$labels.namespaces}}/{{$labels.pod}} restarted {{$value}}
|
||||||
times within the last hour
|
times within the last hour
|
||||||
kubelet.rules.yaml: |+
|
summary: Pod is restarting frequently
|
||||||
|
kubelet.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: kubelet.rules
|
- name: kubelet.rules
|
||||||
rules:
|
rules:
|
||||||
@ -342,15 +325,15 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value }}% of Kubernetes nodes are not ready'
|
description: '{{ $value }}% of Kubernetes nodes are not ready'
|
||||||
- alert: K8SKubeletDown
|
- alert: K8SKubeletDown
|
||||||
expr: count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"}) * 100 > 3
|
expr: count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) * 100 > 3
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
||||||
- alert: K8SKubeletDown
|
- alert: K8SKubeletDown
|
||||||
expr: (absent(up{job="kubernetes-nodes"} == 1) or count(up{job="kubernetes-nodes"} == 0) / count(up{job="kubernetes-nodes"}))
|
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
||||||
* 100 > 1
|
* 100 > 10
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
@ -367,7 +350,7 @@ data:
|
|||||||
description: Kubelet {{$labels.instance}} is running {{$value}} pods, close
|
description: Kubelet {{$labels.instance}} is running {{$value}} pods, close
|
||||||
to the limit of 110
|
to the limit of 110
|
||||||
summary: Kubelet is close to pod limit
|
summary: Kubelet is close to pod limit
|
||||||
kubernetes.rules.yaml: |+
|
kubernetes.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: kubernetes.rules
|
- name: kubernetes.rules
|
||||||
rules:
|
rules:
|
||||||
@ -447,14 +430,28 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: API server returns errors for {{ $value }}% of requests
|
description: API server returns errors for {{ $value }}% of requests
|
||||||
- alert: K8SApiserverDown
|
- alert: K8SApiserverDown
|
||||||
expr: absent(up{job="kubernetes-apiservers"} == 1)
|
expr: absent(up{job="apiserver"} == 1)
|
||||||
for: 20m
|
for: 20m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
description: No API servers are reachable or all have disappeared from service
|
description: No API servers are reachable or all have disappeared from service
|
||||||
discovery
|
discovery
|
||||||
node.rules.yaml: |+
|
|
||||||
|
- alert: K8sCertificateExpirationNotice
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: Kubernetes API Certificate is expiring soon (less than 7 days)
|
||||||
|
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
|
||||||
|
|
||||||
|
- alert: K8sCertificateExpirationNotice
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
description: Kubernetes API Certificate is expiring in less than 1 day
|
||||||
|
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
|
||||||
|
node.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: node.rules
|
- name: node.rules
|
||||||
rules:
|
rules:
|
||||||
@ -476,7 +473,7 @@ data:
|
|||||||
- record: cluster:node_cpu:ratio
|
- record: cluster:node_cpu:ratio
|
||||||
expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
|
expr: cluster:node_cpu:rate5m / count(sum(node_cpu) BY (instance, cpu))
|
||||||
- alert: NodeExporterDown
|
- alert: NodeExporterDown
|
||||||
expr: absent(up{kubernetes_name="node-exporter"} == 1)
|
expr: absent(up{job="node-exporter"} == 1)
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
@ -499,7 +496,14 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
||||||
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
||||||
prometheus.rules.yaml: |+
|
- alert: InactiveRAIDDisk
|
||||||
|
expr: node_md_disks - node_md_disks_active > 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: '{{$value}} RAID disk(s) on node {{$labels.instance}} are inactive'
|
||||||
|
prometheus.rules.yaml: |
|
||||||
groups:
|
groups:
|
||||||
- name: prometheus.rules
|
- name: prometheus.rules
|
||||||
rules:
|
rules:
|
||||||
@ -544,3 +548,38 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
||||||
to any Alertmanagers
|
to any Alertmanagers
|
||||||
|
- alert: PrometheusTSDBReloadsFailing
|
||||||
|
expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
|
||||||
|
for: 12h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
|
||||||
|
reload failures over the last four hours.'
|
||||||
|
summary: Prometheus has issues reloading data blocks from disk
|
||||||
|
- alert: PrometheusTSDBCompactionsFailing
|
||||||
|
expr: increase(prometheus_tsdb_compactions_failed_total[2h]) > 0
|
||||||
|
for: 12h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
|
||||||
|
compaction failures over the last four hours.'
|
||||||
|
summary: Prometheus has issues compacting sample blocks
|
||||||
|
- alert: PrometheusTSDBWALCorruptions
|
||||||
|
expr: tsdb_wal_corruptions_total > 0
|
||||||
|
for: 4h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
|
||||||
|
log (WAL).'
|
||||||
|
summary: Prometheus write-ahead log is corrupted
|
||||||
|
- alert: PrometheusNotIngestingSamples
|
||||||
|
expr: rate(prometheus_tsdb_head_samples_appended_total[5m]) <= 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples."
|
||||||
|
summary: "Prometheus isn't ingesting samples"
|
||||||
|
|||||||
5
addons/prometheus/service-account.yaml
Normal file
5
addons/prometheus/service-account.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
@ -3,6 +3,8 @@ kind: Service
|
|||||||
metadata:
|
metadata:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
spec:
|
spec:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
selector:
|
selector:
|
||||||
|
|||||||
@ -11,12 +11,13 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
|||||||
|
|
||||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||||
|
|
||||||
* Kubernetes v1.9.1 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
* Kubernetes v1.12.2 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||||
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||||
* Ready for Ingress, Dashboards, Metrics, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/)
|
||||||
|
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||||
|
|
||||||
## Docs
|
## Docs
|
||||||
|
|
||||||
Please see the [official docs](https://typhoon.psdn.io) and the AWS [tutorial](https://typhoon.psdn.io/aws/).
|
Please see the [official docs](https://typhoon.psdn.io) and the AWS [tutorial](https://typhoon.psdn.io/cl/aws/).
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,13 @@
|
|||||||
|
locals {
|
||||||
|
# Pick a CoreOS Container Linux derivative
|
||||||
|
# coreos-stable -> Container Linux AMI
|
||||||
|
# flatcar-stable -> Flatcar Linux AMI
|
||||||
|
ami_id = "${local.flavor == "flatcar" ? data.aws_ami.flatcar.image_id : data.aws_ami.coreos.image_id}"
|
||||||
|
|
||||||
|
flavor = "${element(split("-", var.os_image), 0)}"
|
||||||
|
channel = "${element(split("-", var.os_image), 1)}"
|
||||||
|
}
|
||||||
|
|
||||||
data "aws_ami" "coreos" {
|
data "aws_ami" "coreos" {
|
||||||
most_recent = true
|
most_recent = true
|
||||||
owners = ["595879546273"]
|
owners = ["595879546273"]
|
||||||
@ -14,6 +24,26 @@ data "aws_ami" "coreos" {
|
|||||||
|
|
||||||
filter {
|
filter {
|
||||||
name = "name"
|
name = "name"
|
||||||
values = ["CoreOS-${var.os_channel}-*"]
|
values = ["CoreOS-${local.channel}-*"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "aws_ami" "flatcar" {
|
||||||
|
most_recent = true
|
||||||
|
owners = ["075585003325"]
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "architecture"
|
||||||
|
values = ["x86_64"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "virtualization-type"
|
||||||
|
values = ["hvm"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "name"
|
||||||
|
values = ["Flatcar-${local.channel}-*"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
||||||
module "bootkube" {
|
module "bootkube" {
|
||||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=b83e321b350ac549c45ed6a05ffd8683336fb9f4"
|
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=f39f8294c465397e622c606174e6f412ee3ca0f8"
|
||||||
|
|
||||||
cluster_name = "${var.cluster_name}"
|
cluster_name = "${var.cluster_name}"
|
||||||
api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"]
|
api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"]
|
||||||
|
|||||||
@ -7,12 +7,13 @@ systemd:
|
|||||||
- name: 40-etcd-cluster.conf
|
- name: 40-etcd-cluster.conf
|
||||||
contents: |
|
contents: |
|
||||||
[Service]
|
[Service]
|
||||||
Environment="ETCD_IMAGE_TAG=v3.2.13"
|
Environment="ETCD_IMAGE_TAG=v3.3.10"
|
||||||
Environment="ETCD_NAME=${etcd_name}"
|
Environment="ETCD_NAME=${etcd_name}"
|
||||||
Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379"
|
Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379"
|
||||||
Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380"
|
Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380"
|
||||||
Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379"
|
Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379"
|
||||||
Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380"
|
Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380"
|
||||||
|
Environment="ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381"
|
||||||
Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}"
|
Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}"
|
||||||
Environment="ETCD_STRICT_RECONFIG_CHECK=true"
|
Environment="ETCD_STRICT_RECONFIG_CHECK=true"
|
||||||
Environment="ETCD_SSL_DIR=/etc/ssl/etcd"
|
Environment="ETCD_SSL_DIR=/etc/ssl/etcd"
|
||||||
@ -55,6 +56,8 @@ systemd:
|
|||||||
--mount volume=resolv,target=/etc/resolv.conf \
|
--mount volume=resolv,target=/etc/resolv.conf \
|
||||||
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
||||||
--mount volume=var-lib-cni,target=/var/lib/cni \
|
--mount volume=var-lib-cni,target=/var/lib/cni \
|
||||||
|
--volume var-lib-calico,kind=host,source=/var/lib/calico \
|
||||||
|
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||||
--volume var-log,kind=host,source=/var/log \
|
--volume var-log,kind=host,source=/var/log \
|
||||||
@ -66,11 +69,14 @@ systemd:
|
|||||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets
|
||||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests
|
||||||
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
||||||
|
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||||
|
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||||
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
||||||
--allow-privileged \
|
|
||||||
--anonymous-auth=false \
|
--anonymous-auth=false \
|
||||||
|
--authentication-token-webhook \
|
||||||
|
--authorization-mode=Webhook \
|
||||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||||
--cluster_dns=${k8s_dns_service_ip} \
|
--cluster_dns=${k8s_dns_service_ip} \
|
||||||
--cluster_domain=${cluster_domain_suffix} \
|
--cluster_domain=${cluster_domain_suffix} \
|
||||||
@ -80,8 +86,11 @@ systemd:
|
|||||||
--lock-file=/var/run/lock/kubelet.lock \
|
--lock-file=/var/run/lock/kubelet.lock \
|
||||||
--network-plugin=cni \
|
--network-plugin=cni \
|
||||||
--node-labels=node-role.kubernetes.io/master \
|
--node-labels=node-role.kubernetes.io/master \
|
||||||
|
--node-labels=node-role.kubernetes.io/controller="true" \
|
||||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
--register-with-taints=node-role.kubernetes.io/master=:NoSchedule
|
--read-only-port=0 \
|
||||||
|
--register-with-taints=node-role.kubernetes.io/master=:NoSchedule \
|
||||||
|
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=10
|
RestartSec=10
|
||||||
@ -107,29 +116,14 @@ storage:
|
|||||||
mode: 0644
|
mode: 0644
|
||||||
contents:
|
contents:
|
||||||
inline: |
|
inline: |
|
||||||
apiVersion: v1
|
${kubeconfig}
|
||||||
kind: Config
|
|
||||||
clusters:
|
|
||||||
- name: local
|
|
||||||
cluster:
|
|
||||||
server: ${kubeconfig_server}
|
|
||||||
certificate-authority-data: ${kubeconfig_ca_cert}
|
|
||||||
users:
|
|
||||||
- name: kubelet
|
|
||||||
user:
|
|
||||||
client-certificate-data: ${kubeconfig_kubelet_cert}
|
|
||||||
client-key-data: ${kubeconfig_kubelet_key}
|
|
||||||
contexts:
|
|
||||||
- context:
|
|
||||||
cluster: local
|
|
||||||
user: kubelet
|
|
||||||
- path: /etc/kubernetes/kubelet.env
|
- path: /etc/kubernetes/kubelet.env
|
||||||
filesystem: root
|
filesystem: root
|
||||||
mode: 0644
|
mode: 0644
|
||||||
contents:
|
contents:
|
||||||
inline: |
|
inline: |
|
||||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
||||||
KUBELET_IMAGE_TAG=v1.9.1
|
KUBELET_IMAGE_TAG=v1.12.2
|
||||||
- path: /etc/sysctl.d/max-user-watches.conf
|
- path: /etc/sysctl.d/max-user-watches.conf
|
||||||
filesystem: root
|
filesystem: root
|
||||||
contents:
|
contents:
|
||||||
@ -150,7 +144,7 @@ storage:
|
|||||||
# Move experimental manifests
|
# Move experimental manifests
|
||||||
[ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-*
|
[ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-*
|
||||||
BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}"
|
BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}"
|
||||||
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.9.1}"
|
BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.13.0}"
|
||||||
BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}"
|
BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}"
|
||||||
exec /usr/bin/rkt run \
|
exec /usr/bin/rkt run \
|
||||||
--trust-keys-from-https \
|
--trust-keys-from-https \
|
||||||
|
|||||||
@ -23,23 +23,39 @@ resource "aws_instance" "controllers" {
|
|||||||
|
|
||||||
instance_type = "${var.controller_type}"
|
instance_type = "${var.controller_type}"
|
||||||
|
|
||||||
ami = "${data.aws_ami.coreos.image_id}"
|
ami = "${local.ami_id}"
|
||||||
user_data = "${element(data.ct_config.controller_ign.*.rendered, count.index)}"
|
user_data = "${element(data.ct_config.controller-ignitions.*.rendered, count.index)}"
|
||||||
|
|
||||||
# storage
|
# storage
|
||||||
root_block_device {
|
root_block_device {
|
||||||
volume_type = "standard"
|
volume_type = "${var.disk_type}"
|
||||||
volume_size = "${var.disk_size}"
|
volume_size = "${var.disk_size}"
|
||||||
|
iops = "${var.disk_iops}"
|
||||||
}
|
}
|
||||||
|
|
||||||
# network
|
# network
|
||||||
associate_public_ip_address = true
|
associate_public_ip_address = true
|
||||||
subnet_id = "${element(aws_subnet.public.*.id, count.index)}"
|
subnet_id = "${element(aws_subnet.public.*.id, count.index)}"
|
||||||
vpc_security_group_ids = ["${aws_security_group.controller.id}"]
|
vpc_security_group_ids = ["${aws_security_group.controller.id}"]
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [
|
||||||
|
"ami",
|
||||||
|
"user_data",
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Controller Container Linux Config
|
# Controller Ignition configs
|
||||||
data "template_file" "controller_config" {
|
data "ct_config" "controller-ignitions" {
|
||||||
|
count = "${var.controller_count}"
|
||||||
|
content = "${element(data.template_file.controller-configs.*.rendered, count.index)}"
|
||||||
|
pretty_print = false
|
||||||
|
snippets = ["${var.controller_clc_snippets}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Controller Container Linux configs
|
||||||
|
data "template_file" "controller-configs" {
|
||||||
count = "${var.controller_count}"
|
count = "${var.controller_count}"
|
||||||
|
|
||||||
template = "${file("${path.module}/cl/controller.yaml.tmpl")}"
|
template = "${file("${path.module}/cl/controller.yaml.tmpl")}"
|
||||||
@ -50,213 +66,22 @@ data "template_file" "controller_config" {
|
|||||||
etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
|
etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
|
||||||
|
|
||||||
# etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
|
# etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
|
||||||
etcd_initial_cluster = "${join(",", formatlist("%s=https://%s:2380", null_resource.repeat.*.triggers.name, null_resource.repeat.*.triggers.domain))}"
|
etcd_initial_cluster = "${join(",", data.template_file.etcds.*.rendered)}"
|
||||||
|
|
||||||
k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}"
|
kubeconfig = "${indent(10, module.bootkube.kubeconfig)}"
|
||||||
ssh_authorized_key = "${var.ssh_authorized_key}"
|
ssh_authorized_key = "${var.ssh_authorized_key}"
|
||||||
|
k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}"
|
||||||
cluster_domain_suffix = "${var.cluster_domain_suffix}"
|
cluster_domain_suffix = "${var.cluster_domain_suffix}"
|
||||||
kubeconfig_ca_cert = "${module.bootkube.ca_cert}"
|
|
||||||
kubeconfig_kubelet_cert = "${module.bootkube.kubelet_cert}"
|
|
||||||
kubeconfig_kubelet_key = "${module.bootkube.kubelet_key}"
|
|
||||||
kubeconfig_server = "${module.bootkube.server}"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Horrible hack to generate a Terraform list of a desired length without dependencies.
|
data "template_file" "etcds" {
|
||||||
# Ideal ${repeat("etcd", 3) -> ["etcd", "etcd", "etcd"]}
|
|
||||||
resource null_resource "repeat" {
|
|
||||||
count = "${var.controller_count}"
|
count = "${var.controller_count}"
|
||||||
|
template = "etcd$${index}=https://$${cluster_name}-etcd$${index}.$${dns_zone}:2380"
|
||||||
|
|
||||||
triggers {
|
vars {
|
||||||
name = "etcd${count.index}"
|
index = "${count.index}"
|
||||||
domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
|
cluster_name = "${var.cluster_name}"
|
||||||
|
dns_zone = "${var.dns_zone}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data "ct_config" "controller_ign" {
|
|
||||||
count = "${var.controller_count}"
|
|
||||||
content = "${element(data.template_file.controller_config.*.rendered, count.index)}"
|
|
||||||
pretty_print = false
|
|
||||||
}
|
|
||||||
|
|
||||||
# Security Group (instance firewall)
|
|
||||||
|
|
||||||
resource "aws_security_group" "controller" {
|
|
||||||
name = "${var.cluster_name}-controller"
|
|
||||||
description = "${var.cluster_name} controller security group"
|
|
||||||
|
|
||||||
vpc_id = "${aws_vpc.network.id}"
|
|
||||||
|
|
||||||
tags = "${map("Name", "${var.cluster_name}-controller")}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-icmp" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "icmp"
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-ssh" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 22
|
|
||||||
to_port = 22
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-apiserver" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 443
|
|
||||||
to_port = 443
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-etcd" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 2379
|
|
||||||
to_port = 2380
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-flannel" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "udp"
|
|
||||||
from_port = 8472
|
|
||||||
to_port = 8472
|
|
||||||
source_security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-flannel-self" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "udp"
|
|
||||||
from_port = 8472
|
|
||||||
to_port = 8472
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-node-exporter" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 9100
|
|
||||||
to_port = 9100
|
|
||||||
source_security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-kubelet-self" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10250
|
|
||||||
to_port = 10250
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-kubelet-read" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10255
|
|
||||||
to_port = 10255
|
|
||||||
source_security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-kubelet-read-self" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10255
|
|
||||||
to_port = 10255
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-bgp" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 179
|
|
||||||
to_port = 179
|
|
||||||
source_security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-bgp-self" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 179
|
|
||||||
to_port = 179
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-ipip" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 4
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
source_security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-ipip-self" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 4
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-ipip-legacy" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 94
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
source_security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-ipip-legacy-self" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 94
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "controller-egress" {
|
|
||||||
security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
|
|
||||||
type = "egress"
|
|
||||||
protocol = "-1"
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
ipv6_cidr_blocks = ["::/0"]
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,43 +0,0 @@
|
|||||||
# kube-apiserver Network Load Balancer DNS Record
|
|
||||||
resource "aws_route53_record" "apiserver" {
|
|
||||||
zone_id = "${var.dns_zone_id}"
|
|
||||||
|
|
||||||
name = "${format("%s.%s.", var.cluster_name, var.dns_zone)}"
|
|
||||||
type = "A"
|
|
||||||
|
|
||||||
# AWS recommends their special "alias" records for ELBs
|
|
||||||
alias {
|
|
||||||
name = "${aws_elb.apiserver.dns_name}"
|
|
||||||
zone_id = "${aws_elb.apiserver.zone_id}"
|
|
||||||
evaluate_target_health = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Controller Network Load Balancer
|
|
||||||
resource "aws_elb" "apiserver" {
|
|
||||||
name = "${var.cluster_name}-apiserver"
|
|
||||||
subnets = ["${aws_subnet.public.*.id}"]
|
|
||||||
security_groups = ["${aws_security_group.controller.id}"]
|
|
||||||
|
|
||||||
listener {
|
|
||||||
lb_port = 443
|
|
||||||
lb_protocol = "tcp"
|
|
||||||
instance_port = 443
|
|
||||||
instance_protocol = "tcp"
|
|
||||||
}
|
|
||||||
|
|
||||||
instances = ["${aws_instance.controllers.*.id}"]
|
|
||||||
|
|
||||||
# Kubelet HTTP health check
|
|
||||||
health_check {
|
|
||||||
target = "SSL:443"
|
|
||||||
healthy_threshold = 2
|
|
||||||
unhealthy_threshold = 4
|
|
||||||
timeout = 5
|
|
||||||
interval = 6
|
|
||||||
}
|
|
||||||
|
|
||||||
idle_timeout = 3600
|
|
||||||
connection_draining = true
|
|
||||||
connection_draining_timeout = 300
|
|
||||||
}
|
|
||||||
@ -1,32 +0,0 @@
|
|||||||
# Ingress Network Load Balancer
|
|
||||||
resource "aws_elb" "ingress" {
|
|
||||||
name = "${var.cluster_name}-ingress"
|
|
||||||
subnets = ["${aws_subnet.public.*.id}"]
|
|
||||||
security_groups = ["${aws_security_group.worker.id}"]
|
|
||||||
|
|
||||||
listener {
|
|
||||||
lb_port = 80
|
|
||||||
lb_protocol = "tcp"
|
|
||||||
instance_port = 80
|
|
||||||
instance_protocol = "tcp"
|
|
||||||
}
|
|
||||||
|
|
||||||
listener {
|
|
||||||
lb_port = 443
|
|
||||||
lb_protocol = "tcp"
|
|
||||||
instance_port = 443
|
|
||||||
instance_protocol = "tcp"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Ingress Controller HTTP health check
|
|
||||||
health_check {
|
|
||||||
target = "HTTP:10254/healthz"
|
|
||||||
healthy_threshold = 2
|
|
||||||
unhealthy_threshold = 4
|
|
||||||
timeout = 5
|
|
||||||
interval = 6
|
|
||||||
}
|
|
||||||
|
|
||||||
connection_draining = true
|
|
||||||
connection_draining_timeout = 300
|
|
||||||
}
|
|
||||||
93
aws/container-linux/kubernetes/nlb.tf
Normal file
93
aws/container-linux/kubernetes/nlb.tf
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
# Network Load Balancer DNS Record
|
||||||
|
resource "aws_route53_record" "apiserver" {
|
||||||
|
zone_id = "${var.dns_zone_id}"
|
||||||
|
|
||||||
|
name = "${format("%s.%s.", var.cluster_name, var.dns_zone)}"
|
||||||
|
type = "A"
|
||||||
|
|
||||||
|
# AWS recommends their special "alias" records for NLBs
|
||||||
|
alias {
|
||||||
|
name = "${aws_lb.nlb.dns_name}"
|
||||||
|
zone_id = "${aws_lb.nlb.zone_id}"
|
||||||
|
evaluate_target_health = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Network Load Balancer for apiservers and ingress
|
||||||
|
resource "aws_lb" "nlb" {
|
||||||
|
name = "${var.cluster_name}-nlb"
|
||||||
|
load_balancer_type = "network"
|
||||||
|
internal = false
|
||||||
|
|
||||||
|
subnets = ["${aws_subnet.public.*.id}"]
|
||||||
|
|
||||||
|
enable_cross_zone_load_balancing = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Forward TCP apiserver traffic to controllers
|
||||||
|
resource "aws_lb_listener" "apiserver-https" {
|
||||||
|
load_balancer_arn = "${aws_lb.nlb.arn}"
|
||||||
|
protocol = "TCP"
|
||||||
|
port = "6443"
|
||||||
|
|
||||||
|
default_action {
|
||||||
|
type = "forward"
|
||||||
|
target_group_arn = "${aws_lb_target_group.controllers.arn}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Forward HTTP ingress traffic to workers
|
||||||
|
resource "aws_lb_listener" "ingress-http" {
|
||||||
|
load_balancer_arn = "${aws_lb.nlb.arn}"
|
||||||
|
protocol = "TCP"
|
||||||
|
port = 80
|
||||||
|
|
||||||
|
default_action {
|
||||||
|
type = "forward"
|
||||||
|
target_group_arn = "${module.workers.target_group_http}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Forward HTTPS ingress traffic to workers
|
||||||
|
resource "aws_lb_listener" "ingress-https" {
|
||||||
|
load_balancer_arn = "${aws_lb.nlb.arn}"
|
||||||
|
protocol = "TCP"
|
||||||
|
port = 443
|
||||||
|
|
||||||
|
default_action {
|
||||||
|
type = "forward"
|
||||||
|
target_group_arn = "${module.workers.target_group_https}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Target group of controllers
|
||||||
|
resource "aws_lb_target_group" "controllers" {
|
||||||
|
name = "${var.cluster_name}-controllers"
|
||||||
|
vpc_id = "${aws_vpc.network.id}"
|
||||||
|
target_type = "instance"
|
||||||
|
|
||||||
|
protocol = "TCP"
|
||||||
|
port = 6443
|
||||||
|
|
||||||
|
# TCP health check for apiserver
|
||||||
|
health_check {
|
||||||
|
protocol = "TCP"
|
||||||
|
port = 6443
|
||||||
|
|
||||||
|
# NLBs required to use same healthy and unhealthy thresholds
|
||||||
|
healthy_threshold = 3
|
||||||
|
unhealthy_threshold = 3
|
||||||
|
|
||||||
|
# Interval between health checks required to be 10 or 30
|
||||||
|
interval = 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Attach controller instances to apiserver NLB
|
||||||
|
resource "aws_lb_target_group_attachment" "controllers" {
|
||||||
|
count = "${var.controller_count}"
|
||||||
|
|
||||||
|
target_group_arn = "${aws_lb_target_group.controllers.arn}"
|
||||||
|
target_id = "${element(aws_instance.controllers.*.id, count.index)}"
|
||||||
|
port = 6443
|
||||||
|
}
|
||||||
@ -1,4 +1,39 @@
|
|||||||
|
# Outputs for Kubernetes Ingress
|
||||||
|
|
||||||
output "ingress_dns_name" {
|
output "ingress_dns_name" {
|
||||||
value = "${aws_elb.ingress.dns_name}"
|
value = "${aws_lb.nlb.dns_name}"
|
||||||
description = "DNS name of the ELB for distributing traffic to Ingress controllers"
|
description = "DNS name of the network load balancer for distributing traffic to Ingress controllers"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs for worker pools
|
||||||
|
|
||||||
|
output "vpc_id" {
|
||||||
|
value = "${aws_vpc.network.id}"
|
||||||
|
description = "ID of the VPC for creating worker instances"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "subnet_ids" {
|
||||||
|
value = ["${aws_subnet.public.*.id}"]
|
||||||
|
description = "List of subnet IDs for creating worker instances"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "worker_security_groups" {
|
||||||
|
value = ["${aws_security_group.worker.id}"]
|
||||||
|
description = "List of worker security group IDs"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "kubeconfig" {
|
||||||
|
value = "${module.bootkube.kubeconfig}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Outputs for custom load balancing
|
||||||
|
|
||||||
|
output "worker_target_group_http" {
|
||||||
|
description = "ARN of a target group of workers for HTTP traffic"
|
||||||
|
value = "${module.workers.target_group_http}"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "worker_target_group_https" {
|
||||||
|
description = "ARN of a target group of workers for HTTPS traffic"
|
||||||
|
value = "${module.workers.target_group_https}"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,11 +1,11 @@
|
|||||||
# Terraform version and plugin versions
|
# Terraform version and plugin versions
|
||||||
|
|
||||||
terraform {
|
terraform {
|
||||||
required_version = ">= 0.10.4"
|
required_version = ">= 0.11.0"
|
||||||
}
|
}
|
||||||
|
|
||||||
provider "aws" {
|
provider "aws" {
|
||||||
version = "~> 1.0"
|
version = "~> 1.13"
|
||||||
}
|
}
|
||||||
|
|
||||||
provider "local" {
|
provider "local" {
|
||||||
|
|||||||
351
aws/container-linux/kubernetes/security.tf
Normal file
351
aws/container-linux/kubernetes/security.tf
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
# Security Groups (instance firewalls)
|
||||||
|
|
||||||
|
# Controller security group
|
||||||
|
|
||||||
|
resource "aws_security_group" "controller" {
|
||||||
|
name = "${var.cluster_name}-controller"
|
||||||
|
description = "${var.cluster_name} controller security group"
|
||||||
|
|
||||||
|
vpc_id = "${aws_vpc.network.id}"
|
||||||
|
|
||||||
|
tags = "${map("Name", "${var.cluster_name}-controller")}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-ssh" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 22
|
||||||
|
to_port = 22
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-etcd" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 2379
|
||||||
|
to_port = 2380
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow Prometheus to scrape etcd metrics
|
||||||
|
resource "aws_security_group_rule" "controller-etcd-metrics" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 2381
|
||||||
|
to_port = 2381
|
||||||
|
source_security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-apiserver" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 6443
|
||||||
|
to_port = 6443
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-flannel" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "udp"
|
||||||
|
from_port = 8472
|
||||||
|
to_port = 8472
|
||||||
|
source_security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-flannel-self" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "udp"
|
||||||
|
from_port = 8472
|
||||||
|
to_port = 8472
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow Prometheus to scrape node-exporter daemonset
|
||||||
|
resource "aws_security_group_rule" "controller-node-exporter" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 9100
|
||||||
|
to_port = 9100
|
||||||
|
source_security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow apiserver to access kubelets for exec, log, port-forward
|
||||||
|
resource "aws_security_group_rule" "controller-kubelet" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 10250
|
||||||
|
to_port = 10250
|
||||||
|
source_security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-kubelet-self" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 10250
|
||||||
|
to_port = 10250
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-bgp" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 179
|
||||||
|
to_port = 179
|
||||||
|
source_security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-bgp-self" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 179
|
||||||
|
to_port = 179
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-ipip" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 4
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
source_security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-ipip-self" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 4
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-ipip-legacy" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 94
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
source_security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-ipip-legacy-self" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 94
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "controller-egress" {
|
||||||
|
security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
|
||||||
|
type = "egress"
|
||||||
|
protocol = "-1"
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
ipv6_cidr_blocks = ["::/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Worker security group
|
||||||
|
|
||||||
|
resource "aws_security_group" "worker" {
|
||||||
|
name = "${var.cluster_name}-worker"
|
||||||
|
description = "${var.cluster_name} worker security group"
|
||||||
|
|
||||||
|
vpc_id = "${aws_vpc.network.id}"
|
||||||
|
|
||||||
|
tags = "${map("Name", "${var.cluster_name}-worker")}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-ssh" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 22
|
||||||
|
to_port = 22
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-http" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 80
|
||||||
|
to_port = 80
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-https" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 443
|
||||||
|
to_port = 443
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-flannel" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "udp"
|
||||||
|
from_port = 8472
|
||||||
|
to_port = 8472
|
||||||
|
source_security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-flannel-self" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "udp"
|
||||||
|
from_port = 8472
|
||||||
|
to_port = 8472
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow Prometheus to scrape node-exporter daemonset
|
||||||
|
resource "aws_security_group_rule" "worker-node-exporter" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 9100
|
||||||
|
to_port = 9100
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "ingress-health" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 10254
|
||||||
|
to_port = 10254
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow apiserver to access kubelets for exec, log, port-forward
|
||||||
|
resource "aws_security_group_rule" "worker-kubelet" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 10250
|
||||||
|
to_port = 10250
|
||||||
|
source_security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow Prometheus to scrape kubelet metrics
|
||||||
|
resource "aws_security_group_rule" "worker-kubelet-self" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 10250
|
||||||
|
to_port = 10250
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-bgp" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 179
|
||||||
|
to_port = 179
|
||||||
|
source_security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-bgp-self" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 179
|
||||||
|
to_port = 179
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-ipip" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 4
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
source_security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-ipip-self" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 4
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-ipip-legacy" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 94
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
source_security_group_id = "${aws_security_group.controller.id}"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-ipip-legacy-self" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = 94
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
self = true
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_security_group_rule" "worker-egress" {
|
||||||
|
security_group_id = "${aws_security_group.worker.id}"
|
||||||
|
|
||||||
|
type = "egress"
|
||||||
|
protocol = "-1"
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
ipv6_cidr_blocks = ["::/0"]
|
||||||
|
}
|
||||||
@ -1,5 +1,5 @@
|
|||||||
# Secure copy etcd TLS assets and kubeconfig to controllers. Activates kubelet.service
|
# Secure copy etcd TLS assets to controllers.
|
||||||
resource "null_resource" "copy-secrets" {
|
resource "null_resource" "copy-controller-secrets" {
|
||||||
count = "${var.controller_count}"
|
count = "${var.controller_count}"
|
||||||
|
|
||||||
connection {
|
connection {
|
||||||
@ -9,11 +9,6 @@ resource "null_resource" "copy-secrets" {
|
|||||||
timeout = "15m"
|
timeout = "15m"
|
||||||
}
|
}
|
||||||
|
|
||||||
provisioner "file" {
|
|
||||||
content = "${module.bootkube.kubeconfig}"
|
|
||||||
destination = "$HOME/kubeconfig"
|
|
||||||
}
|
|
||||||
|
|
||||||
provisioner "file" {
|
provisioner "file" {
|
||||||
content = "${module.bootkube.etcd_ca_cert}"
|
content = "${module.bootkube.etcd_ca_cert}"
|
||||||
destination = "$HOME/etcd-client-ca.crt"
|
destination = "$HOME/etcd-client-ca.crt"
|
||||||
@ -61,7 +56,6 @@ resource "null_resource" "copy-secrets" {
|
|||||||
"sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key",
|
"sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key",
|
||||||
"sudo chown -R etcd:etcd /etc/ssl/etcd",
|
"sudo chown -R etcd:etcd /etc/ssl/etcd",
|
||||||
"sudo chmod -R 500 /etc/ssl/etcd",
|
"sudo chmod -R 500 /etc/ssl/etcd",
|
||||||
"sudo mv /home/core/kubeconfig /etc/kubernetes/kubeconfig",
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -69,7 +63,12 @@ resource "null_resource" "copy-secrets" {
|
|||||||
# Secure copy bootkube assets to ONE controller and start bootkube to perform
|
# Secure copy bootkube assets to ONE controller and start bootkube to perform
|
||||||
# one-time self-hosted cluster bootstrapping.
|
# one-time self-hosted cluster bootstrapping.
|
||||||
resource "null_resource" "bootkube-start" {
|
resource "null_resource" "bootkube-start" {
|
||||||
depends_on = ["module.bootkube", "null_resource.copy-secrets", "aws_route53_record.apiserver"]
|
depends_on = [
|
||||||
|
"module.bootkube",
|
||||||
|
"module.workers",
|
||||||
|
"aws_route53_record.apiserver",
|
||||||
|
"null_resource.copy-controller-secrets",
|
||||||
|
]
|
||||||
|
|
||||||
connection {
|
connection {
|
||||||
type = "ssh"
|
type = "ssh"
|
||||||
@ -85,7 +84,7 @@ resource "null_resource" "bootkube-start" {
|
|||||||
|
|
||||||
provisioner "remote-exec" {
|
provisioner "remote-exec" {
|
||||||
inline = [
|
inline = [
|
||||||
"sudo mv /home/core/assets /opt/bootkube",
|
"sudo mv $HOME/assets /opt/bootkube",
|
||||||
"sudo systemctl start bootkube",
|
"sudo systemctl start bootkube",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,51 +1,26 @@
|
|||||||
variable "cluster_name" {
|
variable "cluster_name" {
|
||||||
type = "string"
|
type = "string"
|
||||||
description = "Cluster name"
|
description = "Unique cluster name (prepended to dns_zone)"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# AWS
|
||||||
|
|
||||||
variable "dns_zone" {
|
variable "dns_zone" {
|
||||||
type = "string"
|
type = "string"
|
||||||
description = "AWS DNS Zone (e.g. aws.dghubble.io)"
|
description = "AWS Route53 DNS Zone (e.g. aws.example.com)"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "dns_zone_id" {
|
variable "dns_zone_id" {
|
||||||
type = "string"
|
type = "string"
|
||||||
description = "AWS DNS Zone ID (e.g. Z3PAABBCFAKEC0)"
|
description = "AWS Route53 DNS Zone ID (e.g. Z3PAABBCFAKEC0)"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "ssh_authorized_key" {
|
# instances
|
||||||
type = "string"
|
|
||||||
description = "SSH public key for user 'core'"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "os_channel" {
|
|
||||||
type = "string"
|
|
||||||
default = "stable"
|
|
||||||
description = "Container Linux AMI channel (stable, beta, alpha)"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "disk_size" {
|
|
||||||
type = "string"
|
|
||||||
default = "40"
|
|
||||||
description = "The size of the disk in Gigabytes"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "host_cidr" {
|
|
||||||
description = "CIDR IPv4 range to assign to EC2 nodes"
|
|
||||||
type = "string"
|
|
||||||
default = "10.0.0.0/16"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "controller_count" {
|
variable "controller_count" {
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "1"
|
default = "1"
|
||||||
description = "Number of controllers"
|
description = "Number of controllers (i.e. masters)"
|
||||||
}
|
|
||||||
|
|
||||||
variable "controller_type" {
|
|
||||||
type = "string"
|
|
||||||
default = "t2.small"
|
|
||||||
description = "Controller EC2 instance type"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "worker_count" {
|
variable "worker_count" {
|
||||||
@ -54,13 +29,66 @@ variable "worker_count" {
|
|||||||
description = "Number of workers"
|
description = "Number of workers"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "controller_type" {
|
||||||
|
type = "string"
|
||||||
|
default = "t2.small"
|
||||||
|
description = "EC2 instance type for controllers"
|
||||||
|
}
|
||||||
|
|
||||||
variable "worker_type" {
|
variable "worker_type" {
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "t2.small"
|
default = "t2.small"
|
||||||
description = "Worker EC2 instance type"
|
description = "EC2 instance type for workers"
|
||||||
}
|
}
|
||||||
|
|
||||||
# bootkube assets
|
variable "os_image" {
|
||||||
|
type = "string"
|
||||||
|
default = "coreos-stable"
|
||||||
|
description = "AMI channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha, flatcar-stable, flatcar-beta, flatcar-alpha)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_size" {
|
||||||
|
type = "string"
|
||||||
|
default = "40"
|
||||||
|
description = "Size of the EBS volume in GB"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_type" {
|
||||||
|
type = "string"
|
||||||
|
default = "gp2"
|
||||||
|
description = "Type of the EBS volume (e.g. standard, gp2, io1)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_iops" {
|
||||||
|
type = "string"
|
||||||
|
default = "0"
|
||||||
|
description = "IOPS of the EBS volume (e.g. 100)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "worker_price" {
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
description = "Spot price in USD for autoscaling group spot instances. Leave as default empty string for autoscaling group to use on-demand instances. Note, switching in-place from spot to on-demand is not possible: https://github.com/terraform-providers/terraform-provider-aws/issues/4320"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "controller_clc_snippets" {
|
||||||
|
type = "list"
|
||||||
|
description = "Controller Container Linux Config snippets"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "worker_clc_snippets" {
|
||||||
|
type = "list"
|
||||||
|
description = "Worker Container Linux Config snippets"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
|
# configuration
|
||||||
|
|
||||||
|
variable "ssh_authorized_key" {
|
||||||
|
type = "string"
|
||||||
|
description = "SSH public key for user 'core'"
|
||||||
|
}
|
||||||
|
|
||||||
variable "asset_dir" {
|
variable "asset_dir" {
|
||||||
description = "Path to a directory where generated assets should be placed (contains secrets)"
|
description = "Path to a directory where generated assets should be placed (contains secrets)"
|
||||||
@ -79,6 +107,12 @@ variable "network_mtu" {
|
|||||||
default = "1480"
|
default = "1480"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "host_cidr" {
|
||||||
|
description = "CIDR IPv4 range to assign to EC2 nodes"
|
||||||
|
type = "string"
|
||||||
|
default = "10.0.0.0/16"
|
||||||
|
}
|
||||||
|
|
||||||
variable "pod_cidr" {
|
variable "pod_cidr" {
|
||||||
description = "CIDR IPv4 range to assign Kubernetes pods"
|
description = "CIDR IPv4 range to assign Kubernetes pods"
|
||||||
type = "string"
|
type = "string"
|
||||||
@ -88,7 +122,7 @@ variable "pod_cidr" {
|
|||||||
variable "service_cidr" {
|
variable "service_cidr" {
|
||||||
description = <<EOD
|
description = <<EOD
|
||||||
CIDR IPv4 range to assign Kubernetes services.
|
CIDR IPv4 range to assign Kubernetes services.
|
||||||
The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for kube-dns.
|
The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for coredns.
|
||||||
EOD
|
EOD
|
||||||
|
|
||||||
type = "string"
|
type = "string"
|
||||||
@ -96,7 +130,7 @@ EOD
|
|||||||
}
|
}
|
||||||
|
|
||||||
variable "cluster_domain_suffix" {
|
variable "cluster_domain_suffix" {
|
||||||
description = "Queries for domains with the suffix will be answered by kube-dns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
|
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "cluster.local"
|
default = "cluster.local"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,275 +1,21 @@
|
|||||||
# Workers AutoScaling Group
|
module "workers" {
|
||||||
resource "aws_autoscaling_group" "workers" {
|
source = "workers"
|
||||||
name = "${var.cluster_name}-worker ${aws_launch_configuration.worker.name}"
|
name = "${var.cluster_name}"
|
||||||
load_balancers = ["${aws_elb.ingress.id}"]
|
|
||||||
|
|
||||||
# count
|
|
||||||
desired_capacity = "${var.worker_count}"
|
|
||||||
min_size = "${var.worker_count}"
|
|
||||||
max_size = "${var.worker_count + 2}"
|
|
||||||
default_cooldown = 30
|
|
||||||
health_check_grace_period = 30
|
|
||||||
|
|
||||||
# network
|
|
||||||
vpc_zone_identifier = ["${aws_subnet.public.*.id}"]
|
|
||||||
|
|
||||||
# template
|
|
||||||
launch_configuration = "${aws_launch_configuration.worker.name}"
|
|
||||||
|
|
||||||
lifecycle {
|
|
||||||
# override the default destroy and replace update behavior
|
|
||||||
create_before_destroy = true
|
|
||||||
ignore_changes = ["image_id"]
|
|
||||||
}
|
|
||||||
|
|
||||||
tags = [{
|
|
||||||
key = "Name"
|
|
||||||
value = "${var.cluster_name}-worker"
|
|
||||||
propagate_at_launch = true
|
|
||||||
}]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Worker template
|
|
||||||
resource "aws_launch_configuration" "worker" {
|
|
||||||
image_id = "${data.aws_ami.coreos.image_id}"
|
|
||||||
instance_type = "${var.worker_type}"
|
|
||||||
|
|
||||||
user_data = "${data.ct_config.worker_ign.rendered}"
|
|
||||||
|
|
||||||
# storage
|
|
||||||
root_block_device {
|
|
||||||
volume_type = "standard"
|
|
||||||
volume_size = "${var.disk_size}"
|
|
||||||
}
|
|
||||||
|
|
||||||
# network
|
|
||||||
security_groups = ["${aws_security_group.worker.id}"]
|
|
||||||
|
|
||||||
lifecycle {
|
|
||||||
// Override the default destroy and replace update behavior
|
|
||||||
create_before_destroy = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Worker Container Linux Config
|
|
||||||
data "template_file" "worker_config" {
|
|
||||||
template = "${file("${path.module}/cl/worker.yaml.tmpl")}"
|
|
||||||
|
|
||||||
vars = {
|
|
||||||
k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}"
|
|
||||||
k8s_etcd_service_ip = "${cidrhost(var.service_cidr, 15)}"
|
|
||||||
ssh_authorized_key = "${var.ssh_authorized_key}"
|
|
||||||
cluster_domain_suffix = "${var.cluster_domain_suffix}"
|
|
||||||
kubeconfig_ca_cert = "${module.bootkube.ca_cert}"
|
|
||||||
kubeconfig_kubelet_cert = "${module.bootkube.kubelet_cert}"
|
|
||||||
kubeconfig_kubelet_key = "${module.bootkube.kubelet_key}"
|
|
||||||
kubeconfig_server = "${module.bootkube.server}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
data "ct_config" "worker_ign" {
|
|
||||||
content = "${data.template_file.worker_config.rendered}"
|
|
||||||
pretty_print = false
|
|
||||||
}
|
|
||||||
|
|
||||||
# Security Group (instance firewall)
|
|
||||||
|
|
||||||
resource "aws_security_group" "worker" {
|
|
||||||
name = "${var.cluster_name}-worker"
|
|
||||||
description = "${var.cluster_name} worker security group"
|
|
||||||
|
|
||||||
|
# AWS
|
||||||
vpc_id = "${aws_vpc.network.id}"
|
vpc_id = "${aws_vpc.network.id}"
|
||||||
|
subnet_ids = ["${aws_subnet.public.*.id}"]
|
||||||
|
security_groups = ["${aws_security_group.worker.id}"]
|
||||||
|
count = "${var.worker_count}"
|
||||||
|
instance_type = "${var.worker_type}"
|
||||||
|
os_image = "${var.os_image}"
|
||||||
|
disk_size = "${var.disk_size}"
|
||||||
|
spot_price = "${var.worker_price}"
|
||||||
|
|
||||||
tags = "${map("Name", "${var.cluster_name}-worker")}"
|
# configuration
|
||||||
}
|
kubeconfig = "${module.bootkube.kubeconfig}"
|
||||||
|
ssh_authorized_key = "${var.ssh_authorized_key}"
|
||||||
resource "aws_security_group_rule" "worker-icmp" {
|
service_cidr = "${var.service_cidr}"
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
cluster_domain_suffix = "${var.cluster_domain_suffix}"
|
||||||
|
clc_snippets = "${var.worker_clc_snippets}"
|
||||||
type = "ingress"
|
|
||||||
protocol = "icmp"
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-ssh" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 22
|
|
||||||
to_port = 22
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-http" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 80
|
|
||||||
to_port = 80
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-https" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 443
|
|
||||||
to_port = 443
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-flannel" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "udp"
|
|
||||||
from_port = 8472
|
|
||||||
to_port = 8472
|
|
||||||
source_security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-flannel-self" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "udp"
|
|
||||||
from_port = 8472
|
|
||||||
to_port = 8472
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-node-exporter" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 9100
|
|
||||||
to_port = 9100
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-kubelet" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10250
|
|
||||||
to_port = 10250
|
|
||||||
source_security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-kubelet-self" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10250
|
|
||||||
to_port = 10250
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-kubelet-read" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10255
|
|
||||||
to_port = 10255
|
|
||||||
source_security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-kubelet-read-self" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10255
|
|
||||||
to_port = 10255
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "ingress-health-self" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 10254
|
|
||||||
to_port = 10254
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-bgp" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 179
|
|
||||||
to_port = 179
|
|
||||||
source_security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-bgp-self" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = "tcp"
|
|
||||||
from_port = 179
|
|
||||||
to_port = 179
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-ipip" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 4
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
source_security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-ipip-self" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 4
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-ipip-legacy" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 94
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
source_security_group_id = "${aws_security_group.controller.id}"
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-ipip-legacy-self" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "ingress"
|
|
||||||
protocol = 94
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
self = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resource "aws_security_group_rule" "worker-egress" {
|
|
||||||
security_group_id = "${aws_security_group.worker.id}"
|
|
||||||
|
|
||||||
type = "egress"
|
|
||||||
protocol = "-1"
|
|
||||||
from_port = 0
|
|
||||||
to_port = 0
|
|
||||||
cidr_blocks = ["0.0.0.0/0"]
|
|
||||||
ipv6_cidr_blocks = ["::/0"]
|
|
||||||
}
|
}
|
||||||
|
|||||||
49
aws/container-linux/kubernetes/workers/ami.tf
Normal file
49
aws/container-linux/kubernetes/workers/ami.tf
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
locals {
|
||||||
|
# Pick a CoreOS Container Linux derivative
|
||||||
|
# coreos-stable -> Container Linux AMI
|
||||||
|
# flatcar-stable -> Flatcar Linux AMI
|
||||||
|
ami_id = "${local.flavor == "flatcar" ? data.aws_ami.flatcar.image_id : data.aws_ami.coreos.image_id}"
|
||||||
|
|
||||||
|
flavor = "${element(split("-", var.os_image), 0)}"
|
||||||
|
channel = "${element(split("-", var.os_image), 1)}"
|
||||||
|
}
|
||||||
|
|
||||||
|
data "aws_ami" "coreos" {
|
||||||
|
most_recent = true
|
||||||
|
owners = ["595879546273"]
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "architecture"
|
||||||
|
values = ["x86_64"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "virtualization-type"
|
||||||
|
values = ["hvm"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "name"
|
||||||
|
values = ["CoreOS-${local.channel}-*"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "aws_ami" "flatcar" {
|
||||||
|
most_recent = true
|
||||||
|
owners = ["075585003325"]
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "architecture"
|
||||||
|
values = ["x86_64"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "virtualization-type"
|
||||||
|
values = ["hvm"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "name"
|
||||||
|
values = ["Flatcar-${local.channel}-*"]
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -31,6 +31,8 @@ systemd:
|
|||||||
--mount volume=resolv,target=/etc/resolv.conf \
|
--mount volume=resolv,target=/etc/resolv.conf \
|
||||||
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
||||||
--mount volume=var-lib-cni,target=/var/lib/cni \
|
--mount volume=var-lib-cni,target=/var/lib/cni \
|
||||||
|
--volume var-lib-calico,kind=host,source=/var/lib/calico \
|
||||||
|
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||||
--volume var-log,kind=host,source=/var/log \
|
--volume var-log,kind=host,source=/var/log \
|
||||||
@ -39,14 +41,15 @@ systemd:
|
|||||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets
|
|
||||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests
|
|
||||||
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
||||||
|
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||||
|
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||||
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
||||||
--allow-privileged \
|
|
||||||
--anonymous-auth=false \
|
--anonymous-auth=false \
|
||||||
|
--authentication-token-webhook \
|
||||||
|
--authorization-mode=Webhook \
|
||||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||||
--cluster_dns=${k8s_dns_service_ip} \
|
--cluster_dns=${k8s_dns_service_ip} \
|
||||||
--cluster_domain=${cluster_domain_suffix} \
|
--cluster_domain=${cluster_domain_suffix} \
|
||||||
@ -56,7 +59,9 @@ systemd:
|
|||||||
--lock-file=/var/run/lock/kubelet.lock \
|
--lock-file=/var/run/lock/kubelet.lock \
|
||||||
--network-plugin=cni \
|
--network-plugin=cni \
|
||||||
--node-labels=node-role.kubernetes.io/node \
|
--node-labels=node-role.kubernetes.io/node \
|
||||||
--pod-manifest-path=/etc/kubernetes/manifests
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
|
--read-only-port=0 \
|
||||||
|
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=5
|
RestartSec=5
|
||||||
@ -81,29 +86,14 @@ storage:
|
|||||||
mode: 0644
|
mode: 0644
|
||||||
contents:
|
contents:
|
||||||
inline: |
|
inline: |
|
||||||
apiVersion: v1
|
${kubeconfig}
|
||||||
kind: Config
|
|
||||||
clusters:
|
|
||||||
- name: local
|
|
||||||
cluster:
|
|
||||||
server: ${kubeconfig_server}
|
|
||||||
certificate-authority-data: ${kubeconfig_ca_cert}
|
|
||||||
users:
|
|
||||||
- name: kubelet
|
|
||||||
user:
|
|
||||||
client-certificate-data: ${kubeconfig_kubelet_cert}
|
|
||||||
client-key-data: ${kubeconfig_kubelet_key}
|
|
||||||
contexts:
|
|
||||||
- context:
|
|
||||||
cluster: local
|
|
||||||
user: kubelet
|
|
||||||
- path: /etc/kubernetes/kubelet.env
|
- path: /etc/kubernetes/kubelet.env
|
||||||
filesystem: root
|
filesystem: root
|
||||||
mode: 0644
|
mode: 0644
|
||||||
contents:
|
contents:
|
||||||
inline: |
|
inline: |
|
||||||
KUBELET_IMAGE_URL=docker://gcr.io/google_containers/hyperkube
|
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
||||||
KUBELET_IMAGE_TAG=v1.9.1
|
KUBELET_IMAGE_TAG=v1.12.2
|
||||||
- path: /etc/sysctl.d/max-user-watches.conf
|
- path: /etc/sysctl.d/max-user-watches.conf
|
||||||
filesystem: root
|
filesystem: root
|
||||||
contents:
|
contents:
|
||||||
@ -121,7 +111,7 @@ storage:
|
|||||||
--volume config,kind=host,source=/etc/kubernetes \
|
--volume config,kind=host,source=/etc/kubernetes \
|
||||||
--mount volume=config,target=/etc/kubernetes \
|
--mount volume=config,target=/etc/kubernetes \
|
||||||
--insecure-options=image \
|
--insecure-options=image \
|
||||||
docker://gcr.io/google_containers/hyperkube:v1.9.1 \
|
docker://k8s.gcr.io/hyperkube:v1.12.2 \
|
||||||
--net=host \
|
--net=host \
|
||||||
--dns=host \
|
--dns=host \
|
||||||
--exec=/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
--exec=/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
||||||
47
aws/container-linux/kubernetes/workers/ingress.tf
Normal file
47
aws/container-linux/kubernetes/workers/ingress.tf
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Target groups of instances for use with load balancers
|
||||||
|
|
||||||
|
resource "aws_lb_target_group" "workers-http" {
|
||||||
|
name = "${var.name}-workers-http"
|
||||||
|
vpc_id = "${var.vpc_id}"
|
||||||
|
target_type = "instance"
|
||||||
|
|
||||||
|
protocol = "TCP"
|
||||||
|
port = 80
|
||||||
|
|
||||||
|
# HTTP health check for ingress
|
||||||
|
health_check {
|
||||||
|
protocol = "HTTP"
|
||||||
|
port = 10254
|
||||||
|
path = "/healthz"
|
||||||
|
|
||||||
|
# NLBs required to use same healthy and unhealthy thresholds
|
||||||
|
healthy_threshold = 3
|
||||||
|
unhealthy_threshold = 3
|
||||||
|
|
||||||
|
# Interval between health checks required to be 10 or 30
|
||||||
|
interval = 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lb_target_group" "workers-https" {
|
||||||
|
name = "${var.name}-workers-https"
|
||||||
|
vpc_id = "${var.vpc_id}"
|
||||||
|
target_type = "instance"
|
||||||
|
|
||||||
|
protocol = "TCP"
|
||||||
|
port = 443
|
||||||
|
|
||||||
|
# HTTP health check for ingress
|
||||||
|
health_check {
|
||||||
|
protocol = "HTTP"
|
||||||
|
port = 10254
|
||||||
|
path = "/healthz"
|
||||||
|
|
||||||
|
# NLBs required to use same healthy and unhealthy thresholds
|
||||||
|
healthy_threshold = 3
|
||||||
|
unhealthy_threshold = 3
|
||||||
|
|
||||||
|
# Interval between health checks required to be 10 or 30
|
||||||
|
interval = 10
|
||||||
|
}
|
||||||
|
}
|
||||||
9
aws/container-linux/kubernetes/workers/outputs.tf
Normal file
9
aws/container-linux/kubernetes/workers/outputs.tf
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
output "target_group_http" {
|
||||||
|
description = "ARN of a target group of workers for HTTP traffic"
|
||||||
|
value = "${aws_lb_target_group.workers-http.arn}"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "target_group_https" {
|
||||||
|
description = "ARN of a target group of workers for HTTPS traffic"
|
||||||
|
value = "${aws_lb_target_group.workers-https.arn}"
|
||||||
|
}
|
||||||
99
aws/container-linux/kubernetes/workers/variables.tf
Normal file
99
aws/container-linux/kubernetes/workers/variables.tf
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
variable "name" {
|
||||||
|
type = "string"
|
||||||
|
description = "Unique name for the worker pool"
|
||||||
|
}
|
||||||
|
|
||||||
|
# AWS
|
||||||
|
|
||||||
|
variable "vpc_id" {
|
||||||
|
type = "string"
|
||||||
|
description = "Must be set to `vpc_id` output by cluster"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "subnet_ids" {
|
||||||
|
type = "list"
|
||||||
|
description = "Must be set to `subnet_ids` output by cluster"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "security_groups" {
|
||||||
|
type = "list"
|
||||||
|
description = "Must be set to `worker_security_groups` output by cluster"
|
||||||
|
}
|
||||||
|
|
||||||
|
# instances
|
||||||
|
|
||||||
|
variable "count" {
|
||||||
|
type = "string"
|
||||||
|
default = "1"
|
||||||
|
description = "Number of instances"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "instance_type" {
|
||||||
|
type = "string"
|
||||||
|
default = "t2.small"
|
||||||
|
description = "EC2 instance type"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "os_image" {
|
||||||
|
type = "string"
|
||||||
|
default = "coreos-stable"
|
||||||
|
description = "AMI channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha, flatcar-stable, flatcar-beta, flatcar-alpha)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_size" {
|
||||||
|
type = "string"
|
||||||
|
default = "40"
|
||||||
|
description = "Size of the EBS volume in GB"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_type" {
|
||||||
|
type = "string"
|
||||||
|
default = "gp2"
|
||||||
|
description = "Type of the EBS volume (e.g. standard, gp2, io1)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_iops" {
|
||||||
|
type = "string"
|
||||||
|
default = "0"
|
||||||
|
description = "IOPS of the EBS volume (required for io1)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "spot_price" {
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
description = "Spot price in USD for autoscaling group spot instances. Leave as default empty string for autoscaling group to use on-demand instances. Note, switching in-place from spot to on-demand is not possible: https://github.com/terraform-providers/terraform-provider-aws/issues/4320"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "clc_snippets" {
|
||||||
|
type = "list"
|
||||||
|
description = "Container Linux Config snippets"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
|
# configuration
|
||||||
|
|
||||||
|
variable "kubeconfig" {
|
||||||
|
type = "string"
|
||||||
|
description = "Must be set to `kubeconfig` output by cluster"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "ssh_authorized_key" {
|
||||||
|
type = "string"
|
||||||
|
description = "SSH public key for user 'core'"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "service_cidr" {
|
||||||
|
description = <<EOD
|
||||||
|
CIDR IPv4 range to assign Kubernetes services.
|
||||||
|
The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for coredns.
|
||||||
|
EOD
|
||||||
|
|
||||||
|
type = "string"
|
||||||
|
default = "10.3.0.0/16"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cluster_domain_suffix" {
|
||||||
|
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
|
||||||
|
type = "string"
|
||||||
|
default = "cluster.local"
|
||||||
|
}
|
||||||
85
aws/container-linux/kubernetes/workers/workers.tf
Normal file
85
aws/container-linux/kubernetes/workers/workers.tf
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# Workers AutoScaling Group
|
||||||
|
resource "aws_autoscaling_group" "workers" {
|
||||||
|
name = "${var.name}-worker ${aws_launch_configuration.worker.name}"
|
||||||
|
|
||||||
|
# count
|
||||||
|
desired_capacity = "${var.count}"
|
||||||
|
min_size = "${var.count}"
|
||||||
|
max_size = "${var.count + 2}"
|
||||||
|
default_cooldown = 30
|
||||||
|
health_check_grace_period = 30
|
||||||
|
|
||||||
|
# network
|
||||||
|
vpc_zone_identifier = ["${var.subnet_ids}"]
|
||||||
|
|
||||||
|
# template
|
||||||
|
launch_configuration = "${aws_launch_configuration.worker.name}"
|
||||||
|
|
||||||
|
# target groups to which instances should be added
|
||||||
|
target_group_arns = [
|
||||||
|
"${aws_lb_target_group.workers-http.id}",
|
||||||
|
"${aws_lb_target_group.workers-https.id}",
|
||||||
|
]
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
# override the default destroy and replace update behavior
|
||||||
|
create_before_destroy = true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Waiting for instance creation delays adding the ASG to state. If instances
|
||||||
|
# can't be created (e.g. spot price too low), the ASG will be orphaned.
|
||||||
|
# Orphaned ASGs escape cleanup, can't be updated, and keep bidding if spot is
|
||||||
|
# used. Disable wait to avoid issues and align with other clouds.
|
||||||
|
wait_for_capacity_timeout = "0"
|
||||||
|
|
||||||
|
tags = [{
|
||||||
|
key = "Name"
|
||||||
|
value = "${var.name}-worker"
|
||||||
|
propagate_at_launch = true
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Worker template
|
||||||
|
resource "aws_launch_configuration" "worker" {
|
||||||
|
image_id = "${local.ami_id}"
|
||||||
|
instance_type = "${var.instance_type}"
|
||||||
|
spot_price = "${var.spot_price}"
|
||||||
|
enable_monitoring = false
|
||||||
|
|
||||||
|
user_data = "${data.ct_config.worker-ignition.rendered}"
|
||||||
|
|
||||||
|
# storage
|
||||||
|
root_block_device {
|
||||||
|
volume_type = "${var.disk_type}"
|
||||||
|
volume_size = "${var.disk_size}"
|
||||||
|
iops = "${var.disk_iops}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# network
|
||||||
|
security_groups = ["${var.security_groups}"]
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
// Override the default destroy and replace update behavior
|
||||||
|
create_before_destroy = true
|
||||||
|
ignore_changes = ["image_id"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Worker Ignition config
|
||||||
|
data "ct_config" "worker-ignition" {
|
||||||
|
content = "${data.template_file.worker-config.rendered}"
|
||||||
|
pretty_print = false
|
||||||
|
snippets = ["${var.clc_snippets}"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Worker Container Linux config
|
||||||
|
data "template_file" "worker-config" {
|
||||||
|
template = "${file("${path.module}/cl/worker.yaml.tmpl")}"
|
||||||
|
|
||||||
|
vars = {
|
||||||
|
kubeconfig = "${indent(10, var.kubeconfig)}"
|
||||||
|
ssh_authorized_key = "${var.ssh_authorized_key}"
|
||||||
|
k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}"
|
||||||
|
cluster_domain_suffix = "${var.cluster_domain_suffix}"
|
||||||
|
}
|
||||||
|
}
|
||||||
23
aws/fedora-atomic/kubernetes/LICENSE
Normal file
23
aws/fedora-atomic/kubernetes/LICENSE
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2017 Typhoon Authors
|
||||||
|
Copyright (c) 2017 Dalton Hubble
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
|
|
||||||
23
aws/fedora-atomic/kubernetes/README.md
Normal file
23
aws/fedora-atomic/kubernetes/README.md
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||||
|
|
||||||
|
Typhoon is a minimal and free Kubernetes distribution.
|
||||||
|
|
||||||
|
* Minimal, stable base Kubernetes distribution
|
||||||
|
* Declarative infrastructure and configuration
|
||||||
|
* Free (freedom and cost) and privacy-respecting
|
||||||
|
* Practical for labs, datacenters, and clouds
|
||||||
|
|
||||||
|
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||||
|
|
||||||
|
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||||
|
|
||||||
|
* Kubernetes v1.12.2 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
||||||
|
* Single or multi-master, workloads isolated on workers, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||||
|
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||||
|
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/)
|
||||||
|
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||||
|
|
||||||
|
## Docs
|
||||||
|
|
||||||
|
Please see the [official docs](https://typhoon.psdn.io) and the AWS [tutorial](https://typhoon.psdn.io/cl/aws/).
|
||||||
|
|
||||||
19
aws/fedora-atomic/kubernetes/ami.tf
Normal file
19
aws/fedora-atomic/kubernetes/ami.tf
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
data "aws_ami" "fedora" {
|
||||||
|
most_recent = true
|
||||||
|
owners = ["125523088429"]
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "architecture"
|
||||||
|
values = ["x86_64"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "virtualization-type"
|
||||||
|
values = ["hvm"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "name"
|
||||||
|
values = ["Fedora-AtomicHost-28-20180625.1.x86_64-*-gp2-*"]
|
||||||
|
}
|
||||||
|
}
|
||||||
17
aws/fedora-atomic/kubernetes/bootkube.tf
Normal file
17
aws/fedora-atomic/kubernetes/bootkube.tf
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
||||||
|
module "bootkube" {
|
||||||
|
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=f39f8294c465397e622c606174e6f412ee3ca0f8"
|
||||||
|
|
||||||
|
cluster_name = "${var.cluster_name}"
|
||||||
|
api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"]
|
||||||
|
etcd_servers = ["${aws_route53_record.etcds.*.fqdn}"]
|
||||||
|
asset_dir = "${var.asset_dir}"
|
||||||
|
networking = "${var.networking}"
|
||||||
|
network_mtu = "${var.network_mtu}"
|
||||||
|
pod_cidr = "${var.pod_cidr}"
|
||||||
|
service_cidr = "${var.service_cidr}"
|
||||||
|
cluster_domain_suffix = "${var.cluster_domain_suffix}"
|
||||||
|
|
||||||
|
# Fedora
|
||||||
|
trusted_certs_dir = "/etc/pki/tls/certs"
|
||||||
|
}
|
||||||
109
aws/fedora-atomic/kubernetes/cloudinit/controller.yaml.tmpl
Normal file
109
aws/fedora-atomic/kubernetes/cloudinit/controller.yaml.tmpl
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#cloud-config
|
||||||
|
write_files:
|
||||||
|
- path: /etc/etcd/etcd.conf
|
||||||
|
content: |
|
||||||
|
ETCD_NAME=${etcd_name}
|
||||||
|
ETCD_DATA_DIR=/var/lib/etcd
|
||||||
|
ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379
|
||||||
|
ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380
|
||||||
|
ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379
|
||||||
|
ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380
|
||||||
|
ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381
|
||||||
|
ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}
|
||||||
|
ETCD_STRICT_RECONFIG_CHECK=true
|
||||||
|
ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt
|
||||||
|
ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt
|
||||||
|
ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key
|
||||||
|
ETCD_CLIENT_CERT_AUTH=true
|
||||||
|
ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt
|
||||||
|
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
||||||
|
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
||||||
|
ETCD_PEER_CLIENT_CERT_AUTH=true
|
||||||
|
- path: /etc/systemd/system/cloud-metadata.service
|
||||||
|
content: |
|
||||||
|
[Unit]
|
||||||
|
Description=Cloud metadata agent
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
Environment=OUTPUT=/run/metadata/cloud
|
||||||
|
ExecStart=/usr/bin/mkdir -p /run/metadata
|
||||||
|
ExecStart=/usr/bin/bash -c 'echo "HOSTNAME_OVERRIDE=$(curl\
|
||||||
|
--url http://169.254.169.254/latest/meta-data/local-ipv4\
|
||||||
|
--retry 10)" > $${OUTPUT}'
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
- path: /etc/systemd/system/kubelet.service.d/10-typhoon.conf
|
||||||
|
content: |
|
||||||
|
[Unit]
|
||||||
|
Requires=cloud-metadata.service
|
||||||
|
After=cloud-metadata.service
|
||||||
|
Wants=rpc-statd.service
|
||||||
|
[Service]
|
||||||
|
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||||
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||||
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||||
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets
|
||||||
|
ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests
|
||||||
|
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
||||||
|
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||||
|
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10
|
||||||
|
- path: /etc/kubernetes/kubelet.conf
|
||||||
|
content: |
|
||||||
|
ARGS="--anonymous-auth=false \
|
||||||
|
--authentication-token-webhook \
|
||||||
|
--authorization-mode=Webhook \
|
||||||
|
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||||
|
--cluster_dns=${k8s_dns_service_ip} \
|
||||||
|
--cluster_domain=${cluster_domain_suffix} \
|
||||||
|
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||||
|
--exit-on-lock-contention \
|
||||||
|
--kubeconfig=/etc/kubernetes/kubeconfig \
|
||||||
|
--lock-file=/var/run/lock/kubelet.lock \
|
||||||
|
--network-plugin=cni \
|
||||||
|
--node-labels=node-role.kubernetes.io/master \
|
||||||
|
--node-labels=node-role.kubernetes.io/controller="true" \
|
||||||
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
|
--read-only-port=0 \
|
||||||
|
--register-with-taints=node-role.kubernetes.io/master=:NoSchedule \
|
||||||
|
--volume-plugin-dir=/var/lib/kubelet/volumeplugins"
|
||||||
|
- path: /etc/kubernetes/kubeconfig
|
||||||
|
permissions: '0644'
|
||||||
|
content: |
|
||||||
|
${kubeconfig}
|
||||||
|
- path: /var/lib/bootkube/.keep
|
||||||
|
- path: /etc/NetworkManager/conf.d/typhoon.conf
|
||||||
|
content: |
|
||||||
|
[main]
|
||||||
|
plugins=keyfile
|
||||||
|
[keyfile]
|
||||||
|
unmanaged-devices=interface-name:cali*;interface-name:tunl*
|
||||||
|
- path: /etc/selinux/config
|
||||||
|
owner: root:root
|
||||||
|
permissions: '0644'
|
||||||
|
content: |
|
||||||
|
SELINUX=permissive
|
||||||
|
SELINUXTYPE=targeted
|
||||||
|
bootcmd:
|
||||||
|
- [setenforce, Permissive]
|
||||||
|
- [systemctl, disable, firewalld, --now]
|
||||||
|
# https://github.com/kubernetes/kubernetes/issues/60869
|
||||||
|
- [modprobe, ip_vs]
|
||||||
|
runcmd:
|
||||||
|
- [systemctl, daemon-reload]
|
||||||
|
- [systemctl, restart, NetworkManager]
|
||||||
|
- "atomic install --system --name=etcd quay.io/poseidon/etcd:v3.3.10"
|
||||||
|
- "atomic install --system --name=kubelet quay.io/poseidon/kubelet:v1.12.2"
|
||||||
|
- "atomic install --system --name=bootkube quay.io/poseidon/bootkube:v0.13.0"
|
||||||
|
- [systemctl, start, --no-block, etcd.service]
|
||||||
|
- [systemctl, enable, cloud-metadata.service]
|
||||||
|
- [systemctl, start, --no-block, kubelet.service]
|
||||||
|
users:
|
||||||
|
- default
|
||||||
|
- name: fedora
|
||||||
|
gecos: Fedora Admin
|
||||||
|
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||||
|
groups: wheel,adm,systemd-journal,docker
|
||||||
|
ssh-authorized-keys:
|
||||||
|
- "${ssh_authorized_key}"
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user