mirror of
https://github.com/puppetmaster/typhoon.git
synced 2025-08-02 19:01:34 +02:00
Compare commits
339 Commits
Author | SHA1 | Date | |
---|---|---|---|
e06ee042ee | |||
a527f73f5a | |||
c21a0479c0 | |||
f614c538cf | |||
3da8c1575c | |||
dedd17d085 | |||
e274a451ff | |||
b2e36947ab | |||
5af0a5c5b9 | |||
2265ab5375 | |||
08ea9776f3 | |||
2e8bc99164 | |||
b18b0a9f3d | |||
beb9f1477a | |||
f544a9c71f | |||
415b7fa19a | |||
d0c29099ba | |||
30e4070474 | |||
43f6a19060 | |||
50215e373b | |||
a9f9c59b91 | |||
6ed048eb65 | |||
ce7b2fa21f | |||
9e3807798f | |||
ef9c6aa423 | |||
bb5e5811ec | |||
16aa997604 | |||
fb6650b06b | |||
43c6558aaf | |||
125008fbb3 | |||
136107b448 | |||
e97c1cc9e5 | |||
39da5b53f5 | |||
41f739891b | |||
861021ee98 | |||
9d583ab377 | |||
c1d28e6f61 | |||
a8fd21d250 | |||
9c626c9dbd | |||
85252dec6e | |||
298ea65d3e | |||
c0ab15ba22 | |||
5d7b6f611e | |||
93594292eb | |||
0546608e77 | |||
94b2793e40 | |||
4fd43b39ad | |||
65083aca7d | |||
07db4c1143 | |||
e5d0ce5fd7 | |||
b934a13605 | |||
cd005a0b27 | |||
dd4a5a4e7e | |||
af835f976f | |||
9e4a369f76 | |||
831d897533 | |||
17dce49982 | |||
5744e10329 | |||
20748536df | |||
f2e6256dd9 | |||
443bd5a26b | |||
f8162b9be3 | |||
20ffbba4bf | |||
15117fb95b | |||
10af8b4120 | |||
e51b2903c1 | |||
cb72b261c7 | |||
209efd2f5b | |||
388b1238bc | |||
5a1e455220 | |||
69f37c8b17 | |||
b30de949b8 | |||
4973178750 | |||
bb7f31822e | |||
c6923b9ef3 | |||
dae79d5916 | |||
f4d5ac0ca7 | |||
7e1b2cdba1 | |||
3bb20ce083 | |||
eb29fb639b | |||
fcbdb50d93 | |||
efac611e9c | |||
87ff431b80 | |||
0d8ceae1d9 | |||
c5cf803634 | |||
61ee01f462 | |||
cbef202eec | |||
0c99b909a9 | |||
739db3b35f | |||
c68b035a63 | |||
1a5949824c | |||
9bac641511 | |||
37ff3c28eb | |||
f03045f0dc | |||
b603bbde3d | |||
810236f6df | |||
3c3d3a2473 | |||
1af9fd8094 | |||
c734fa7b84 | |||
fdade5b40c | |||
171fd2c998 | |||
545bd79624 | |||
12b825c78f | |||
66e7354c8a | |||
3a71b2ccb1 | |||
c7e327417b | |||
e313e733ab | |||
d0e73b8174 | |||
65ddd2419c | |||
b0e9b1fa60 | |||
485feb82c4 | |||
0b276b6b7e | |||
e8513e58bb | |||
d77343be3a | |||
f2b01e1d75 | |||
60c2107d7f | |||
30cfeec6c1 | |||
ba8774ee0d | |||
24e63bd134 | |||
996bdd9112 | |||
a34d78f55d | |||
04b2e149ba | |||
9f0126a410 | |||
a1bab9c96e | |||
966fd280b0 | |||
e4e074c894 | |||
d51da49925 | |||
2076a779a3 | |||
048094b256 | |||
75b063c586 | |||
1620d1e456 | |||
939bffbf98 | |||
bc96443710 | |||
82a7422b3d | |||
132ab395a5 | |||
5f87eb3ec9 | |||
b152b9f973 | |||
9c842395a8 | |||
6cb9c0341b | |||
d4fd6d4adb | |||
3664dfafc2 | |||
e535ddd15a | |||
5752a8f041 | |||
68abbf7b0d | |||
67047ead08 | |||
c11e23fc50 | |||
b647ad8806 | |||
2eb1ac1b4d | |||
cb2721ef7d | |||
fc06d28e13 | |||
a9078cb52b | |||
ebd9570ede | |||
34e8db7aae | |||
084e8bea49 | |||
d73621c838 | |||
1a6481df04 | |||
798ec9a92f | |||
96aed4c3c3 | |||
7372d33af8 | |||
451ec771a8 | |||
4d9846b83e | |||
597ca4acce | |||
507c646e8b | |||
d8f7da6873 | |||
048f1f514e | |||
b825cd9afe | |||
796149d122 | |||
a66bccd590 | |||
30b1edfcc6 | |||
a4afe06b64 | |||
4d58be0816 | |||
170b768ad8 | |||
5bc1cd28c3 | |||
13fbac6c79 | |||
a8fa4a9a06 | |||
a5c1a96df1 | |||
6a091e245e | |||
590796ee62 | |||
ec389295fe | |||
3c807f3478 | |||
e76fe80b45 | |||
32853aaa7b | |||
c32a54db40 | |||
9671b1c734 | |||
3b933e1ab3 | |||
58d8f6f505 | |||
56853fe222 | |||
18165d8076 | |||
50acf28ce5 | |||
ab793eb842 | |||
b74c958524 | |||
2024d3c32e | |||
11c434915f | |||
05f7df9e80 | |||
4220b9ce18 | |||
6a6af4aa16 | |||
3dcd10f3b8 | |||
22503993b9 | |||
cf3aa8885b | |||
ba61a137db | |||
646bdd78e4 | |||
c163fbbbcd | |||
dc7be431e0 | |||
86e0f806b3 | |||
96172ad269 | |||
3eb20a1f4b | |||
ee9ce3d0ab | |||
a8b8a9b454 | |||
968febb050 | |||
bee455f83a | |||
3e89ea1b4a | |||
e77dd6ecd4 | |||
4fd4a0f540 | |||
804dfea0f9 | |||
8ba23f364c | |||
f6025666eb | |||
85eb502f19 | |||
fa3184fb9c | |||
22565e57e0 | |||
026e1f3648 | |||
ae548ce213 | |||
e826b49648 | |||
fa8f68f50e | |||
ba8d972c76 | |||
c0347ca0c6 | |||
9f94ab6bcc | |||
5e4f5de271 | |||
be28495d79 | |||
f1356fec24 | |||
cc00afa4e1 | |||
5c3b5a20de | |||
f5a83667e8 | |||
a911367c2e | |||
f884de847e | |||
1b3a0f6ebc | |||
1113a22f61 | |||
152c7d86bd | |||
79deb8a967 | |||
f412f0d9f2 | |||
eca6c4a1a1 | |||
133d325013 | |||
4b05c0180e | |||
f49ab3a6ee | |||
0eef16b274 | |||
ad1f59ce91 | |||
82e5ac3e7c | |||
a8f7880511 | |||
cda5b93b09 | |||
3e9f5f34de | |||
893d139590 | |||
fc62e51b2a | |||
e5ba3329eb | |||
7c3f3ab6d0 | |||
a99a990d49 | |||
df17253e72 | |||
eda78db08e | |||
afac46e39a | |||
b1e680ac0c | |||
9fbfbdb854 | |||
511f5272f4 | |||
46ca5e8813 | |||
394e496cc7 | |||
a38ec1a856 | |||
7881f4bd86 | |||
d5b5b7cb02 | |||
759a48be7c | |||
b39a1d70da | |||
901f7939b2 | |||
d65085ce14 | |||
343db5b578 | |||
444363be2d | |||
bc7ad25c60 | |||
e838d4dc3d | |||
979c092ef6 | |||
db8e94bb4b | |||
eb093af9ed | |||
36096f844d | |||
d236628e53 | |||
577b927a2b | |||
000c11edf6 | |||
29b16c3fc0 | |||
0c7a879bc4 | |||
1e654c9e4e | |||
28ee693e6b | |||
8c7d95aefd | |||
d45dfdbf91 | |||
d7e0536838 | |||
8dd221a57c | |||
f17bb4cf61 | |||
44f1fe620a | |||
a504264e24 | |||
88cf7273dc | |||
58def65a09 | |||
cd7fd29194 | |||
aafa38476a | |||
9a07f1d30b | |||
c87db3ef37 | |||
342380cfa4 | |||
5e70d7e2c8 | |||
aab071309f | |||
f6ce12766b | |||
e1d6ab2f24 | |||
8b3d41d6a0 | |||
ccee5d3d89 | |||
8aefd4f082 | |||
78e6409bd0 | |||
2aef42d4f6 | |||
b7d67757de | |||
26f5d2d753 | |||
cd0a28904e | |||
618f8b30fd | |||
264d23a1b5 | |||
f96e91f225 | |||
efd4a0319d | |||
6df6bf904a | |||
5fba20d358 | |||
a8d3d3bb12 | |||
9ea6d2c245 | |||
507aac9b78 | |||
dfd2a0ec23 | |||
e3bf7d8f9b | |||
49050320ce | |||
74e025c9e4 | |||
257a49ce37 | |||
df3f40bcce | |||
32886cfba1 | |||
0ba2c1a4da | |||
430d139a5b | |||
7c6ab21b94 | |||
21178868db | |||
9dcf35e393 | |||
81b6f54169 | |||
7bce15975c | |||
1f83ae7dbb | |||
a10a1cee9f | |||
a79ad34ba3 | |||
99a11442c7 | |||
d27f367004 | |||
e9c8520359 |
1
.github/FUNDING.yml
vendored
Normal file
1
.github/FUNDING.yml
vendored
Normal file
@ -0,0 +1 @@
|
||||
github: [poseidon]
|
9
.github/dependabot.yaml
vendored
Normal file
9
.github/dependabot.yaml
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: pip
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
||||
pull-request-branch-name:
|
||||
separator: "-"
|
||||
open-pull-requests-limit: 3
|
621
CHANGES.md
621
CHANGES.md
@ -4,6 +4,625 @@ Notable changes between versions.
|
||||
|
||||
## Latest
|
||||
|
||||
## V1.23.3
|
||||
|
||||
* Kubernetes [v1.23.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.23.md#v1233)
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
#### Google Cloud
|
||||
|
||||
* Switch to using official Kinvolk Flatcar Linux images
|
||||
* Promote Typhoon on Flatcar Linux / Google Cloud to stable
|
||||
* Change `os_image` to `flatcar-stable`, `flatcar-beta`, or `flatcar-alpha` (**action required**)
|
||||
|
||||
## v1.23.2
|
||||
|
||||
* Kubernetes [v1.23.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.23.md#v1232)
|
||||
* Remove Kubelet flag `--network-plugin`. Unused since `docker-shim` isn't used ([#1106](https://github.com/poseidon/typhoon/pull/1106))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Switch Kubernetes Container Runtime from `docker` to `containerd` ([#1101](https://github.com/poseidon/typhoon/pull/1101))
|
||||
* Mask `docker.service` to prevent it from being socket activated ([#1105](https://github.com/poseidon/typhoon/pull/1105))
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
#### AWS
|
||||
|
||||
* Add experimental Flatcar Linux ARM64 support ([docs](https://typhoon.psdn.io/advanced/arm64/), [#1102](https://github.com/poseidon/typhoon/pull/1102))
|
||||
* Add `arch` variable to AWS `kubernetes` and `workers` modules
|
||||
* Allow arm64 full-cluster or mixed/hybrid cluster with arm64 workers
|
||||
* Requires `flannel` or `cilium` CNI provider
|
||||
|
||||
### DigitalOcean
|
||||
|
||||
* Upgrade DigitalOcean Terraform provider to [v2.x](https://registry.terraform.io/providers/digitalocean/digitalocean/latest/docs) ([#1109](https://github.com/poseidon/typhoon/pull/1109))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v1.1.0 to [v1.1.1](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.1.1)
|
||||
* Update Grafana from v8.3.3 to [v8.3.4](https://github.com/grafana/grafana/releases/tag/v8.3.4)
|
||||
|
||||
## v1.23.1
|
||||
|
||||
* Kubernetes [v1.23.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.23.md#v1231)
|
||||
* Workaround Terraform v1.1 regression in `file` provisioner ([#1093](https://github.com/poseidon/typhoon/pull/1093))
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
* Switch Kubernetes Container Runtime from `docker` to `containerd` ([#1087](https://github.com/poseidon/typhoon/pull/1087))
|
||||
|
||||
### Addons
|
||||
|
||||
* Configure Prometheus to allow a custom scrape query parameter ([#1095](https://github.com/poseidon/typhoon/pull/1095))
|
||||
* Configure Prometheus to probe Kubernetes Ingress via `blackbox-exporter` ([#1096](https://github.com/poseidon/typhoon/pull/1096))
|
||||
* Fix Prometheus Service probes to use `blackbox-exporter`, not `blackbox` ([#1096](https://github.com/poseidon/typhoon/pull/1096))
|
||||
|
||||
## v1.23.0
|
||||
|
||||
* Kubernetes [v1.23.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.23.md#v1230)
|
||||
* Normalize CA cert mounts in static Pods and kube-proxy ([#1078](https://github.com/poseidon/typhoon/pull/1078))
|
||||
* Set Kubelet resolver config to `/run/systemd/resolve/resolv.conf` ([#1082](https://github.com/poseidon/typhoon/pull/1082))
|
||||
* Update Cilium from v1.10.5 to [v1.11.0](https://github.com/cilium/cilium/releases/tag/v1.11.0) ([#1083](https://github.com/poseidon/typhoon/pull/1083))
|
||||
* With Calico, add missing `caliconodestatuses` CRD ([#289](https://github.com/poseidon/terraform-render-bootstrap/pull/289))
|
||||
* Change `enable_aggregation` default to true ([#279](https://github.com/poseidon/terraform-render-bootstrap/pull/279))
|
||||
* Remove deprecated `--port` from `kube-scheduler` ([#1078](https://github.com/poseidon/typhoon/pull/1078))
|
||||
|
||||
### AWS
|
||||
|
||||
* Change controller node default `disk_iops` to 3000 ([#1073](https://github.com/poseidon/typhoon/pull/1073))
|
||||
|
||||
### Azure
|
||||
|
||||
* Fix warning about deprecated `backend_address_pool_id` ([#1086](https://github.com/poseidon/typhoon/pull/1086))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Fix Fedora ARM64 workers to official Fedora CoreOS AMIs ([#1072](https://github.com/poseidon/typhoon/pull/1072))
|
||||
* Should have been changed alongside controller AMIs in ([#1038](https://github.com/poseidon/typhoon/pull/1038))
|
||||
* Old Poseidon built ARM64 AMIs have been deleted
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v1.0.5 to [v1.1.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.1.0)
|
||||
* Update Prometheus from v2.31.1 to [v2.32.0](https://github.com/prometheus/prometheus/releases/tag/v2.32.0)
|
||||
* Update kube-state-metrics from v2.2.4 to [v2.3.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.3.0)
|
||||
* Update node-exporter from v1.3.0 to [v1.3.1](https://github.com/prometheus/node_exporter/releases/tag/v1.3.1)
|
||||
* Update Grafana from v8.2.4 to [v8.3.3](https://github.com/grafana/grafana/releases/tag/v8.3.3)
|
||||
|
||||
### Known Issues
|
||||
|
||||
* Calico does not yet support Kubernetes v1.23.0, use `flannel` or `cilium` ([calico#5011](https://github.com/projectcalico/calico/issues/5011))
|
||||
|
||||
## v1.22.4
|
||||
|
||||
* Kubernetes [v1.22.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md#v1224)
|
||||
* Update CoreDNS from v1.8.4 to [v1.8.6](https://github.com/poseidon/terraform-render-bootstrap/pull/284)
|
||||
* Update Calico from v3.20.2 to [v3.21.0](https://github.com/projectcalico/calico/releases/tag/v3.21.0)
|
||||
* Update flannel from v0.14.0 to [v0.15.1](https://github.com/flannel-io/flannel/releases/tag/v0.15.1)
|
||||
|
||||
### Google
|
||||
|
||||
* Allow use of Terraform provider `google` [v4.0+](https://github.com/hashicorp/terraform-provider-google/releases/tag/v4.0.0)
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
* Change Kubelet mounts for cgroups v2 ([#1064](https://github.com/poseidon/typhoon/pull/1064))
|
||||
* Update cgroup driver from cgroupfs to systemd (Flatcar Linux changed default) ([#1064](https://github.com/poseidon/typhoon/pull/1064))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update Prometheus from v2.30.3 to [v2.31.1](https://github.com/prometheus/prometheus/releases/tag/v2.31.1)
|
||||
* Update node-exporter from v1.2.2 to [v1.3.0](https://github.com/prometheus/node_exporter/releases/tag/v1.3.0)
|
||||
* Update kube-state-metrics from v2.2.3 to [v2.2.4](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.2.4)
|
||||
* Update Grafana from v8.2.1 to [v8.2.4](https://github.com/grafana/grafana/releases/tag/v8.2.4)
|
||||
* Update nginx-ingress from v1.0.4 to [v1.0.5](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.0.5)
|
||||
|
||||
## v1.23.3
|
||||
|
||||
* Kubernetes [v1.22.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md#v1223)
|
||||
* Update etcd from v3.5.0 to [v3.5.1](https://github.com/etcd-io/etcd/releases/tag/v3.5.1)
|
||||
* Update Cilium from v1.10.4 to [v1.10.5](https://github.com/cilium/cilium/releases/tag/v1.10.5)
|
||||
* Update Calico from v3.20.1 to [v3.20.2](https://github.com/projectcalico/calico/releases/tag/v3.20.2)
|
||||
* Use Calico's iptables legacy vs nft auto-detection
|
||||
* Update flannel from v0.13.0 to v0.14.0
|
||||
|
||||
### Bare-Metal
|
||||
|
||||
* Require Terraform provider `poseidon/matchbox` v0.5+ ([#1048](https://github.com/poseidon/typhoon/pull/1048))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v1.0.0 to [v1.0.4](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.0.4)
|
||||
* Update Prometheus from v2.29.2 to [v2.30.3](https://github.com/prometheus/prometheus/releases/tag/v2.30.3)
|
||||
* Update kube-state-metrics from v2.2.0 to [v2.2.3](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.2.3)
|
||||
* Update Grafana from v8.1.2 to [v8.2.1](https://github.com/grafana/grafana/releases/tag/v8.2.1)
|
||||
|
||||
## v1.22.2
|
||||
|
||||
* Kubernetes [v1.22.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md#v1222)
|
||||
* Update Cilium from v1.10.3 to [v1.10.4](https://github.com/cilium/cilium/releases/tag/v1.10.4)
|
||||
* Update Calico from v3.20.0 to [v3.20.1](https://github.com/projectcalico/calico/releases/tag/v3.20.1)
|
||||
* Fix access to ClusterIP services with Cilium ([#276](https://github.com/poseidon/terraform-render-bootstrap/pull/276))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Use Fedora CoreOS ARM64 AMIs ([#1038](https://github.com/poseidon/typhoon/pull/1038))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update Prometheus from v2.29.1 to [v2.29.2](https://github.com/prometheus/prometheus/releases/tag/v2.29.2)
|
||||
* Update kube-state-metrics from v2.1.1 to [v2.2.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.2.0)
|
||||
|
||||
## v1.22.1
|
||||
|
||||
* Kubernetes [v1.22.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md#v1221)
|
||||
* Update Calico from v3.19.1 to [v3.20.0](https://github.com/projectcalico/calico/releases/tag/v3.20.0)
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v1.0.0-beta.1 to [v1.0.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.0.0)
|
||||
* Update Prometheus from v2.28.1 to [v2.29.1](https://github.com/prometheus/prometheus/releases/tag/v2.29.1)
|
||||
* Update Grafana from v8.1.1 to [v8.1.2](https://github.com/grafana/grafana/releases/tag/v8.1.2)
|
||||
|
||||
## v1.22.0
|
||||
|
||||
* Kubernetes [v1.22.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md#v1220)
|
||||
* Update etcd from v3.4.16 to [v3.5.0](https://github.com/etcd-io/etcd/releases/tag/v3.5.0)
|
||||
* Switch `kube-controller-manager` and `kube-scheduler` to use secure port only
|
||||
* Update Prometheus config to discover endpoints and use a bearer token to scrape
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Add Cilium cgroups v2 support on Fedora CoreOS
|
||||
* Update Butane Config version from v1.2.0 to v1.4.0
|
||||
* Rename Fedora CoreOS Config to Butane Config
|
||||
* Require any [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customizations to update to v1.4.0
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.47.0 to [v1.0.0-beta.1](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.0.0-beta.1)
|
||||
* Update node-exporter from v1.2.0 to [v1.2.2](https://github.com/prometheus/node_exporter/releases/tag/v1.2.2)
|
||||
* Update kube-state-metrics from v2.1.0 to [v2.1.1](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.1.1)
|
||||
* Update Grafana from v8.0.6 to [v8.1.1](https://github.com/grafana/grafana/releases/tag/v8.1.1)
|
||||
|
||||
## v1.21.3
|
||||
|
||||
* Kubernetes [v1.21.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.21.md#v1213)
|
||||
* Update Cilium from v1.10.1 to [v1.10.3](https://github.com/cilium/cilium/releases/tag/v1.10.3)
|
||||
* Require [poseidon/ct](https://github.com/poseidon/terraform-provider-ct) Terraform provider v0.9+ ([notes](https://typhoon.psdn.io/topics/maintenance/#upgrade-terraform-provider-ct))
|
||||
|
||||
### AWS
|
||||
|
||||
* Change default disk type from `gp2` to `gp3` ([#1012](https://github.com/poseidon/typhoon/pull/1012))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update Prometheus from v2.28.0 to [v2.28.1](https://github.com/prometheus/prometheus/releases/tag/v2.28.1)
|
||||
* Update node-exporter from v1.1.2 to [v1.2.0](https://github.com/prometheus/node_exporter/releases/tag/v1.2.0)
|
||||
* Update Grafana from v8.0.3 to [v8.0.6](https://github.com/grafana/grafana/releases/tag/v8.0.6)
|
||||
|
||||
### Known Issues
|
||||
|
||||
* Cilium with recent Fedora CoreOS will have networking issues ([fedora-coreos#881](https://github.com/coreos/fedora-coreos-tracker/issues/881)) (fixed in v1.21.4)
|
||||
|
||||
## v1.21.2
|
||||
|
||||
* Kubernetes [v1.21.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.21.md#v1212)
|
||||
* Add Terraform v1.0.x support ([#974](https://github.com/poseidon/typhoon/pull/974))
|
||||
* Continue to support Terraform v0.13.x, v0.14.4+, and v0.15.x
|
||||
* Update CoreDNS from v1.8.0 to [v1.8.4]([#1006](https://github.com/poseidon/typhoon/pull/1006))
|
||||
* Update Cilium from v1.9.6 to [v1.10.1](https://github.com/cilium/cilium/releases/tag/v1.10.1)
|
||||
* Update Calico from v3.19.0 to [v3.19.1](https://github.com/projectcalico/calico/releases/tag/v3.19.1)
|
||||
|
||||
### Addons
|
||||
|
||||
* Update kube-state-metrics from v2.0.0 to [v2.1.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.1.0)
|
||||
* Update Prometheus from v2.27.0 to [v2.28.0](https://github.com/prometheus/prometheus/releases/tag/v2.28.0)
|
||||
* Update Grafana from v7.5.6 to [v8.0.3](https://github.com/grafana/grafana/releases/tag/v8.0.3)
|
||||
* Update nginx-ingress from v0.46.0 to [v0.47.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.47.0)
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
#### AWS
|
||||
|
||||
* Extend experimental Fedora CoreOS arm64 support with Cilium
|
||||
* CNI provider may now be `flannel` or `cilium` (new)
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Workaround systemd path unit issue [fedora-coreos-tracker/#861](https://github.com/coreos/fedora-coreos-tracker/issues/861)
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
* Workaround systemd path unit issue [fedora-coreos-tracker/#861](https://github.com/coreos/fedora-coreos-tracker/issues/861)
|
||||
|
||||
### Known Issues
|
||||
|
||||
* Cilium with recent Fedora CoreOS will have networking issues ([fedora-coreos#881](https://github.com/coreos/fedora-coreos-tracker/issues/881)) (fixed in v1.21.4)
|
||||
|
||||
## v1.21.1
|
||||
|
||||
* Kubernetes [v1.21.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.21.md#v1211)
|
||||
* Add Terraform v0.15.x support ([#974](https://github.com/poseidon/typhoon/pull/974))
|
||||
* Continue to support Terraform v0.13.x and v0.14.4+
|
||||
* Update etcd from v3.4.15 to [v3.4.16](https://github.com/etcd-io/etcd/releases/tag/v3.4.16)
|
||||
* Update Cilium from v1.9.5 to [v1.9.6](https://github.com/cilium/cilium/releases/tag/v1.9.6)
|
||||
* Update Calico from v3.18.1 to [v3.19.0](https://github.com/projectcalico/calico/releases/tag/v3.19.0)
|
||||
|
||||
### AWS
|
||||
|
||||
* Reduce the default `disk_size` from 40GB to 30GB ([#983](https://github.com/poseidon/typhoon/pull/983))
|
||||
|
||||
### Azure
|
||||
|
||||
* Reduce the default `disk_size` from 40GB to 30GB ([#983](https://github.com/poseidon/typhoon/pull/983))
|
||||
|
||||
### Google Cloud
|
||||
|
||||
* Reduce the default `disk_size` from 40GB to 30GB ([#983](https://github.com/poseidon/typhoon/pull/983))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Update Kubelet mounts for cgroups v2 ([#978](https://github.com/poseidon/typhoon/pull/978))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update kube-state-metrics from v2.0.0-rc.1 to [v2.0.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0)
|
||||
* Update Prometheus from v2.25.2 to [v2.27.0](https://github.com/prometheus/prometheus/releases/tag/v2.27.0)
|
||||
* Update Grafana from v7.5.3 to [v7.5.6](https://github.com/grafana/grafana/releases/tag/v7.5.6)
|
||||
* Update nginx-ingress from v0.45.0 to [v0.46.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.46.0)
|
||||
|
||||
## v1.21.0
|
||||
|
||||
* Kubernetes [v1.21.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.21.md#v1210)
|
||||
* Enable `tokencleaner` controller ([#969](https://github.com/poseidon/typhoon/pull/969))
|
||||
* Enable `kube-scheduler` and `kube-controller-manager` separate authn/z kubeconfig
|
||||
* Change CNI config location from /etc/kubernetes/cni/net.d to /etc/cni/net.d ([#965](https://github.com/poseidon/typhoon/pull/965))
|
||||
* Change `kube-controller-manager` to mount `/var/lib/kubelet/volumeplugins` directly
|
||||
* Remove unused `cloud-provider` flags
|
||||
* Update Fedora CoreOS Config version from v1.1.0 to v1.2.0 ([#970](https://github.com/poseidon/typhoon/pull/970))
|
||||
* Require [poseidon/ct](https://github.com/poseidon/terraform-provider-ct) Terraform provider v0.8+ ([notes](https://typhoon.psdn.io/topics/maintenance/#upgrade-terraform-provider-ct))
|
||||
* Require any [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customizations to update to v1.2.0
|
||||
|
||||
### AWS
|
||||
|
||||
* Allow setting custom initial node taints on worker pools ([#968](https://github.com/poseidon/typhoon/pull/968))
|
||||
* Add `node_taints` variable to internal `workers` pool module to set initial node taints
|
||||
* Add `daemonset_tolerations` so `kube-system` DaemonSets can tolerate custom taints
|
||||
|
||||
### Azure
|
||||
|
||||
* Allow setting custom initial node taints on worker pools ([#968](https://github.com/poseidon/typhoon/pull/968))
|
||||
* Add `node_taints` variable to internal `workers` pool module to set initial node taints
|
||||
* Add `daemonset_tolerations` so `kube-system` DaemonSets can tolerate custom taints
|
||||
* Remove deprecated `azurerm_lb_backend_address_pool` field `resource_group_name` ([#972](https://github.com/poseidon/typhoon/pull/972))
|
||||
|
||||
### Google Cloud
|
||||
|
||||
* Allow setting custom initial node taints on worker pools ([#968](https://github.com/poseidon/typhoon/pull/968))
|
||||
* Add `node_taints` variable to internal `workers` pool module to set initial node taints
|
||||
* Add `daemonset_tolerations` so `kube-system` DaemonSets can tolerate custom taints
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.44.0 to [v0.45.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.45.0)
|
||||
* Update kube-state-metrics from v2.0.0-rc.0 to [v2.0.0-rc.1](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0-rc.1)
|
||||
* Update Grafana from v7.4.5 to [v7.5.3](https://github.com/grafana/grafana/releases/tag/v7.5.3)
|
||||
|
||||
## v1.20.5
|
||||
|
||||
* Kubernetes [v1.20.5](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.20.md#v1205)
|
||||
* Update etcd from v3.4.14 to [v3.4.15](https://github.com/etcd-io/etcd/releases/tag/v3.4.15)
|
||||
* Update Cilium from v1.9.4 to [v1.9.5](https://github.com/cilium/cilium/releases/tag/v1.9.5)
|
||||
* Update Calico from v3.17.3 to [v3.18.1](https://github.com/projectcalico/calico/releases/tag/v3.18.1)
|
||||
* Update CoreDNS from v1.7.0 to [v1.8.0](https://coredns.io/2020/10/22/coredns-1.8.0-release/)
|
||||
* Mark bootstrap token as sensitive in Terraform plans ([#949](https://github.com/poseidon/typhoon/pull/949))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Set Kubelet `provider-id` ([#951](https://github.com/poseidon/typhoon/pull/951))
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
#### AWS
|
||||
|
||||
* Set Kubelet `provider-id` ([#951](https://github.com/poseidon/typhoon/pull/951))
|
||||
* Remove `os_image` option `flatcar-edge` ([#943](https://github.com/poseidon/typhoon/pull/943))
|
||||
|
||||
#### Azure
|
||||
|
||||
* Remove `os_image` option `flatcar-edge` ([#943](https://github.com/poseidon/typhoon/pull/943))
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Remove `os_channel` option `flatcar-edge` ([#943](https://github.com/poseidon/typhoon/pull/943))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update Prometheus from v2.25.0 to [v2.25.2](https://github.com/prometheus/prometheus/releases/tag/v2.25.2)
|
||||
* Update kube-state-metrics from v2.0.0-alpha.3 to [v2.0.0-rc.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0-rc.0)
|
||||
* Switch image from `quay.io` to `k8s.gcr.io` ([#946](https://github.com/poseidon/typhoon/pull/946))
|
||||
* Update node-exporter from v1.1.1 to [v1.1.2](https://github.com/prometheus/node_exporter/releases/tag/v1.1.2)
|
||||
* Update Grafana from v7.4.2 to [v7.4.5](https://github.com/grafana/grafana/releases/tag/v7.4.5)
|
||||
|
||||
## v1.20.4
|
||||
|
||||
* Kubernetes [v1.20.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.20.md#v1204)
|
||||
* Update Cilium from v1.9.1 to [v1.9.4](https://github.com/cilium/cilium/releases/tag/v1.9.4)
|
||||
* Update Calico from v3.17.1 to [v3.17.3](https://github.com/projectcalico/calico/releases/tag/v3.17.3)
|
||||
* Update flannel-cni from v0.4.1 to [v0.4.2](https://github.com/poseidon/flannel-cni/releases/tag/v0.4.2)
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.43.0 to [v0.44.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.44.0)
|
||||
* Update Prometheus from v2.24.0 to [v2.25.0](https://github.com/prometheus/prometheus/releases/tag/v2.25.0)
|
||||
* Update node-exporter from v1.0.1 to [v1.1.1](https://github.com/prometheus/node_exporter/releases/tag/v1.1.1)
|
||||
* Update Grafana from v7.3.7 to [v7.4.2](https://github.com/grafana/grafana/releases/tag/v7.4.2)
|
||||
|
||||
## v1.20.2
|
||||
|
||||
* Kubernetes [v1.20.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.20.md#v1202)
|
||||
* Support Terraform v0.13.x and v0.14.4+ ([#924](https://github.com/poseidon/typhoon/pull/923))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.41.2 to [v0.43.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.43.0)
|
||||
* Update Prometheus from v2.23.0 to [v2.24.0](https://github.com/prometheus/prometheus/releases/tag/v2.24.0)
|
||||
* Update Grafana from v7.3.6 to [v7.3.7](https://github.com/grafana/grafana/releases/tag/v7.3.7)
|
||||
|
||||
## v1.20.1
|
||||
|
||||
* Kubernetes [v1.20.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.20.md#v1201)
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Fedora CoreOS 33 has stronger crypto defaults ([**notice**](https://docs.fedoraproject.org/en-US/fedora-coreos/faq/#_why_does_ssh_stop_working_after_upgrading_to_fedora_33), [#915](https://github.com/poseidon/typhoon/issues/915))
|
||||
* Use a non-RSA SSH key or add the workaround provided in upstream [Fedora docs](https://docs.fedoraproject.org/en-US/fedora-coreos/faq/#_why_does_ssh_stop_working_after_upgrading_to_fedora_33) as a [snippet](https://typhoon.psdn.io/advanced/customization/#fedora-coreos) (**action required**)
|
||||
|
||||
### Addons
|
||||
|
||||
* Update Grafana from v7.3.5 to [v7.3.6](https://github.com/grafana/grafana/releases/tag/v7.3.6)
|
||||
|
||||
## v1.20.0
|
||||
|
||||
* Kubernetes [v1.20.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.20.md#v1200)
|
||||
* Add input variable validations ([#880](https://github.com/poseidon/typhoon/pull/880))
|
||||
* Require Terraform v0.13+ ([migration guide](https://typhoon.psdn.io/topics/maintenance/#terraform-versions))
|
||||
* Set output sensitive to suppress console display for some cases ([#885](https://github.com/poseidon/typhoon/pull/885))
|
||||
* Add service account token [volume projection](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#service-account-token-volume-projection) ([#897](https://github.com/poseidon/typhoon/pull/897))
|
||||
* Scope kube-scheduler and kube-controller-manager permissions ([#898](https://github.com/poseidon/typhoon/pull/898))
|
||||
* Update etcd from v3.4.12 to [v3.4.14](https://github.com/etcd-io/etcd/releases/tag/v3.4.14)
|
||||
* Update Calico from v3.16.5 to v3.17.1 ([#890](https://github.com/poseidon/typhoon/pull/890))
|
||||
* Enable Calico MTU auto-detection
|
||||
* Remove [workaround](https://github.com/poseidon/typhoon/pull/724) to Calico cni-plugin [issue](https://github.com/projectcalico/cni-plugin/issues/874)
|
||||
* Update Cilium from v1.9.0 to [v1.9.1](https://github.com/cilium/cilium/releases/tag/v1.9.1)
|
||||
* Relax `terraform-provider-ct` version constraint to v0.6+ ([#893](https://github.com/poseidon/typhoon/pull/893))
|
||||
* Allow upgrading `terraform-provider-ct` to v0.7.x ([warn](https://typhoon.psdn.io/topics/maintenance/#upgrade-terraform-provider-ct))
|
||||
|
||||
### AWS
|
||||
|
||||
* Enable Network Load Balancer (NLB) dualstack ([#883](https://github.com/poseidon/typhoon/pull/883))
|
||||
* NLB subnets assigned both IPv4 and IPv6 addresses
|
||||
* NLB DNS name has both A and AAAA records
|
||||
* NLB to target node traffic is IPv4 (no change)
|
||||
|
||||
### Bare-Metal
|
||||
|
||||
* Remove iSCSI `/etc/iscsi` and `iscsadm` mounts from Kubelet ([#912](https://github.com/poseidon/typhoon/pull/912))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
#### AWS
|
||||
|
||||
* Fix AMI query for which could fail in some regions ([#887](https://github.com/poseidon/typhoon/pull/887))
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Promote Fedora CoreOS to stable
|
||||
* Use initramfs and rootfs images as initrd's ([#889](https://github.com/poseidon/typhoon/pull/889))
|
||||
* Requires Fedora CoreOS version with rootfs images (e.g. 32.20200923.3.0+)
|
||||
|
||||
### Addons
|
||||
|
||||
* Update Prometheus from v2.22.2 to [v2.23.0](https://github.com/prometheus/prometheus/releases/tag/v2.23.0)
|
||||
* Update kube-state-metrics from v2.0.0-alpha.2 to [v2.0.0-alpha.3](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0-alpha.3)
|
||||
* Update Grafana from v7.3.2 to [v7.3.5](https://github.com/grafana/grafana/releases/tag/v7.3.5)
|
||||
|
||||
## v1.19.4
|
||||
|
||||
* Kubernetes [v1.19.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md#v1194)
|
||||
* Update Cilium from v1.8.4 to [v1.9.0](https://github.com/cilium/cilium/releases/tag/v1.9.0)
|
||||
* Update Calico from v3.16.3 to [v3.16.5](https://github.com/projectcalico/calico/releases/tag/v3.16.5)
|
||||
* Remove `asset_dir` variable (defaulted off in [v1.17.0](https://github.com/poseidon/typhoon/pull/595), deprecated in [v1.18.0](https://github.com/poseidon/typhoon/pull/678))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Improve `etcd-member.service` systemd unit ([#868](https://github.com/poseidon/typhoon/pull/868))
|
||||
* Allow a snippet with a systemd dropin to set an alternate image (e.g. mirror)
|
||||
* Fix local node delete oneshot on node shutdown ([#856](https://github.com/poseidon/typhoon/pull/855))
|
||||
|
||||
#### AWS
|
||||
|
||||
* Add experimental Fedora CoreOS arm64 support ([docs](https://typhoon.psdn.io/advanced/arm64/), [#875](https://github.com/poseidon/typhoon/pull/875))
|
||||
* Allow arm64 full-cluster or mixed/hybrid cluster with worker pools
|
||||
* Add `arch` variable to cluster module
|
||||
* Add `daemonset_tolerations` variable to cluster module
|
||||
* Add `node_taints` variable to workers module
|
||||
* Requires flannel CNI provider and use of experimental AMI (see docs)
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
* Rename `container-linux` modules to `flatcar-linux` ([#858](https://github.com/poseidon/typhoon/issues/858)) (**action required**)
|
||||
* Change on-host system containers from rkt to docker
|
||||
* Change `etcd-member.service` container runnner from rkt to docker ([#867](https://github.com/poseidon/typhoon/pull/867))
|
||||
* Change `kubelet.service` container runner from rkt-fly to docker ([#855](https://github.com/poseidon/typhoon/pull/855))
|
||||
* Change `bootstrap.service` container runner from rkt to docker ([#873](https://github.com/poseidon/typhoon/pull/873))
|
||||
* Change `delete-node.service` to use docker and an inline ExecStart ([#855](https://github.com/poseidon/typhoon/pull/855))
|
||||
* Fix local node delete oneshot on node shutdown ([#855](https://github.com/poseidon/typhoon/pull/855))
|
||||
* Remove CoreOS Container Linux Matchbox profiles ([#859](https://github.com/poseidon/typhoon/pull/858))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.40.2 to [v0.41.2](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.41.2)
|
||||
* Update Prometheus from v2.22.0 to [v2.22.1](https://github.com/prometheus/prometheus/releases/tag/v2.22.1)
|
||||
* Update kube-state-metrics from v2.0.0-alpha.1 to [v2.0.0-alpha.2](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0-alpha.2)
|
||||
* Update Grafana from v7.2.1 to [v7.3.2](https://github.com/grafana/grafana/releases/tag/v7.3.2)
|
||||
|
||||
## v1.19.3
|
||||
|
||||
* Kubernetes [v1.19.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md#v1193)
|
||||
* Update Cilium from v1.8.3 to [v1.8.4](https://github.com/cilium/cilium/releases/tag/v1.8.4)
|
||||
* Update Calico from v1.15.3 to [v1.16.3](https://github.com/projectcalico/calico/releases/tag/v3.16.3) ([#851](https://github.com/poseidon/typhoon/pull/851))
|
||||
* Update flannel from v0.13.0-rc2 to v0.13.0 ([#219](https://github.com/poseidon/terraform-render-bootstrap/pull/219))
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
* Remove references to CoreOS Container Linux ([#839](https://github.com/poseidon/typhoon/pull/839))
|
||||
* Fix error querying for coreos AMI on AWS ([#838](https://github.com/poseidon/typhoon/issues/838))
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.35.0 to [v0.40.2](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.40.2)
|
||||
* Update Grafana from v7.1.5 to [v7.2.1](https://github.com/grafana/grafana/releases/tag/v7.2.1)
|
||||
* Update Prometheus from v2.21.0 to [v2.22.0](https://github.com/prometheus/prometheus/releases/tag/v2.22.0)
|
||||
* Update kube-state-metrics from v1.9.7 to [v2.0.0-alpha.1](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0-alpha.1)
|
||||
|
||||
## v1.19.2
|
||||
|
||||
* Kubernetes [v1.19.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md#v1192)
|
||||
* Update flannel from v0.12.0 to v0.13.0-rc2 ([#216](https://github.com/poseidon/terraform-render-bootstrap/pull/216))
|
||||
* Update flannel-cni from v0.4.0 to v0.4.1
|
||||
* Update CNI plugins from v0.8.6 to v0.8.7
|
||||
|
||||
### Addons
|
||||
|
||||
* Refresh Prometheus rules/alerts and Grafana dashboards ([#831](https://github.com/poseidon/typhoon/pull/831))
|
||||
* Reduce apiserver metrics cardinality for non-core APIs ([#830](https://github.com/poseidon/typhoon/pull/830))
|
||||
|
||||
## v1.19.1
|
||||
|
||||
* Kubernetes [v1.19.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md#v1191)
|
||||
* Change control plane seccomp annotations to GA `seccompProfile` ([#822](https://github.com/poseidon/typhoon/pull/822))
|
||||
* Update Cilium from v1.8.2 to [v1.8.3](https://github.com/cilium/cilium/releases/tag/v1.8.3)
|
||||
* Promote Cilium from experimental to general availability ([#827](https://github.com/poseidon/typhoon/pull/827))
|
||||
* Update Calico from v1.15.2 to [v1.15.3](https://github.com/projectcalico/calico/releases/tag/v3.15.3)
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Update Fedora CoreOS Config version from v1.0.0 to v1.1.0
|
||||
* Require any [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customizations to update to v1.1.0
|
||||
|
||||
### Addons
|
||||
|
||||
* Update IngressClass resources to `networking.k8s.io/v1` ([#824](https://github.com/poseidon/typhoon/pull/824))
|
||||
* Update Prometheus from v2.20.0 to [v2.21.0](https://github.com/prometheus/prometheus/releases/tag/v2.21.0)
|
||||
* Remove Kubernetes node name labelmap `relabel_config` from etcd, Kubelet, and CAdvisor scrape config ([#828](https://github.com/poseidon/typhoon/pull/828))
|
||||
|
||||
## v1.19.0
|
||||
|
||||
* Kubernetes [v1.19.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md#v1190)
|
||||
* Update etcd from v3.4.10 to [v3.4.12](https://github.com/etcd-io/etcd/releases/tag/v3.4.12)
|
||||
* Update Calico from v3.15.1 to [v3.15.2](https://docs.projectcalico.org/v3.15/release-notes/)
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Fix race condition during bootstrap of multi-controller clusters ([#808](https://github.com/poseidon/typhoon/pull/808))
|
||||
* Fix SELinux label of bootstrap-secrets on non-bootstrap controllers
|
||||
|
||||
### Addons
|
||||
|
||||
* Introduce [fleetlock](https://github.com/poseidon/fleetlock) for Fedora CoreOS reboot coordination ([#814](https://github.com/poseidon/typhoon/pull/814))
|
||||
* Update nginx-ingress from v0.34.1 to [v0.35.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.35.0)
|
||||
* Repository changed to `k8s.gcr.io/ingress-nginx/controller`
|
||||
* Update Grafana from v7.1.3 to [v7.1.5](https://github.com/grafana/grafana/releases/tag/v7.1.5)
|
||||
|
||||
## v1.18.8
|
||||
|
||||
* Kubernetes [v1.18.8](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1188)
|
||||
* Migrate from Terraform v0.12.x to v0.13.x ([#804](https://github.com/poseidon/typhoon/pull/804)) (**action required**)
|
||||
* Recommend Terraform v0.13.x ([migration guide](https://typhoon.psdn.io/topics/maintenance/#terraform-versions))
|
||||
* Support automatic install of poseidon's provider plugins ([poseidon/ct](https://registry.terraform.io/providers/poseidon/ct/latest), [poseidon/matchbox](https://registry.terraform.io/providers/poseidon/matchbox/latest))
|
||||
* Require Terraform v0.12.26+ (migration compatibility)
|
||||
* Require `terraform-provider-ct` v0.6.1
|
||||
* Require `terraform-provider-matchbox` v0.4.1
|
||||
* Update etcd from v3.4.9 to [v3.4.10](https://github.com/etcd-io/etcd/releases/tag/v3.4.10)
|
||||
* Update CoreDNS from v1.6.7 to [v1.7.0](https://coredns.io/2020/06/15/coredns-1.7.0-release/)
|
||||
* Update Cilium from v1.8.1 to [v1.8.2](https://github.com/cilium/cilium/releases/tag/v1.8.2)
|
||||
* Update [coreos/flannel-cni](https://github.com/coreos/flannel-cni) to [poseidon/flannel-cni](https://github.com/poseidon/flannel-cni) ([#798](https://github.com/poseidon/typhoon/pull/798))
|
||||
* Update CNI plugins and fix CVEs with Flannel CNI (non-default)
|
||||
* Transition to a poseidon maintained container image
|
||||
|
||||
### AWS
|
||||
|
||||
* Allow `terraform-provider-aws` v3.0+ ([#803](https://github.com/poseidon/typhoon/pull/803))
|
||||
* Recommend updating `terraform-provider-aws` to v3.0+
|
||||
* Continue to allow v2.23+, no v3.x specific features are used
|
||||
|
||||
### DigitalOcean
|
||||
|
||||
* Require `terraform-provider-digitalocean` v1.21+ for Terraform v0.13.x (unenforced)
|
||||
* Require `terraform-provider-digitalocean` v1.20+ for Terraform v0.12.x
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Fix support for Flannel with Fedora CoreOS ([#795](https://github.com/poseidon/typhoon/pull/795))
|
||||
* Configure `flannel.1` link to select its own MAC address to solve flannel
|
||||
pod-to-pod traffic drops starting with default link changes in Fedora CoreOS
|
||||
32.20200629.3.0 ([details](https://github.com/coreos/fedora-coreos-tracker/issues/574#issuecomment-665487296))
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update Prometheus from v2.19.2 to [v2.20.0](https://github.com/prometheus/prometheus/releases/tag/v2.20.0)
|
||||
* Update Grafana from v7.0.6 to [v7.1.3](https://github.com/grafana/grafana/releases/tag/v7.1.3)
|
||||
|
||||
## v1.18.6
|
||||
|
||||
* Kubernetes [v1.18.6](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1186)
|
||||
* Update Calico from v3.15.0 to [v3.15.1](https://docs.projectcalico.org/v3.15/release-notes/)
|
||||
* Update Cilium from v1.8.0 to [v1.8.1](https://github.com/cilium/cilium/releases/tag/v1.8.1)
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update nginx-ingress from v0.33.0 to [v0.34.1](https://github.com/kubernetes/ingress-nginx/releases/tag/nginx-0.34.1)
|
||||
* [ingress-nginx](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v0.34.0) will publish images only to gcr.io
|
||||
* Update Prometheus from v2.19.1 to [v2.19.2](https://github.com/prometheus/prometheus/releases/tag/v2.19.2)
|
||||
* Update Grafana from v7.0.4 to [v7.0.6](https://github.com/grafana/grafana/releases/tag/v7.0.6)
|
||||
|
||||
## v1.18.5
|
||||
|
||||
* Kubernetes [v1.18.5](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1185)
|
||||
* Add Cilium v1.8.0 as a (experimental) CNI provider option ([#760](https://github.com/poseidon/typhoon/pull/760))
|
||||
* Set `networking` to "cilium" to enable
|
||||
* Update Calico from v3.14.1 to [v3.15.0](https://docs.projectcalico.org/v3.15/release-notes/)
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
* Isolate each cluster in an independent DigitalOcean VPC ([#776](https://github.com/poseidon/typhoon/pull/776))
|
||||
* Create droplets in a VPC per cluster (matches Typhoon AWS, Azure, and GCP)
|
||||
* Require `terraform-provider-digitalocean` v1.16.0+ (action required)
|
||||
* Output `vpc_id` for use with an attached DigitalOcean [loadbalancer](https://github.com/poseidon/typhoon/blob/v1.18.5/docs/architecture/digitalocean.md#custom-load-balancer)
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
#### Google Cloud
|
||||
|
||||
* Promote Fedora CoreOS to stable
|
||||
* Remove `os_image` variable deprecated in v1.18.3 ([#777](https://github.com/poseidon/typhoon/pull/777))
|
||||
* Use `os_stream` to select a Fedora CoreOS image stream
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
#### Azure
|
||||
|
||||
* Allow using Flatcar Linux Edge by setting `os_image` to "flatcar-edge" ([#778](https://github.com/poseidon/typhoon/pull/778))
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update Prometheus from v2.19.0 to [v2.19.1](https://github.com/prometheus/prometheus/releases/tag/v2.19.1)
|
||||
* Update Grafana from v7.0.3 to [v7.0.4](https://github.com/grafana/grafana/releases/tag/v7.0.4)
|
||||
|
||||
## v1.18.4
|
||||
|
||||
* Kubernetes [v1.18.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1184)
|
||||
@ -88,7 +707,7 @@ Notable changes between versions.
|
||||
|
||||
#### Google
|
||||
|
||||
* Support Fedora CoreOS [image streams](https://docs.fedoraproject.org/en-US/fedora-coreos/update-streams/) ([#723](https://github.com/poseidon/typhoon/pull/722))
|
||||
* Support Fedora CoreOS [image streams](https://docs.fedoraproject.org/en-US/fedora-coreos/update-streams/) ([#723](https://github.com/poseidon/typhoon/pull/723))
|
||||
* Add `os_stream` variable to set the stream to `stable` (default), `testing`, or `next`
|
||||
* Deprecate `os_image` variable. Manual image uploads are no longer needed
|
||||
|
||||
|
53
README.md
53
README.md
@ -11,10 +11,10 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* Kubernetes v1.23.3 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/cl/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/flatcar-linux/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, CSI, or other [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Modules
|
||||
@ -27,19 +27,27 @@ Typhoon is available for [Fedora CoreOS](https://getfedora.org/coreos/).
|
||||
|---------------|------------------|------------------|--------|
|
||||
| AWS | Fedora CoreOS | [aws/fedora-coreos/kubernetes](aws/fedora-coreos/kubernetes) | stable |
|
||||
| Azure | Fedora CoreOS | [azure/fedora-coreos/kubernetes](azure/fedora-coreos/kubernetes) | alpha |
|
||||
| Bare-Metal | Fedora CoreOS | [bare-metal/fedora-coreos/kubernetes](bare-metal/fedora-coreos/kubernetes) | beta |
|
||||
| Bare-Metal | Fedora CoreOS | [bare-metal/fedora-coreos/kubernetes](bare-metal/fedora-coreos/kubernetes) | stable |
|
||||
| DigitalOcean | Fedora CoreOS | [digital-ocean/fedora-coreos/kubernetes](digital-ocean/fedora-coreos/kubernetes) | beta |
|
||||
| Google Cloud | Fedora CoreOS | [google-cloud/fedora-coreos/kubernetes](google-cloud/fedora-coreos/kubernetes) | beta |
|
||||
| Google Cloud | Fedora CoreOS | [google-cloud/fedora-coreos/kubernetes](google-cloud/fedora-coreos/kubernetes) | stable |
|
||||
|
||||
| Platform | Operating System | Terraform Module | Status |
|
||||
|---------------|------------------|------------------|--------|
|
||||
| AWS | Fedora CoreOS (ARM64) | [aws/fedora-coreos/kubernetes](aws/fedora-coreos/kubernetes) | alpha |
|
||||
|
||||
Typhoon is available for [Flatcar Linux](https://www.flatcar-linux.org/releases/).
|
||||
|
||||
| Platform | Operating System | Terraform Module | Status |
|
||||
|---------------|------------------|------------------|--------|
|
||||
| AWS | Flatcar Linux | [aws/container-linux/kubernetes](aws/container-linux/kubernetes) | stable |
|
||||
| Azure | Flatcar Linux | [azure/container-linux/kubernetes](azure/container-linux/kubernetes) | alpha |
|
||||
| Bare-Metal | Flatcar Linux | [bare-metal/container-linux/kubernetes](bare-metal/container-linux/kubernetes) | stable |
|
||||
| DigitalOcean | Flatcar Linux | [digital-ocean/container-linux/kubernetes](digital-ocean/container-linux/kubernetes) | beta |
|
||||
| Google Cloud | Flatcar Linux | [google-cloud/container-linux/kubernetes](google-cloud/container-linux/kubernetes) | beta |
|
||||
| AWS | Flatcar Linux | [aws/flatcar-linux/kubernetes](aws/flatcar-linux/kubernetes) | stable |
|
||||
| Azure | Flatcar Linux | [azure/flatcar-linux/kubernetes](azure/flatcar-linux/kubernetes) | alpha |
|
||||
| Bare-Metal | Flatcar Linux | [bare-metal/flatcar-linux/kubernetes](bare-metal/flatcar-linux/kubernetes) | stable |
|
||||
| DigitalOcean | Flatcar Linux | [digital-ocean/flatcar-linux/kubernetes](digital-ocean/flatcar-linux/kubernetes) | beta |
|
||||
| Google Cloud | Flatcar Linux | [google-cloud/flatcar-linux/kubernetes](google-cloud/flatcar-linux/kubernetes) | stable |
|
||||
|
||||
| Platform | Operating System | Terraform Module | Status |
|
||||
|---------------|------------------|------------------|--------|
|
||||
| AWS | Flatcar Linux (ARM64) | [aws/flatcar-linux/kubernetes](aws/flatcar-linux/kubernetes) | alpha |
|
||||
|
||||
## Documentation
|
||||
|
||||
@ -54,7 +62,7 @@ Define a Kubernetes cluster by using the Terraform module for your chosen platfo
|
||||
|
||||
```tf
|
||||
module "yavin" {
|
||||
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.18.4"
|
||||
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.23.3"
|
||||
|
||||
# Google Cloud
|
||||
cluster_name = "yavin"
|
||||
@ -63,7 +71,7 @@ module "yavin" {
|
||||
dns_zone_name = "example-zone"
|
||||
|
||||
# configuration
|
||||
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
|
||||
ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..."
|
||||
|
||||
# optional
|
||||
worker_count = 2
|
||||
@ -93,9 +101,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
|
||||
$ export KUBECONFIG=/home/user/.kube/configs/yavin-config
|
||||
$ kubectl get nodes
|
||||
NAME ROLES STATUS AGE VERSION
|
||||
yavin-controller-0.c.example-com.internal <none> Ready 6m v1.18.4
|
||||
yavin-worker-jrbf.c.example-com.internal <none> Ready 5m v1.18.4
|
||||
yavin-worker-mzdm.c.example-com.internal <none> Ready 5m v1.18.4
|
||||
yavin-controller-0.c.example-com.internal <none> Ready 6m v1.23.3
|
||||
yavin-worker-jrbf.c.example-com.internal <none> Ready 5m v1.23.3
|
||||
yavin-worker-mzdm.c.example-com.internal <none> Ready 5m v1.23.3
|
||||
```
|
||||
|
||||
List the pods.
|
||||
@ -126,7 +134,7 @@ Typhoon is strict about minimalism, maturity, and scope. These are not in scope:
|
||||
|
||||
## Help
|
||||
|
||||
Ask questions on the IRC #typhoon channel on [freenode.net](http://freenode.net/).
|
||||
Schedule a meeting via [Github Sponsors](https://github.com/sponsors/poseidon?frequency=one-time) to discuss your use case.
|
||||
|
||||
## Motivation
|
||||
|
||||
@ -136,12 +144,17 @@ Typhoon addresses real world needs, which you may share. It is honest about limi
|
||||
|
||||
## Social Contract
|
||||
|
||||
Typhoon is not a product, trial, or free-tier. It is not run by a company, does not offer support or services, and does not accept or make any money. It is not associated with any operating system or platform vendor.
|
||||
Typhoon is not a product, trial, or free-tier. Typhoon does not offer support, services, or charge money. And Typhoon is independent of operating system or platform vendors.
|
||||
|
||||
Typhoon clusters will contain only [free](https://www.debian.org/intro/free) components. Cluster components will not collect data on users without their permission.
|
||||
|
||||
## Donations
|
||||
## Sponsors
|
||||
|
||||
Typhoon does not accept money donations. Instead, we encourage you to donate to one of [these organizations](https://github.com/poseidon/typhoon/wiki/Donations) to show your appreciation.
|
||||
Poseidon's Github [Sponsors](https://github.com/sponsors/poseidon) support the infrastructure and operational costs of providing Typhoon.
|
||||
|
||||
* [DigitalOcean](https://www.digitalocean.com/) kindly provides credits to support Typhoon test clusters.
|
||||
<a href="https://www.digitalocean.com/">
|
||||
<img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg" width="201px">
|
||||
</a>
|
||||
<br>
|
||||
|
||||
If you'd like your company here, please contact dghubble at psdn.io.
|
||||
|
@ -37,6 +37,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -49,6 +50,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -72,7 +74,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (proto)",
|
||||
"expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[5m])) by (proto)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{proto}}",
|
||||
@ -128,6 +130,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -140,6 +143,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -163,7 +167,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(coredns_dns_request_type_count_total{instance=~\"$instance\"}[5m])) by (type)",
|
||||
"expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[5m])) by (type)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{type}}",
|
||||
@ -219,6 +223,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -231,6 +236,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -254,7 +260,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(coredns_dns_request_count_total{instance=~\"$instance\"}[5m])) by (zone)",
|
||||
"expr": "sum(rate(coredns_dns_requests_total{instance=~\"$instance\"}[5m])) by (zone)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{zone}}",
|
||||
@ -323,6 +329,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -335,6 +342,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -428,6 +436,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -440,6 +449,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -463,7 +473,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(coredns_dns_response_rcode_count_total{instance=~\"$instance\"}[5m])) by (rcode)",
|
||||
"expr": "sum(rate(coredns_dns_responses_total{instance=~\"$instance\"}[5m])) by (rcode)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{rcode}}",
|
||||
@ -532,6 +542,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -544,6 +555,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -637,6 +649,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -649,6 +662,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -755,6 +769,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -767,6 +782,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -790,7 +806,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(coredns_cache_size{instance=~\"$instance\"}) by (type)",
|
||||
"expr": "sum(coredns_cache_entries{instance=~\"$instance\"}) by (type)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{type}}",
|
||||
@ -846,6 +862,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -858,6 +875,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
|
@ -11,7 +11,6 @@ data:
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"hideControls": false,
|
||||
"id": 6,
|
||||
"links": [
|
||||
|
||||
],
|
||||
@ -343,7 +342,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "etcd_debugging_mvcc_db_total_size_in_bytes{job=\"$cluster\"}",
|
||||
"expr": "etcd_mvcc_db_total_size_in_bytes{job=\"$cluster\"}",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
|
@ -172,7 +172,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})",
|
||||
"expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}) OR sum(kubelet_running_pod_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -256,7 +256,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})",
|
||||
"expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}) OR sum(kubelet_running_container_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -553,6 +553,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -565,6 +566,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -644,6 +646,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -656,6 +659,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -748,6 +752,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -760,6 +765,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -852,6 +858,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -864,6 +871,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -950,6 +958,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -962,6 +971,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1061,6 +1071,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1075,6 +1086,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1154,6 +1166,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1168,6 +1181,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1260,6 +1274,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1274,6 +1289,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1366,6 +1382,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1378,6 +1395,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1457,6 +1475,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1469,6 +1488,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1562,6 +1582,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"description": "Pod lifecycle event generator",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1574,6 +1595,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1653,6 +1675,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1665,6 +1688,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1757,6 +1781,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1769,6 +1794,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1861,6 +1887,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1873,6 +1900,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1986,6 +2014,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1998,6 +2027,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -2021,7 +2051,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[5m])) by (instance, verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{verb}} {{url}}",
|
||||
@ -2090,6 +2120,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2102,6 +2133,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2181,6 +2213,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2193,6 +2226,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2272,6 +2306,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2284,6 +2319,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2470,7 +2506,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Kubelet",
|
||||
"uid": "3138fa155d5915769fbded898ac09fd9",
|
||||
"version": 0
|
||||
@ -2595,6 +2631,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2607,6 +2644,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2686,6 +2724,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2698,6 +2737,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -2790,6 +2830,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2802,6 +2843,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2881,6 +2923,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2893,6 +2936,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -2985,6 +3029,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2997,6 +3042,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3097,6 +3143,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3109,6 +3156,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3132,7 +3180,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -3201,6 +3249,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3213,6 +3262,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -3236,7 +3286,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -3305,6 +3355,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3317,6 +3368,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3396,6 +3448,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3408,6 +3461,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3487,6 +3541,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3499,6 +3554,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3659,7 +3715,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Proxy",
|
||||
"uid": "632e265de029684c40b21cb76bca4f94",
|
||||
"version": 0
|
||||
|
@ -31,6 +31,7 @@ data:
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 1,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -59,7 +60,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__interval]))",
|
||||
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__rate_interval]))",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -686,6 +687,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #A",
|
||||
@ -704,6 +706,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to workloads",
|
||||
"linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #B",
|
||||
@ -722,6 +725,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -740,6 +744,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -758,6 +763,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -776,6 +782,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -794,6 +801,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #G",
|
||||
@ -812,6 +820,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
|
||||
"pattern": "namespace",
|
||||
@ -839,7 +848,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)",
|
||||
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -848,7 +857,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
|
||||
"expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1105,6 +1114,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #A",
|
||||
@ -1123,6 +1133,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to workloads",
|
||||
"linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #B",
|
||||
@ -1141,6 +1152,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -1159,6 +1171,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -1177,6 +1190,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -1195,6 +1209,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -1213,6 +1228,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #G",
|
||||
@ -1231,6 +1247,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
|
||||
"pattern": "namespace",
|
||||
@ -1258,7 +1275,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(mixin_pod_workload{cluster=\"$cluster\"}) by (namespace)",
|
||||
"expr": "sum(kube_pod_owner{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1267,7 +1284,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "count(avg(mixin_pod_workload{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
|
||||
"expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1384,6 +1401,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -1426,6 +1444,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -1444,6 +1463,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -1462,6 +1482,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -1480,6 +1501,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -1498,6 +1520,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -1516,6 +1539,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -1534,6 +1558,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
|
||||
"pattern": "namespace",
|
||||
@ -1561,7 +1586,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1570,7 +1595,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1579,7 +1604,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1588,7 +1613,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1597,7 +1622,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1606,7 +1631,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1706,7 +1731,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1804,7 +1829,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1902,7 +1927,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2000,7 +2025,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2098,7 +2123,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2196,7 +2221,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2294,7 +2319,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2392,7 +2417,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2472,33 +2497,6 @@ data:
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(node_cpu_seconds_total, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
@ -2557,7 +2555,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Compute Resources / Cluster",
|
||||
"uid": "efa86fd1d0c121a26444b636a3f509a8",
|
||||
"version": 0
|
||||
@ -2789,7 +2787,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"})",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2873,7 +2871,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"})",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3115,6 +3113,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -3133,6 +3132,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -3151,6 +3151,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -3169,6 +3170,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -3187,6 +3189,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -3205,6 +3208,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
@ -3387,7 +3391,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3515,6 +3519,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -3533,6 +3538,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -3551,6 +3557,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -3569,6 +3576,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -3587,6 +3595,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -3605,6 +3614,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -3623,6 +3633,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #G",
|
||||
@ -3641,6 +3652,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #H",
|
||||
@ -3659,6 +3671,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
@ -3686,7 +3699,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3704,7 +3717,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3722,7 +3735,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3821,6 +3834,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -3863,6 +3877,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -3881,6 +3896,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -3899,6 +3915,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -3917,6 +3934,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -3935,6 +3953,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -3953,6 +3972,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -3971,6 +3991,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
@ -3998,7 +4019,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4007,7 +4028,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4016,7 +4037,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4025,7 +4046,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4034,7 +4055,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4043,7 +4064,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4143,7 +4164,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4241,7 +4262,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4339,7 +4360,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4437,7 +4458,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4535,7 +4556,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4633,7 +4654,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4798,7 +4819,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Compute Resources / Namespace (Pods)",
|
||||
"uid": "85a562078cdf77779eaa1add43ccec1e",
|
||||
"version": 0
|
||||
@ -4861,7 +4882,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4973,6 +4994,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -4991,6 +5013,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -5009,6 +5032,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -5027,6 +5051,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -5045,6 +5070,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -5063,6 +5089,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "pod",
|
||||
@ -5090,7 +5117,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5099,7 +5126,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5108,7 +5135,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5117,7 +5144,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5126,7 +5153,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5226,7 +5253,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\", container!=\"\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -5338,6 +5365,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -5356,6 +5384,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -5374,6 +5403,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -5392,6 +5422,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -5410,6 +5441,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -5428,6 +5460,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -5446,6 +5479,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #G",
|
||||
@ -5464,6 +5498,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #H",
|
||||
@ -5482,6 +5517,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "pod",
|
||||
@ -5509,7 +5545,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5518,7 +5554,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5527,7 +5563,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests_memory_bytes{node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5536,7 +5572,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5545,7 +5581,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=\"$node\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits_memory_bytes{node=~\"$node\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5554,7 +5590,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5563,7 +5599,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5572,7 +5608,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=\"$node\",container!=\"\"}) by (pod)",
|
||||
"expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5691,7 +5727,7 @@ data:
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"multi": true,
|
||||
"name": "node",
|
||||
"options": [
|
||||
|
||||
@ -5739,7 +5775,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Compute Resources / Node (Pods)",
|
||||
"uid": "200ac8fdbfbb74b39aff88118e4d1c2c",
|
||||
"version": 0
|
||||
|
@ -189,7 +189,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container)",
|
||||
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\", cluster=\"$cluster\"}[5m])) by (container)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}}",
|
||||
@ -203,7 +203,7 @@ data:
|
||||
"fill": true,
|
||||
"line": true,
|
||||
"op": "gt",
|
||||
"value": 1,
|
||||
"value": 0.80000000000000004,
|
||||
"yaxis": "left"
|
||||
}
|
||||
],
|
||||
@ -308,6 +308,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -326,6 +327,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -344,6 +346,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -362,6 +365,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -380,6 +384,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -398,6 +403,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "container",
|
||||
@ -580,7 +586,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\", image!=\"\"}) by (container)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}}",
|
||||
@ -708,6 +714,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -726,6 +733,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -744,6 +752,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -762,6 +771,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -780,6 +790,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -798,6 +809,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -816,6 +828,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #G",
|
||||
@ -834,6 +847,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #H",
|
||||
@ -852,6 +866,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "container",
|
||||
@ -879,7 +894,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\", image!=\"\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -897,7 +912,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -915,7 +930,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1014,6 +1029,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 6,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -1042,7 +1058,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1112,6 +1128,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -1140,7 +1157,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1210,6 +1227,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 8,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -1238,7 +1256,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1308,6 +1326,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 9,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -1336,7 +1355,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1406,6 +1425,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 10,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -1434,7 +1454,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1504,6 +1524,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 11,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -1532,7 +1553,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1724,7 +1745,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Compute Resources / Pod",
|
||||
"uid": "6581e46e4e5c7ba40a07646395ef7b23",
|
||||
"version": 0
|
||||
@ -1787,7 +1808,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1899,6 +1920,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -1917,6 +1939,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -1935,6 +1958,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -1953,6 +1977,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -1971,6 +1996,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -1989,6 +2015,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
@ -2016,7 +2043,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2025,7 +2052,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2034,7 +2061,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2043,7 +2070,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2052,7 +2079,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2152,7 +2179,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -2264,6 +2291,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -2282,6 +2310,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -2300,6 +2329,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -2318,6 +2348,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -2336,6 +2367,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -2354,6 +2386,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
@ -2381,7 +2414,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2390,7 +2423,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2399,7 +2432,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2408,7 +2441,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2417,7 +2450,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=\"$type\"}\n) by (pod)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2489,6 +2522,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -2531,6 +2565,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -2549,6 +2584,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -2567,6 +2603,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -2585,6 +2622,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -2603,6 +2641,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -2621,6 +2660,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -2639,6 +2679,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
@ -2666,7 +2707,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2675,7 +2716,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2684,7 +2725,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2693,7 +2734,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2702,7 +2743,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2711,7 +2752,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2811,7 +2852,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -2909,7 +2950,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3007,7 +3048,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3105,7 +3146,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3203,7 +3244,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3301,7 +3342,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3399,7 +3440,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3497,7 +3538,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3646,7 +3687,7 @@ data:
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
|
||||
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -3673,7 +3714,7 @@ data:
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
|
||||
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -3716,7 +3757,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Compute Resources / Workload",
|
||||
"uid": "a164a7f0339f99e89cea5cb47e9be617",
|
||||
"version": 0
|
||||
@ -3798,7 +3839,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}} - {{workload_type}}",
|
||||
@ -3926,6 +3967,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 0,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -3944,6 +3986,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -3962,6 +4005,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -3980,6 +4024,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -3998,6 +4043,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -4016,6 +4062,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -4034,6 +4081,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
|
||||
"pattern": "workload",
|
||||
@ -4052,6 +4100,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "workload_type",
|
||||
@ -4079,7 +4128,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
|
||||
"expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4088,7 +4137,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4097,7 +4146,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4106,7 +4155,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4115,7 +4164,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4124,7 +4173,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4243,7 +4292,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}} - {{workload_type}}",
|
||||
@ -4371,6 +4420,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 0,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -4389,6 +4439,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -4407,6 +4458,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -4425,6 +4477,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -4443,6 +4496,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -4461,6 +4515,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -4479,6 +4534,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
|
||||
"pattern": "workload",
|
||||
@ -4497,6 +4553,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "workload_type",
|
||||
@ -4524,7 +4581,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
|
||||
"expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}) by (workload, workload_type)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4533,7 +4590,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4542,7 +4599,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4551,7 +4608,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4560,7 +4617,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4569,7 +4626,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=\"$type\"}\n) by (workload, workload_type)\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4641,6 +4698,7 @@ data:
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"interval": "1m",
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -4683,6 +4741,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -4701,6 +4760,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -4719,6 +4779,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #C",
|
||||
@ -4737,6 +4798,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #D",
|
||||
@ -4755,6 +4817,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #E",
|
||||
@ -4773,6 +4836,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #F",
|
||||
@ -4791,6 +4855,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$type",
|
||||
"pattern": "workload",
|
||||
@ -4809,6 +4874,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "workload_type",
|
||||
@ -4836,7 +4902,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4845,7 +4911,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4854,7 +4920,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4863,7 +4929,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4872,7 +4938,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4881,7 +4947,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4981,7 +5047,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5079,7 +5145,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5177,7 +5243,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5275,7 +5341,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5373,7 +5439,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5471,7 +5537,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5569,7 +5635,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5667,7 +5733,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5757,7 +5823,7 @@ data:
|
||||
"value": "deployment"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"definition": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
|
||||
"definition": "label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
@ -5766,7 +5832,7 @@ data:
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(mixin_pod_workload{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
|
||||
"query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{namespace=~\"$namespace\", workload=~\".+\"}, workload_type)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
@ -5864,7 +5930,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Compute Resources / Namespace (Workloads)",
|
||||
"uid": "a87fb0d919ec0ea5f6543124e16c42a5",
|
||||
"version": 0
|
||||
|
@ -20,6 +20,24 @@ data:
|
||||
"id": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
|
||||
"datasource": null,
|
||||
"description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.",
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"mode": "markdown",
|
||||
"span": 12,
|
||||
"title": "Notice",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
@ -37,7 +55,9 @@ data:
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"decimals": 3,
|
||||
"description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?",
|
||||
"format": "percentunit",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
@ -48,7 +68,7 @@ data:
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 2,
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
@ -78,7 +98,7 @@ data:
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 2,
|
||||
"span": 4,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
@ -88,7 +108,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(up{job=\"apiserver\", cluster=\"$cluster\"})",
|
||||
"expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -96,7 +116,7 @@ data:
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Up",
|
||||
"title": "Availability (30d) > 99.000%",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
@ -109,7 +129,7 @@ data:
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "min"
|
||||
"valueName": "avg"
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
@ -119,11 +139,14 @@ data:
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"decimals": 3,
|
||||
"description": "How much error budget is left looking at our 0.990% availability guarantees?",
|
||||
"fill": 10,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 3,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
@ -132,6 +155,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -150,37 +174,16 @@ data:
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 5,
|
||||
"span": 8,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"2..\", cluster=\"$cluster\"}[5m]))",
|
||||
"expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "2xx",
|
||||
"legendFormat": "errorbudget",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"3..\", cluster=\"$cluster\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "3xx",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"4..\", cluster=\"$cluster\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "4xx",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"5..\", cluster=\"$cluster\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "5xx",
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -188,7 +191,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "RPC Rate",
|
||||
"title": "ErrorBudget (30d) > 99.000%",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
@ -206,7 +209,8 @@ data:
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ops",
|
||||
"decimals": 3,
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
@ -214,7 +218,216 @@ data:
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "ops",
|
||||
"decimals": 3,
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"decimals": 3,
|
||||
"description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?",
|
||||
"format": "percentunit",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 5,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Read Availability (30d)",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "avg"
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"description": "How many read requests (LIST,GET) per second do the apiservers get by code?",
|
||||
"fill": 10,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "/2../i",
|
||||
"color": "#56A64B"
|
||||
},
|
||||
{
|
||||
"alias": "/3../i",
|
||||
"color": "#F2CC0C"
|
||||
},
|
||||
{
|
||||
"alias": "/4../i",
|
||||
"color": "#3274D9"
|
||||
},
|
||||
{
|
||||
"alias": "/5../i",
|
||||
"color": "#E02F44"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ code }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Read SLI - Requests",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "reqps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "reqps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
@ -231,21 +444,24 @@ data:
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 4,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": true,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -262,15 +478,15 @@ data:
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 5,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", verb!=\"WATCH\", cluster=\"$cluster\"}[5m])) by (verb, le))",
|
||||
"expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}}",
|
||||
"legendFormat": "{{ resource }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -279,7 +495,497 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Request duration 99th quantile",
|
||||
"title": "Read SLI - Errors",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ resource }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Read SLI - Duration",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"decimals": 3,
|
||||
"description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?",
|
||||
"format": "percentunit",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 9,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Write Availability (30d)",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "N/A",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "avg"
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?",
|
||||
"fill": 10,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "/2../i",
|
||||
"color": "#56A64B"
|
||||
},
|
||||
{
|
||||
"alias": "/3../i",
|
||||
"color": "#F2CC0C"
|
||||
},
|
||||
{
|
||||
"alias": "/4../i",
|
||||
"color": "#3274D9"
|
||||
},
|
||||
{
|
||||
"alias": "/5../i",
|
||||
"color": "#E02F44"
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ code }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Write SLI - Requests",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "reqps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "reqps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 11,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ resource }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Write SLI - Errors",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ resource }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Write SLI - Duration",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
@ -336,10 +1042,11 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 5,
|
||||
"id": 13,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
@ -348,6 +1055,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": false,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -427,10 +1135,11 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 6,
|
||||
"id": 14,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
@ -439,6 +1148,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": false,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -518,10 +1228,11 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 7,
|
||||
"id": 15,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
@ -530,6 +1241,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -622,310 +1334,11 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "ETCD Cache Entry Total",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} hit",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} miss",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "ETCD Cache Hit/Miss Rate",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "ops",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "ops",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} get",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} miss",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "ETCD Cache Duration 99th Quantile",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "s",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 11,
|
||||
"id": 16,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
@ -934,6 +1347,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1013,10 +1427,11 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 12,
|
||||
"id": 17,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
@ -1025,6 +1440,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1104,10 +1520,11 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 13,
|
||||
"id": 18,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
@ -1116,6 +1533,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1222,20 +1640,19 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(apiserver_request_total, cluster)",
|
||||
"refresh": 1,
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
@ -1303,7 +1720,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / API server",
|
||||
"uid": "09ec8aa1e996d6ffcd6817bbaff4db1b",
|
||||
"version": 0
|
||||
@ -1428,6 +1845,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1440,6 +1858,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1532,6 +1951,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1544,6 +1964,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1636,6 +2057,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1648,6 +2070,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1740,6 +2163,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1752,6 +2176,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1852,6 +2277,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1864,6 +2290,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1887,7 +2314,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -1956,6 +2383,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1968,6 +2396,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -1991,7 +2420,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -2060,6 +2489,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2072,6 +2502,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2151,6 +2582,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2163,6 +2595,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2242,6 +2675,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2254,6 +2688,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -2414,7 +2849,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Controller Manager",
|
||||
"uid": "72e0e05bef5099e5f049b05fdc429ed4",
|
||||
"version": 0
|
||||
@ -2455,6 +2890,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2467,6 +2903,7 @@ data:
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -2605,7 +3042,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
|
||||
"expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -2650,6 +3087,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -2662,6 +3100,7 @@ data:
|
||||
"min": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -2800,7 +3239,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
|
||||
"expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -2965,7 +3404,7 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Persistent Volumes",
|
||||
"uid": "919b92a8e8041bd567af9edab12c840c",
|
||||
"version": 0
|
||||
@ -3090,6 +3529,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3102,6 +3542,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -3202,6 +3643,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3214,6 +3656,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -3327,6 +3770,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3339,6 +3783,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3439,6 +3884,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3451,6 +3897,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3474,7 +3921,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[5m])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -3543,6 +3990,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3555,6 +4003,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
@ -3578,7 +4027,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[5m])) by (verb, url, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}} {{url}}",
|
||||
@ -3647,6 +4096,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3659,6 +4109,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3738,6 +4189,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3750,6 +4202,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -3829,6 +4282,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -3841,6 +4295,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -4001,11 +4456,917 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / Scheduler",
|
||||
"uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
|
||||
"version": 0
|
||||
}
|
||||
statefulset.json: |-
|
||||
{
|
||||
"__inputs": [
|
||||
|
||||
],
|
||||
"__requires": [
|
||||
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
|
||||
]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 2,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "cores",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": true
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", container!=\"\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "CPU",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "GB",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": true
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_usage_bytes{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", container!=\"\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Memory",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 4,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "Bps",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": true
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Network",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"height": "100px",
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 5,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Desired Replicas",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 6,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Replicas of current version",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 7,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Observed Generation",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 8,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Metadata Generation",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "replicas specified",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "replicas created",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "ready",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "replicas of current version",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "updated",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replicas",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"kubernetes-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"name": "datasource",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_statefulset_metadata_generation, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Name",
|
||||
"multi": false,
|
||||
"name": "statefulset",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "UTC",
|
||||
"title": "Kubernetes / StatefulSets",
|
||||
"uid": "a31c1f46e6f727cb37c0d731a7245005",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboards-k8s
|
||||
|
@ -172,7 +172,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(avg_over_time(nginx_ingress_controller_nginx_process_connections{cluster=~\"$cluster\", controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\"}[2m]))",
|
||||
"expr": "sum(avg_over_time(nginx_ingress_controller_nginx_process_connections{cluster=~\"$cluster\", controller_pod=~\"$controller\",controller_class=~\"$controller_class\",controller_namespace=~\"$namespace\",state=\"active\"}[2m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -296,6 +296,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -308,6 +309,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -387,6 +389,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -399,6 +402,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -491,6 +495,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -503,6 +508,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -609,6 +615,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -621,6 +628,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -707,6 +715,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -719,6 +728,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
@ -798,6 +808,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -810,6 +821,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": "true",
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
},
|
||||
|
@ -36,6 +36,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -48,6 +49,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -128,6 +130,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -140,6 +143,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -253,6 +257,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -265,6 +270,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -417,7 +423,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n",
|
||||
"expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"})\n/\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -459,6 +465,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -471,6 +478,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -574,6 +582,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -586,6 +595,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -692,6 +702,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -704,6 +715,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -784,6 +796,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 0,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -796,6 +809,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
|
@ -21,7 +21,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "60s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -36,6 +36,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -48,6 +49,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -71,10 +73,10 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n",
|
||||
"expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -127,6 +129,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -139,6 +142,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -162,10 +166,10 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n",
|
||||
"expr": "clamp_min(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (remote_name, url) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n, 0)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -231,6 +235,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -243,6 +248,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -266,10 +272,10 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
|
||||
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -335,6 +341,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -347,6 +354,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -374,7 +382,7 @@ data:
|
||||
"expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -427,6 +435,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -439,6 +448,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -465,7 +475,7 @@ data:
|
||||
"expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -518,6 +528,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -530,6 +541,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -556,7 +568,7 @@ data:
|
||||
"expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -609,6 +621,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -621,6 +634,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -647,7 +661,7 @@ data:
|
||||
"expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -713,6 +727,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -725,6 +740,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -751,7 +767,7 @@ data:
|
||||
"expr": "prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -804,6 +820,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -816,6 +833,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -839,10 +857,10 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -908,6 +926,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -920,6 +939,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -999,6 +1019,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1011,6 +1032,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1037,7 +1059,7 @@ data:
|
||||
"expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{consumer}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1103,6 +1125,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1115,6 +1138,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1138,10 +1162,10 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1194,6 +1218,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1206,6 +1231,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1229,10 +1255,10 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1285,6 +1311,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1297,6 +1324,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1320,10 +1348,10 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1376,6 +1404,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
@ -1388,6 +1417,7 @@ data:
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sideWidth": null,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
@ -1414,7 +1444,7 @@ data:
|
||||
"expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1471,7 +1501,7 @@ data:
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
|
||||
"prometheus-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
@ -1567,11 +1597,11 @@ data:
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "queue",
|
||||
"name": "url",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, queue)",
|
||||
"query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
@ -1615,7 +1645,7 @@ data:
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "Prometheus Remote Write",
|
||||
"title": "Prometheus / Remote Write",
|
||||
"version": 0
|
||||
}
|
||||
prometheus.json: |-
|
||||
@ -1632,7 +1662,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "10s",
|
||||
"refresh": "60s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -1690,6 +1720,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #A",
|
||||
@ -1708,6 +1739,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "Value #B",
|
||||
@ -1726,6 +1758,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "instance",
|
||||
@ -1744,6 +1777,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "job",
|
||||
@ -1762,6 +1796,7 @@ data:
|
||||
"dateFormat": "YYYY-MM-DD HH:mm:ss",
|
||||
"decimals": 2,
|
||||
"link": false,
|
||||
"linkTargetBlank": false,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "",
|
||||
"pattern": "version",
|
||||
@ -2706,7 +2741,7 @@ data:
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
|
||||
"prometheus-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
@ -2814,7 +2849,7 @@ data:
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Prometheus",
|
||||
"title": "Prometheus / Overview",
|
||||
"uid": "",
|
||||
"version": 0
|
||||
}
|
||||
|
@ -18,12 +18,13 @@ spec:
|
||||
labels:
|
||||
name: grafana
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: grafana
|
||||
image: docker.io/grafana/grafana:7.0.3
|
||||
image: docker.io/grafana/grafana:8.3.4
|
||||
env:
|
||||
- name: GF_PATHS_CONFIG
|
||||
value: "/etc/grafana/custom.ini"
|
||||
|
@ -1,4 +1,4 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
|
@ -17,12 +17,13 @@ spec:
|
||||
labels:
|
||||
name: nginx-ingress-controller
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
image: k8s.gcr.io/ingress-nginx/controller:v1.1.1
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -47,7 +48,6 @@ spec:
|
||||
containerPort: 10254
|
||||
hostPort: 10254
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
@ -55,15 +55,16 @@ spec:
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
scheme: HTTP
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
lifecycle:
|
||||
preStop:
|
||||
|
@ -1,4 +1,4 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
|
@ -17,12 +17,13 @@ spec:
|
||||
labels:
|
||||
name: nginx-ingress-controller
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
image: k8s.gcr.io/ingress-nginx/controller:v1.1.1
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -47,7 +48,6 @@ spec:
|
||||
containerPort: 10254
|
||||
hostPort: 10254
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
@ -55,15 +55,16 @@ spec:
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
scheme: HTTP
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
lifecycle:
|
||||
preStop:
|
||||
|
@ -1,4 +1,4 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
|
@ -1,7 +1,7 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ingress-controller-public
|
||||
name: nginx-ingress-controller
|
||||
namespace: ingress
|
||||
spec:
|
||||
replicas: 2
|
||||
@ -10,19 +10,20 @@ spec:
|
||||
maxUnavailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
name: ingress-controller-public
|
||||
name: nginx-ingress-controller
|
||||
phase: prod
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: ingress-controller-public
|
||||
name: nginx-ingress-controller
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
image: k8s.gcr.io/ingress-nginx/controller:v1.1.1
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -76,4 +77,3 @@ spec:
|
||||
runAsUser: 101 # www-data
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 300
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
|
@ -17,12 +17,13 @@ spec:
|
||||
labels:
|
||||
name: nginx-ingress-controller
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
image: k8s.gcr.io/ingress-nginx/controller:v1.1.1
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -47,7 +48,6 @@ spec:
|
||||
containerPort: 10254
|
||||
hostPort: 10254
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
@ -55,15 +55,16 @@ spec:
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
scheme: HTTP
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
lifecycle:
|
||||
preStop:
|
||||
|
@ -1,4 +1,4 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
|
@ -17,12 +17,13 @@ spec:
|
||||
labels:
|
||||
name: nginx-ingress-controller
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
image: k8s.gcr.io/ingress-nginx/controller:v1.1.1
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -47,7 +48,6 @@ spec:
|
||||
containerPort: 10254
|
||||
hostPort: 10254
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
@ -55,15 +55,16 @@ spec:
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10254
|
||||
scheme: HTTP
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 5
|
||||
lifecycle:
|
||||
preStop:
|
||||
|
@ -34,7 +34,7 @@ data:
|
||||
- job_name: 'kubernetes-apiservers'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
@ -68,13 +68,58 @@ data:
|
||||
- source_labels: [__name__, group]
|
||||
regex: apiserver_request_duration_seconds_bucket;.+
|
||||
action: drop
|
||||
- source_labels: [__name__, group]
|
||||
regex: apiserver_request_duration_seconds_count;.+
|
||||
action: drop
|
||||
|
||||
# Scrape config for kube-controller-manager endpoints.
|
||||
#
|
||||
# kube-controller-manager service endpoints can be discovered by using the
|
||||
# `endpoints` role and relabelling to only keep only endpoints associated with
|
||||
# kube-system/kube-controller-manager and the `https` port.
|
||||
- job_name: 'kube-controller-manager'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: kube-system;kube-controller-manager;metrics
|
||||
- replacement: kube-controller-manager
|
||||
action: replace
|
||||
target_label: job
|
||||
|
||||
# Scrape config for kube-scheduler endpoints.
|
||||
#
|
||||
# kube-scheduler service endpoints can be discovered by using the `endpoints`
|
||||
# role and relabelling to only keep only endpoints associated with
|
||||
# kube-system/kube-scheduler and the `https` port.
|
||||
- job_name: 'kube-scheduler'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: kube-system;kube-scheduler;metrics
|
||||
- replacement: kube-scheduler
|
||||
action: replace
|
||||
target_label: job
|
||||
|
||||
# Scrape config for node (i.e. kubelet) /metrics (e.g. 'kubelet_'). Explore
|
||||
# metrics from a node by scraping kubelet (127.0.0.1:10250/metrics).
|
||||
- job_name: 'kubelet'
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
@ -82,10 +127,6 @@ data:
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_name
|
||||
|
||||
# Scrape config for Kubelet cAdvisor. Explore metrics from a node by
|
||||
# scraping kubelet (127.0.0.1:10250/metrics/cadvisor).
|
||||
- job_name: 'kubernetes-cadvisor'
|
||||
@ -100,9 +141,6 @@ data:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_name
|
||||
metric_relabel_configs:
|
||||
- source_labels: [__name__, image]
|
||||
action: drop
|
||||
@ -121,13 +159,11 @@ data:
|
||||
- source_labels: [__meta_kubernetes_node_label_node_kubernetes_io_controller]
|
||||
action: keep
|
||||
regex: 'true'
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_name
|
||||
- source_labels: [__meta_kubernetes_node_address_InternalIP]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
replacement: '${1}:2381'
|
||||
|
||||
|
||||
# Scrape config for service endpoints.
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
@ -139,6 +175,7 @@ data:
|
||||
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
||||
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
||||
# service then set this appropriately.
|
||||
# * `prometheus.io/param`: Custom metrics query parameter, like "format=prometheus".
|
||||
- job_name: 'kubernetes-service-endpoints'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
@ -161,6 +198,11 @@ data:
|
||||
target_label: __address__
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_param]
|
||||
action: replace
|
||||
target_label: __param_$1
|
||||
regex: ([^=]+)=(.*)
|
||||
replacement: $2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
@ -172,44 +214,12 @@ data:
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: job
|
||||
|
||||
|
||||
metric_relabel_configs:
|
||||
- source_labels: [__name__]
|
||||
action: drop
|
||||
regex: etcd_(debugging|disk|request|server).*
|
||||
|
||||
# Example scrape config for probing services via the Blackbox Exporter.
|
||||
#
|
||||
# The relabeling allows the actual service scrape endpoint to be configured
|
||||
# via the following annotations:
|
||||
#
|
||||
# * `prometheus.io/probe`: Only probe services that have a value of `true`
|
||||
- job_name: 'kubernetes-services'
|
||||
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
|
||||
# Example scrape config for pods
|
||||
#
|
||||
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
||||
@ -246,6 +256,67 @@ data:
|
||||
action: replace
|
||||
target_label: kubernetes_pod_name
|
||||
|
||||
# Example scrape config for probing Services via the Blackbox Exporter.
|
||||
#
|
||||
# Relabeling allows service scraping to be configured via annotations:
|
||||
# * `prometheus.io/probe`: Only probe services that have a value of `true`
|
||||
- job_name: 'kubernetes-services'
|
||||
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: service
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:8080
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
|
||||
# Example scrape config for probing Ingresses via a Blackbox Exporter.
|
||||
#
|
||||
# Relabeling allows service scraping to be configured via annotations:
|
||||
# * `prometheus.io/probe`: Only probe ingresses that have a value of `true`
|
||||
- job_name: 'kubernetes-ingresses'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: ingress
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_ingress_scheme, __address__, __meta_kubernetes_ingress_path]
|
||||
regex: (.+);(.+);(.+)
|
||||
replacement: ${1}://${2}${3}
|
||||
target_label: __param_target
|
||||
- target_label: __address__
|
||||
replacement: blackbox-exporter:8080
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_ingress_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
|
||||
# Rule files
|
||||
rule_files:
|
||||
- "/etc/prometheus/rules/*.rules"
|
||||
|
@ -14,13 +14,14 @@ spec:
|
||||
labels:
|
||||
name: prometheus
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
serviceAccountName: prometheus
|
||||
containers:
|
||||
- name: prometheus
|
||||
image: quay.io/prometheus/prometheus:v2.19.0
|
||||
image: quay.io/prometheus/prometheus:v2.32.1
|
||||
args:
|
||||
- --web.listen-address=0.0.0.0:9090
|
||||
- --config.file=/etc/prometheus/prometheus.yaml
|
||||
|
@ -1,11 +1,9 @@
|
||||
# Allow Prometheus to scrape service endpoints
|
||||
# Allow Prometheus to discover service endpoints
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-controller-manager
|
||||
namespace: kube-system
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
@ -14,5 +12,5 @@ spec:
|
||||
ports:
|
||||
- name: metrics
|
||||
protocol: TCP
|
||||
port: 10252
|
||||
targetPort: 10252
|
||||
port: 10257
|
||||
targetPort: 10257
|
||||
|
@ -1,11 +1,9 @@
|
||||
# Allow Prometheus to scrape service endpoints
|
||||
# Allow Prometheus to discover service endpoints
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-scheduler
|
||||
namespace: kube-system
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
@ -14,5 +12,5 @@ spec:
|
||||
ports:
|
||||
- name: metrics
|
||||
protocol: TCP
|
||||
port: 10251
|
||||
targetPort: 10251
|
||||
port: 10259
|
||||
targetPort: 10259
|
||||
|
@ -78,13 +78,6 @@ rules:
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- autoscaling.k8s.io
|
||||
resources:
|
||||
- verticalpodautoscalers
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
@ -97,6 +90,14 @@ rules:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
- ingresses
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- coordination.k8s.io
|
||||
resources:
|
||||
- leases
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
@ -18,16 +18,19 @@ spec:
|
||||
labels:
|
||||
name: kube-state-metrics
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
securityContext:
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
serviceAccountName: kube-state-metrics
|
||||
containers:
|
||||
- name: kube-state-metrics
|
||||
image: quay.io/coreos/kube-state-metrics:v1.9.7
|
||||
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.3.0
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 8080
|
||||
- name: telemetry
|
||||
containerPort: 8081
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
@ -40,3 +43,5 @@ spec:
|
||||
port: 8081
|
||||
initialDelaySeconds: 5
|
||||
timeoutSeconds: 5
|
||||
securityContext:
|
||||
runAsUser: 65534
|
||||
|
@ -17,24 +17,24 @@ spec:
|
||||
labels:
|
||||
name: node-exporter
|
||||
phase: prod
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
serviceAccountName: node-exporter
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65534
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
containers:
|
||||
- name: node-exporter
|
||||
image: quay.io/prometheus/node-exporter:v1.0.1
|
||||
image: quay.io/prometheus/node-exporter:v1.3.1
|
||||
args:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/host/root
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
|
||||
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
|
||||
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+)($|/)
|
||||
- --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 9100
|
||||
|
@ -10,6 +10,17 @@ rules:
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- nonResourceURLs: ["/metrics"]
|
||||
verbs: ["get"]
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
|
@ -9,10 +9,11 @@ data:
|
||||
{
|
||||
"alert": "etcdMembersDown",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": members are down ({{ $value }})."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": members are down ({{ $value }}).",
|
||||
"summary": "etcd cluster members are down."
|
||||
},
|
||||
"expr": "max by (job) (\n sum by (job) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count by (job,endpoint) (\n sum by (job,endpoint,To) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[3m])) > 0.01\n )\n)\n> 0\n",
|
||||
"for": "3m",
|
||||
"expr": "max without (endpoint) (\n sum without (instance) (up{job=~\".*etcd.*\"} == bool 0)\nor\n count without (To) (\n sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s])) > 0.01\n )\n)\n> 0\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
@ -20,9 +21,10 @@ data:
|
||||
{
|
||||
"alert": "etcdInsufficientMembers",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": insufficient members ({{ $value }})."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": insufficient members ({{ $value }}).",
|
||||
"summary": "etcd cluster has insufficient number of members."
|
||||
},
|
||||
"expr": "sum(up{job=~\".*etcd.*\"} == bool 1) by (job) < ((count(up{job=~\".*etcd.*\"}) by (job) + 1) / 2)\n",
|
||||
"expr": "sum(up{job=~\".*etcd.*\"} == bool 1) without (instance) < ((count(up{job=~\".*etcd.*\"}) without (instance) + 1) / 2)\n",
|
||||
"for": "3m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -31,7 +33,8 @@ data:
|
||||
{
|
||||
"alert": "etcdNoLeader",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": member {{ $labels.instance }} has no leader."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": member {{ $labels.instance }} has no leader.",
|
||||
"summary": "etcd cluster has no leader."
|
||||
},
|
||||
"expr": "etcd_server_has_leader{job=~\".*etcd.*\"} == 0\n",
|
||||
"for": "1m",
|
||||
@ -42,9 +45,10 @@ data:
|
||||
{
|
||||
"alert": "etcdHighNumberOfLeaderChanges",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.",
|
||||
"summary": "etcd cluster has high number of leader changes."
|
||||
},
|
||||
"expr": "increase((max by (job) (etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}))[15m:1m]) >= 3\n",
|
||||
"expr": "increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}))[15m:1m]) >= 4\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -53,9 +57,10 @@ data:
|
||||
{
|
||||
"alert": "etcdGRPCRequestsSlow",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": gRPC requests to {{ $labels.grpc_method }} are taking {{ $value }}s on etcd instance {{ $labels.instance }}.",
|
||||
"summary": "etcd grpc requests are slow"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\", grpc_type=\"unary\"}[5m])) by (job, instance, grpc_service, grpc_method, le))\n> 0.15\n",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\".*etcd.*\", grpc_type=\"unary\"}[5m])) without(grpc_type))\n> 0.15\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -64,7 +69,8 @@ data:
|
||||
{
|
||||
"alert": "etcdMemberCommunicationSlow",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": member communication with {{ $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance }}.",
|
||||
"summary": "etcd cluster member communication is slow."
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~\".*etcd.*\"}[5m]))\n> 0.15\n",
|
||||
"for": "10m",
|
||||
@ -75,7 +81,8 @@ data:
|
||||
{
|
||||
"alert": "etcdHighNumberOfFailedProposals",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": {{ $value }} proposal failures within the last 30 minutes on etcd instance {{ $labels.instance }}.",
|
||||
"summary": "etcd cluster has high number of proposal failures."
|
||||
},
|
||||
"expr": "rate(etcd_server_proposals_failed_total{job=~\".*etcd.*\"}[15m]) > 5\n",
|
||||
"for": "15m",
|
||||
@ -86,7 +93,8 @@ data:
|
||||
{
|
||||
"alert": "etcdHighFsyncDurations",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": 99th percentile fync durations are {{ $value }}s on etcd instance {{ $labels.instance }}."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}.",
|
||||
"summary": "etcd cluster 99th percentile fsync durations are too high."
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\"}[5m]))\n> 0.5\n",
|
||||
"for": "10m",
|
||||
@ -94,10 +102,22 @@ data:
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "etcdHighFsyncDurations",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": 99th percentile fsync durations are {{ $value }}s on etcd instance {{ $labels.instance }}."
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\"}[5m]))\n> 1\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "etcdHighCommitDurations",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}."
|
||||
"description": "etcd cluster \"{{ $labels.job }}\": 99th percentile commit durations {{ $value }}s on etcd instance {{ $labels.instance }}.",
|
||||
"summary": "etcd cluster 99th percentile commit durations are too high."
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~\".*etcd.*\"}[5m]))\n> 0.25\n",
|
||||
"for": "10m",
|
||||
@ -108,9 +128,10 @@ data:
|
||||
{
|
||||
"alert": "etcdHighNumberOfFailedHTTPRequests",
|
||||
"annotations": {
|
||||
"message": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}"
|
||||
"description": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
|
||||
"summary": "etcd has high number of failed HTTP requests."
|
||||
},
|
||||
"expr": "sum(rate(etcd_http_failed_total{job=~\".*etcd.*\", code!=\"404\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~\".*etcd.*\"}[5m]))\nBY (method) > 0.01\n",
|
||||
"expr": "sum(rate(etcd_http_failed_total{job=~\".*etcd.*\", code!=\"404\"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~\".*etcd.*\"}[5m]))\nwithout (code) > 0.01\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -119,9 +140,10 @@ data:
|
||||
{
|
||||
"alert": "etcdHighNumberOfFailedHTTPRequests",
|
||||
"annotations": {
|
||||
"message": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}."
|
||||
"description": "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}.",
|
||||
"summary": "etcd has high number of failed HTTP requests."
|
||||
},
|
||||
"expr": "sum(rate(etcd_http_failed_total{job=~\".*etcd.*\", code!=\"404\"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~\".*etcd.*\"}[5m]))\nBY (method) > 0.05\n",
|
||||
"expr": "sum(rate(etcd_http_failed_total{job=~\".*etcd.*\", code!=\"404\"}[5m])) without (code) / sum(rate(etcd_http_received_total{job=~\".*etcd.*\"}[5m]))\nwithout (code) > 0.05\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -130,13 +152,36 @@ data:
|
||||
{
|
||||
"alert": "etcdHTTPRequestsSlow",
|
||||
"annotations": {
|
||||
"message": "etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow."
|
||||
"description": "etcd instance {{ $labels.instance }} HTTP requests to {{ $labels.method }} are slow.",
|
||||
"summary": "etcd instance HTTP requests are slow."
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]))\n> 0.15\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "etcdBackendQuotaLowSpace",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": database size exceeds the defined quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota as the writes to etcd will be disabled when it is full."
|
||||
},
|
||||
"expr": "(etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100 > 95\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "etcdExcessiveDatabaseGrowth",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive."
|
||||
},
|
||||
"expr": "increase(((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100)[240m:1m]) > 50\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -145,115 +190,136 @@ data:
|
||||
kube.yaml: |-
|
||||
{
|
||||
"groups": [
|
||||
{
|
||||
"name": "kube-apiserver-error",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[5m]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate5m"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[30m]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate30m"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[1h]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[2h]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate2h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[6h]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate6h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[1d]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate1d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[3d]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate3d"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate5m{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate5m{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate5m"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate30m{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate30m{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate30m"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate1h{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate1h{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate2h{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate2h{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate2h"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate6h{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate6h{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate6h"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate1d{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate1d{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate1d"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate3d{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate3d{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate3d"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kube-apiserver.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_duration_seconds_sum{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod)\n/\nsum(rate(apiserver_request_duration_seconds_count{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod)\n",
|
||||
"record": "cluster:apiserver_request_duration_seconds:mean5m"
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n -\n (\n (\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[1d]))\n or\n vector(0)\n )\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[1d]))\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[1d]))\n )\n )\n +\n # errors\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n -\n (\n (\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[1h]))\n or\n vector(0)\n )\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[1h]))\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[1h]))\n )\n )\n +\n # errors\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n -\n (\n (\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[2h]))\n or\n vector(0)\n )\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[2h]))\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[2h]))\n )\n )\n +\n # errors\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n -\n (\n (\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[30m]))\n or\n vector(0)\n )\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[30m]))\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[30m]))\n )\n )\n +\n # errors\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n -\n (\n (\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[3d]))\n or\n vector(0)\n )\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[3d]))\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[3d]))\n )\n )\n +\n # errors\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n -\n (\n (\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[5m]))\n or\n vector(0)\n )\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[5m]))\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[5m]))\n )\n )\n +\n # errors\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n -\n (\n (\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[6h]))\n or\n vector(0)\n )\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[6h]))\n +\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[6h]))\n )\n )\n +\n # errors\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:burnrate6h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n -\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1d]))\n )\n +\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n -\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[1h]))\n )\n +\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate1h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n -\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[2h]))\n )\n +\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate2h"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n -\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30m]))\n )\n +\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate30m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n -\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[3d]))\n )\n +\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate3d"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n -\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[5m]))\n )\n +\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate5m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n (\n # too slow\n sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n -\n sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[6h]))\n )\n +\n sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n)\n/\nsum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:burnrate6h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code,resource) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "code_resource:apiserver_request_total:rate5m"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code,resource) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "code_resource:apiserver_request_total:rate5m"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))) > 0\n",
|
||||
"labels": {
|
||||
"quantile": "0.99",
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))) > 0\n",
|
||||
"labels": {
|
||||
"quantile": "0.99",
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||
@ -278,37 +344,166 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"interval": "3m",
|
||||
"name": "kube-apiserver-availability.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "1 - (\n (\n # write too slow\n sum(increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n ) +\n (\n # read too slow\n sum(increase(apiserver_request_duration_seconds_count{verb=~\"LIST|GET\"}[30d]))\n -\n (\n (\n sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[30d]))\n or\n vector(0)\n )\n +\n sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[30d]))\n +\n sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[30d]))\n )\n ) +\n # errors\n sum(code:apiserver_request_total:increase30d{code=~\"5..\"} or vector(0))\n)\n/\nsum(code:apiserver_request_total:increase30d)\n",
|
||||
"labels": {
|
||||
"verb": "all"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n sum(increase(apiserver_request_duration_seconds_count{job=\"apiserver\",verb=~\"LIST|GET\"}[30d]))\n -\n (\n # too slow\n (\n sum(increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=~\"resource|\",le=\"0.1\"}[30d]))\n or\n vector(0)\n )\n +\n sum(increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"namespace\",le=\"0.5\"}[30d]))\n +\n sum(increase(apiserver_request_duration_seconds_bucket{job=\"apiserver\",verb=~\"LIST|GET\",scope=\"cluster\",le=\"5\"}[30d]))\n )\n +\n # errors\n sum(code:apiserver_request_total:increase30d{verb=\"read\",code=~\"5..\"} or vector(0))\n)\n/\nsum(code:apiserver_request_total:increase30d{verb=\"read\"})\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n (\n # too slow\n sum(increase(apiserver_request_duration_seconds_count{verb=~\"POST|PUT|PATCH|DELETE\"}[30d]))\n -\n sum(increase(apiserver_request_duration_seconds_bucket{verb=~\"POST|PUT|PATCH|DELETE\",le=\"1\"}[30d]))\n )\n +\n # errors\n sum(code:apiserver_request_total:increase30d{verb=\"write\",code=~\"5..\"} or vector(0))\n)\n/\nsum(code:apiserver_request_total:increase30d{verb=\"write\"})\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "apiserver_request:availability30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"LIST\",code=~\"2..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"GET\",code=~\"2..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"POST\",code=~\"2..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PUT\",code=~\"2..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PATCH\",code=~\"2..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"DELETE\",code=~\"2..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"LIST\",code=~\"3..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"GET\",code=~\"3..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"POST\",code=~\"3..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PUT\",code=~\"3..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PATCH\",code=~\"3..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"DELETE\",code=~\"3..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"LIST\",code=~\"4..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"GET\",code=~\"4..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"POST\",code=~\"4..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PUT\",code=~\"4..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PATCH\",code=~\"4..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"DELETE\",code=~\"4..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"LIST\",code=~\"5..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"GET\",code=~\"5..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"POST\",code=~\"5..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PUT\",code=~\"5..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"PATCH\",code=~\"5..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=\"DELETE\",code=~\"5..\"}[30d]))\n",
|
||||
"record": "code_verb:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~\"LIST|GET\"})\n",
|
||||
"labels": {
|
||||
"verb": "read"
|
||||
},
|
||||
"record": "code:apiserver_request_total:increase30d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~\"POST|PUT|PATCH|DELETE\"})\n",
|
||||
"labels": {
|
||||
"verb": "write"
|
||||
},
|
||||
"record": "code:apiserver_request_total:increase30d"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "k8s.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", image!=\"\", container!=\"POD\"}[5m])) by (namespace)\n",
|
||||
"record": "namespace:container_cpu_usage_seconds_total:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (cluster, namespace, pod, container) (\n rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", image!=\"\", container!=\"POD\"}[5m])\n) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (\n 1, max by(cluster, namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"expr": "sum by (cluster, namespace, pod, container) (\n rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", image!=\"\", container!=\"POD\"}[5m])\n) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (\n 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
|
||||
"record": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_working_set_bytes{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"expr": "container_memory_working_set_bytes{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_working_set_bytes"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_rss{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"expr": "container_memory_rss{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_rss"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_cache{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"expr": "container_memory_cache{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_cache"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_swap{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"expr": "container_memory_swap{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info{node!=\"\"})\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_swap"
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_usage_bytes{job=\"kubernetes-cadvisor\", image!=\"\", container!=\"POD\"}) by (namespace)\n",
|
||||
"record": "namespace:container_memory_usage_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (namespace) (\n sum by (namespace, pod) (\n max by (namespace, pod, container) (\n kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\"}\n ) * on(namespace, pod) group_left() max by (namespace, pod) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n",
|
||||
"record": "namespace:kube_pod_container_resource_requests_memory_bytes:sum"
|
||||
@ -322,21 +517,21 @@ data:
|
||||
"labels": {
|
||||
"workload_type": "deployment"
|
||||
},
|
||||
"record": "mixin_pod_workload"
|
||||
"record": "namespace_workload_pod:kube_pod_owner:relabel"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
|
||||
"labels": {
|
||||
"workload_type": "daemonset"
|
||||
},
|
||||
"record": "mixin_pod_workload"
|
||||
"record": "namespace_workload_pod:kube_pod_owner:relabel"
|
||||
},
|
||||
{
|
||||
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
|
||||
"labels": {
|
||||
"workload_type": "statefulset"
|
||||
},
|
||||
"record": "mixin_pod_workload"
|
||||
"record": "namespace_workload_pod:kube_pod_owner:relabel"
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -412,11 +607,7 @@ data:
|
||||
"name": "node.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "sum(min(kube_pod_info) by (cluster, node))\n",
|
||||
"record": ":kube_pod_info_node_count:"
|
||||
},
|
||||
{
|
||||
"expr": "topk by(namespace, pod) (1,\n max by (node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n",
|
||||
"expr": "topk by(namespace, pod) (1,\n max by (node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n",
|
||||
"record": "node_namespace_pod:kube_pod_info:"
|
||||
},
|
||||
{
|
||||
@ -461,104 +652,113 @@ data:
|
||||
{
|
||||
"alert": "KubePodCrashLooping",
|
||||
"annotations": {
|
||||
"message": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf \"%.2f\" $value }} times / 5 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping"
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf \"%.2f\" $value }} times / 5 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping",
|
||||
"summary": "Pod is crash looping."
|
||||
},
|
||||
"expr": "rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\"}[15m]) * 60 * 5 > 0\n",
|
||||
"expr": "rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\"}[5m]) * 60 * 5 > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubePodNotReady",
|
||||
"annotations": {
|
||||
"message": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready"
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready",
|
||||
"summary": "Pod has been in a non-ready state for more than 15 minutes."
|
||||
},
|
||||
"expr": "sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})) > 0\n",
|
||||
"expr": "sum by (namespace, pod) (\n max by(namespace, pod) (\n kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}\n ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (\n 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})\n )\n) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeDeploymentGenerationMismatch",
|
||||
"annotations": {
|
||||
"message": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch"
|
||||
"description": "Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch",
|
||||
"summary": "Deployment generation mismatch due to possible roll-back"
|
||||
},
|
||||
"expr": "kube_deployment_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_deployment_metadata_generation{job=\"kube-state-metrics\"}\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeDeploymentReplicasMismatch",
|
||||
"annotations": {
|
||||
"message": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch"
|
||||
"description": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch",
|
||||
"summary": "Deployment has not matched the expected number of replicas."
|
||||
},
|
||||
"expr": "(\n kube_deployment_spec_replicas{job=\"kube-state-metrics\"}\n !=\n kube_deployment_status_replicas_available{job=\"kube-state-metrics\"}\n) and (\n changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeStatefulSetReplicasMismatch",
|
||||
"annotations": {
|
||||
"message": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch"
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch",
|
||||
"summary": "Deployment has not matched the expected number of replicas."
|
||||
},
|
||||
"expr": "(\n kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas{job=\"kube-state-metrics\"}\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeStatefulSetGenerationMismatch",
|
||||
"annotations": {
|
||||
"message": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch"
|
||||
"description": "StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch",
|
||||
"summary": "StatefulSet generation mismatch due to possible roll-back"
|
||||
},
|
||||
"expr": "kube_statefulset_status_observed_generation{job=\"kube-state-metrics\"}\n !=\nkube_statefulset_metadata_generation{job=\"kube-state-metrics\"}\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeStatefulSetUpdateNotRolledOut",
|
||||
"annotations": {
|
||||
"message": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout"
|
||||
"description": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout",
|
||||
"summary": "StatefulSet update has not been rolled out."
|
||||
},
|
||||
"expr": "max without (revision) (\n kube_statefulset_status_current_revision{job=\"kube-state-metrics\"}\n unless\n kube_statefulset_status_update_revision{job=\"kube-state-metrics\"}\n)\n *\n(\n kube_statefulset_replicas{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}\n)\n",
|
||||
"expr": "(\n max without (revision) (\n kube_statefulset_status_current_revision{job=\"kube-state-metrics\"}\n unless\n kube_statefulset_status_update_revision{job=\"kube-state-metrics\"}\n )\n *\n (\n kube_statefulset_replicas{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeDaemonSetRolloutStuck",
|
||||
"annotations": {
|
||||
"message": "Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck"
|
||||
"description": "DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck",
|
||||
"summary": "DaemonSet rollout is stuck."
|
||||
},
|
||||
"expr": "kube_daemonset_status_number_ready{job=\"kube-state-metrics\"}\n /\nkube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"} < 1.00\n",
|
||||
"expr": "(\n (\n kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"}\n !=\n 0\n ) or (\n kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n ) or (\n kube_daemonset_status_number_available{job=\"kube-state-metrics\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n )\n) and (\n changes(kube_daemonset_updated_number_scheduled{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeContainerWaiting",
|
||||
"annotations": {
|
||||
"message": "Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting"
|
||||
"description": "Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting",
|
||||
"summary": "Pod container waiting longer than 1 hour"
|
||||
},
|
||||
"expr": "sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job=\"kube-state-metrics\"}) > 0\n",
|
||||
"for": "1h",
|
||||
@ -569,8 +769,9 @@ data:
|
||||
{
|
||||
"alert": "KubeDaemonSetNotScheduled",
|
||||
"annotations": {
|
||||
"message": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled"
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled",
|
||||
"summary": "DaemonSet pods are not scheduled."
|
||||
},
|
||||
"expr": "kube_daemonset_status_desired_number_scheduled{job=\"kube-state-metrics\"}\n -\nkube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"} > 0\n",
|
||||
"for": "10m",
|
||||
@ -581,23 +782,12 @@ data:
|
||||
{
|
||||
"alert": "KubeDaemonSetMisScheduled",
|
||||
"annotations": {
|
||||
"message": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled"
|
||||
"description": "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled",
|
||||
"summary": "DaemonSet pods are misscheduled."
|
||||
},
|
||||
"expr": "kube_daemonset_status_number_misscheduled{job=\"kube-state-metrics\"} > 0\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeCronJobRunning",
|
||||
"annotations": {
|
||||
"message": "CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more than 1h to complete.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning"
|
||||
},
|
||||
"expr": "time() - kube_cronjob_next_schedule_time{job=\"kube-state-metrics\"} > 3600\n",
|
||||
"for": "1h",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -605,11 +795,12 @@ data:
|
||||
{
|
||||
"alert": "KubeJobCompletion",
|
||||
"annotations": {
|
||||
"message": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than one hour to complete.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion"
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion",
|
||||
"summary": "Job did not complete in time"
|
||||
},
|
||||
"expr": "kube_job_spec_completions{job=\"kube-state-metrics\"} - kube_job_status_succeeded{job=\"kube-state-metrics\"} > 0\n",
|
||||
"for": "1h",
|
||||
"for": "12h",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -617,8 +808,9 @@ data:
|
||||
{
|
||||
"alert": "KubeJobFailed",
|
||||
"annotations": {
|
||||
"message": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed"
|
||||
"description": "Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed",
|
||||
"summary": "Job failed to complete."
|
||||
},
|
||||
"expr": "kube_job_failed{job=\"kube-state-metrics\"} > 0\n",
|
||||
"for": "15m",
|
||||
@ -629,10 +821,11 @@ data:
|
||||
{
|
||||
"alert": "KubeHpaReplicasMismatch",
|
||||
"annotations": {
|
||||
"message": "HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch"
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch",
|
||||
"summary": "HPA has not matched descired number of replicas."
|
||||
},
|
||||
"expr": "(kube_hpa_status_desired_replicas{job=\"kube-state-metrics\"}\n !=\nkube_hpa_status_current_replicas{job=\"kube-state-metrics\"})\n and\nchanges(kube_hpa_status_current_replicas[15m]) == 0\n",
|
||||
"expr": "(kube_hpa_status_desired_replicas{job=\"kube-state-metrics\"}\n !=\nkube_hpa_status_current_replicas{job=\"kube-state-metrics\"})\n and\n(kube_hpa_status_current_replicas{job=\"kube-state-metrics\"}\n >\nkube_hpa_spec_min_replicas{job=\"kube-state-metrics\"})\n and\n(kube_hpa_status_current_replicas{job=\"kube-state-metrics\"}\n <\nkube_hpa_spec_max_replicas{job=\"kube-state-metrics\"})\n and\nchanges(kube_hpa_status_current_replicas[15m]) == 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -641,8 +834,9 @@ data:
|
||||
{
|
||||
"alert": "KubeHpaMaxedOut",
|
||||
"annotations": {
|
||||
"message": "HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout"
|
||||
"description": "HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout",
|
||||
"summary": "HPA is running at max replicas"
|
||||
},
|
||||
"expr": "kube_hpa_status_current_replicas{job=\"kube-state-metrics\"}\n ==\nkube_hpa_spec_max_replicas{job=\"kube-state-metrics\"}\n",
|
||||
"for": "15m",
|
||||
@ -658,8 +852,9 @@ data:
|
||||
{
|
||||
"alert": "KubeCPUOvercommit",
|
||||
"annotations": {
|
||||
"message": "Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit"
|
||||
"description": "Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit",
|
||||
"summary": "Cluster has overcommitted CPU resource requests."
|
||||
},
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})\n /\nsum(kube_node_status_allocatable_cpu_cores)\n >\n(count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores)\n",
|
||||
"for": "5m",
|
||||
@ -668,10 +863,11 @@ data:
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeMemOvercommit",
|
||||
"alert": "KubeMemoryOvercommit",
|
||||
"annotations": {
|
||||
"message": "Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit"
|
||||
"description": "Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit",
|
||||
"summary": "Cluster has overcommitted memory resource requests."
|
||||
},
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})\n /\nsum(kube_node_status_allocatable_memory_bytes)\n >\n(count(kube_node_status_allocatable_memory_bytes)-1)\n /\ncount(kube_node_status_allocatable_memory_bytes)\n",
|
||||
"for": "5m",
|
||||
@ -680,10 +876,11 @@ data:
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeCPUOvercommit",
|
||||
"alert": "KubeCPUQuotaOvercommit",
|
||||
"annotations": {
|
||||
"message": "Cluster has overcommitted CPU resource requests for Namespaces.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit"
|
||||
"description": "Cluster has overcommitted CPU resource requests for Namespaces.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuquotaovercommit",
|
||||
"summary": "Cluster has overcommitted CPU resource requests."
|
||||
},
|
||||
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"cpu\"})\n /\nsum(kube_node_status_allocatable_cpu_cores)\n > 1.5\n",
|
||||
"for": "5m",
|
||||
@ -692,24 +889,52 @@ data:
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeMemOvercommit",
|
||||
"alert": "KubeMemoryQuotaOvercommit",
|
||||
"annotations": {
|
||||
"message": "Cluster has overcommitted memory resource requests for Namespaces.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit"
|
||||
"description": "Cluster has overcommitted memory resource requests for Namespaces.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryquotaovercommit",
|
||||
"summary": "Cluster has overcommitted memory resource requests."
|
||||
},
|
||||
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable_memory_bytes{job=\"node-exporter\"})\n > 1.5\n",
|
||||
"expr": "sum(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\", resource=\"memory\"})\n /\nsum(kube_node_status_allocatable_memory_bytes{job=\"kube-state-metrics\"})\n > 1.5\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeQuotaAlmostFull",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull",
|
||||
"summary": "Namespace quota is going to be full."
|
||||
},
|
||||
"expr": "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 0.9 < 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "info"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeQuotaFullyUsed",
|
||||
"annotations": {
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused",
|
||||
"summary": "Namespace quota is fully used."
|
||||
},
|
||||
"expr": "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n == 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "info"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeQuotaExceeded",
|
||||
"annotations": {
|
||||
"message": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded"
|
||||
"description": "Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded",
|
||||
"summary": "Namespace quota has exceeded the limits."
|
||||
},
|
||||
"expr": "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 0.90\n",
|
||||
"expr": "kube_resourcequota{job=\"kube-state-metrics\", type=\"used\"}\n / ignoring(instance, job, type)\n(kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"} > 0)\n > 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -718,13 +943,14 @@ data:
|
||||
{
|
||||
"alert": "CPUThrottlingHigh",
|
||||
"annotations": {
|
||||
"message": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh"
|
||||
"description": "{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh",
|
||||
"summary": "Processes experience elevated CPU throttling."
|
||||
},
|
||||
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", }[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)\n > ( 100 / 100 )\n",
|
||||
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\", }[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)\n > ( 80 / 100 )\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
"severity": "info"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -733,10 +959,11 @@ data:
|
||||
"name": "kubernetes-storage",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "KubePersistentVolumeUsageCritical",
|
||||
"alert": "KubePersistentVolumeFillingUp",
|
||||
"annotations": {
|
||||
"message": "The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical"
|
||||
"description": "The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup",
|
||||
"summary": "PersistentVolume is filling up."
|
||||
},
|
||||
"expr": "kubelet_volume_stats_available_bytes{job=\"kubelet\"}\n /\nkubelet_volume_stats_capacity_bytes{job=\"kubelet\"}\n < 0.03\n",
|
||||
"for": "1m",
|
||||
@ -745,22 +972,24 @@ data:
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubePersistentVolumeFullInFourDays",
|
||||
"alert": "KubePersistentVolumeFillingUp",
|
||||
"annotations": {
|
||||
"message": "Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays"
|
||||
"description": "Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup",
|
||||
"summary": "PersistentVolume is filling up."
|
||||
},
|
||||
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}\n) < 0.15\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\"}[6h], 4 * 24 * 3600) < 0\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubePersistentVolumeErrors",
|
||||
"annotations": {
|
||||
"message": "The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors"
|
||||
"description": "The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors",
|
||||
"summary": "PersistentVolume is having issues with provisioning."
|
||||
},
|
||||
"expr": "kube_persistentvolume_status_phase{phase=~\"Failed|Pending\",job=\"kube-state-metrics\"} > 0\n",
|
||||
"for": "5m",
|
||||
@ -776,10 +1005,11 @@ data:
|
||||
{
|
||||
"alert": "KubeVersionMismatch",
|
||||
"annotations": {
|
||||
"message": "There are {{ $value }} different semantic versions of Kubernetes components running.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch"
|
||||
"description": "There are {{ $value }} different semantic versions of Kubernetes components running.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch",
|
||||
"summary": "Different semantic versions of Kubernetes components running."
|
||||
},
|
||||
"expr": "count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~\"kube-dns|coredns\"},\"gitVersion\",\"$1\",\"gitVersion\",\"(v[0-9]*.[0-9]*.[0-9]*).*\"))) > 1\n",
|
||||
"expr": "count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~\"kube-dns|coredns\"},\"gitVersion\",\"$1\",\"gitVersion\",\"(v[0-9]*.[0-9]*).*\"))) > 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -788,8 +1018,9 @@ data:
|
||||
{
|
||||
"alert": "KubeClientErrors",
|
||||
"annotations": {
|
||||
"message": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors"
|
||||
"description": "Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors",
|
||||
"summary": "Kubernetes API server client is experiencing errors."
|
||||
},
|
||||
"expr": "(sum(rate(rest_client_requests_total{code=~\"5..\"}[5m])) by (instance, job)\n /\nsum(rate(rest_client_requests_total[5m])) by (instance, job))\n> 0.01\n",
|
||||
"for": "15m",
|
||||
@ -800,30 +1031,66 @@ data:
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kube-apiserver-error-alerts",
|
||||
"name": "kube-apiserver-slos",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "ErrorBudgetBurn",
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"message": "High requests error budget burn for job=apiserver (current value: {{ $value }})",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1h{job=\"apiserver\"} > (14.4*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate5m{job=\"apiserver\"} > (14.4*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (6*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate30m{job=\"apiserver\"} > (6*0.010000)\n)\n",
|
||||
"expr": "sum(apiserver_request:burnrate1h) > (14.40 * 0.01000)\nand\nsum(apiserver_request:burnrate5m) > (14.40 * 0.01000)\n",
|
||||
"for": "2m",
|
||||
"labels": {
|
||||
"job": "apiserver",
|
||||
"severity": "critical"
|
||||
"long": "1h",
|
||||
"severity": "critical",
|
||||
"short": "5m"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "ErrorBudgetBurn",
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"message": "High requests error budget burn for job=apiserver (current value: {{ $value }})",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1d{job=\"apiserver\"} > (3*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate2h{job=\"apiserver\"} > (3*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate3d{job=\"apiserver\"} > (0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (0.010000)\n)\n",
|
||||
"expr": "sum(apiserver_request:burnrate6h) > (6.00 * 0.01000)\nand\nsum(apiserver_request:burnrate30m) > (6.00 * 0.01000)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"job": "apiserver",
|
||||
"severity": "warning"
|
||||
"long": "6h",
|
||||
"severity": "critical",
|
||||
"short": "30m"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
"expr": "sum(apiserver_request:burnrate1d) > (3.00 * 0.01000)\nand\nsum(apiserver_request:burnrate2h) > (3.00 * 0.01000)\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"long": "1d",
|
||||
"severity": "warning",
|
||||
"short": "2h"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"description": "The API server is burning too much error budget.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorbudgetburn",
|
||||
"summary": "The API server is burning too much error budget."
|
||||
},
|
||||
"expr": "sum(apiserver_request:burnrate3d) > (1.00 * 0.01000)\nand\nsum(apiserver_request:burnrate6h) > (1.00 * 0.01000)\n",
|
||||
"for": "3h",
|
||||
"labels": {
|
||||
"long": "3d",
|
||||
"severity": "warning",
|
||||
"short": "6h"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -831,59 +1098,12 @@ data:
|
||||
{
|
||||
"name": "kubernetes-system-apiserver",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "KubeAPILatencyHigh",
|
||||
"annotations": {
|
||||
"message": "The API server has an abnormal latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh"
|
||||
},
|
||||
"expr": "(\n cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"}\n >\n on (verb) group_left()\n (\n avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"} >= 0)\n +\n 2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"} >= 0)\n )\n) > on (verb) group_left()\n1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"} >= 0)\nand on (verb,resource)\ncluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job=\"apiserver\",quantile=\"0.99\"}\n>\n1\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPILatencyHigh",
|
||||
"annotations": {
|
||||
"message": "The API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh"
|
||||
},
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job=\"apiserver\",quantile=\"0.99\"} > 4\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIErrorsHigh",
|
||||
"annotations": {
|
||||
"message": "API server is returning errors for {{ $value | humanizePercentage }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh"
|
||||
},
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\",code=~\"5..\"}[5m])) by (resource,subresource,verb)\n /\nsum(rate(apiserver_request_total{job=\"apiserver\"}[5m])) by (resource,subresource,verb) > 0.10\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIErrorsHigh",
|
||||
"annotations": {
|
||||
"message": "API server is returning errors for {{ $value | humanizePercentage }} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh"
|
||||
},
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\",code=~\"5..\"}[5m])) by (resource,subresource,verb)\n /\nsum(rate(apiserver_request_total{job=\"apiserver\"}[5m])) by (resource,subresource,verb) > 0.05\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"message": "A client certificate used to authenticate to the apiserver is expiring in less than 1.0 hours.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration"
|
||||
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 1.0 hours.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration",
|
||||
"summary": "Client certificate is about to expire."
|
||||
},
|
||||
"expr": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 3600\n",
|
||||
"labels": {
|
||||
@ -893,8 +1113,9 @@ data:
|
||||
{
|
||||
"alert": "KubeClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"message": "A client certificate used to authenticate to the apiserver is expiring in less than 0.1 hours.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration"
|
||||
"description": "A client certificate used to authenticate to the apiserver is expiring in less than 0.1 hours.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration",
|
||||
"summary": "Client certificate is about to expire."
|
||||
},
|
||||
"expr": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 300\n",
|
||||
"labels": {
|
||||
@ -904,10 +1125,11 @@ data:
|
||||
{
|
||||
"alert": "AggregatedAPIErrors",
|
||||
"annotations": {
|
||||
"message": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors"
|
||||
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors",
|
||||
"summary": "An aggregated API has reported errors."
|
||||
},
|
||||
"expr": "sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2\n",
|
||||
"expr": "sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[10m])) > 4\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -915,10 +1137,11 @@ data:
|
||||
{
|
||||
"alert": "AggregatedAPIDown",
|
||||
"annotations": {
|
||||
"message": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown"
|
||||
"description": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown",
|
||||
"summary": "An aggregated API is down."
|
||||
},
|
||||
"expr": "sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0\n",
|
||||
"expr": "(1 - max by(name, namespace)(avg_over_time(aggregator_unavailable_apiservice[10m]))) * 100 < 85\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -927,8 +1150,9 @@ data:
|
||||
{
|
||||
"alert": "KubeAPIDown",
|
||||
"annotations": {
|
||||
"message": "KubeAPI has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown"
|
||||
"description": "KubeAPI has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown",
|
||||
"summary": "Target disappeared from Prometheus target discovery."
|
||||
},
|
||||
"expr": "absent(up{job=\"apiserver\"} == 1)\n",
|
||||
"for": "15m",
|
||||
@ -944,8 +1168,9 @@ data:
|
||||
{
|
||||
"alert": "KubeNodeNotReady",
|
||||
"annotations": {
|
||||
"message": "{{ $labels.node }} has been unready for more than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready"
|
||||
"description": "{{ $labels.node }} has been unready for more than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready",
|
||||
"summary": "Node is not ready."
|
||||
},
|
||||
"expr": "kube_node_status_condition{job=\"kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\n",
|
||||
"for": "15m",
|
||||
@ -956,11 +1181,12 @@ data:
|
||||
{
|
||||
"alert": "KubeNodeUnreachable",
|
||||
"annotations": {
|
||||
"message": "{{ $labels.node }} is unreachable and some workloads may be rescheduled.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable"
|
||||
"description": "{{ $labels.node }} is unreachable and some workloads may be rescheduled.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable",
|
||||
"summary": "Node is unreachable."
|
||||
},
|
||||
"expr": "kube_node_spec_taint{job=\"kube-state-metrics\",key=\"node.kubernetes.io/unreachable\",effect=\"NoSchedule\"} == 1\n",
|
||||
"for": "2m",
|
||||
"expr": "(kube_node_spec_taint{job=\"kube-state-metrics\",key=\"node.kubernetes.io/unreachable\",effect=\"NoSchedule\"} unless ignoring(key,value) kube_node_spec_taint{job=\"kube-state-metrics\",key=~\"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn\"}) == 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -968,10 +1194,11 @@ data:
|
||||
{
|
||||
"alert": "KubeletTooManyPods",
|
||||
"annotations": {
|
||||
"message": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods"
|
||||
"description": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods",
|
||||
"summary": "Kubelet is running at capacity."
|
||||
},
|
||||
"expr": "max(max(kubelet_running_pod_count{job=\"kubelet\"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"}) by(node) / max(kube_node_status_capacity_pods{job=\"kube-state-metrics\"} != 1) by(node) > 0.95\n",
|
||||
"expr": "count by(node) (\n (kube_pod_status_phase{job=\"kube-state-metrics\",phase=\"Running\"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job=\"kube-state-metrics\"})\n)\n/\nmax by(node) (\n kube_node_status_capacity_pods{job=\"kube-state-metrics\"} != 1\n) > 0.95\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -980,8 +1207,9 @@ data:
|
||||
{
|
||||
"alert": "KubeNodeReadinessFlapping",
|
||||
"annotations": {
|
||||
"message": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping"
|
||||
"description": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping",
|
||||
"summary": "Node readiness status is flapping."
|
||||
},
|
||||
"expr": "sum(changes(kube_node_status_condition{status=\"true\",condition=\"Ready\"}[15m])) by (node) > 2\n",
|
||||
"for": "15m",
|
||||
@ -992,8 +1220,9 @@ data:
|
||||
{
|
||||
"alert": "KubeletPlegDurationHigh",
|
||||
"annotations": {
|
||||
"message": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh"
|
||||
"description": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh",
|
||||
"summary": "Kubelet Pod Lifecycle Event Generator is taking too long to relist."
|
||||
},
|
||||
"expr": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile=\"0.99\"} >= 10\n",
|
||||
"for": "5m",
|
||||
@ -1004,10 +1233,85 @@ data:
|
||||
{
|
||||
"alert": "KubeletPodStartUpLatencyHigh",
|
||||
"annotations": {
|
||||
"message": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh"
|
||||
"description": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh",
|
||||
"summary": "Kubelet Pod startup latency is too high."
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60\n",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"} > 60\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration",
|
||||
"summary": "Kubelet client certificate is about to expire."
|
||||
},
|
||||
"expr": "kubelet_certificate_manager_client_ttl_seconds < 3600\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration",
|
||||
"summary": "Kubelet client certificate is about to expire."
|
||||
},
|
||||
"expr": "kubelet_certificate_manager_client_ttl_seconds < 300\n",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletServerCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration",
|
||||
"summary": "Kubelet server certificate is about to expire."
|
||||
},
|
||||
"expr": "kubelet_certificate_manager_server_ttl_seconds < 3600\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletServerCertificateExpiration",
|
||||
"annotations": {
|
||||
"description": "Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration",
|
||||
"summary": "Kubelet server certificate is about to expire."
|
||||
},
|
||||
"expr": "kubelet_certificate_manager_server_ttl_seconds < 300\n",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletClientCertificateRenewalErrors",
|
||||
"annotations": {
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificaterenewalerrors",
|
||||
"summary": "Kubelet has failed to renew its client certificate."
|
||||
},
|
||||
"expr": "increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletServerCertificateRenewalErrors",
|
||||
"annotations": {
|
||||
"description": "Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificaterenewalerrors",
|
||||
"summary": "Kubelet has failed to renew its server certificate."
|
||||
},
|
||||
"expr": "increase(kubelet_server_expiration_renew_errors[5m]) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -1016,8 +1320,9 @@ data:
|
||||
{
|
||||
"alert": "KubeletDown",
|
||||
"annotations": {
|
||||
"message": "Kubelet has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown"
|
||||
"description": "Kubelet has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown",
|
||||
"summary": "Target disappeared from Prometheus target discovery."
|
||||
},
|
||||
"expr": "absent(up{job=\"kubelet\"} == 1)\n",
|
||||
"for": "15m",
|
||||
@ -1033,8 +1338,9 @@ data:
|
||||
{
|
||||
"alert": "KubeSchedulerDown",
|
||||
"annotations": {
|
||||
"message": "KubeScheduler has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown"
|
||||
"description": "KubeScheduler has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown",
|
||||
"summary": "Target disappeared from Prometheus target discovery."
|
||||
},
|
||||
"expr": "absent(up{job=\"kube-scheduler\"} == 1)\n",
|
||||
"for": "15m",
|
||||
@ -1050,8 +1356,9 @@ data:
|
||||
{
|
||||
"alert": "KubeControllerManagerDown",
|
||||
"annotations": {
|
||||
"message": "KubeControllerManager has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown"
|
||||
"description": "KubeControllerManager has disappeared from Prometheus target discovery.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown",
|
||||
"summary": "Target disappeared from Prometheus target discovery."
|
||||
},
|
||||
"expr": "absent(up{job=\"kube-controller-manager\"} == 1)\n",
|
||||
"for": "15m",
|
||||
@ -1063,115 +1370,6 @@ data:
|
||||
}
|
||||
]
|
||||
}
|
||||
loki.yaml: |-
|
||||
{
|
||||
"groups": [
|
||||
{
|
||||
"name": "loki_rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job))",
|
||||
"record": "job:loki_request_duration_seconds:99quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job))",
|
||||
"record": "job:loki_request_duration_seconds:50quantile"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m])) by (job)",
|
||||
"record": "job:loki_request_duration_seconds:avg"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job)",
|
||||
"record": "job:loki_request_duration_seconds_bucket:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job)",
|
||||
"record": "job:loki_request_duration_seconds_sum:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (job)",
|
||||
"record": "job:loki_request_duration_seconds_count:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route))",
|
||||
"record": "job_route:loki_request_duration_seconds:99quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route))",
|
||||
"record": "job_route:loki_request_duration_seconds:50quantile"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds:avg"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds_bucket:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds_sum:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds_count:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route))",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds:99quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route))",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds:50quantile"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds:avg"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds_bucket:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds_sum:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds_count:sum_rate"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "loki_alerts",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "LokiRequestErrors",
|
||||
"annotations": {
|
||||
"message": "{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}% errors.\n"
|
||||
},
|
||||
"expr": "100 * sum(rate(loki_request_duration_seconds_count{status_code=~\"5..\"}[1m])) by (namespace, job, route)\n /\nsum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)\n > 10\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "LokiRequestLatency",
|
||||
"annotations": {
|
||||
"message": "{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency.\n"
|
||||
},
|
||||
"expr": "namespace_job_route:loki_request_duration_seconds:99quantile{route!~\"(?i).*tail.*\"} > 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
node-exporter.yaml: |-
|
||||
{
|
||||
"groups": [
|
||||
@ -1329,7 +1527,7 @@ data:
|
||||
"description": "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} receive errors in the last two minutes.",
|
||||
"summary": "Network interface is reporting many receive errors."
|
||||
},
|
||||
"expr": "increase(node_network_receive_errs_total[2m]) > 10\n",
|
||||
"expr": "rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -1341,7 +1539,7 @@ data:
|
||||
"description": "{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.",
|
||||
"summary": "Network interface is reporting many transmit errors."
|
||||
},
|
||||
"expr": "increase(node_network_transmit_errs_total[2m]) > 10\n",
|
||||
"expr": "rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01\n",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -1350,14 +1548,25 @@ data:
|
||||
{
|
||||
"alert": "NodeHighNumberConntrackEntriesUsed",
|
||||
"annotations": {
|
||||
"description": "{{ $value | humanizePercentage }} of conntrack entries are used",
|
||||
"summary": "Number of conntrack are getting close to the limit"
|
||||
"description": "{{ $value | humanizePercentage }} of conntrack entries are used.",
|
||||
"summary": "Number of conntrack are getting close to the limit."
|
||||
},
|
||||
"expr": "(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "NodeTextFileCollectorScrapeError",
|
||||
"annotations": {
|
||||
"description": "Node Exporter text file collector failed to scrape.",
|
||||
"summary": "Node Exporter text file collector failed to scrape."
|
||||
},
|
||||
"expr": "node_textfile_scrape_error{job=\"node-exporter\"} == 1\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "NodeClockSkewDetected",
|
||||
"annotations": {
|
||||
@ -1376,11 +1585,34 @@ data:
|
||||
"message": "Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.",
|
||||
"summary": "Clock not synchronising."
|
||||
},
|
||||
"expr": "min_over_time(node_timex_sync_status[5m]) == 0\n",
|
||||
"expr": "min_over_time(node_timex_sync_status[5m]) == 0\nand\nnode_timex_maxerror_seconds >= 16\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "NodeRAIDDegraded",
|
||||
"annotations": {
|
||||
"description": "RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.",
|
||||
"summary": "RAID Array is degraded"
|
||||
},
|
||||
"expr": "node_md_disks_required - ignoring (state) (node_md_disks{state=\"active\"}) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "NodeRAIDDiskFailure",
|
||||
"annotations": {
|
||||
"description": "At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.",
|
||||
"summary": "Failed device in RAID array"
|
||||
},
|
||||
"expr": "node_md_disks{state=\"fail\"} > 0\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1428,18 +1660,6 @@ data:
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PrometheusErrorSendingAlertsToAnyAlertmanager",
|
||||
"annotations": {
|
||||
"description": "{{ printf \"%.1f\" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.",
|
||||
"summary": "Prometheus encounters more than 3% errors sending alerts to any Alertmanager."
|
||||
},
|
||||
"expr": "min without(alertmanager) (\n rate(prometheus_notifications_errors_total{job=\"prometheus\"}[5m])\n/\n rate(prometheus_notifications_sent_total{job=\"prometheus\"}[5m])\n)\n* 100\n> 3\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PrometheusNotConnectedToAlertmanagers",
|
||||
"annotations": {
|
||||
@ -1482,7 +1702,7 @@ data:
|
||||
"description": "Prometheus {{$labels.instance}} is not ingesting samples.",
|
||||
"summary": "Prometheus is not ingesting samples."
|
||||
},
|
||||
"expr": "rate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus\"}[5m]) <= 0\n",
|
||||
"expr": "(\n rate(prometheus_tsdb_head_samples_appended_total{job=\"prometheus\"}[5m]) <= 0\nand\n (\n sum without(scrape_job) (prometheus_target_metadata_cache_entries{job=\"prometheus\"}) > 0\n or\n sum without(rule_group) (prometheus_rule_group_rules{job=\"prometheus\"}) > 0\n )\n)\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -1515,7 +1735,7 @@ data:
|
||||
{
|
||||
"alert": "PrometheusRemoteStorageFailures",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.instance}} failed to send {{ printf \"%.1f\" $value }}% of the samples to {{ if $labels.queue }}{{ $labels.queue }}{{ else }}{{ $labels.url }}{{ end }}.",
|
||||
"description": "Prometheus {{$labels.instance}} failed to send {{ printf \"%.1f\" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}",
|
||||
"summary": "Prometheus fails to send samples to remote storage."
|
||||
},
|
||||
"expr": "(\n rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus\"}[5m])\n/\n (\n rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus\"}[5m])\n +\n rate(prometheus_remote_storage_succeeded_samples_total{job=\"prometheus\"}[5m])\n )\n)\n* 100\n> 1\n",
|
||||
@ -1527,10 +1747,10 @@ data:
|
||||
{
|
||||
"alert": "PrometheusRemoteWriteBehind",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.instance}} remote write is {{ printf \"%.1f\" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue }}{{ else }}{{ $labels.url }}{{ end }}.",
|
||||
"description": "Prometheus {{$labels.instance}} remote write is {{ printf \"%.1f\" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.",
|
||||
"summary": "Prometheus remote write is behind."
|
||||
},
|
||||
"expr": "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job=\"prometheus\"}[5m])\n- on(job, instance) group_right\n max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job=\"prometheus\"}[5m])\n)\n> 120\n",
|
||||
"expr": "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job=\"prometheus\"}[5m])\n- ignoring(remote_name, url) group_right\n max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job=\"prometheus\"}[5m])\n)\n> 120\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -1539,7 +1759,7 @@ data:
|
||||
{
|
||||
"alert": "PrometheusRemoteWriteDesiredShards",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ $value }} shards, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance=\"%s\",job=\"prometheus\"}` $labels.instance | query | first | value }}.",
|
||||
"description": "Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance=\"%s\",job=\"prometheus\"}` $labels.instance | query | first | value }}.",
|
||||
"summary": "Prometheus remote write desired shards calculation wants to run more than configured max shards."
|
||||
},
|
||||
"expr": "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_shards_desired{job=\"prometheus\"}[5m])\n>\n max_over_time(prometheus_remote_storage_shards_max{job=\"prometheus\"}[5m])\n)\n",
|
||||
@ -1571,6 +1791,30 @@ data:
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PrometheusTargetLimitHit",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.instance}} has dropped {{ printf \"%.0f\" $value }} targets because the number of targets exceeded the configured target_limit.",
|
||||
"summary": "Prometheus has dropped targets because some scrape configs have exceeded the targets limit."
|
||||
},
|
||||
"expr": "increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job=\"prometheus\"}[5m]) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "PrometheusErrorSendingAlertsToAnyAlertmanager",
|
||||
"annotations": {
|
||||
"description": "{{ printf \"%.1f\" $value }}% minimum errors while sending alerts from Prometheus {{$labels.instance}} to any Alertmanager.",
|
||||
"summary": "Prometheus encounters more than 3% errors sending alerts to any Alertmanager."
|
||||
},
|
||||
"expr": "min without (alertmanager) (\n rate(prometheus_notifications_errors_total{job=\"prometheus\",alertmanager!~``}[5m])\n/\n rate(prometheus_notifications_sent_total{job=\"prometheus\",alertmanager!~``}[5m])\n)\n* 100\n> 3\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -1,199 +0,0 @@
|
||||
---
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
enabled: true
|
||||
dropins:
|
||||
- name: 40-etcd-cluster.conf
|
||||
contents: |
|
||||
[Service]
|
||||
Environment="ETCD_IMAGE_TAG=v3.4.9"
|
||||
Environment="ETCD_IMAGE_URL=docker://quay.io/coreos/etcd"
|
||||
Environment="RKT_RUN_ARGS=--insecure-options=image"
|
||||
Environment="ETCD_NAME=${etcd_name}"
|
||||
Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379"
|
||||
Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380"
|
||||
Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379"
|
||||
Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380"
|
||||
Environment="ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381"
|
||||
Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}"
|
||||
Environment="ETCD_STRICT_RECONFIG_CHECK=true"
|
||||
Environment="ETCD_SSL_DIR=/etc/ssl/etcd"
|
||||
Environment="ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt"
|
||||
Environment="ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt"
|
||||
Environment="ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key"
|
||||
Environment="ETCD_CLIENT_CERT_AUTH=true"
|
||||
Environment="ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt"
|
||||
Environment="ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt"
|
||||
Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key"
|
||||
Environment="ETCD_PEER_CLIENT_CERT_AUTH=true"
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Wants=systemd-resolved.service
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
RequiredBy=etcd-member.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
Environment=KUBELET_CGROUP_DRIVER=${cgroup_driver}
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=$${KUBELET_CGROUP_DRIVER} \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: bootstrap.service
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubernetes control plane
|
||||
ConditionPathExists=!/opt/bootstrap/bootstrap.done
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
--volume config,kind=host,source=/etc/kubernetes/bootstrap-secrets \
|
||||
--mount volume=config,target=/etc/kubernetes/secrets \
|
||||
--volume assets,kind=host,source=/opt/bootstrap/assets \
|
||||
--mount volume=assets,target=/assets \
|
||||
--volume script,kind=host,source=/opt/bootstrap/apply \
|
||||
--mount volume=script,target=/apply \
|
||||
--insecure-options=image \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/apply
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /opt/bootstrap/layout
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
||||
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
||||
mkdir -p /etc/ssl/etcd/etcd
|
||||
mkdir -p /etc/kubernetes/bootstrap-secrets
|
||||
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/bootstrap-secrets/
|
||||
chown -R etcd:etcd /etc/ssl/etcd
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
export KUBECONFIG=/etc/kubernetes/secrets/kubeconfig
|
||||
until kubectl version; do
|
||||
echo "Waiting for static pod control plane"
|
||||
sleep 5
|
||||
done
|
||||
until kubectl apply -f /assets/manifests -R; do
|
||||
echo "Retry applying manifests"
|
||||
sleep 5
|
||||
done
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- "${ssh_authorized_key}"
|
@ -1,11 +0,0 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = "~> 0.12.6"
|
||||
required_providers {
|
||||
aws = "~> 2.23"
|
||||
ct = "~> 0.4"
|
||||
template = "~> 2.1"
|
||||
null = "~> 2.1"
|
||||
}
|
||||
}
|
@ -1,140 +0,0 @@
|
||||
---
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Wants=systemd-resolved.service
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
Environment=KUBELET_CGROUP_DRIVER=${cgroup_driver}
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=$${KUBELET_CGROUP_DRIVER} \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: delete-node.service
|
||||
enable: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Waiting to delete Kubernetes node on shutdown
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/true
|
||||
ExecStop=/etc/kubernetes/delete-node
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/kubernetes/delete-node
|
||||
filesystem: root
|
||||
mode: 0744
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash
|
||||
set -e
|
||||
exec /usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
--volume config,kind=host,source=/etc/kubernetes \
|
||||
--mount volume=config,target=/etc/kubernetes \
|
||||
--insecure-options=image \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/usr/local/bin/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- "${ssh_authorized_key}"
|
@ -1,4 +0,0 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
@ -11,10 +11,10 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* Kubernetes v1.23.3 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/cl/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/fedora-coreos/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, CSI, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Docs
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
data "aws_ami" "fedora-coreos" {
|
||||
most_recent = true
|
||||
owners = ["125523088429"]
|
||||
@ -18,3 +17,25 @@ data "aws_ami" "fedora-coreos" {
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_ami" "fedora-coreos-arm" {
|
||||
count = var.arch == "arm64" ? 1 : 0
|
||||
|
||||
most_recent = true
|
||||
owners = ["125523088429"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["arm64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "description"
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,10 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=26bea83b957d18b99e16435405983181c4a6e159"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
etcd_servers = aws_route53_record.etcds.*.fqdn
|
||||
asset_dir = var.asset_dir
|
||||
networking = var.networking
|
||||
network_mtu = var.network_mtu
|
||||
pod_cidr = var.pod_cidr
|
||||
@ -13,7 +12,6 @@ module "bootstrap" {
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
enable_reporting = var.enable_reporting
|
||||
enable_aggregation = var.enable_aggregation
|
||||
|
||||
trusted_certs_dir = "/etc/pki/tls/certs"
|
||||
daemonset_tolerations = var.daemonset_tolerations
|
||||
}
|
||||
|
||||
|
@ -22,9 +22,8 @@ resource "aws_instance" "controllers" {
|
||||
}
|
||||
|
||||
instance_type = var.controller_type
|
||||
|
||||
ami = data.aws_ami.fedora-coreos.image_id
|
||||
user_data = data.ct_config.controller-ignitions.*.rendered[count.index]
|
||||
ami = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
|
||||
user_data = data.ct_config.controller-ignitions.*.rendered[count.index]
|
||||
|
||||
# storage
|
||||
root_block_device {
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
variant: fcos
|
||||
version: 1.0.0
|
||||
version: 1.4.0
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
@ -8,32 +8,31 @@ systemd:
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=etcd (System Container)
|
||||
Documentation=https://github.com/coreos/etcd
|
||||
Documentation=https://github.com/etcd-io/etcd
|
||||
Wants=network-online.target network.target
|
||||
After=network-online.target
|
||||
[Service]
|
||||
# https://github.com/opencontainers/runc/pull/1807
|
||||
# Type=notify
|
||||
# NotifyAccess=exec
|
||||
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.1
|
||||
Type=exec
|
||||
Restart=on-failure
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=0
|
||||
LimitNOFILE=40000
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/etcd
|
||||
ExecStartPre=-/usr/bin/podman rm etcd
|
||||
#--volume $${NOTIFY_SOCKET}:/run/systemd/notify \
|
||||
ExecStart=/usr/bin/podman run --name etcd \
|
||||
--env-file /etc/etcd/etcd.env \
|
||||
--network host \
|
||||
--volume /var/lib/etcd:/var/lib/etcd:rw,Z \
|
||||
--volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
|
||||
quay.io/coreos/etcd:v3.4.9
|
||||
$${ETCD_IMAGE}
|
||||
ExecStop=/usr/bin/podman stop etcd
|
||||
Restart=on-failure
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=0
|
||||
LimitNOFILE=40000
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: docker.service
|
||||
- name: containerd.service
|
||||
enabled: true
|
||||
- name: docker.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
@ -53,10 +52,13 @@ systemd:
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet (System Container)
|
||||
Requires=afterburn.service
|
||||
After=afterburn.service
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
EnvironmentFile=/run/metadata/afterburn
|
||||
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
@ -67,16 +69,14 @@ systemd:
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
--volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
|
||||
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
|
||||
--volume /usr/lib/os-release:/etc/os-release:ro \
|
||||
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
|
||||
--volume /lib/modules:/lib/modules:ro \
|
||||
--volume /run:/run \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/containerd:/var/lib/containerd \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
@ -88,17 +88,19 @@ systemd:
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
||||
--enforce-node-allocatable=pods \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--provider-id=aws:///$${AFTERBURN_AWS_AVAILABILITY_ZONE}/$${AFTERBURN_AWS_INSTANCE_ID} \
|
||||
--read-only-port=0 \
|
||||
--resolv-conf=/run/systemd/resolve/resolv.conf \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
@ -120,15 +122,17 @@ systemd:
|
||||
ExecStartPre=-/usr/bin/podman rm bootstrap
|
||||
ExecStart=/usr/bin/podman run --name bootstrap \
|
||||
--network host \
|
||||
--volume /etc/kubernetes/bootstrap-secrets:/etc/kubernetes/secrets:ro,z \
|
||||
--volume /etc/kubernetes/pki:/etc/kubernetes/pki:ro,z \
|
||||
--volume /opt/bootstrap/assets:/assets:ro,Z \
|
||||
--volume /opt/bootstrap/apply:/apply:ro,Z \
|
||||
--entrypoint=/apply \
|
||||
quay.io/poseidon/kubelet:v1.18.4
|
||||
quay.io/poseidon/kubelet:v1.23.3
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
ExecStartPost=-/usr/bin/podman stop bootstrap
|
||||
storage:
|
||||
directories:
|
||||
- path: /var/lib/etcd
|
||||
mode: 0700
|
||||
- path: /etc/kubernetes
|
||||
- path: /opt/bootstrap
|
||||
files:
|
||||
@ -145,25 +149,26 @@ storage:
|
||||
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
||||
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
||||
mkdir -p /etc/ssl/etcd/etcd
|
||||
mkdir -p /etc/kubernetes/bootstrap-secrets
|
||||
mkdir -p /etc/kubernetes/pki
|
||||
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/pki/
|
||||
chown -R etcd:etcd /etc/ssl/etcd
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
mv auth/* /etc/kubernetes/pki/
|
||||
mv tls/k8s/* /etc/kubernetes/pki/
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
|
||||
- path: /opt/bootstrap/apply
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
export KUBECONFIG=/etc/kubernetes/secrets/kubeconfig
|
||||
export KUBECONFIG=/etc/kubernetes/pki/admin.conf
|
||||
until kubectl version; do
|
||||
echo "Waiting for static pod control plane"
|
||||
sleep 5
|
||||
@ -176,6 +181,18 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/sysctl.d/reverse-path-filter.conf
|
||||
contents:
|
||||
inline: |
|
||||
net.ipv4.conf.default.rp_filter=0
|
||||
net.ipv4.conf.*.rp_filter=0
|
||||
- path: /etc/systemd/network/50-flannel.link
|
||||
contents:
|
||||
inline: |
|
||||
[Match]
|
||||
OriginalName=flannel*
|
||||
[Link]
|
||||
MACAddressPolicy=none
|
||||
- path: /etc/systemd/system.conf.d/accounting.conf
|
||||
contents:
|
||||
inline: |
|
||||
@ -187,8 +204,6 @@ storage:
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
# TODO: Use a systemd dropin once podman v1.4.5 is avail.
|
||||
NOTIFY_SOCKET=/run/systemd/notify
|
||||
ETCD_NAME=${etcd_name}
|
||||
ETCD_DATA_DIR=/var/lib/etcd
|
||||
ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379
|
||||
@ -206,6 +221,26 @@ storage:
|
||||
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
||||
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
||||
ETCD_PEER_CLIENT_CERT_AUTH=true
|
||||
- path: /etc/fedora-coreos/iptables-legacy.stamp
|
||||
- path: /etc/containerd/config.toml
|
||||
overwrite: true
|
||||
contents:
|
||||
inline: |
|
||||
version = 2
|
||||
root = "/var/lib/containerd"
|
||||
state = "/run/containerd"
|
||||
subreaper = true
|
||||
oom_score = -999
|
||||
[grpc]
|
||||
address = "/run/containerd/containerd.sock"
|
||||
uid = 0
|
||||
gid = 0
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_selinux = true
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
|
||||
SystemdCgroup = true
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
|
@ -17,6 +17,7 @@ resource "aws_route53_record" "apiserver" {
|
||||
resource "aws_lb" "nlb" {
|
||||
name = "${var.cluster_name}-nlb"
|
||||
load_balancer_type = "network"
|
||||
ip_address_type = "dualstack"
|
||||
internal = false
|
||||
|
||||
subnets = aws_subnet.public.*.id
|
||||
|
@ -1,5 +1,6 @@
|
||||
output "kubeconfig-admin" {
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for Kubernetes Ingress
|
||||
@ -32,7 +33,8 @@ output "worker_security_groups" {
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for custom load balancing
|
||||
@ -52,3 +54,10 @@ output "worker_target_group_https" {
|
||||
value = module.workers.target_group_https
|
||||
}
|
||||
|
||||
# Outputs for debug
|
||||
|
||||
output "assets_dist" {
|
||||
value = module.bootstrap.assets_dist
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,30 @@ resource "aws_security_group" "controller" {
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-icmp" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-icmp-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-ssh" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
@ -44,39 +68,31 @@ resource "aws_security_group_rule" "controller-etcd-metrics" {
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "kube-proxy-metrics" {
|
||||
resource "aws_security_group_rule" "controller-cilium-health" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler
|
||||
resource "aws_security_group_rule" "controller-scheduler-metrics" {
|
||||
resource "aws_security_group_rule" "controller-cilium-health-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10251
|
||||
to_port = 10251
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-controller-manager
|
||||
resource "aws_security_group_rule" "controller-manager-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10252
|
||||
to_port = 10252
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
self = true
|
||||
}
|
||||
|
||||
# IANA VXLAN default
|
||||
resource "aws_security_group_rule" "controller-vxlan" {
|
||||
count = var.networking == "flannel" ? 1 : 0
|
||||
|
||||
@ -111,6 +127,31 @@ resource "aws_security_group_rule" "controller-apiserver" {
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
# Linux VXLAN default
|
||||
resource "aws_security_group_rule" "controller-linux-vxlan" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-linux-vxlan-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "aws_security_group_rule" "controller-node-exporter" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
@ -122,6 +163,17 @@ resource "aws_security_group_rule" "controller-node-exporter" {
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "kube-proxy-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow apiserver to access kubelets for exec, log, port-forward
|
||||
resource "aws_security_group_rule" "controller-kubelet" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
@ -143,6 +195,28 @@ resource "aws_security_group_rule" "controller-kubelet-self" {
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler
|
||||
resource "aws_security_group_rule" "controller-scheduler-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10259
|
||||
to_port = 10259
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-controller-manager
|
||||
resource "aws_security_group_rule" "controller-manager-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10257
|
||||
to_port = 10257
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-bgp" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
@ -227,6 +301,30 @@ resource "aws_security_group" "worker" {
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-icmp" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
source_security_group_id = aws_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-icmp-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-ssh" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
@ -257,6 +355,31 @@ resource "aws_security_group_rule" "worker-https" {
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-cilium-health" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
source_security_group_id = aws_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-cilium-health-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
self = true
|
||||
}
|
||||
|
||||
# IANA VXLAN default
|
||||
resource "aws_security_group_rule" "worker-vxlan" {
|
||||
count = var.networking == "flannel" ? 1 : 0
|
||||
|
||||
@ -281,6 +404,31 @@ resource "aws_security_group_rule" "worker-vxlan-self" {
|
||||
self = true
|
||||
}
|
||||
|
||||
# Linux VXLAN default
|
||||
resource "aws_security_group_rule" "worker-linux-vxlan" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
source_security_group_id = aws_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-linux-vxlan-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "aws_security_group_rule" "worker-node-exporter" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
@ -24,7 +24,7 @@ resource "null_resource" "copy-controller-secrets" {
|
||||
|
||||
provisioner "file" {
|
||||
content = join("\n", local.assets_bundle)
|
||||
destination = "$HOME/assets"
|
||||
destination = "/home/core/assets"
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
|
@ -43,26 +43,31 @@ variable "worker_type" {
|
||||
|
||||
variable "os_stream" {
|
||||
type = string
|
||||
description = "Fedora CoreOs image stream for instances (e.g. stable, testing, next)"
|
||||
description = "Fedora CoreOS image stream for instances (e.g. stable, testing, next)"
|
||||
default = "stable"
|
||||
|
||||
validation {
|
||||
condition = contains(["stable", "testing", "next"], var.os_stream)
|
||||
error_message = "The os_stream must be stable, testing, or next."
|
||||
}
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
type = number
|
||||
description = "Size of the EBS volume in GB"
|
||||
default = 40
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "disk_type" {
|
||||
type = string
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, io1)"
|
||||
default = "gp2"
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
|
||||
default = "gp3"
|
||||
}
|
||||
|
||||
variable "disk_iops" {
|
||||
type = number
|
||||
description = "IOPS of the EBS volume (e.g. 100)"
|
||||
default = 0
|
||||
description = "IOPS of the EBS volume (e.g. 3000)"
|
||||
default = 3000
|
||||
}
|
||||
|
||||
variable "worker_price" {
|
||||
@ -79,13 +84,13 @@ variable "worker_target_groups" {
|
||||
|
||||
variable "controller_snippets" {
|
||||
type = list(string)
|
||||
description = "Controller Fedora CoreOS Config snippets"
|
||||
description = "Controller Butane snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "worker_snippets" {
|
||||
type = list(string)
|
||||
description = "Worker Fedora CoreOS Config snippets"
|
||||
description = "Worker Butane snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -96,12 +101,6 @@ variable "ssh_authorized_key" {
|
||||
description = "SSH public key for user 'core'"
|
||||
}
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "networking" {
|
||||
type = string
|
||||
description = "Choice of networking provider (calico or flannel)"
|
||||
@ -143,8 +142,8 @@ variable "enable_reporting" {
|
||||
|
||||
variable "enable_aggregation" {
|
||||
type = bool
|
||||
description = "Enable the Kubernetes Aggregation Layer (defaults to false)"
|
||||
default = false
|
||||
description = "Enable the Kubernetes Aggregation Layer"
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "worker_node_labels" {
|
||||
@ -161,3 +160,19 @@ variable "cluster_domain_suffix" {
|
||||
default = "cluster.local"
|
||||
}
|
||||
|
||||
variable "arch" {
|
||||
type = string
|
||||
description = "Container architecture (amd64 or arm64)"
|
||||
default = "amd64"
|
||||
|
||||
validation {
|
||||
condition = var.arch == "amd64" || var.arch == "arm64"
|
||||
error_message = "The arch must be amd64 or arm64."
|
||||
}
|
||||
}
|
||||
|
||||
variable "daemonset_tolerations" {
|
||||
type = list(string)
|
||||
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||
default = []
|
||||
}
|
||||
|
@ -1,11 +1,15 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = "~> 0.12.6"
|
||||
required_version = ">= 0.13.0, < 2.0.0"
|
||||
required_providers {
|
||||
aws = "~> 2.23"
|
||||
ct = "~> 0.4"
|
||||
template = "~> 2.1"
|
||||
null = "~> 2.1"
|
||||
aws = ">= 2.23, <= 4.0"
|
||||
template = "~> 2.2"
|
||||
null = ">= 2.1"
|
||||
|
||||
ct = {
|
||||
source = "poseidon/ct"
|
||||
version = "~> 0.9"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ module "workers" {
|
||||
worker_count = var.worker_count
|
||||
instance_type = var.worker_type
|
||||
os_stream = var.os_stream
|
||||
arch = var.arch
|
||||
disk_size = var.disk_size
|
||||
spot_price = var.worker_price
|
||||
target_groups = var.worker_target_groups
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
data "aws_ami" "fedora-coreos" {
|
||||
most_recent = true
|
||||
owners = ["125523088429"]
|
||||
@ -18,3 +17,25 @@ data "aws_ami" "fedora-coreos" {
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_ami" "fedora-coreos-arm" {
|
||||
count = var.arch == "arm64" ? 1 : 0
|
||||
|
||||
most_recent = true
|
||||
owners = ["125523088429"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["arm64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "description"
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,12 @@
|
||||
---
|
||||
variant: fcos
|
||||
version: 1.0.0
|
||||
version: 1.4.0
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
- name: containerd.service
|
||||
enabled: true
|
||||
- name: docker.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
@ -23,10 +25,13 @@ systemd:
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet (System Container)
|
||||
Requires=afterburn.service
|
||||
After=afterburn.service
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
EnvironmentFile=/run/metadata/afterburn
|
||||
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
@ -37,16 +42,14 @@ systemd:
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
--volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
|
||||
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
|
||||
--volume /usr/lib/os-release:/etc/os-release:ro \
|
||||
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
|
||||
--volume /lib/modules:/lib/modules:ro \
|
||||
--volume /run:/run \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/containerd:/var/lib/containerd \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
@ -58,20 +61,25 @@ systemd:
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
||||
--enforce-node-allocatable=pods \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{~ endfor ~}
|
||||
%{~ for taint in split(",", node_taints) ~}
|
||||
--register-with-taints=${taint} \
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--provider-id=aws:///$${AFTERBURN_AWS_AVAILABILITY_ZONE}/$${AFTERBURN_AWS_INSTANCE_ID} \
|
||||
--read-only-port=0 \
|
||||
--resolv-conf=/run/systemd/resolve/resolv.conf \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/podman stop kubelet
|
||||
@ -86,10 +94,11 @@ systemd:
|
||||
[Unit]
|
||||
Description=Delete Kubernetes node on shutdown
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/true
|
||||
ExecStop=/bin/bash -c '/usr/bin/podman run --volume /etc/kubernetes:/etc/kubernetes:ro,z --entrypoint /usr/local/bin/kubectl quay.io/poseidon/kubelet:v1.18.4 --kubeconfig=/etc/kubernetes/kubeconfig delete node $HOSTNAME'
|
||||
ExecStop=/bin/bash -c '/usr/bin/podman run --volume /var/lib/kubelet:/var/lib/kubelet:ro,z --entrypoint /usr/local/bin/kubectl $${KUBELET_IMAGE} --kubeconfig=/var/lib/kubelet/kubeconfig delete node $HOSTNAME'
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
@ -105,6 +114,18 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/sysctl.d/reverse-path-filter.conf
|
||||
contents:
|
||||
inline: |
|
||||
net.ipv4.conf.default.rp_filter=0
|
||||
net.ipv4.conf.*.rp_filter=0
|
||||
- path: /etc/systemd/network/50-flannel.link
|
||||
contents:
|
||||
inline: |
|
||||
[Match]
|
||||
OriginalName=flannel*
|
||||
[Link]
|
||||
MACAddressPolicy=none
|
||||
- path: /etc/systemd/system.conf.d/accounting.conf
|
||||
contents:
|
||||
inline: |
|
||||
@ -112,9 +133,28 @@ storage:
|
||||
DefaultCPUAccounting=yes
|
||||
DefaultMemoryAccounting=yes
|
||||
DefaultBlockIOAccounting=yes
|
||||
- path: /etc/fedora-coreos/iptables-legacy.stamp
|
||||
- path: /etc/containerd/config.toml
|
||||
overwrite: true
|
||||
contents:
|
||||
inline: |
|
||||
version = 2
|
||||
root = "/var/lib/containerd"
|
||||
state = "/run/containerd"
|
||||
subreaper = true
|
||||
oom_score = -999
|
||||
[grpc]
|
||||
address = "/run/containerd/containerd.sock"
|
||||
uid = 0
|
||||
gid = 0
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_selinux = true
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
|
||||
SystemdCgroup = true
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- ${ssh_authorized_key}
|
||||
|
||||
|
@ -36,20 +36,25 @@ variable "instance_type" {
|
||||
|
||||
variable "os_stream" {
|
||||
type = string
|
||||
description = "Fedora CoreOs image stream for instances (e.g. stable, testing, next)"
|
||||
description = "Fedora CoreOS image stream for instances (e.g. stable, testing, next)"
|
||||
default = "stable"
|
||||
|
||||
validation {
|
||||
condition = contains(["stable", "testing", "next"], var.os_stream)
|
||||
error_message = "The os_stream must be stable, testing, or next."
|
||||
}
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
type = number
|
||||
description = "Size of the EBS volume in GB"
|
||||
default = 40
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "disk_type" {
|
||||
type = string
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, io1)"
|
||||
default = "gp2"
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
|
||||
default = "gp3"
|
||||
}
|
||||
|
||||
variable "disk_iops" {
|
||||
@ -72,7 +77,7 @@ variable "target_groups" {
|
||||
|
||||
variable "snippets" {
|
||||
type = list(string)
|
||||
description = "Fedora CoreOS Config snippets"
|
||||
description = "Butane snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -108,3 +113,22 @@ variable "node_labels" {
|
||||
description = "List of initial node labels"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "node_taints" {
|
||||
type = list(string)
|
||||
description = "List of initial node taints"
|
||||
default = []
|
||||
}
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "arch" {
|
||||
type = string
|
||||
description = "Container architecture (amd64 or arm64)"
|
||||
default = "amd64"
|
||||
|
||||
validation {
|
||||
condition = var.arch == "amd64" || var.arch == "arm64"
|
||||
error_message = "The arch must be amd64 or arm64."
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,14 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
required_version = ">= 0.13.0, < 2.0.0"
|
||||
required_providers {
|
||||
aws = ">= 2.23, <= 4.0"
|
||||
template = "~> 2.2"
|
||||
|
||||
ct = {
|
||||
source = "poseidon/ct"
|
||||
version = "~> 0.9"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ resource "aws_autoscaling_group" "workers" {
|
||||
|
||||
# Worker template
|
||||
resource "aws_launch_configuration" "worker" {
|
||||
image_id = data.aws_ami.fedora-coreos.image_id
|
||||
image_id = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
|
||||
instance_type = var.instance_type
|
||||
spot_price = var.spot_price > 0 ? var.spot_price : null
|
||||
enable_monitoring = false
|
||||
@ -86,6 +86,7 @@ data "template_file" "worker-config" {
|
||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
node_labels = join(",", var.node_labels)
|
||||
node_taints = join(",", var.node_taints)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,13 +11,13 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* Kubernetes v1.23.3 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/cl/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/flatcar-linux/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, CSI, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Docs
|
||||
|
||||
Please see the [official docs](https://typhoon.psdn.io) and the AWS [tutorial](https://typhoon.psdn.io/cl/aws/).
|
||||
Please see the [official docs](https://typhoon.psdn.io) and the AWS [tutorial](https://typhoon.psdn.io/flatcar-linux/aws/).
|
||||
|
@ -1,33 +1,10 @@
|
||||
locals {
|
||||
# Pick a CoreOS Container Linux derivative
|
||||
# coreos-stable -> Container Linux AMI
|
||||
# Pick a Flatcar Linux AMI
|
||||
# flatcar-stable -> Flatcar Linux AMI
|
||||
ami_id = local.flavor == "flatcar" ? data.aws_ami.flatcar.image_id : data.aws_ami.coreos.image_id
|
||||
|
||||
flavor = split("-", var.os_image)[0]
|
||||
ami_id = var.arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
|
||||
channel = split("-", var.os_image)[1]
|
||||
}
|
||||
|
||||
data "aws_ami" "coreos" {
|
||||
most_recent = true
|
||||
owners = ["595879546273"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["x86_64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["CoreOS-${local.flavor == "coreos" ? local.channel : "stable"}-*"]
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_ami" "flatcar" {
|
||||
most_recent = true
|
||||
owners = ["075585003325"]
|
||||
@ -44,7 +21,29 @@ data "aws_ami" "flatcar" {
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["Flatcar-${local.flavor == "flatcar" ? local.channel : "stable"}-*"]
|
||||
values = ["Flatcar-${local.channel}-*"]
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_ami" "flatcar-arm64" {
|
||||
count = var.arch == "arm64" ? 1 : 0
|
||||
|
||||
most_recent = true
|
||||
owners = ["075585003325"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["arm64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["Flatcar-${local.channel}-*"]
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,10 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=26bea83b957d18b99e16435405983181c4a6e159"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
etcd_servers = aws_route53_record.etcds.*.fqdn
|
||||
asset_dir = var.asset_dir
|
||||
networking = var.networking
|
||||
network_mtu = var.network_mtu
|
||||
pod_cidr = var.pod_cidr
|
||||
@ -13,5 +12,6 @@ module "bootstrap" {
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
enable_reporting = var.enable_reporting
|
||||
enable_aggregation = var.enable_aggregation
|
||||
daemonset_tolerations = var.daemonset_tolerations
|
||||
}
|
||||
|
217
aws/flatcar-linux/kubernetes/cl/controller.yaml
Normal file
217
aws/flatcar-linux/kubernetes/cl/controller.yaml
Normal file
@ -0,0 +1,217 @@
|
||||
---
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=etcd (System Container)
|
||||
Documentation=https://github.com/etcd-io/etcd
|
||||
Requires=docker.service
|
||||
After=docker.service
|
||||
[Service]
|
||||
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.1
|
||||
ExecStartPre=/usr/bin/docker run -d \
|
||||
--name etcd \
|
||||
--network host \
|
||||
--env-file /etc/etcd/etcd.env \
|
||||
--user 232:232 \
|
||||
--volume /etc/ssl/etcd:/etc/ssl/certs:ro \
|
||||
--volume /var/lib/etcd:/var/lib/etcd:rw \
|
||||
$${ETCD_IMAGE}
|
||||
ExecStart=docker logs -f etcd
|
||||
ExecStop=docker stop etcd
|
||||
ExecStopPost=docker rm etcd
|
||||
Restart=always
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=0
|
||||
LimitNOFILE=40000
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Wants=systemd-resolved.service
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
RequiredBy=etcd-member.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet (System Container)
|
||||
Requires=docker.service
|
||||
After=docker.service
|
||||
Requires=coreos-metadata.service
|
||||
After=coreos-metadata.service
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
EnvironmentFile=/run/metadata/coreos
|
||||
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=/usr/bin/docker run -d \
|
||||
--name kubelet \
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
-v /etc/cni/net.d:/etc/cni/net.d:ro \
|
||||
-v /etc/kubernetes:/etc/kubernetes:ro \
|
||||
-v /etc/machine-id:/etc/machine-id:ro \
|
||||
-v /usr/lib/os-release:/etc/os-release:ro \
|
||||
-v /lib/modules:/lib/modules:ro \
|
||||
-v /run:/run \
|
||||
-v /sys/fs/cgroup:/sys/fs/cgroup \
|
||||
-v /var/lib/calico:/var/lib/calico:ro \
|
||||
-v /var/lib/containerd:/var/lib/containerd \
|
||||
-v /var/lib/kubelet:/var/lib/kubelet:rshared \
|
||||
-v /var/log:/var/log \
|
||||
-v /opt/cni/bin:/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--provider-id=aws:///$${COREOS_EC2_AVAILABILITY_ZONE}/$${COREOS_EC2_INSTANCE_ID} \
|
||||
--read-only-port=0 \
|
||||
--resolv-conf=/run/systemd/resolve/resolv.conf \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStart=docker logs -f kubelet
|
||||
ExecStop=docker stop kubelet
|
||||
ExecStopPost=docker rm kubelet
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: bootstrap.service
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubernetes control plane
|
||||
Wants=docker.service
|
||||
After=docker.service
|
||||
ConditionPathExists=!/opt/bootstrap/bootstrap.done
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
ExecStart=/usr/bin/docker run \
|
||||
-v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
|
||||
-v /opt/bootstrap/assets:/assets:ro \
|
||||
-v /opt/bootstrap/apply:/apply:ro \
|
||||
--entrypoint=/apply \
|
||||
$${KUBELET_IMAGE}
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
directories:
|
||||
- path: /var/lib/etcd
|
||||
filesystem: root
|
||||
mode: 0700
|
||||
overwrite: true
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /opt/bootstrap/layout
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
||||
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
||||
mkdir -p /etc/ssl/etcd/etcd
|
||||
mkdir -p /etc/kubernetes/pki
|
||||
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/pki/
|
||||
chown -R etcd:etcd /etc/ssl/etcd
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
chmod -R 700 /var/lib/etcd
|
||||
mv auth/* /etc/kubernetes/pki/
|
||||
mv tls/k8s/* /etc/kubernetes/pki/
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
export KUBECONFIG=/etc/kubernetes/pki/admin.conf
|
||||
until kubectl version; do
|
||||
echo "Waiting for static pod control plane"
|
||||
sleep 5
|
||||
done
|
||||
until kubectl apply -f /assets/manifests -R; do
|
||||
echo "Retry applying manifests"
|
||||
sleep 5
|
||||
done
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/etcd/etcd.env
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
ETCD_NAME=${etcd_name}
|
||||
ETCD_DATA_DIR=/var/lib/etcd
|
||||
ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379
|
||||
ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380
|
||||
ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379
|
||||
ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380
|
||||
ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381
|
||||
ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}
|
||||
ETCD_STRICT_RECONFIG_CHECK=true
|
||||
ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt
|
||||
ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt
|
||||
ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key
|
||||
ETCD_CLIENT_CERT_AUTH=true
|
||||
ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt
|
||||
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
||||
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
||||
ETCD_PEER_CLIENT_CERT_AUTH=true
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- "${ssh_authorized_key}"
|
@ -67,7 +67,6 @@ data "template_file" "controller-configs" {
|
||||
etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
|
||||
# etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
|
||||
etcd_initial_cluster = join(",", data.template_file.etcds.*.rendered)
|
||||
cgroup_driver = local.flavor == "flatcar" && local.channel == "edge" ? "systemd" : "cgroupfs"
|
||||
kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet)
|
||||
ssh_authorized_key = var.ssh_authorized_key
|
||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
@ -17,6 +17,7 @@ resource "aws_route53_record" "apiserver" {
|
||||
resource "aws_lb" "nlb" {
|
||||
name = "${var.cluster_name}-nlb"
|
||||
load_balancer_type = "network"
|
||||
ip_address_type = "dualstack"
|
||||
internal = false
|
||||
|
||||
subnets = aws_subnet.public.*.id
|
@ -1,5 +1,6 @@
|
||||
output "kubeconfig-admin" {
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for Kubernetes Ingress
|
||||
@ -32,7 +33,8 @@ output "worker_security_groups" {
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for custom load balancing
|
||||
@ -52,3 +54,10 @@ output "worker_target_group_https" {
|
||||
value = module.workers.target_group_https
|
||||
}
|
||||
|
||||
# Outputs for debug
|
||||
|
||||
output "assets_dist" {
|
||||
value = module.bootstrap.assets_dist
|
||||
sensitive = true
|
||||
}
|
||||
|
@ -13,6 +13,30 @@ resource "aws_security_group" "controller" {
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-icmp" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-icmp-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-ssh" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
@ -44,39 +68,31 @@ resource "aws_security_group_rule" "controller-etcd-metrics" {
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "kube-proxy-metrics" {
|
||||
resource "aws_security_group_rule" "controller-cilium-health" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler
|
||||
resource "aws_security_group_rule" "controller-scheduler-metrics" {
|
||||
resource "aws_security_group_rule" "controller-cilium-health-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10251
|
||||
to_port = 10251
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-controller-manager
|
||||
resource "aws_security_group_rule" "controller-manager-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10252
|
||||
to_port = 10252
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
self = true
|
||||
}
|
||||
|
||||
# IANA VXLAN default
|
||||
resource "aws_security_group_rule" "controller-vxlan" {
|
||||
count = var.networking == "flannel" ? 1 : 0
|
||||
|
||||
@ -111,6 +127,31 @@ resource "aws_security_group_rule" "controller-apiserver" {
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
# Linux VXLAN default
|
||||
resource "aws_security_group_rule" "controller-linux-vxlan" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-linux-vxlan-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "aws_security_group_rule" "controller-node-exporter" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
@ -122,6 +163,17 @@ resource "aws_security_group_rule" "controller-node-exporter" {
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "kube-proxy-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow apiserver to access kubelets for exec, log, port-forward
|
||||
resource "aws_security_group_rule" "controller-kubelet" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
@ -143,6 +195,28 @@ resource "aws_security_group_rule" "controller-kubelet-self" {
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler
|
||||
resource "aws_security_group_rule" "controller-scheduler-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10259
|
||||
to_port = 10259
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-controller-manager
|
||||
resource "aws_security_group_rule" "controller-manager-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10257
|
||||
to_port = 10257
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-bgp" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
@ -227,6 +301,30 @@ resource "aws_security_group" "worker" {
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-icmp" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
source_security_group_id = aws_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-icmp-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "icmp"
|
||||
from_port = 8
|
||||
to_port = 0
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-ssh" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
@ -257,6 +355,31 @@ resource "aws_security_group_rule" "worker-https" {
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-cilium-health" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
source_security_group_id = aws_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-cilium-health-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 4240
|
||||
to_port = 4240
|
||||
self = true
|
||||
}
|
||||
|
||||
# IANA VXLAN default
|
||||
resource "aws_security_group_rule" "worker-vxlan" {
|
||||
count = var.networking == "flannel" ? 1 : 0
|
||||
|
||||
@ -281,6 +404,31 @@ resource "aws_security_group_rule" "worker-vxlan-self" {
|
||||
self = true
|
||||
}
|
||||
|
||||
# Linux VXLAN default
|
||||
resource "aws_security_group_rule" "worker-linux-vxlan" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
source_security_group_id = aws_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-linux-vxlan-self" {
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "udp"
|
||||
from_port = 8472
|
||||
to_port = 8472
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "aws_security_group_rule" "worker-node-exporter" {
|
||||
security_group_id = aws_security_group.worker.id
|
@ -24,7 +24,7 @@ resource "null_resource" "copy-controller-secrets" {
|
||||
|
||||
provisioner "file" {
|
||||
content = join("\n", local.assets_bundle)
|
||||
destination = "$HOME/assets"
|
||||
destination = "/home/core/assets"
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
@ -43,26 +43,31 @@ variable "worker_type" {
|
||||
|
||||
variable "os_image" {
|
||||
type = string
|
||||
description = "AMI channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha, flatcar-stable, flatcar-beta, flatcar-alpha, flatcar-edge)"
|
||||
description = "AMI channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
|
||||
default = "flatcar-stable"
|
||||
|
||||
validation {
|
||||
condition = contains(["flatcar-stable", "flatcar-beta", "flatcar-alpha"], var.os_image)
|
||||
error_message = "The os_image must be flatcar-stable, flatcar-beta, or flatcar-alpha."
|
||||
}
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
type = number
|
||||
description = "Size of the EBS volume in GB"
|
||||
default = 40
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "disk_type" {
|
||||
type = string
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, io1)"
|
||||
default = "gp2"
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
|
||||
default = "gp3"
|
||||
}
|
||||
|
||||
variable "disk_iops" {
|
||||
type = number
|
||||
description = "IOPS of the EBS volume (e.g. 100)"
|
||||
default = 0
|
||||
description = "IOPS of the EBS volume (e.g. 3000)"
|
||||
default = 3000
|
||||
}
|
||||
|
||||
variable "worker_price" {
|
||||
@ -137,8 +142,8 @@ variable "enable_reporting" {
|
||||
|
||||
variable "enable_aggregation" {
|
||||
type = bool
|
||||
description = "Enable the Kubernetes Aggregation Layer (defaults to false)"
|
||||
default = false
|
||||
description = "Enable the Kubernetes Aggregation Layer"
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "worker_node_labels" {
|
||||
@ -149,15 +154,25 @@ variable "worker_node_labels" {
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cluster_domain_suffix" {
|
||||
type = string
|
||||
description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
|
||||
default = "cluster.local"
|
||||
}
|
||||
|
||||
variable "arch" {
|
||||
type = string
|
||||
description = "Container architecture (amd64 or arm64)"
|
||||
default = "amd64"
|
||||
|
||||
validation {
|
||||
condition = var.arch == "amd64" || var.arch == "arm64"
|
||||
error_message = "The arch must be amd64 or arm64."
|
||||
}
|
||||
}
|
||||
|
||||
variable "daemonset_tolerations" {
|
||||
type = list(string)
|
||||
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||
default = []
|
||||
}
|
15
aws/flatcar-linux/kubernetes/versions.tf
Normal file
15
aws/flatcar-linux/kubernetes/versions.tf
Normal file
@ -0,0 +1,15 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.13.0, < 2.0.0"
|
||||
required_providers {
|
||||
aws = ">= 2.23, <= 4.0"
|
||||
template = "~> 2.2"
|
||||
null = ">= 2.1"
|
||||
|
||||
ct = {
|
||||
source = "poseidon/ct"
|
||||
version = "~> 0.9"
|
||||
}
|
||||
}
|
||||
}
|
@ -9,6 +9,7 @@ module "workers" {
|
||||
worker_count = var.worker_count
|
||||
instance_type = var.worker_type
|
||||
os_image = var.os_image
|
||||
arch = var.arch
|
||||
disk_size = var.disk_size
|
||||
spot_price = var.worker_price
|
||||
target_groups = var.worker_target_groups
|
@ -1,33 +1,10 @@
|
||||
locals {
|
||||
# Pick a CoreOS Container Linux derivative
|
||||
# coreos-stable -> Container Linux AMI
|
||||
# Pick a Flatcar Linux AMI
|
||||
# flatcar-stable -> Flatcar Linux AMI
|
||||
ami_id = local.flavor == "flatcar" ? data.aws_ami.flatcar.image_id : data.aws_ami.coreos.image_id
|
||||
|
||||
flavor = split("-", var.os_image)[0]
|
||||
ami_id = var.arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
|
||||
channel = split("-", var.os_image)[1]
|
||||
}
|
||||
|
||||
data "aws_ami" "coreos" {
|
||||
most_recent = true
|
||||
owners = ["595879546273"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["x86_64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["CoreOS-${local.flavor == "coreos" ? local.channel : "stable"}-*"]
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_ami" "flatcar" {
|
||||
most_recent = true
|
||||
owners = ["075585003325"]
|
||||
@ -44,7 +21,28 @@ data "aws_ami" "flatcar" {
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["Flatcar-${local.flavor == "flatcar" ? local.channel : "stable"}-*"]
|
||||
values = ["Flatcar-${local.channel}-*"]
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_ami" "flatcar-arm64" {
|
||||
count = var.arch == "arm64" ? 1 : 0
|
||||
|
||||
most_recent = true
|
||||
owners = ["075585003325"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["arm64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["Flatcar-${local.channel}-*"]
|
||||
}
|
||||
}
|
124
aws/flatcar-linux/kubernetes/workers/cl/worker.yaml
Normal file
124
aws/flatcar-linux/kubernetes/workers/cl/worker.yaml
Normal file
@ -0,0 +1,124 @@
|
||||
---
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Wants=systemd-resolved.service
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet
|
||||
Requires=docker.service
|
||||
After=docker.service
|
||||
Requires=coreos-metadata.service
|
||||
After=coreos-metadata.service
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
EnvironmentFile=/run/metadata/coreos
|
||||
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
# Podman, rkt, or runc run container processes, whereas docker run
|
||||
# is a client to a daemon and requires workarounds to use within a
|
||||
# systemd unit. https://github.com/moby/moby/issues/6791
|
||||
ExecStartPre=/usr/bin/docker run -d \
|
||||
--name kubelet \
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
-v /etc/cni/net.d:/etc/cni/net.d:ro \
|
||||
-v /etc/kubernetes:/etc/kubernetes:ro \
|
||||
-v /etc/machine-id:/etc/machine-id:ro \
|
||||
-v /usr/lib/os-release:/etc/os-release:ro \
|
||||
-v /lib/modules:/lib/modules:ro \
|
||||
-v /run:/run \
|
||||
-v /sys/fs/cgroup:/sys/fs/cgroup \
|
||||
-v /var/lib/calico:/var/lib/calico:ro \
|
||||
-v /var/lib/containerd:/var/lib/containerd \
|
||||
-v /var/lib/kubelet:/var/lib/kubelet:rshared \
|
||||
-v /var/log:/var/log \
|
||||
-v /opt/cni/bin:/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{~ endfor ~}
|
||||
%{~ for taint in split(",", node_taints) ~}
|
||||
--register-with-taints=${taint} \
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--provider-id=aws:///$${COREOS_EC2_AVAILABILITY_ZONE}/$${COREOS_EC2_INSTANCE_ID} \
|
||||
--read-only-port=0 \
|
||||
--resolv-conf=/run/systemd/resolve/resolv.conf \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStart=docker logs -f kubelet
|
||||
ExecStop=docker stop kubelet
|
||||
ExecStopPost=docker rm kubelet
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: delete-node.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Delete Kubernetes node on shutdown
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/true
|
||||
ExecStop=/bin/bash -c '/usr/bin/docker run -v /var/lib/kubelet:/var/lib/kubelet:ro --entrypoint /usr/local/bin/kubectl $${KUBELET_IMAGE} --kubeconfig=/var/lib/kubelet/kubeconfig delete node $HOSTNAME'
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- "${ssh_authorized_key}"
|
@ -36,20 +36,25 @@ variable "instance_type" {
|
||||
|
||||
variable "os_image" {
|
||||
type = string
|
||||
description = "AMI channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha, flatcar-stable, flatcar-beta, flatcar-alpha, flatcar-edge)"
|
||||
description = "AMI channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
|
||||
default = "flatcar-stable"
|
||||
|
||||
validation {
|
||||
condition = contains(["flatcar-stable", "flatcar-beta", "flatcar-alpha"], var.os_image)
|
||||
error_message = "The os_image must be flatcar-stable, flatcar-beta, or flatcar-alpha."
|
||||
}
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
type = number
|
||||
description = "Size of the EBS volume in GB"
|
||||
default = 40
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "disk_type" {
|
||||
type = string
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, io1)"
|
||||
default = "gp2"
|
||||
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
|
||||
default = "gp3"
|
||||
}
|
||||
|
||||
variable "disk_iops" {
|
||||
@ -108,3 +113,22 @@ variable "node_labels" {
|
||||
description = "List of initial node labels"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "node_taints" {
|
||||
type = list(string)
|
||||
description = "List of initial node taints"
|
||||
default = []
|
||||
}
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "arch" {
|
||||
type = string
|
||||
description = "Container architecture (amd64 or arm64)"
|
||||
default = "amd64"
|
||||
|
||||
validation {
|
||||
condition = var.arch == "amd64" || var.arch == "arm64"
|
||||
error_message = "The arch must be amd64 or arm64."
|
||||
}
|
||||
}
|
14
aws/flatcar-linux/kubernetes/workers/versions.tf
Normal file
14
aws/flatcar-linux/kubernetes/workers/versions.tf
Normal file
@ -0,0 +1,14 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.13.0, < 2.0.0"
|
||||
required_providers {
|
||||
aws = ">= 2.23, <= 4.0"
|
||||
template = "~> 2.2"
|
||||
|
||||
ct = {
|
||||
source = "poseidon/ct"
|
||||
version = "~> 0.9"
|
||||
}
|
||||
}
|
||||
}
|
@ -85,8 +85,8 @@ data "template_file" "worker-config" {
|
||||
ssh_authorized_key = var.ssh_authorized_key
|
||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
cgroup_driver = local.flavor == "flatcar" && local.channel == "edge" ? "systemd" : "cgroupfs"
|
||||
node_labels = join(",", var.node_labels)
|
||||
node_taints = join(",", var.node_taints)
|
||||
}
|
||||
}
|
||||
|
@ -1,197 +0,0 @@
|
||||
---
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
enabled: true
|
||||
dropins:
|
||||
- name: 40-etcd-cluster.conf
|
||||
contents: |
|
||||
[Service]
|
||||
Environment="ETCD_IMAGE_TAG=v3.4.9"
|
||||
Environment="ETCD_IMAGE_URL=docker://quay.io/coreos/etcd"
|
||||
Environment="RKT_RUN_ARGS=--insecure-options=image"
|
||||
Environment="ETCD_NAME=${etcd_name}"
|
||||
Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379"
|
||||
Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380"
|
||||
Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379"
|
||||
Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380"
|
||||
Environment="ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381"
|
||||
Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}"
|
||||
Environment="ETCD_STRICT_RECONFIG_CHECK=true"
|
||||
Environment="ETCD_SSL_DIR=/etc/ssl/etcd"
|
||||
Environment="ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt"
|
||||
Environment="ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt"
|
||||
Environment="ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key"
|
||||
Environment="ETCD_CLIENT_CERT_AUTH=true"
|
||||
Environment="ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt"
|
||||
Environment="ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt"
|
||||
Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key"
|
||||
Environment="ETCD_PEER_CLIENT_CERT_AUTH=true"
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Wants=systemd-resolved.service
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
RequiredBy=etcd-member.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: bootstrap.service
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubernetes control plane
|
||||
ConditionPathExists=!/opt/bootstrap/bootstrap.done
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
--volume config,kind=host,source=/etc/kubernetes/bootstrap-secrets \
|
||||
--mount volume=config,target=/etc/kubernetes/secrets \
|
||||
--volume assets,kind=host,source=/opt/bootstrap/assets \
|
||||
--mount volume=assets,target=/assets \
|
||||
--volume script,kind=host,source=/opt/bootstrap/apply \
|
||||
--mount volume=script,target=/apply \
|
||||
--insecure-options=image \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/apply
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /opt/bootstrap/layout
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
||||
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
||||
mkdir -p /etc/ssl/etcd/etcd
|
||||
mkdir -p /etc/kubernetes/bootstrap-secrets
|
||||
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/bootstrap-secrets/
|
||||
chown -R etcd:etcd /etc/ssl/etcd
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
export KUBECONFIG=/etc/kubernetes/secrets/kubeconfig
|
||||
until kubectl version; do
|
||||
echo "Waiting for static pod control plane"
|
||||
sleep 5
|
||||
done
|
||||
until kubectl apply -f /assets/manifests -R; do
|
||||
echo "Retry applying manifests"
|
||||
sleep 5
|
||||
done
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- "${ssh_authorized_key}"
|
@ -1,12 +0,0 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = "~> 0.12.6"
|
||||
required_providers {
|
||||
azurerm = "~> 2.8"
|
||||
ct = "~> 0.4"
|
||||
template = "~> 2.1"
|
||||
null = "~> 2.1"
|
||||
}
|
||||
}
|
||||
|
@ -1,138 +0,0 @@
|
||||
---
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Wants=systemd-resolved.service
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: delete-node.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Waiting to delete Kubernetes node on shutdown
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/true
|
||||
ExecStop=/etc/kubernetes/delete-node
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/kubernetes/delete-node
|
||||
filesystem: root
|
||||
mode: 0744
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash
|
||||
set -e
|
||||
exec /usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
--volume config,kind=host,source=/etc/kubernetes \
|
||||
--mount volume=config,target=/etc/kubernetes \
|
||||
--insecure-options=image \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/usr/local/bin/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname | tr '[:upper:]' '[:lower:]')
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- "${ssh_authorized_key}"
|
@ -1,4 +0,0 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
@ -11,10 +11,10 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* Kubernetes v1.23.3 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot priority](https://typhoon.psdn.io/fedora-coreos/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/) customization
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot priority](https://typhoon.psdn.io/fedora-coreos/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Docs
|
||||
|
@ -1,11 +1,10 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=26bea83b957d18b99e16435405983181c4a6e159"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)
|
||||
asset_dir = var.asset_dir
|
||||
|
||||
networking = var.networking
|
||||
|
||||
@ -19,8 +18,6 @@ module "bootstrap" {
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
enable_reporting = var.enable_reporting
|
||||
enable_aggregation = var.enable_aggregation
|
||||
|
||||
# Fedora CoreOS
|
||||
trusted_certs_dir = "/etc/pki/tls/certs"
|
||||
daemonset_tolerations = var.daemonset_tolerations
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
variant: fcos
|
||||
version: 1.0.0
|
||||
version: 1.4.0
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
@ -8,32 +8,31 @@ systemd:
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=etcd (System Container)
|
||||
Documentation=https://github.com/coreos/etcd
|
||||
Documentation=https://github.com/etcd-io/etcd
|
||||
Wants=network-online.target network.target
|
||||
After=network-online.target
|
||||
[Service]
|
||||
# https://github.com/opencontainers/runc/pull/1807
|
||||
# Type=notify
|
||||
# NotifyAccess=exec
|
||||
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.1
|
||||
Type=exec
|
||||
Restart=on-failure
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=0
|
||||
LimitNOFILE=40000
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/etcd
|
||||
ExecStartPre=-/usr/bin/podman rm etcd
|
||||
#--volume $${NOTIFY_SOCKET}:/run/systemd/notify \
|
||||
ExecStart=/usr/bin/podman run --name etcd \
|
||||
--env-file /etc/etcd/etcd.env \
|
||||
--network host \
|
||||
--volume /var/lib/etcd:/var/lib/etcd:rw,Z \
|
||||
--volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
|
||||
quay.io/coreos/etcd:v3.4.9
|
||||
$${ETCD_IMAGE}
|
||||
ExecStop=/usr/bin/podman stop etcd
|
||||
Restart=on-failure
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=0
|
||||
LimitNOFILE=40000
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: docker.service
|
||||
- name: containerd.service
|
||||
enabled: true
|
||||
- name: docker.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
@ -54,8 +53,8 @@ systemd:
|
||||
Description=Kubelet (System Container)
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
@ -66,16 +65,14 @@ systemd:
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
--volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
|
||||
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
|
||||
--volume /usr/lib/os-release:/etc/os-release:ro \
|
||||
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
|
||||
--volume /lib/modules:/lib/modules:ro \
|
||||
--volume /run:/run \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/containerd:/var/lib/containerd \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
@ -87,17 +84,18 @@ systemd:
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
||||
--enforce-node-allocatable=pods \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--resolv-conf=/run/systemd/resolve/resolv.conf \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
@ -119,15 +117,17 @@ systemd:
|
||||
ExecStartPre=-/usr/bin/podman rm bootstrap
|
||||
ExecStart=/usr/bin/podman run --name bootstrap \
|
||||
--network host \
|
||||
--volume /etc/kubernetes/bootstrap-secrets:/etc/kubernetes/secrets:ro,z \
|
||||
--volume /etc/kubernetes/pki:/etc/kubernetes/pki:ro,z \
|
||||
--volume /opt/bootstrap/assets:/assets:ro,Z \
|
||||
--volume /opt/bootstrap/apply:/apply:ro,Z \
|
||||
--entrypoint=/apply \
|
||||
quay.io/poseidon/kubelet:v1.18.4
|
||||
quay.io/poseidon/kubelet:v1.23.3
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
ExecStartPost=-/usr/bin/podman stop bootstrap
|
||||
storage:
|
||||
directories:
|
||||
- path: /var/lib/etcd
|
||||
mode: 0700
|
||||
- path: /etc/kubernetes
|
||||
- path: /opt/bootstrap
|
||||
files:
|
||||
@ -144,25 +144,26 @@ storage:
|
||||
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
||||
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
||||
mkdir -p /etc/ssl/etcd/etcd
|
||||
mkdir -p /etc/kubernetes/bootstrap-secrets
|
||||
mkdir -p /etc/kubernetes/pki
|
||||
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/pki/
|
||||
chown -R etcd:etcd /etc/ssl/etcd
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
mv auth/* /etc/kubernetes/pki/
|
||||
mv tls/k8s/* /etc/kubernetes/pki/
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
|
||||
- path: /opt/bootstrap/apply
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
export KUBECONFIG=/etc/kubernetes/secrets/kubeconfig
|
||||
export KUBECONFIG=/etc/kubernetes/pki/admin.conf
|
||||
until kubectl version; do
|
||||
echo "Waiting for static pod control plane"
|
||||
sleep 5
|
||||
@ -175,6 +176,18 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/sysctl.d/reverse-path-filter.conf
|
||||
contents:
|
||||
inline: |
|
||||
net.ipv4.conf.default.rp_filter=0
|
||||
net.ipv4.conf.*.rp_filter=0
|
||||
- path: /etc/systemd/network/50-flannel.link
|
||||
contents:
|
||||
inline: |
|
||||
[Match]
|
||||
OriginalName=flannel*
|
||||
[Link]
|
||||
MACAddressPolicy=none
|
||||
- path: /etc/systemd/system.conf.d/accounting.conf
|
||||
contents:
|
||||
inline: |
|
||||
@ -186,8 +199,6 @@ storage:
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
# TODO: Use a systemd dropin once podman v1.4.5 is avail.
|
||||
NOTIFY_SOCKET=/run/systemd/notify
|
||||
ETCD_NAME=${etcd_name}
|
||||
ETCD_DATA_DIR=/var/lib/etcd
|
||||
ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379
|
||||
@ -205,6 +216,26 @@ storage:
|
||||
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
||||
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
||||
ETCD_PEER_CLIENT_CERT_AUTH=true
|
||||
- path: /etc/fedora-coreos/iptables-legacy.stamp
|
||||
- path: /etc/containerd/config.toml
|
||||
overwrite: true
|
||||
contents:
|
||||
inline: |
|
||||
version = 2
|
||||
root = "/var/lib/containerd"
|
||||
state = "/run/containerd"
|
||||
subreaper = true
|
||||
oom_score = -999
|
||||
[grpc]
|
||||
address = "/run/containerd/containerd.sock"
|
||||
uid = 0
|
||||
gid = 0
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_selinux = true
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
|
||||
SystemdCgroup = true
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
|
@ -59,11 +59,11 @@ resource "azurerm_lb_rule" "apiserver" {
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration_name = "apiserver"
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 6443
|
||||
backend_port = 6443
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
|
||||
probe_id = azurerm_lb_probe.apiserver.id
|
||||
protocol = "Tcp"
|
||||
frontend_port = 6443
|
||||
backend_port = 6443
|
||||
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
|
||||
probe_id = azurerm_lb_probe.apiserver.id
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-http" {
|
||||
@ -74,11 +74,11 @@ resource "azurerm_lb_rule" "ingress-http" {
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 80
|
||||
backend_port = 80
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
protocol = "Tcp"
|
||||
frontend_port = 80
|
||||
backend_port = 80
|
||||
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-https" {
|
||||
@ -89,11 +89,11 @@ resource "azurerm_lb_rule" "ingress-https" {
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 443
|
||||
backend_port = 443
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
protocol = "Tcp"
|
||||
frontend_port = 443
|
||||
backend_port = 443
|
||||
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
}
|
||||
|
||||
# Worker outbound TCP/UDP SNAT
|
||||
@ -112,16 +112,12 @@ resource "azurerm_lb_outbound_rule" "worker-outbound" {
|
||||
|
||||
# Address pool of controllers
|
||||
resource "azurerm_lb_backend_address_pool" "controller" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "controller"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
}
|
||||
|
||||
# Address pool of workers
|
||||
resource "azurerm_lb_backend_address_pool" "worker" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "worker"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
output "kubeconfig-admin" {
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for Kubernetes Ingress
|
||||
@ -32,7 +33,8 @@ output "security_group_id" {
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for custom firewalling
|
||||
@ -57,3 +59,11 @@ output "backend_address_pool_id" {
|
||||
description = "ID of the worker backend address pool"
|
||||
value = azurerm_lb_backend_address_pool.worker.id
|
||||
}
|
||||
|
||||
# Outputs for debug
|
||||
|
||||
output "assets_dist" {
|
||||
value = module.bootstrap.assets_dist
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,21 @@ resource "azurerm_network_security_group" "controller" {
|
||||
location = azurerm_resource_group.cluster.location
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-icmp" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-icmp"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "1995"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Icmp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "*"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-ssh" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
@ -80,7 +95,7 @@ resource "azurerm_network_security_rule" "controller-kube-metrics" {
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10251-10252"
|
||||
destination_port_range = "10257-10259"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
@ -100,6 +115,22 @@ resource "azurerm_network_security_rule" "controller-apiserver" {
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-cilium-health" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
name = "allow-cilium-health"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2019"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "4240"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-vxlan" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
@ -115,6 +146,21 @@ resource "azurerm_network_security_rule" "controller-vxlan" {
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-linux-vxlan" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-linux-vxlan"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2021"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Udp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "8472"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "azurerm_network_security_rule" "controller-node-exporter" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
@ -191,6 +237,21 @@ resource "azurerm_network_security_group" "worker" {
|
||||
location = azurerm_resource_group.cluster.location
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-icmp" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-icmp"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "1995"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Icmp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "*"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-ssh" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
@ -236,6 +297,22 @@ resource "azurerm_network_security_rule" "worker-https" {
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-cilium-health" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
count = var.networking == "cilium" ? 1 : 0
|
||||
|
||||
name = "allow-cilium-health"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2014"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "4240"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-vxlan" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
@ -251,6 +328,21 @@ resource "azurerm_network_security_rule" "worker-vxlan" {
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-linux-vxlan" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-linux-vxlan"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2016"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Udp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "8472"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "azurerm_network_security_rule" "worker-node-exporter" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
@ -25,7 +25,7 @@ resource "null_resource" "copy-controller-secrets" {
|
||||
|
||||
provisioner "file" {
|
||||
content = join("\n", local.assets_bundle)
|
||||
destination = "$HOME/assets"
|
||||
destination = "/home/core/assets"
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
|
@ -54,7 +54,7 @@ variable "os_image" {
|
||||
variable "disk_size" {
|
||||
type = number
|
||||
description = "Size of the disk in GB"
|
||||
default = 40
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "worker_priority" {
|
||||
@ -65,13 +65,13 @@ variable "worker_priority" {
|
||||
|
||||
variable "controller_snippets" {
|
||||
type = list(string)
|
||||
description = "Controller Fedora CoreOS Config snippets"
|
||||
description = "Controller Butane snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "worker_snippets" {
|
||||
type = list(string)
|
||||
description = "Worker Fedora CoreOS Config snippets"
|
||||
description = "Worker Butane snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -117,8 +117,8 @@ variable "enable_reporting" {
|
||||
|
||||
variable "enable_aggregation" {
|
||||
type = bool
|
||||
description = "Enable the Kubernetes Aggregation Layer (defaults to false)"
|
||||
default = false
|
||||
description = "Enable the Kubernetes Aggregation Layer"
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "worker_node_labels" {
|
||||
@ -129,15 +129,14 @@ variable "worker_node_labels" {
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cluster_domain_suffix" {
|
||||
type = string
|
||||
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
|
||||
default = "cluster.local"
|
||||
}
|
||||
|
||||
variable "daemonset_tolerations" {
|
||||
type = list(string)
|
||||
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||
default = []
|
||||
}
|
||||
|
@ -1,12 +1,16 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = "~> 0.12.6"
|
||||
required_version = ">= 0.13.0, < 2.0.0"
|
||||
required_providers {
|
||||
azurerm = "~> 2.8"
|
||||
ct = "~> 0.4"
|
||||
template = "~> 2.1"
|
||||
null = "~> 2.1"
|
||||
template = "~> 2.2"
|
||||
null = ">= 2.1"
|
||||
|
||||
ct = {
|
||||
source = "poseidon/ct"
|
||||
version = "~> 0.9"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,12 @@
|
||||
---
|
||||
variant: fcos
|
||||
version: 1.0.0
|
||||
version: 1.4.0
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
- name: containerd.service
|
||||
enabled: true
|
||||
- name: docker.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
@ -24,8 +26,8 @@ systemd:
|
||||
Description=Kubelet (System Container)
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
@ -36,16 +38,14 @@ systemd:
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
--volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
|
||||
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
|
||||
--volume /usr/lib/os-release:/etc/os-release:ro \
|
||||
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
|
||||
--volume /lib/modules:/lib/modules:ro \
|
||||
--volume /run:/run \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/containerd:/var/lib/containerd \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
@ -57,20 +57,24 @@ systemd:
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
||||
--enforce-node-allocatable=pods \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{~ endfor ~}
|
||||
%{~ for taint in split(",", node_taints) ~}
|
||||
--register-with-taints=${taint} \
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--resolv-conf=/run/systemd/resolve/resolv.conf \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/podman stop kubelet
|
||||
@ -85,10 +89,11 @@ systemd:
|
||||
[Unit]
|
||||
Description=Delete Kubernetes node on shutdown
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/true
|
||||
ExecStop=/bin/bash -c '/usr/bin/podman run --volume /etc/kubernetes:/etc/kubernetes:ro,z --entrypoint /usr/local/bin/kubectl quay.io/poseidon/kubelet:v1.18.4 --kubeconfig=/etc/kubernetes/kubeconfig delete node $HOSTNAME'
|
||||
ExecStop=/bin/bash -c '/usr/bin/podman run --volume /var/lib/kubelet:/var/lib/kubelet:ro,z --entrypoint /usr/local/bin/kubectl $${KUBELET_IMAGE} --kubeconfig=/var/lib/kubelet/kubeconfig delete node $HOSTNAME'
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
@ -104,6 +109,18 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/sysctl.d/reverse-path-filter.conf
|
||||
contents:
|
||||
inline: |
|
||||
net.ipv4.conf.default.rp_filter=0
|
||||
net.ipv4.conf.*.rp_filter=0
|
||||
- path: /etc/systemd/network/50-flannel.link
|
||||
contents:
|
||||
inline: |
|
||||
[Match]
|
||||
OriginalName=flannel*
|
||||
[Link]
|
||||
MACAddressPolicy=none
|
||||
- path: /etc/systemd/system.conf.d/accounting.conf
|
||||
contents:
|
||||
inline: |
|
||||
@ -111,10 +128,29 @@ storage:
|
||||
DefaultCPUAccounting=yes
|
||||
DefaultMemoryAccounting=yes
|
||||
DefaultBlockIOAccounting=yes
|
||||
- path: /etc/fedora-coreos/iptables-legacy.stamp
|
||||
- path: /etc/containerd/config.toml
|
||||
overwrite: true
|
||||
contents:
|
||||
inline: |
|
||||
version = 2
|
||||
root = "/var/lib/containerd"
|
||||
state = "/run/containerd"
|
||||
subreaper = true
|
||||
oom_score = -999
|
||||
[grpc]
|
||||
address = "/run/containerd/containerd.sock"
|
||||
uid = 0
|
||||
gid = 0
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
enable_selinux = true
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
|
||||
SystemdCgroup = true
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- ${ssh_authorized_key}
|
||||
|
||||
|
||||
|
@ -57,7 +57,7 @@ variable "priority" {
|
||||
|
||||
variable "snippets" {
|
||||
type = list(string)
|
||||
description = "Fedora CoreOS Config snippets"
|
||||
description = "Butane snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -88,6 +88,12 @@ variable "node_labels" {
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "node_taints" {
|
||||
type = list(string)
|
||||
description = "List of initial node taints"
|
||||
default = []
|
||||
}
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "cluster_domain_suffix" {
|
||||
|
@ -1,4 +1,14 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
required_version = ">= 0.13.0, < 2.0.0"
|
||||
required_providers {
|
||||
azurerm = "~> 2.8"
|
||||
template = "~> 2.2"
|
||||
|
||||
ct = {
|
||||
source = "poseidon/ct"
|
||||
version = "~> 0.9"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,6 +87,7 @@ data "template_file" "worker-config" {
|
||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
node_labels = join(",", var.node_labels)
|
||||
node_taints = join(",", var.node_taints)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,13 +11,13 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* Kubernetes v1.23.3 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/cl/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/flatcar-linux/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Docs
|
||||
|
||||
Please see the [official docs](https://typhoon.psdn.io) and the Azure [tutorial](https://typhoon.psdn.io/cl/azure/).
|
||||
Please see the [official docs](https://typhoon.psdn.io) and the Azure [tutorial](https://typhoon.psdn.io/flatcar-linux/azure/).
|
||||
|
@ -1,11 +1,10 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=26bea83b957d18b99e16435405983181c4a6e159"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)
|
||||
asset_dir = var.asset_dir
|
||||
|
||||
networking = var.networking
|
||||
|
||||
@ -19,5 +18,6 @@ module "bootstrap" {
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
enable_reporting = var.enable_reporting
|
||||
enable_aggregation = var.enable_aggregation
|
||||
daemonset_tolerations = var.daemonset_tolerations
|
||||
}
|
||||
|
213
azure/flatcar-linux/kubernetes/cl/controller.yaml
Normal file
213
azure/flatcar-linux/kubernetes/cl/controller.yaml
Normal file
@ -0,0 +1,213 @@
|
||||
---
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=etcd (System Container)
|
||||
Documentation=https://github.com/etcd-io/etcd
|
||||
Requires=docker.service
|
||||
After=docker.service
|
||||
[Service]
|
||||
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.1
|
||||
ExecStartPre=/usr/bin/docker run -d \
|
||||
--name etcd \
|
||||
--network host \
|
||||
--env-file /etc/etcd/etcd.env \
|
||||
--user 232:232 \
|
||||
--volume /etc/ssl/etcd:/etc/ssl/certs:ro \
|
||||
--volume /var/lib/etcd:/var/lib/etcd:rw \
|
||||
$${ETCD_IMAGE}
|
||||
ExecStart=docker logs -f etcd
|
||||
ExecStop=docker stop etcd
|
||||
ExecStopPost=docker rm etcd
|
||||
Restart=always
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=0
|
||||
LimitNOFILE=40000
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Wants=systemd-resolved.service
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
RequiredBy=etcd-member.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet (System Container)
|
||||
Requires=docker.service
|
||||
After=docker.service
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=/usr/bin/docker run -d \
|
||||
--name kubelet \
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
-v /etc/cni/net.d:/etc/cni/net.d:ro \
|
||||
-v /etc/kubernetes:/etc/kubernetes:ro \
|
||||
-v /etc/machine-id:/etc/machine-id:ro \
|
||||
-v /usr/lib/os-release:/etc/os-release:ro \
|
||||
-v /lib/modules:/lib/modules:ro \
|
||||
-v /run:/run \
|
||||
-v /sys/fs/cgroup:/sys/fs/cgroup \
|
||||
-v /var/lib/calico:/var/lib/calico:ro \
|
||||
-v /var/lib/containerd:/var/lib/containerd \
|
||||
-v /var/lib/kubelet:/var/lib/kubelet:rshared \
|
||||
-v /var/log:/var/log \
|
||||
-v /opt/cni/bin:/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--resolv-conf=/run/systemd/resolve/resolv.conf \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStart=docker logs -f kubelet
|
||||
ExecStop=docker stop kubelet
|
||||
ExecStopPost=docker rm kubelet
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: bootstrap.service
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubernetes control plane
|
||||
Wants=docker.service
|
||||
After=docker.service
|
||||
ConditionPathExists=!/opt/bootstrap/bootstrap.done
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.3
|
||||
ExecStart=/usr/bin/docker run \
|
||||
-v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
|
||||
-v /opt/bootstrap/assets:/assets:ro \
|
||||
-v /opt/bootstrap/apply:/apply:ro \
|
||||
--entrypoint=/apply \
|
||||
$${KUBELET_IMAGE}
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
directories:
|
||||
- path: /var/lib/etcd
|
||||
filesystem: root
|
||||
mode: 0700
|
||||
overwrite: true
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /opt/bootstrap/layout
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
||||
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
||||
mkdir -p /etc/ssl/etcd/etcd
|
||||
mkdir -p /etc/kubernetes/pki
|
||||
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/pki/
|
||||
chown -R etcd:etcd /etc/ssl/etcd
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
chmod -R 700 /var/lib/etcd
|
||||
mv auth/* /etc/kubernetes/pki/
|
||||
mv tls/k8s/* /etc/kubernetes/pki/
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
export KUBECONFIG=/etc/kubernetes/pki/admin.conf
|
||||
until kubectl version; do
|
||||
echo "Waiting for static pod control plane"
|
||||
sleep 5
|
||||
done
|
||||
until kubectl apply -f /assets/manifests -R; do
|
||||
echo "Retry applying manifests"
|
||||
sleep 5
|
||||
done
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/etcd/etcd.env
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
ETCD_NAME=${etcd_name}
|
||||
ETCD_DATA_DIR=/var/lib/etcd
|
||||
ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379
|
||||
ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380
|
||||
ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379
|
||||
ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380
|
||||
ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381
|
||||
ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}
|
||||
ETCD_STRICT_RECONFIG_CHECK=true
|
||||
ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt
|
||||
ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt
|
||||
ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key
|
||||
ETCD_CLIENT_CERT_AUTH=true
|
||||
ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt
|
||||
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
||||
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
||||
ETCD_PEER_CLIENT_CERT_AUTH=true
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- "${ssh_authorized_key}"
|
@ -16,9 +16,7 @@ resource "azurerm_dns_a_record" "etcds" {
|
||||
|
||||
locals {
|
||||
# Container Linux derivative
|
||||
# coreos-stable -> Container Linux Stable
|
||||
# flatcar-stable -> Flatcar Linux Stable
|
||||
flavor = split("-", var.os_image)[0]
|
||||
channel = split("-", var.os_image)[1]
|
||||
}
|
||||
|
||||
@ -53,23 +51,18 @@ resource "azurerm_linux_virtual_machine" "controllers" {
|
||||
storage_account_type = "Premium_LRS"
|
||||
}
|
||||
|
||||
# CoreOS Container Linux or Flatcar Container Linux
|
||||
# Flatcar Container Linux
|
||||
source_image_reference {
|
||||
publisher = local.flavor == "flatcar" ? "Kinvolk" : "CoreOS"
|
||||
offer = local.flavor == "flatcar" ? "flatcar-container-linux-free" : "CoreOS"
|
||||
publisher = "Kinvolk"
|
||||
offer = "flatcar-container-linux-free"
|
||||
sku = local.channel
|
||||
version = "latest"
|
||||
}
|
||||
|
||||
# Gross hack for Flatcar Linux
|
||||
dynamic "plan" {
|
||||
for_each = local.flavor == "flatcar" ? [1] : []
|
||||
|
||||
content {
|
||||
name = local.channel
|
||||
publisher = "kinvolk"
|
||||
product = "flatcar-container-linux-free"
|
||||
}
|
||||
plan {
|
||||
name = local.channel
|
||||
publisher = "kinvolk"
|
||||
product = "flatcar-container-linux-free"
|
||||
}
|
||||
|
||||
# network
|
@ -59,11 +59,11 @@ resource "azurerm_lb_rule" "apiserver" {
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration_name = "apiserver"
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 6443
|
||||
backend_port = 6443
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
|
||||
probe_id = azurerm_lb_probe.apiserver.id
|
||||
protocol = "Tcp"
|
||||
frontend_port = 6443
|
||||
backend_port = 6443
|
||||
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
|
||||
probe_id = azurerm_lb_probe.apiserver.id
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-http" {
|
||||
@ -74,11 +74,11 @@ resource "azurerm_lb_rule" "ingress-http" {
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 80
|
||||
backend_port = 80
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
protocol = "Tcp"
|
||||
frontend_port = 80
|
||||
backend_port = 80
|
||||
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-https" {
|
||||
@ -89,11 +89,11 @@ resource "azurerm_lb_rule" "ingress-https" {
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 443
|
||||
backend_port = 443
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
protocol = "Tcp"
|
||||
frontend_port = 443
|
||||
backend_port = 443
|
||||
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
}
|
||||
|
||||
# Worker outbound TCP/UDP SNAT
|
||||
@ -112,16 +112,12 @@ resource "azurerm_lb_outbound_rule" "worker-outbound" {
|
||||
|
||||
# Address pool of controllers
|
||||
resource "azurerm_lb_backend_address_pool" "controller" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "controller"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
}
|
||||
|
||||
# Address pool of workers
|
||||
resource "azurerm_lb_backend_address_pool" "worker" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "worker"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
output "kubeconfig-admin" {
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for Kubernetes Ingress
|
||||
@ -32,7 +33,8 @@ output "security_group_id" {
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Outputs for custom firewalling
|
||||
@ -57,3 +59,11 @@ output "backend_address_pool_id" {
|
||||
description = "ID of the worker backend address pool"
|
||||
value = azurerm_lb_backend_address_pool.worker.id
|
||||
}
|
||||
|
||||
# Outputs for debug
|
||||
|
||||
output "assets_dist" {
|
||||
value = module.bootstrap.assets_dist
|
||||
sensitive = true
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user