mirror of
https://github.com/puppetmaster/typhoon.git
synced 2025-08-02 23:41:34 +02:00
Compare commits
168 Commits
Author | SHA1 | Date | |
---|---|---|---|
37f00a3882 | |||
4cfafeaa07 | |||
90e23f5822 | |||
6234147948 | |||
c25c59058c | |||
bc9b808d44 | |||
4b0203fdb2 | |||
331566e1f7 | |||
04520e447c | |||
413585681b | |||
96711d7f17 | |||
c9059d3fe9 | |||
a287920169 | |||
8dc170b9d9 | |||
aed1a5f33d | |||
31d02b0221 | |||
8f875f80f5 | |||
16c0b9152b | |||
99dbce67a3 | |||
20bfd69780 | |||
ba44408b76 | |||
455175d9e6 | |||
d45804b1f6 | |||
907a96916f | |||
187bb17d39 | |||
abc31c3711 | |||
283e14f3e0 | |||
e72f916c8d | |||
c52f9f8d08 | |||
ecae6679ff | |||
4760543356 | |||
09eb208b4e | |||
8d024d22ad | |||
3bdddc452c | |||
ff4187a1fb | |||
2578be1f96 | |||
90edcd3d77 | |||
a927c7c790 | |||
d952576d2f | |||
70e389f37f | |||
a18bd0a707 | |||
01905b00bc | |||
f4194cd57a | |||
a2db4fa8c4 | |||
358854e712 | |||
b5dabcea31 | |||
3f0a5d2715 | |||
33173c0206 | |||
70f30d9c07 | |||
6afc1643d9 | |||
e71e27e769 | |||
64035005d4 | |||
317416b316 | |||
2c1af917ec | |||
4ac2d94999 | |||
fd044ee117 | |||
38a6bddd06 | |||
d8966afdda | |||
84ed0a31c3 | |||
fcbee12334 | |||
feac94605a | |||
2b1b918b43 | |||
bf22222f7d | |||
671eacb86e | |||
e2d4af43be | |||
5c4a3f73d5 | |||
76ab4c4c2a | |||
1627ecaf27 | |||
1420700bc0 | |||
80538e2953 | |||
73af2f3b7c | |||
17ea547723 | |||
2b5dfece93 | |||
d47d40b517 | |||
3c1be7b0e0 | |||
bbbaf949f9 | |||
135c6182b8 | |||
c53dc66d4a | |||
9960972726 | |||
bac5acb3bd | |||
70bdc9ec94 | |||
144bb9403c | |||
5fca08064b | |||
fc686c8fc7 | |||
a1a5da6bc2 | |||
076b8e3c42 | |||
ef5f953e04 | |||
d25f23e675 | |||
f100a90d28 | |||
c3bf8bcf96 | |||
5d1e4ad333 | |||
9f702c72d2 | |||
e556bc2167 | |||
1bf4f3b801 | |||
590d941f50 | |||
ddc1ff5348 | |||
61557e89a6 | |||
c3ef21dbf5 | |||
2a5dddeb9d | |||
75fb4e5d11 | |||
1a139ef6f1 | |||
bc7902f40a | |||
70bf39bb9a | |||
4e1b8f22df | |||
ab7913a061 | |||
7b0ea23cdc | |||
c4683c5bad | |||
51cee6d5a4 | |||
87f9a2fc35 | |||
6de5cf5a55 | |||
3250994c95 | |||
f4d260645c | |||
d9219a6722 | |||
60c7eb85ee | |||
4c964b56a0 | |||
1fbd6835f2 | |||
e4d977bfcd | |||
947c2c1815 | |||
4a38fb5927 | |||
c4e64a9d1b | |||
7ca03e5219 | |||
362b3fac5c | |||
32db59b9eb | |||
0c53ad52e4 | |||
008817b0aa | |||
49d3b9e6b3 | |||
1243f395d1 | |||
846f11097f | |||
ba84f86dc7 | |||
b49a1d715d | |||
34c3d7cc39 | |||
ca96a1335c | |||
e339fbd2b6 | |||
8cc303c9ac | |||
b19ba16afa | |||
d127a7345c | |||
02a470d2f2 | |||
5643ad525f | |||
d5b7ce8f27 | |||
1cda5bcd2a | |||
bda73264f7 | |||
dd930a2ff9 | |||
03ff3a9cf3 | |||
48703f9906 | |||
7ddd3d096d | |||
7daabd28b5 | |||
b642e3b41b | |||
ac786a2efc | |||
073fcb7067 | |||
ce0569e03b | |||
0e2fc89f78 | |||
b1f521fc4a | |||
73588cfad3 | |||
0223b31e1a | |||
bb586b60da | |||
43e05b9131 | |||
b2eb3e05d0 | |||
f1f4cd6fc0 | |||
50db3d0231 | |||
11565ffa8a | |||
a4e843693f | |||
f48e43c0b1 | |||
daa8d9d9ec | |||
52d11096dc | |||
00c431a9d2 | |||
0ecb995890 | |||
1b9fa2e688 | |||
2d8e367664 |
33
.github/ISSUE_TEMPLATE.md
vendored
33
.github/ISSUE_TEMPLATE.md
vendored
@ -1,33 +0,0 @@
|
||||
<!-- Fill in either the 'Bug' or 'Feature Request' section -->
|
||||
|
||||
## Bug
|
||||
|
||||
### Environment
|
||||
|
||||
* Platform: aws, azure, bare-metal, google-cloud, digital-ocean
|
||||
* OS: container-linux, flatcar-linux
|
||||
* Release: Typhoon version or Git SHA (reporting latest is **not** helpful)
|
||||
* Terraform: `terraform version` (reporting latest is **not** helpful)
|
||||
* Plugins: Provider plugin versions (reporting latest is **not** helpful)
|
||||
|
||||
### Problem
|
||||
|
||||
Describe the problem.
|
||||
|
||||
### Desired Behavior
|
||||
|
||||
Describe the goal.
|
||||
|
||||
### Steps to Reproduce
|
||||
|
||||
Provide clear steps to reproduce the issue unless already covered.
|
||||
|
||||
## Feature Request
|
||||
|
||||
### Feature
|
||||
|
||||
Describe the feature and what problem it solves.
|
||||
|
||||
### Tradeoffs
|
||||
|
||||
What are the pros and cons of this feature? How will it be exercised and maintained?
|
39
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
39
.github/ISSUE_TEMPLATE/bug_report.md
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Report a bug to improve the project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
<!-- READ: Issues are used to receive focused bug reports from users and to track planned future enhancements by the authors. Topics like cluster operation, support, debugging help, advice, and Kubernetes concepts are out of scope and should not use issues-->
|
||||
|
||||
**Description**
|
||||
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**Steps to Reproduce**
|
||||
|
||||
Provide clear steps to reproduce the bug.
|
||||
|
||||
- [ ] Relevant error messages if appropriate (concise, not a dump of everything).
|
||||
- [ ] Explored using a vanilla cluster from the [tutorials](https://typhoon.psdn.io/#documentation). Ruled out [customizations](https://typhoon.psdn.io/advanced/customization/).
|
||||
|
||||
**Expected behavior**
|
||||
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Environment**
|
||||
|
||||
* Platform: aws, azure, bare-metal, google-cloud, digital-ocean
|
||||
* OS: fedora-coreos, flatcar-linux (include release version)
|
||||
* Release: Typhoon version or Git SHA (reporting latest is **not** helpful)
|
||||
* Terraform: `terraform version` (reporting latest is **not** helpful)
|
||||
* Plugins: Provider plugin versions (reporting latest is **not** helpful)
|
||||
|
||||
**Possible Solution**
|
||||
|
||||
<!-- Most bug reports should have some inkling about solutions. Otherwise, your report may be less of a bug and more of a support request (see top).-->
|
||||
|
||||
Link to a PR or description.
|
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Security
|
||||
url: https://typhoon.psdn.io/topics/security/
|
||||
about: Report security vulnerabilities
|
15
.github/issue_template.md
vendored
Normal file
15
.github/issue_template.md
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
<!-- READ: Issues are used to receive focused bug reports from users and to track planned future enhancements by the authors. Topics like cluster operation, support, debugging help, advice, and Kubernetes concepts are out of scope and should not use issues-->
|
||||
|
||||
## Enhancement
|
||||
|
||||
### Overview
|
||||
|
||||
One paragraph explanation of the enhancement.
|
||||
|
||||
### Motivation
|
||||
|
||||
Describe the motivation and what problem this solves.
|
||||
|
||||
### Tradeoffs
|
||||
|
||||
What are the pros and cons of this feature? How will it be exercised and maintained?
|
376
CHANGES.md
376
CHANGES.md
@ -4,6 +4,364 @@ Notable changes between versions.
|
||||
|
||||
## Latest
|
||||
|
||||
## v1.18.4
|
||||
|
||||
* Kubernetes [v1.18.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1184)
|
||||
* Update Kubelet image publishing ([#749](https://github.com/poseidon/typhoon/pull/749))
|
||||
* Build Kubelet images internally and publish to Quay and Dockerhub
|
||||
* [quay.io/poseidon/kubelet](https://quay.io/repository/poseidon/kubelet) (official)
|
||||
* [docker.io/psdn/kubelet](https://hub.docker.com/r/psdn/kubelet) (fallback)
|
||||
* Continue offering automated image builds with an alternate tag strategy (see [docs](https://typhoon.psdn.io/topics/security/#container-images))
|
||||
* [Document](https://typhoon.psdn.io/advanced/customization/#kubelet) use of alternate Kubelet images during registry incidents
|
||||
* Update Calico from v3.14.0 to [v3.14.1](https://docs.projectcalico.org/v3.14/release-notes/)
|
||||
* Fix [CVE-2020-13597](https://github.com/kubernetes/kubernetes/issues/91507)
|
||||
* Rename controller NoSchedule taint from `node-role.kubernetes.io/master` to `node-role.kubernetes.io/controller` ([#764](https://github.com/poseidon/typhoon/pull/764))
|
||||
* Tolerate the new taint name for workloads that may run on controller nodes
|
||||
* Remove node label `node.kubernetes.io/master` from controller nodes ([#764](https://github.com/poseidon/typhoon/pull/764))
|
||||
* Use `node.kubernetes.io/controller` (present since v1.9.5, [#160](https://github.com/poseidon/typhoon/pull/160)) to node select controllers
|
||||
* Remove unused Kubelet `-lock-file` and `-exit-on-lock-contention` ([#758](https://github.com/poseidon/typhoon/pull/758))
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
#### Azure
|
||||
|
||||
* Use `strict` Fedora CoreOS Config (FCC) snippet parsing ([#755](https://github.com/poseidon/typhoon/pull/755))
|
||||
* Reduce Calico vxlan interface MTU to maintain performance ([#767](https://github.com/poseidon/typhoon/pull/766))
|
||||
|
||||
#### AWS
|
||||
|
||||
* Fix Kubelet service race with hostname update ([#766](https://github.com/poseidon/typhoon/pull/766))
|
||||
* Wait for a hostname to avoid Kubelet trying to register as `localhost`
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
* Use `strict` Container Linux Config (CLC) snippet parsing ([#755](https://github.com/poseidon/typhoon/pull/755))
|
||||
* Require `terraform-provider-ct` v0.4+, recommend v0.5+ (**action required**)
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.32.0 to [v0.33.0](https://github.com/kubernetes/ingress-nginx/releases/tag/nginx-0.33.0)
|
||||
* Update Prometheus from v2.18.1 to [v2.19.0](https://github.com/prometheus/prometheus/releases/tag/v2.19.0)
|
||||
* Update node-exporter from v1.0.0-rc.1 to [v1.0.1](https://github.com/prometheus/node_exporter/releases/tag/v1.0.1)
|
||||
* Update kube-state-metrics from v1.9.6 to v1.9.7
|
||||
* Update Grafana from v7.0.0 to v7.0.3
|
||||
|
||||
## v1.18.3
|
||||
|
||||
* Kubernetes [v1.18.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1183)
|
||||
* Use Kubelet [TLS bootstrap](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/) with bootstrap token authentication ([#713](https://github.com/poseidon/typhoon/pull/713))
|
||||
* Enable Node [Authorization](https://kubernetes.io/docs/reference/access-authn-authz/node/) and [NodeRestriction](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#noderestriction) to reduce authorization scope
|
||||
* Renew Kubelet certificates every 72 hours
|
||||
* Update etcd from v3.4.7 to [v3.4.9](https://github.com/etcd-io/etcd/releases/tag/v3.4.9)
|
||||
* Update Calico from v3.13.1 to [v3.14.0](https://docs.projectcalico.org/v3.14/release-notes/)
|
||||
* Add CoreDNS node affinity preference for controller nodes ([#188](https://github.com/poseidon/terraform-render-bootstrap/pull/188))
|
||||
* Deprecate CoreOS Container Linux support (no OS [updates](https://coreos.com/os/eol/) after May 2020)
|
||||
* Use a `fedora-coreos` module for Fedora CoreOS
|
||||
* Use a `container-linux` module for Flatcar Linux
|
||||
|
||||
### AWS
|
||||
|
||||
* Fix Terraform plan error when `controller_count` exceeds AWS zones (e.g. 5 controllers) ([#714](https://github.com/poseidon/typhoon/pull/714))
|
||||
* Regressed in v1.17.1 ([#605](https://github.com/poseidon/typhoon/pull/605))
|
||||
|
||||
### Azure
|
||||
|
||||
* Update Azure subnets to set `address_prefixes` list ([#730](https://github.com/poseidon/typhoon/pull/730))
|
||||
* Fix warning that `address_prefix` is deprecated
|
||||
* Require `terraform-provider-azurerm` v2.8.0+ (action required)
|
||||
|
||||
### DigitalOcean
|
||||
|
||||
* Promote DigitalOcean to beta on both Fedora CoreOS and Flatcar Linux
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Fix Calico `install-cni` crashloop on Pod restarts ([#724](https://github.com/poseidon/typhoon/pull/724))
|
||||
* SELinux enforcement requires consistent file context MCS level
|
||||
* Restarting a node resolved the issue as a previous workaround
|
||||
|
||||
#### AWS
|
||||
|
||||
* Support Fedora CoreOS [image streams](https://docs.fedoraproject.org/en-US/fedora-coreos/update-streams/) ([#727](https://github.com/poseidon/typhoon/pull/727))
|
||||
* Add `os_stream` variable to set the stream to `stable` (default), `testing`, or `next`
|
||||
* Remove unused `os_image` variable
|
||||
|
||||
#### Google
|
||||
|
||||
* Support Fedora CoreOS [image streams](https://docs.fedoraproject.org/en-US/fedora-coreos/update-streams/) ([#723](https://github.com/poseidon/typhoon/pull/722))
|
||||
* Add `os_stream` variable to set the stream to `stable` (default), `testing`, or `next`
|
||||
* Deprecate `os_image` variable. Manual image uploads are no longer needed
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
#### Azure
|
||||
|
||||
* Use the Flatcar Linux Azure Marketplace image
|
||||
* Restore [#664](https://github.com/poseidon/typhoon/pull/664) (reverted in [#707](https://github.com/poseidon/typhoon/pull/707)) but use Flatcar Linux new free offer (not byol)
|
||||
* Change `os_image` to use a `flatcar-stable` default
|
||||
|
||||
#### Google
|
||||
|
||||
* Promote Flatcar Linux to beta
|
||||
|
||||
### Addons
|
||||
|
||||
* Update nginx-ingress from v0.30.0 to [v0.32.0](https://github.com/kubernetes/ingress-nginx/releases/tag/nginx-0.32.0)
|
||||
* Add support for [IngressClass](https://kubernetes.io/docs/concepts/services-networking/ingress/#ingress-class)
|
||||
* Update Prometheus from v2.17.1 to v2.18.1
|
||||
* Update kube-state-metrics from v1.9.5 to [v1.9.6](https://github.com/kubernetes/kube-state-metrics/releases/tag/v1.9.6)
|
||||
* Update node-exporter from v1.0.0-rc.0 to [v1.0.0-rc.1](https://github.com/prometheus/node_exporter/releases/tag/v1.0.0-rc.1)
|
||||
* Update Grafana from v6.7.2 to [v7.0.0](https://grafana.com/docs/grafana/latest/guides/whats-new-in-v7-0/)
|
||||
|
||||
## v1.18.2
|
||||
|
||||
* Kubernetes [v1.18.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1182)
|
||||
* Choose Fedora CoreOS or Flatcar Linux (**action required**)
|
||||
* Use a `fedora-coreos` module for Fedora CoreOS
|
||||
* Use a `container-linux` module for Flatcar Linux
|
||||
* Change Container Linux modules' defaults from CoreOS Container Linux to [Flatcar Container Linux](https://typhoon.psdn.io/architecture/operating-systems/) ([#702](https://github.com/poseidon/typhoon/pull/702))
|
||||
* CoreOS Container Linux [won't receive updates](https://coreos.com/os/eol/) after May 2020
|
||||
|
||||
### Fedora CoreOS
|
||||
|
||||
* Fix bootstrap race condition from SELinux unshared content label ([#708](https://github.com/poseidon/typhoon/pull/708))
|
||||
|
||||
#### Azure
|
||||
|
||||
* Add support for Fedora CoreOS ([#704](https://github.com/poseidon/typhoon/pull/704))
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
* Fix race condition creating firewall allow rules ([#709](https://github.com/poseidon/typhoon/pull/709))
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
#### AWS
|
||||
|
||||
* Change `os_image` default from `coreos-stable` to `flatcar-stable` ([#702](https://github.com/poseidon/typhoon/pull/702))
|
||||
|
||||
#### Azure
|
||||
|
||||
* Change `os_image` to be required. Recommend uploading a Flatcar Linux image (**action required**) ([#702](https://github.com/poseidon/typhoon/pull/702))
|
||||
* Disable Flatcar Linux Azure Marketplace image [support](https://github.com/poseidon/typhoon/pull/664) (**breaking**, [#707](https://github.com/poseidon/typhoon/pull/707))
|
||||
* Revert to manual uploading until marketplace issue is closed ([#703](https://github.com/poseidon/typhoon/issues/703))
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Recommend changing [os_channel](https://typhoon.psdn.io/cl/bare-metal/#required) from `coreos-stable` to `flatcar-stable`
|
||||
|
||||
#### Google
|
||||
|
||||
* Change `os_image` to be required. Recommend uploading a Flatcar Linux image (**action required**) ([#702](https://github.com/poseidon/typhoon/pull/702))
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
* Change `os_image` to be required. Recommend uploading a Flatcar Linux image (**action required**) ([#702](https://github.com/poseidon/typhoon/pull/702))
|
||||
* Fix race condition creating firewall allow rules ([#709](https://github.com/poseidon/typhoon/pull/709))
|
||||
|
||||
## v1.18.1
|
||||
|
||||
* Kubernetes [v1.18.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1181)
|
||||
* Choose Fedora CoreOS or Flatcar Linux (**action recommended**)
|
||||
* Use a `fedora-coreos` module for Fedora CoreOS
|
||||
* Use a `container-linux` module with OS set to Flatcar Linux
|
||||
* Update etcd from v3.4.5 to [v3.4.7](https://github.com/etcd-io/etcd/releases/tag/v3.4.7)
|
||||
* Change `kube-proxy` and `calico` or `flannel` to tolerate specific taints ([#682](https://github.com/poseidon/typhoon/pull/682))
|
||||
* Tolerate master and not-ready taints, rather than tolerating all taints
|
||||
* Update flannel from v0.11.0 to v0.12.0 ([#690](https://github.com/poseidon/typhoon/pull/690))
|
||||
* Fix bootstrap when `networking` mode `flannel` (non-default) is chosen ([#689](https://github.com/poseidon/typhoon/pull/689))
|
||||
* Regressed in v1.18.0 changes for Calico ([#675](https://github.com/poseidon/typhoon/pull/675))
|
||||
* Rename Container Linux `controller_clc_snippets` to `controller_snippets` for consistency ([#688](https://github.com/poseidon/typhoon/pull/688))
|
||||
* Rename Container Linux `worker_clc_snippets` to `worker_snippets` for consistency
|
||||
* Rename Container Linux `clc_snippets` (bare-metal) to `snippets` for consistency
|
||||
* Drop support for [gitRepo](https://kubernetes.io/docs/concepts/storage/volumes/#gitrepo) volumes ([kubelet#3](https://github.com/poseidon/kubelet/pull/3))
|
||||
|
||||
#### Azure
|
||||
|
||||
* Fix Azure worker UDP outbound connections ([#691](https://github.com/poseidon/typhoon/pull/691))
|
||||
* Fix Azure worker clock sync timeouts
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
* Add support for Fedora CoreOS ([#699](https://github.com/poseidon/typhoon/pull/699))
|
||||
|
||||
#### Addons
|
||||
|
||||
* Refresh Prometheus rules/alerts and Grafana dashboards ([#692](https://github.com/poseidon/typhoon/pull/692))
|
||||
* Update Grafana from v6.7.1 to v6.7.2
|
||||
|
||||
## v1.18.0
|
||||
|
||||
* Kubernetes [v1.18.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md#v1180)
|
||||
* Update etcd from v3.4.4 to [v3.4.5](https://github.com/etcd-io/etcd/releases/tag/v3.4.5)
|
||||
* Switch from upstream hyperkube image to individual images ([#669](https://github.com/poseidon/typhoon/pull/669))
|
||||
* Use upstream k8s.gcr.io `kube-apiserver`, `kube-controller-manager`, `kube-scheduler`, and `kube-proxy` container images
|
||||
* Use [poseidon/kubelet](https://github.com/poseidon/kubelet) to package the upstream Kubelet binary and dependencies as a container image (checksummed, automated build)
|
||||
* Add [quay.io/poseidon/kubelet](https://quay.io/repository/poseidon/kubelet) as a Typhoon distributed artifact in the security policy
|
||||
* Update base images from debian 9 to debian 10
|
||||
* Background: Kubernetes will [stop releasing](https://github.com/kubernetes/kubernetes/pull/88676) the hyperkube container image and provide the Kubelet as a binary for packaging
|
||||
* Choose Fedora CoreOS or Flatcar Linux (**action recommended**)
|
||||
* Use a `fedora-coreos` module for Fedora CoreOS
|
||||
* Use a `container-linux` module with OS set for Flatcar Linux (varies, see docs)
|
||||
* CoreOS Container Linux [won't receive updates](https://coreos.com/os/eol/) after May 2020
|
||||
* Add support for Fedora CoreOS snippets (`terraform-provider-ct` v0.5+) ([#686](https://github.com/poseidon/typhoon/pull/686))
|
||||
* Recommend updating `terraform-provider-ct` plugin from v0.4.0 to [v0.5.0](https://github.com/poseidon/terraform-provider-ct/releases/tag/v0.5.0)
|
||||
* Set Fedora CoreOS log driver back to the default `journald` ([#681](https://github.com/poseidon/typhoon/pull/681))
|
||||
* Deprecate `asset_dir` variable and remove docs ([#678](https://github.com/poseidon/typhoon/pull/678))
|
||||
* Deprecate support for [gitRepo](https://kubernetes.io/docs/concepts/storage/volumes/#gitrepo) volumes. A future release will drop support.
|
||||
|
||||
#### AWS
|
||||
|
||||
* Fix Fedora CoreOS AMI to filter for stable images ([#685](https://github.com/poseidon/typhoon/pull/685))
|
||||
* Latest Fedora CoreOS `testing` or `bodhi-update` images could be chosen depending on the region
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Update Fedora CoreOS default `os_stream` from testing to stable
|
||||
|
||||
#### Google Cloud
|
||||
|
||||
* Known: Use of stale Fedora CoreOS image may require terraform re-apply during bootstrap ([#687](https://github.com/poseidon/typhoon/pull/687))
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
* Rename `image` variable to `os_image` for consistency ([#677](https://github.com/poseidon/typhoon/pull/677)) (action required)
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update Prometheus from v2.16.0 to [v2.17.1](https://github.com/prometheus/prometheus/releases/tag/v2.17.1)
|
||||
* Update Grafana from v6.6.2 to [v6.7.1](https://github.com/grafana/grafana/releases/tag/v6.7.1)
|
||||
|
||||
## v1.17.4
|
||||
|
||||
* Kubernetes [v1.17.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.17.md#v1174)
|
||||
* Update etcd from v3.4.3 to [v3.4.4](https://github.com/etcd-io/etcd/releases/tag/v3.4.4)
|
||||
* On Container Linux, fetch using the docker transport format ([#659](https://github.com/poseidon/typhoon/pull/659))
|
||||
* Update CoreDNS from v1.6.6 to v1.6.7 ([#648](https://github.com/poseidon/typhoon/pull/648))
|
||||
* Update Calico from v3.12.0 to [v3.13.1](https://docs.projectcalico.org/v3.13/release-notes/)
|
||||
|
||||
#### AWS
|
||||
|
||||
* Promote Fedora CoreOS to stable ([#668](https://github.com/poseidon/typhoon/pull/668))
|
||||
* Allow VPC route table extension via reference ([#654](https://github.com/poseidon/typhoon/pull/654))
|
||||
* Fix `worker_node_labels` on Fedora CoreOS ([#651](https://github.com/poseidon/typhoon/pull/651))
|
||||
* Fix automatic worker node delete on shutdown on Fedora CoreOS ([#657](https://github.com/poseidon/typhoon/pull/657))
|
||||
|
||||
#### Azure
|
||||
|
||||
* Upgrade to `terraform-provider-azurerm` [v2.0+](https://www.terraform.io/docs/providers/azurerm/guides/2.0-upgrade-guide.html) (action required)
|
||||
* Change `worker_priority` from `Low` to `Spot` if used (action required)
|
||||
* Switch to Azure's new Linux VM and Linux VM Scale Set resources
|
||||
* Set controller's Azure disk caching to None
|
||||
* Associate subnets (in addition to NICs) with security groups (aesthetic)
|
||||
* Add support for Flatcar Container Linux ([#664](https://github.com/poseidon/typhoon/pull/664))
|
||||
* Requires accepting Flatcar Linux Azure Marketplace terms
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Add `worker_node_labels` map variable for per-worker node labels ([#663](https://github.com/poseidon/typhoon/pull/663))
|
||||
* Add `worker_node_taints` map variable for per-worker node taints ([#663](https://github.com/poseidon/typhoon/pull/663))
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
* Add support for Flatcar Container Linux ([#644](https://github.com/poseidon/typhoon/pull/644))
|
||||
|
||||
#### Google Cloud
|
||||
|
||||
* Promote Fedora CoreOS to beta ([#668](https://github.com/poseidon/typhoon/pull/668))
|
||||
* Fix `worker_node_labels` on Fedora CoreOS ([#651](https://github.com/poseidon/typhoon/pull/651))
|
||||
* Fix automatic worker node delete on shutdown on Fedora CoreOS ([#657](https://github.com/poseidon/typhoon/pull/657))
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update nginx-ingress from v0.28.0 to [v0.30.0](https://github.com/kubernetes/ingress-nginx/releases/tag/nginx-0.30.0)
|
||||
* Update Prometheus from v2.15.2 to [v2.16.0](https://github.com/prometheus/prometheus/releases/tag/v2.16.0)
|
||||
* Refresh Prometheus rules and alerts
|
||||
* Add a BlackboxProbeFailure alert
|
||||
* Update kube-state-metrics from v1.9.4 to v1.9.5
|
||||
* Update node-exporter from v0.18.1 to [v1.0.0-rc.0](https://github.com/prometheus/node_exporter/releases/tag/v1.0.0-rc.0)
|
||||
* Update Grafana from v6.6.1 to v6.6.2
|
||||
* Refresh Grafana dashboards
|
||||
* Remove Container Linux Update Operator (CLUO) addon example ([#667](https://github.com/poseidon/typhoon/pull/667))
|
||||
* CLUO hasn't been in active use in our clusters and won't be relevant
|
||||
beyond Container Linux. Requires patches for use on Kubernetes v1.16+
|
||||
|
||||
## v1.17.3
|
||||
|
||||
* Kubernetes [v1.17.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.17.md#v1173)
|
||||
* Update Calico from v3.11.2 to v3.12.0
|
||||
* Allow Fedora CoreOS clusters to pass CNCF conformance suite
|
||||
* Set Docker log driver to `json-file` as a workaround
|
||||
* Try Fedora CoreOS or Flatcar Linux alongside CoreOS [Container Linux](https://coreos.com/os/eol/) clusters (recommended)
|
||||
|
||||
#### AWS
|
||||
|
||||
* Promote Fedora CoreOS to beta ([#645](https://github.com/poseidon/typhoon/pull/645))
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Promote Fedora CoreOS to beta ([#645](https://github.com/poseidon/typhoon/pull/645))
|
||||
* Add Fedora CoreOS kernel arguments initrd and console ([#640](https://github.com/poseidon/typhoon/pull/640))
|
||||
|
||||
#### Google Cloud
|
||||
|
||||
* Add Terraform module for Fedora CoreOS ([#632](https://github.com/poseidon/typhoon/pull/632))
|
||||
* Add support for Flatcar Container Linux ([#639](https://github.com/poseidon/typhoon/pull/639))
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update nginx-ingress from v0.27.1 to v0.28.0
|
||||
* Update kube-state-metrics from v1.9.3 to v1.9.4
|
||||
* Update Grafana from v6.5.3 to v6.6.1
|
||||
|
||||
## v1.17.2
|
||||
|
||||
* Kubernetes [v1.17.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.17.md#v1172)
|
||||
|
||||
#### AWS
|
||||
|
||||
* Promote Fedora CoreOS from preview to alpha
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
* Promote Fedora CoreOS from preview to alpha
|
||||
* Update Fedora CoreOS images location
|
||||
* Use Fedora CoreOS production [download](https://getfedora.org/coreos/download/) streams
|
||||
* Use live PXE kernel and initramfs images
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update nginx-ingress from v0.26.1 to [v0.27.1](https://github.com/kubernetes/ingress-nginx/releases/tag/nginx-0.27.1) ([#625](https://github.com/poseidon/typhoon/pull/625))
|
||||
* Change runAsUser from 33 to 101 for alpine-based image
|
||||
* Update kube-state-metrics from v1.9.2 to v1.9.3
|
||||
|
||||
## v1.17.1
|
||||
|
||||
* Kubernetes [v1.17.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.17.md#v1171)
|
||||
* Update CoreDNS from v1.6.5 to [v1.6.6](https://coredns.io/2019/12/11/coredns-1.6.6-release/) ([#602](https://github.com/poseidon/typhoon/pull/602))
|
||||
* Update Calico from v3.10.2 to v3.11.2 ([#604](https://github.com/poseidon/typhoon/pull/604))
|
||||
* Inline Kubelet service on Container Linux nodes ([#606](https://github.com/poseidon/typhoon/pull/606))
|
||||
* Disable unused Kubelet `127.0.0.1:10248` healthz listener ([#607](https://github.com/poseidon/typhoon/pull/607))
|
||||
* Enable kube-proxy metrics and allow Prometheus scrapes
|
||||
* Allow TCP/10249 traffic with worker node sources
|
||||
|
||||
#### AWS
|
||||
|
||||
* Update Fedora CoreOS AMI filter for fedora-coreos-31 ([#620](https://github.com/poseidon/typhoon/pull/620))
|
||||
|
||||
#### Google
|
||||
|
||||
* Allow `terraform-provider-google` v3.0+ ([#617](https://github.com/poseidon/typhoon/pull/617))
|
||||
* Only enforce `v2.19+` to ease migration, as no v3.x features are used
|
||||
|
||||
#### Addons
|
||||
|
||||
* Update Prometheus from v2.14.0 to [v2.15.2](https://github.com/prometheus/prometheus/releases/tag/v2.15.2)
|
||||
* Add discovery for kube-proxy service endpoints
|
||||
* Update kube-state-metrics from v1.8.0 to v1.9.2
|
||||
* Reduce node-exporter DaemonSet tolerations ([#614](https://github.com/poseidon/typhoon/pull/614))
|
||||
* Update Grafana from v6.5.1 to v6.5.3
|
||||
|
||||
## v1.17.0
|
||||
|
||||
* Kubernetes [v1.17.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.17.md#v1170)
|
||||
@ -226,7 +584,7 @@ Notable changes between versions.
|
||||
* Require `terraform-provider-azurerm` v1.27+ to support Terraform v0.12 (action required)
|
||||
* Avoid unneeded rotations of Regular priority virtual machine scale sets
|
||||
* Azure only allows `eviction_policy` to be set for Low priority VMs. Supporting Low priority VMs meant when Regular VMs were used, each `terraform apply` rolled workers, to set eviction_policy to null.
|
||||
* Terraform v0.12 nullable variables fix the issue so plan does not produce a diff.
|
||||
* Terraform v0.12 nullable variables fix the issue so plan does not produce a diff.
|
||||
|
||||
#### Bare-Metal
|
||||
|
||||
@ -281,7 +639,7 @@ Notable changes between versions.
|
||||
* Update Grafana from v6.1.6 to v6.2.1
|
||||
|
||||
## v1.14.2
|
||||
|
||||
|
||||
* Kubernetes [v1.14.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.14.md#v1142)
|
||||
* Update etcd from v3.3.12 to [v3.3.13](https://github.com/etcd-io/etcd/releases/tag/v3.3.13)
|
||||
* Upgrade Calico from v3.6.1 to [v3.7.2](https://docs.projectcalico.org/v3.7/release-notes/)
|
||||
@ -352,7 +710,7 @@ Notable changes between versions.
|
||||
|
||||
* Add ability to load balance TCP/UDP applications ([#442](https://github.com/poseidon/typhoon/pull/442))
|
||||
* Add worker instances to a target pool, output as `worker_target_pool`
|
||||
* Health check for workers with Ingress controllers. Forward rules don't support differing internal/external ports, but some Ingress controllers support TCP/UDP proxy as a workaround
|
||||
* Health check for workers with Ingress controllers. Forward rules don't support differing internal/external ports, but some Ingress controllers support TCP/UDP proxy as a workaround
|
||||
* Remove Haswell minimum CPU platform requirement ([#439](https://github.com/poseidon/typhoon/pull/439))
|
||||
* Google Cloud API implements `min_cpu_platform` to mean "use exactly this CPU". Revert [#405](https://github.com/poseidon/typhoon/pull/405) added in v1.13.4.
|
||||
* Fix error creating clusters in new regions without Haswell (e.g. europe-west2) ([#438](https://github.com/poseidon/typhoon/issues/438))
|
||||
@ -537,7 +895,7 @@ Notable changes between versions.
|
||||
* Update Calico from v3.3.0 to [v3.3.1](https://docs.projectcalico.org/v3.3/releases/)
|
||||
* Disable Felix usage reporting by default ([#345](https://github.com/poseidon/typhoon/pull/345))
|
||||
* Improve flannel manifests
|
||||
* [Rename](https://github.com/poseidon/terraform-render-bootkube/commit/d045a8e6b8eccfbb9d69bb51953b5a93d23f67f7) `kube-flannel` DaemonSet to `flannel` and `kube-flannel-cfg` ConfigMap to `flannel-config`
|
||||
* [Rename](https://github.com/poseidon/terraform-render-bootkube/commit/d045a8e6b8eccfbb9d69bb51953b5a93d23f67f7) `kube-flannel` DaemonSet to `flannel` and `kube-flannel-cfg` ConfigMap to `flannel-config`
|
||||
* [Drop](https://github.com/poseidon/terraform-render-bootkube/commit/39f9afb3360ec642e5b98457c8bd07eda35b6c96) unused mounts and add a CPU resource request
|
||||
* Update CoreDNS from v1.2.4 to [v1.2.6](https://coredns.io/2018/11/05/coredns-1.2.6-release/)
|
||||
* Enable CoreDNS `loop` and `loadbalance` plugins ([#340](https://github.com/poseidon/typhoon/pull/340))
|
||||
@ -699,7 +1057,7 @@ Notable changes between versions.
|
||||
* Force apiserver to stop listening on `127.0.0.1:8080`
|
||||
* Replace `kube-dns` with [CoreDNS](https://coredns.io/) ([#261](https://github.com/poseidon/typhoon/pull/261))
|
||||
* Edit the `coredns` ConfigMap to [customize](https://coredns.io/plugins/)
|
||||
* CoreDNS doesn't use a resizer. For large clusters, scaling may be required.
|
||||
* CoreDNS doesn't use a resizer. For large clusters, scaling may be required.
|
||||
|
||||
#### AWS
|
||||
|
||||
@ -744,7 +1102,7 @@ Notable changes between versions.
|
||||
|
||||
* Switch `kube-apiserver` port from 443 to 6443 ([#248](https://github.com/poseidon/typhoon/pull/248))
|
||||
* Users who exposed kube-apiserver on a WAN via their router/load-balancer will need to adjust its configuration (e.g. DNAT 6443). Most apiservers are on a LAN (internal, VPN-only, etc) so if you didn't specially configure network gear for 443, no change is needed. (possible action required)
|
||||
* Fix possible deadlock when provisioning clusters larger than 10 nodes ([#244](https://github.com/poseidon/typhoon/pull/244))
|
||||
* Fix possible deadlock when provisioning clusters larger than 10 nodes ([#244](https://github.com/poseidon/typhoon/pull/244))
|
||||
|
||||
#### DigitalOcean
|
||||
|
||||
@ -812,7 +1170,7 @@ Notable changes between versions.
|
||||
* Please change values stable, beta, or alpha to coreos-stable, coreos-beta, coreos-alpha (**action required!**)
|
||||
* Replace `container_linux_version` variable with `os_version`
|
||||
* Add `network_ip_autodetection_method` variable for Calico host IPv4 address detection
|
||||
* Use Calico's default "first-found" to support single NIC and bonded NIC nodes
|
||||
* Use Calico's default "first-found" to support single NIC and bonded NIC nodes
|
||||
* Allow [alternative](https://docs.projectcalico.org/v3.1/reference/node/configuration#ip-autodetection-methods) methods for multi NIC nodes, like can-reach=IP or interface=REGEX
|
||||
* Deprecate `container_linux_oem` variable
|
||||
|
||||
@ -845,7 +1203,7 @@ Notable changes between versions.
|
||||
#### Google Cloud
|
||||
|
||||
* Add support for multi-controller clusters (i.e. multi-master) ([#54](https://github.com/poseidon/typhoon/issues/54), [#190](https://github.com/poseidon/typhoon/pull/190))
|
||||
* Switch from Google Cloud network load balancer to a TCP proxy load balancer. Avoid a [bug](https://issuetracker.google.com/issues/67366622) in Google network load balancers that limited clusters to only bootstrapping one controller node.
|
||||
* Switch from Google Cloud network load balancer to a TCP proxy load balancer. Avoid a [bug](https://issuetracker.google.com/issues/67366622) in Google network load balancers that limited clusters to only bootstrapping one controller node.
|
||||
* Add TCP health check for apiserver pods on controllers. Replace kubelet check approximation.
|
||||
|
||||
#### Addons
|
||||
@ -1076,7 +1434,7 @@ Notable changes between versions.
|
||||
* Container Linux stable, beta, and alpha now provide Docker 17.09 (instead
|
||||
of 1.12)
|
||||
* Older clusters (with CLUO addon) auto-update Container Linux version to begin using Docker 17.09
|
||||
* Fix race where `etcd-member.service` could fail to resolve peers ([#69](https://github.com/poseidon/typhoon/pull/69))
|
||||
* Fix race where `etcd-member.service` could fail to resolve peers ([#69](https://github.com/poseidon/typhoon/pull/69))
|
||||
* Add optional `cluster_domain_suffix` variable (#74)
|
||||
* Use kubernetes-incubator/bootkube v0.9.1
|
||||
|
||||
|
44
README.md
44
README.md
@ -11,9 +11,9 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.17.0 (upstream)
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/cl/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, CSI, or other [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
@ -21,26 +21,32 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
Typhoon provides a Terraform Module for each supported operating system and platform.
|
||||
|
||||
| Platform | Operating System | Terraform Module | Status |
|
||||
|---------------|------------------|------------------|--------|
|
||||
| AWS | Container Linux / Flatcar Linux | [aws/container-linux/kubernetes](aws/container-linux/kubernetes) | stable |
|
||||
| Azure | Container Linux | [azure/container-linux/kubernetes](azure/container-linux/kubernetes) | alpha |
|
||||
| Bare-Metal | Container Linux / Flatcar Linux | [bare-metal/container-linux/kubernetes](bare-metal/container-linux/kubernetes) | stable |
|
||||
| Digital Ocean | Container Linux | [digital-ocean/container-linux/kubernetes](digital-ocean/container-linux/kubernetes) | beta |
|
||||
| Google Cloud | Container Linux | [google-cloud/container-linux/kubernetes](google-cloud/container-linux/kubernetes) | stable |
|
||||
|
||||
A preview of Typhoon for [Fedora CoreOS](https://getfedora.org/coreos/) is available for testing.
|
||||
Typhoon is available for [Fedora CoreOS](https://getfedora.org/coreos/).
|
||||
|
||||
| Platform | Operating System | Terraform Module | Status |
|
||||
|---------------|------------------|------------------|--------|
|
||||
| AWS | Fedora CoreOS | [aws/fedora-coreos/kubernetes](aws/fedora-coreos/kubernetes) | preview |
|
||||
| Bare-Metal | Fedora CoreOS | [bare-metal/fedora-coreos/kubernetes](bare-metal/fedora-coreos/kubernetes) | preview |
|
||||
| AWS | Fedora CoreOS | [aws/fedora-coreos/kubernetes](aws/fedora-coreos/kubernetes) | stable |
|
||||
| Azure | Fedora CoreOS | [azure/fedora-coreos/kubernetes](azure/fedora-coreos/kubernetes) | alpha |
|
||||
| Bare-Metal | Fedora CoreOS | [bare-metal/fedora-coreos/kubernetes](bare-metal/fedora-coreos/kubernetes) | beta |
|
||||
| DigitalOcean | Fedora CoreOS | [digital-ocean/fedora-coreos/kubernetes](digital-ocean/fedora-coreos/kubernetes) | beta |
|
||||
| Google Cloud | Fedora CoreOS | [google-cloud/fedora-coreos/kubernetes](google-cloud/fedora-coreos/kubernetes) | beta |
|
||||
|
||||
Typhoon is available for [Flatcar Linux](https://www.flatcar-linux.org/releases/).
|
||||
|
||||
| Platform | Operating System | Terraform Module | Status |
|
||||
|---------------|------------------|------------------|--------|
|
||||
| AWS | Flatcar Linux | [aws/container-linux/kubernetes](aws/container-linux/kubernetes) | stable |
|
||||
| Azure | Flatcar Linux | [azure/container-linux/kubernetes](azure/container-linux/kubernetes) | alpha |
|
||||
| Bare-Metal | Flatcar Linux | [bare-metal/container-linux/kubernetes](bare-metal/container-linux/kubernetes) | stable |
|
||||
| DigitalOcean | Flatcar Linux | [digital-ocean/container-linux/kubernetes](digital-ocean/container-linux/kubernetes) | beta |
|
||||
| Google Cloud | Flatcar Linux | [google-cloud/container-linux/kubernetes](google-cloud/container-linux/kubernetes) | beta |
|
||||
|
||||
## Documentation
|
||||
|
||||
* [Docs](https://typhoon.psdn.io)
|
||||
* Architecture [concepts](https://typhoon.psdn.io/architecture/concepts/) and [operating systems](https://typhoon.psdn.io/architecture/operating-systems/)
|
||||
* Tutorials for [AWS](docs/cl/aws.md), [Azure](docs/cl/azure.md), [Bare-Metal](docs/cl/bare-metal.md), [Digital Ocean](docs/cl/digital-ocean.md), and [Google-Cloud](docs/cl/google-cloud.md)
|
||||
* Fedora CoreOS tutorials for [AWS](docs/fedora-coreos/aws.md), [Azure](docs/fedora-coreos/azure.md), [Bare-Metal](docs/fedora-coreos/bare-metal.md), [DigitalOcean](docs/fedora-coreos/digitalocean.md), and [Google Cloud](docs/fedora-coreos/google-cloud.md)
|
||||
* Flatcar Linux tutorials for [AWS](docs/flatcar-linux/aws.md), [Azure](docs/flatcar-linux/azure.md), [Bare-Metal](docs/flatcar-linux/bare-metal.md), [DigitalOcean](docs/flatcar-linux/digitalocean.md), and [Google Cloud](docs/flatcar-linux/google-cloud.md)
|
||||
|
||||
## Usage
|
||||
|
||||
@ -48,7 +54,7 @@ Define a Kubernetes cluster by using the Terraform module for your chosen platfo
|
||||
|
||||
```tf
|
||||
module "yavin" {
|
||||
source = "git::https://github.com/poseidon/typhoon//google-cloud/container-linux/kubernetes?ref=v1.17.0"
|
||||
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.18.4"
|
||||
|
||||
# Google Cloud
|
||||
cluster_name = "yavin"
|
||||
@ -58,7 +64,7 @@ module "yavin" {
|
||||
|
||||
# configuration
|
||||
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
|
||||
|
||||
|
||||
# optional
|
||||
worker_count = 2
|
||||
worker_preemptible = true
|
||||
@ -87,9 +93,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
|
||||
$ export KUBECONFIG=/home/user/.kube/configs/yavin-config
|
||||
$ kubectl get nodes
|
||||
NAME ROLES STATUS AGE VERSION
|
||||
yavin-controller-0.c.example-com.internal <none> Ready 6m v1.17.0
|
||||
yavin-worker-jrbf.c.example-com.internal <none> Ready 5m v1.17.0
|
||||
yavin-worker-mzdm.c.example-com.internal <none> Ready 5m v1.17.0
|
||||
yavin-controller-0.c.example-com.internal <none> Ready 6m v1.18.4
|
||||
yavin-worker-jrbf.c.example-com.internal <none> Ready 5m v1.18.4
|
||||
yavin-worker-mzdm.c.example-com.internal <none> Ready 5m v1.18.4
|
||||
```
|
||||
|
||||
List the pods.
|
||||
|
@ -1,4 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: reboot-coordinator
|
@ -1,12 +0,0 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: reboot-coordinator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: reboot-coordinator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
namespace: reboot-coordinator
|
||||
name: default
|
@ -1,45 +0,0 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: reboot-coordinator
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- update
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- update
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- delete
|
||||
- apiGroups:
|
||||
- "extensions"
|
||||
resources:
|
||||
- daemonsets
|
||||
verbs:
|
||||
- get
|
@ -1,68 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: container-linux-update-agent
|
||||
namespace: reboot-coordinator
|
||||
spec:
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxUnavailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
name: container-linux-update-agent
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: container-linux-update-agent
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: update-agent
|
||||
image: quay.io/coreos/container-linux-update-operator:v0.7.0
|
||||
command:
|
||||
- "/bin/update-agent"
|
||||
env:
|
||||
# read by update-agent as the node name to manage reboots for
|
||||
- name: UPDATE_AGENT_NODE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/dbus
|
||||
name: var-run-dbus
|
||||
- mountPath: /etc/coreos
|
||||
name: etc-coreos
|
||||
- mountPath: /usr/share/coreos
|
||||
name: usr-share-coreos
|
||||
- mountPath: /etc/os-release
|
||||
name: etc-os-release
|
||||
volumes:
|
||||
- name: var-run-dbus
|
||||
hostPath:
|
||||
path: /var/run/dbus
|
||||
- name: etc-coreos
|
||||
hostPath:
|
||||
path: /etc/coreos
|
||||
- name: usr-share-coreos
|
||||
hostPath:
|
||||
path: /usr/share/coreos
|
||||
- name: etc-os-release
|
||||
hostPath:
|
||||
path: /etc/os-release
|
@ -1,39 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: container-linux-update-operator
|
||||
namespace: reboot-coordinator
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
name: container-linux-update-operator
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: container-linux-update-operator
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: 'docker/default'
|
||||
spec:
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: update-operator
|
||||
image: quay.io/coreos/container-linux-update-operator:v0.7.0
|
||||
command:
|
||||
- "/bin/update-operator"
|
||||
env:
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 20Mi
|
||||
limits:
|
||||
cpu: 20m
|
||||
memory: 40Mi
|
||||
|
@ -21,7 +21,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -558,15 +558,15 @@ data:
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -649,15 +649,15 @@ data:
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -753,15 +753,15 @@ data:
|
||||
},
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -857,15 +857,15 @@ data:
|
||||
},
|
||||
"id": 11,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -955,15 +955,15 @@ data:
|
||||
},
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1066,17 +1066,17 @@ data:
|
||||
},
|
||||
"id": 13,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"hideEmpty": "true",
|
||||
"hideZero": "true",
|
||||
"current": true,
|
||||
"hideEmpty": true,
|
||||
"hideZero": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1159,17 +1159,17 @@ data:
|
||||
},
|
||||
"id": 14,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"hideEmpty": "true",
|
||||
"hideZero": "true",
|
||||
"current": true,
|
||||
"hideEmpty": true,
|
||||
"hideZero": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1265,17 +1265,17 @@ data:
|
||||
},
|
||||
"id": 15,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"hideEmpty": "true",
|
||||
"hideZero": "true",
|
||||
"current": true,
|
||||
"hideEmpty": true,
|
||||
"hideZero": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1371,15 +1371,15 @@ data:
|
||||
},
|
||||
"id": 16,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1462,15 +1462,15 @@ data:
|
||||
},
|
||||
"id": 17,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1567,15 +1567,15 @@ data:
|
||||
},
|
||||
"id": 18,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1658,15 +1658,15 @@ data:
|
||||
},
|
||||
"id": 19,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1762,15 +1762,15 @@ data:
|
||||
},
|
||||
"id": 20,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1991,15 +1991,15 @@ data:
|
||||
},
|
||||
"id": 22,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -2373,8 +2373,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -2427,7 +2427,7 @@ data:
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kubelet_runtime_operations{cluster=\"$cluster\", job=\"kubelet\"}, instance)",
|
||||
"query": "label_values(kubelet_runtime_operations_total{cluster=\"$cluster\", job=\"kubelet\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -2496,7 +2496,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -2691,15 +2691,15 @@ data:
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -2886,15 +2886,15 @@ data:
|
||||
},
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -3206,15 +3206,15 @@ data:
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -3588,8 +3588,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
|
@ -59,7 +59,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[1m]))",
|
||||
"expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\", cluster=\"$cluster\"}[$__interval]))",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1561,7 +1561,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1570,7 +1570,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1579,7 +1579,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1588,7 +1588,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1597,7 +1597,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1606,7 +1606,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -1706,7 +1706,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1804,7 +1804,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -1902,7 +1902,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2000,7 +2000,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2098,7 +2098,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2196,7 +2196,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2294,7 +2294,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2392,7 +2392,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$interval])) by (namespace)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\".+\"}[$__interval])) by (namespace)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{namespace}}",
|
||||
@ -2458,8 +2458,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -2501,37 +2501,29 @@ data:
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"auto": false,
|
||||
"auto_count": 30,
|
||||
"auto_min": "10s",
|
||||
"current": {
|
||||
"text": "5m",
|
||||
"value": "5m"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "prometheus",
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "interval",
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "4h",
|
||||
"value": "4h"
|
||||
}
|
||||
|
||||
],
|
||||
"query": "4h",
|
||||
"query": "label_values(node_cpu_seconds_total, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "interval",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
@ -2586,6 +2578,354 @@ data:
|
||||
],
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "100px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "70,80",
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU Utilisation (from requests)",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "singlestat",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "70,80",
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU Utilisation (from limits)",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "singlestat",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_requests_memory_bytes{namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "70,80",
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory Utilization (from requests)",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "singlestat",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"format": "percentunit",
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 3,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) / sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "70,80",
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory Utilisation (from limits)",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "singlestat",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Headlines",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
@ -2599,7 +2939,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 1,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -2620,7 +2960,26 @@ data:
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
{
|
||||
"alias": "quota - requests",
|
||||
"color": "#F2495C",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
},
|
||||
{
|
||||
"alias": "quota - limits",
|
||||
"color": "#FF9830",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
@ -2634,6 +2993,22 @@ data:
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - requests",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - limits",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -2697,7 +3072,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -2964,7 +3339,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 3,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -2985,7 +3360,26 @@ data:
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
{
|
||||
"alias": "quota - requests",
|
||||
"color": "#F2495C",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
},
|
||||
{
|
||||
"alias": "quota - limits",
|
||||
"color": "#FF9830",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
@ -2999,6 +3393,22 @@ data:
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - requests",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - limits",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -3062,7 +3472,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -3410,7 +3820,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -3588,7 +3998,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3597,7 +4007,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3606,7 +4016,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3615,7 +4025,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3624,7 +4034,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3633,7 +4043,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -3698,398 +4108,6 @@ data:
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Receive Bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Transmit Bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Received Packets",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Transmitted Packets",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
@ -4125,7 +4143,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4138,7 +4156,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Received Packets Dropped",
|
||||
"title": "Receive Bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
@ -4223,7 +4241,399 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Transmit Bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Received Packets",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 13,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Transmitted Packets",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 14,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Received Packets Dropped",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 15,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -4289,8 +4699,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -4306,13 +4716,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
@ -4321,7 +4731,7 @@ data:
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -4333,13 +4743,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
@ -4348,48 +4758,13 @@ data:
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"auto": false,
|
||||
"auto_count": 30,
|
||||
"auto_min": "10s",
|
||||
"current": {
|
||||
"text": "5m",
|
||||
"value": "5m"
|
||||
},
|
||||
"datasource": "prometheus",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "interval",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "4h",
|
||||
"value": "4h"
|
||||
}
|
||||
],
|
||||
"query": "4h",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "interval",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
@ -5265,8 +5640,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -5282,13 +5657,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
@ -5297,7 +5672,7 @@ data:
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -5309,13 +5684,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "node",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "node",
|
||||
"options": [
|
||||
@ -5324,7 +5699,7 @@ data:
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, node)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
|
@ -50,7 +50,24 @@ data:
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
{
|
||||
"alias": "requests",
|
||||
"color": "#F2495C",
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": true,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
},
|
||||
{
|
||||
"alias": "limits",
|
||||
"color": "#FF9830",
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": true,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
@ -64,6 +81,22 @@ data:
|
||||
"legendFormat": "{{container}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "requests",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_cpu_cores{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "limits",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -126,8 +159,113 @@ data:
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"fill": 10,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": true,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(increase(container_cpu_cfs_throttled_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container) /sum(increase(container_cpu_cfs_periods_total{namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", cluster=\"$cluster\"}[5m])) by (container)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
{
|
||||
"colorMode": "critical",
|
||||
"fill": true,
|
||||
"line": true,
|
||||
"op": "gt",
|
||||
"value": 1,
|
||||
"yaxis": "left"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU Throttling",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "CPU Throttling",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -394,7 +532,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 3,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -415,7 +553,26 @@ data:
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
{
|
||||
"alias": "requests",
|
||||
"color": "#F2495C",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
},
|
||||
{
|
||||
"alias": "limits",
|
||||
"color": "#FF9830",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
@ -423,26 +580,26 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
|
||||
"expr": "sum(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}} (RSS)",
|
||||
"legendFormat": "{{container}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_cache{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
|
||||
"expr": "sum(\n kube_pod_container_resource_requests_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}} (Cache)",
|
||||
"legendFormat": "requests",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_swap{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"POD\", container!=\"\"}) by (container)",
|
||||
"expr": "sum(\n kube_pod_container_resource_limits_memory_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{container}} (Swap)",
|
||||
"legendFormat": "limits",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
@ -508,7 +665,7 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
@ -850,104 +1007,6 @@ data:
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Receive Bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
@ -983,7 +1042,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -996,7 +1055,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Transmit Bandwidth",
|
||||
"title": "Receive Bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
@ -1081,7 +1140,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1094,7 +1153,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Received Packets",
|
||||
"title": "Transmit Bandwidth",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
@ -1179,7 +1238,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1192,7 +1251,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Transmitted Packets",
|
||||
"title": "Rate of Received Packets",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
@ -1277,7 +1336,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_transmit_packets_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1290,7 +1349,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Received Packets Dropped",
|
||||
"title": "Rate of Transmitted Packets",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
@ -1375,7 +1434,105 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$interval])) by (pod)",
|
||||
"expr": "sum(irate(container_network_receive_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Rate of Received Packets Dropped",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Network",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 10,
|
||||
"id": 11,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 0,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(irate(container_network_transmit_packets_dropped_total{namespace=~\"$namespace\", pod=~\"$pod\"}[$__interval])) by (pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -1441,8 +1598,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -1458,13 +1615,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
@ -1473,7 +1630,7 @@ data:
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -1485,13 +1642,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
@ -1500,75 +1657,40 @@ data:
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "pod",
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"auto": false,
|
||||
"auto_count": 30,
|
||||
"auto_min": "10s",
|
||||
"current": {
|
||||
"text": "5m",
|
||||
"value": "5m"
|
||||
},
|
||||
"datasource": "prometheus",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "interval",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "4h",
|
||||
"value": "4h"
|
||||
}
|
||||
],
|
||||
"query": "4h",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "interval",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=\"$namespace\"}, pod)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
@ -2544,7 +2666,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2553,7 +2675,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2562,7 +2684,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2571,7 +2693,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2580,7 +2702,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2589,7 +2711,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -2689,7 +2811,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -2787,7 +2909,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -2885,7 +3007,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -2983,7 +3105,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3081,7 +3203,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3179,7 +3301,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3277,7 +3399,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3375,7 +3497,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\", workload_type=\"$type\"}) by (pod))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{pod}}",
|
||||
@ -3441,8 +3563,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -3458,13 +3580,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
@ -3473,7 +3595,7 @@ data:
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -3485,13 +3607,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
@ -3500,7 +3622,7 @@ data:
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -3512,13 +3634,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "workload",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "workload",
|
||||
"options": [
|
||||
@ -3527,7 +3649,7 @@ data:
|
||||
"query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\"}, workload)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -3539,13 +3661,13 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "type",
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "type",
|
||||
"options": [
|
||||
@ -3554,48 +3676,13 @@ data:
|
||||
"query": "label_values(mixin_pod_workload{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\"}, workload_type)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"auto": false,
|
||||
"auto_count": 30,
|
||||
"auto_min": "10s",
|
||||
"current": {
|
||||
"text": "5m",
|
||||
"value": "5m"
|
||||
},
|
||||
"datasource": "prometheus",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "interval",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "4h",
|
||||
"value": "4h"
|
||||
}
|
||||
],
|
||||
"query": "4h",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "interval",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
@ -3684,7 +3771,26 @@ data:
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
{
|
||||
"alias": "quota - requests",
|
||||
"color": "#F2495C",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
},
|
||||
{
|
||||
"alias": "quota - limits",
|
||||
"color": "#FF9830",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
@ -3698,6 +3804,22 @@ data:
|
||||
"legendFormat": "{{workload}} - {{workload_type}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - requests",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - limits",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -4094,7 +4216,26 @@ data:
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
|
||||
{
|
||||
"alias": "quota - requests",
|
||||
"color": "#F2495C",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
},
|
||||
{
|
||||
"alias": "quota - limits",
|
||||
"color": "#FF9830",
|
||||
"dashes": true,
|
||||
"fill": 0,
|
||||
"hideTooltip": true,
|
||||
"legend": false,
|
||||
"linewidth": 2,
|
||||
"stack": false
|
||||
}
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
@ -4108,6 +4249,22 @@ data:
|
||||
"legendFormat": "{{workload}} - {{workload_type}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - requests",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "scalar(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "quota - limits",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -4679,7 +4836,7 @@ data:
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4688,7 +4845,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4697,7 +4854,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4706,7 +4863,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4715,7 +4872,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4724,7 +4881,7 @@ data:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -4824,7 +4981,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -4922,7 +5079,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5020,7 +5177,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(avg(irate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5118,7 +5275,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(avg(irate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5216,7 +5373,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5314,7 +5471,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5412,7 +5569,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5510,7 +5667,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"expr": "(sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=~\"$namespace\"}[$__interval])\n* on (namespace,pod) \ngroup_left(workload,workload_type) mixin_pod_workload{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\", workload_type=\"$type\"}) by (workload))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{workload}}",
|
||||
@ -5576,8 +5733,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -5590,95 +5747,6 @@ data:
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"auto": false,
|
||||
"auto_count": 30,
|
||||
"auto_min": "10s",
|
||||
"current": {
|
||||
"text": "5m",
|
||||
"value": "5m"
|
||||
},
|
||||
"datasource": "prometheus",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "interval",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "4h",
|
||||
"value": "4h"
|
||||
}
|
||||
],
|
||||
"query": "4h",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "interval",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"auto": false,
|
||||
@ -5706,6 +5774,60 @@ data:
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
|
@ -21,7 +21,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -88,7 +88,7 @@ data:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(up{job=\"apiserver\"})",
|
||||
"expr": "sum(up{job=\"apiserver\", cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -155,28 +155,28 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"2..\"}[5m]))",
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"2..\", cluster=\"$cluster\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "2xx",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"3..\"}[5m]))",
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"3..\", cluster=\"$cluster\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "3xx",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"4..\"}[5m]))",
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"4..\", cluster=\"$cluster\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "4xx",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"5..\"}[5m]))",
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\", instance=~\"$instance\",code=~\"5..\", cluster=\"$cluster\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "5xx",
|
||||
@ -237,15 +237,15 @@ data:
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -267,7 +267,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (verb, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", verb!=\"WATCH\", cluster=\"$cluster\"}[5m])) by (verb, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{verb}}",
|
||||
@ -371,7 +371,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
||||
"expr": "sum(rate(workqueue_adds_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
@ -462,7 +462,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name)",
|
||||
"expr": "sum(rate(workqueue_depth{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
@ -523,15 +523,15 @@ data:
|
||||
},
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -553,7 +553,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\"}[5m])) by (instance, name, le))",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, name, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} {{name}}",
|
||||
@ -657,7 +657,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\"}",
|
||||
"expr": "etcd_helper_cache_entry_total{job=\"apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -748,14 +748,14 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (intance)",
|
||||
"expr": "sum(rate(etcd_helper_cache_hit_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} hit",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance)",
|
||||
"expr": "sum(rate(etcd_helper_cache_miss_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} miss",
|
||||
@ -846,14 +846,14 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||
"expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_get_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} get",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\"}[5m])) by (instance, le))",
|
||||
"expr": "histogram_quantile(0.99,sum(rate(etcd_request_cache_add_duration_seconds_bucket{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])) by (instance, le))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}} miss",
|
||||
@ -957,7 +957,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\"}",
|
||||
"expr": "process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1048,7 +1048,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1139,7 +1139,7 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\"}",
|
||||
"expr": "go_goroutines{job=\"apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{instance}}",
|
||||
@ -1205,8 +1205,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -1219,6 +1219,33 @@ data:
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"text": "prod",
|
||||
"value": "prod"
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(apiserver_request_total, cluster)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
@ -1233,7 +1260,7 @@ data:
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(apiserver_request_total{job=\"apiserver\"}, instance)",
|
||||
"query": "label_values(apiserver_request_total{job=\"apiserver\", cluster=\"$cluster\"}, instance)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
@ -1302,7 +1329,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -1406,15 +1433,15 @@ data:
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1510,15 +1537,15 @@ data:
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1614,15 +1641,15 @@ data:
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -1934,15 +1961,15 @@ data:
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -2316,8 +2343,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -2413,7 +2440,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -2815,8 +2842,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -2943,664 +2970,6 @@ data:
|
||||
"uid": "919b92a8e8041bd567af9edab12c840c",
|
||||
"version": 0
|
||||
}
|
||||
pods.json: |-
|
||||
{
|
||||
"__inputs": [
|
||||
|
||||
],
|
||||
"__requires": [
|
||||
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "$datasource",
|
||||
"enable": true,
|
||||
"expr": "time() == BOOL timestamp(rate(kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[2m]) > 0)",
|
||||
"hide": false,
|
||||
"iconColor": "rgba(215, 44, 44, 1)",
|
||||
"name": "Restarts",
|
||||
"showIn": 0,
|
||||
"tags": [
|
||||
"restart"
|
||||
],
|
||||
"type": "rows"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by(container) (container_memory_usage_bytes{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Current: {{ container }}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Requested: {{ container }}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Limit: {{ container }}",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (container_memory_cache{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", container=~\"$container\", container!=\"POD\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Cache: {{ container }}",
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Memory Usage",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (container) (irate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod=\"$pod\", container=~\"$container\", container!=\"POD\"}[4m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Current: {{ container }}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Requested: {{ container }}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Limit: {{ container }}",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU Usage",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (pod) (irate(container_network_receive_bytes_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "RX: {{ pod }}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "sort_desc(sum by (pod) (irate(container_network_transmit_bytes_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[4m])))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "TX: {{ pod }}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Network I/O",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max by (container) (kube_pod_container_status_restarts_total{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Restarts: {{ container }}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Total Restarts Per Container",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"kubernetes-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"name": "datasource",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Pod",
|
||||
"multi": false,
|
||||
"name": "pod",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, pod)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Container",
|
||||
"multi": false,
|
||||
"name": "container",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_container_info{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}, container)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Kubernetes / Pods",
|
||||
"uid": "ab4f13a9892a76a4d21ce8c2445bf4ea",
|
||||
"version": 0
|
||||
}
|
||||
scheduler.json: |-
|
||||
{
|
||||
"__inputs": [
|
||||
@ -3622,7 +2991,7 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "10s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -3726,15 +3095,15 @@ data:
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -3838,15 +3207,15 @@ data:
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -4179,15 +3548,15 @@ data:
|
||||
},
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": "true",
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": "true",
|
||||
"current": true,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": "true",
|
||||
"show": "true",
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": "true"
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
@ -4561,8 +3930,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
@ -4637,910 +4006,6 @@ data:
|
||||
"uid": "2e6b6a3b4bddf1427b3a55aa1311c656",
|
||||
"version": 0
|
||||
}
|
||||
statefulset.json: |-
|
||||
{
|
||||
"__inputs": [
|
||||
|
||||
],
|
||||
"__requires": [
|
||||
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
|
||||
]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 2,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "cores",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": true
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "CPU",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "GB",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": true
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_usage_bytes{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}) / 1024^3",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Memory",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 4,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "Bps",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": true
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubernetes-cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\",pod=~\"$statefulset.*\"}[3m]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Network",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"height": "100px",
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 5,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Desired Replicas",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 6,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Replicas of current version",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 7,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Observed Generation",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"colorBackground": false,
|
||||
"colorValue": false,
|
||||
"colors": [
|
||||
"#299c46",
|
||||
"rgba(237, 129, 40, 0.89)",
|
||||
"#d44a3a"
|
||||
],
|
||||
"datasource": "$datasource",
|
||||
"format": "none",
|
||||
"gauge": {
|
||||
"maxValue": 100,
|
||||
"minValue": 0,
|
||||
"show": false,
|
||||
"thresholdLabels": false,
|
||||
"thresholdMarkers": true
|
||||
},
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 8,
|
||||
"interval": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"mappingType": 1,
|
||||
"mappingTypes": [
|
||||
{
|
||||
"name": "value to text",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"name": "range to text",
|
||||
"value": 2
|
||||
}
|
||||
],
|
||||
"maxDataPoints": 100,
|
||||
"nullPointMode": "connected",
|
||||
"nullText": null,
|
||||
"postfix": "",
|
||||
"postfixFontSize": "50%",
|
||||
"prefix": "",
|
||||
"prefixFontSize": "50%",
|
||||
"rangeMaps": [
|
||||
{
|
||||
"from": "null",
|
||||
"text": "N/A",
|
||||
"to": "null"
|
||||
}
|
||||
],
|
||||
"span": 3,
|
||||
"sparkline": {
|
||||
"fillColor": "rgba(31, 118, 189, 0.18)",
|
||||
"full": false,
|
||||
"lineColor": "rgb(31, 120, 193)",
|
||||
"show": false
|
||||
},
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": "",
|
||||
"title": "Metadata Generation",
|
||||
"tooltip": {
|
||||
"shared": false
|
||||
},
|
||||
"type": "singlestat",
|
||||
"valueFontSize": "80%",
|
||||
"valueMaps": [
|
||||
{
|
||||
"op": "=",
|
||||
"text": "0",
|
||||
"value": "null"
|
||||
}
|
||||
],
|
||||
"valueName": "current"
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "replicas specified",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "max(kube_statefulset_status_replicas{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "replicas created",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "ready",
|
||||
"refId": "C"
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "replicas of current version",
|
||||
"refId": "D"
|
||||
},
|
||||
{
|
||||
"expr": "min(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\", statefulset=\"$statefulset\", cluster=\"$cluster\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "updated",
|
||||
"refId": "E"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Replicas",
|
||||
"tooltip": {
|
||||
"shared": false,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"kubernetes-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"name": "datasource",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 2,
|
||||
"includeAll": false,
|
||||
"label": "cluster",
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_statefulset_metadata_generation, cluster)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Name",
|
||||
"multi": false,
|
||||
"name": "statefulset",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, statefulset)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Kubernetes / StatefulSets",
|
||||
"uid": "a31c1f46e6f727cb37c0d731a7245005",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboards-k8s
|
||||
|
@ -2,6 +2,12 @@ apiVersion: v1
|
||||
data:
|
||||
prometheus-remote-write.json: |-
|
||||
{
|
||||
"__inputs": [
|
||||
|
||||
],
|
||||
"__requires": [
|
||||
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
|
||||
@ -11,14 +17,15 @@ data:
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "10s",
|
||||
"refresh": "",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
@ -29,12 +36,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -44,11 +56,12 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -58,12 +71,11 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} - ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -89,11 +101,11 @@ data:
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "s",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -102,7 +114,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -115,12 +127,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -130,11 +147,12 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -144,12 +162,11 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) - ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "(\n rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -179,7 +196,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -188,7 +205,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -198,11 +215,12 @@ data:
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Timestamps",
|
||||
"titleSize": "h6"
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
@ -213,12 +231,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 3,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -228,11 +251,12 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -242,12 +266,11 @@ data:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])- ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) - rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) \n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -277,7 +300,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -286,7 +309,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -296,11 +319,12 @@ data:
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Samples",
|
||||
"titleSize": "h6"
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
@ -311,12 +335,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -326,16 +355,18 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"minSpan": 6,
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"span": 12,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
@ -344,8 +375,7 @@ data:
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -353,7 +383,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Num. Shards",
|
||||
"title": "Current Shards",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
@ -375,7 +405,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -384,7 +414,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -397,12 +427,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -412,11 +447,298 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Max Shards",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 7,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Min Shards",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Desired Shards",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Shards",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 9,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -430,8 +752,7 @@ data:
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -439,7 +760,7 @@ data:
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Capacity",
|
||||
"title": "Shard Capacity",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
@ -461,7 +782,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -470,7 +791,98 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Pending Samples",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -479,12 +891,13 @@ data:
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Shards",
|
||||
"titleSize": "h6"
|
||||
"title": "Shard Details",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": "250px",
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
@ -495,12 +908,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 6,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 11,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -510,11 +928,207 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "TSDB Current Segment",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "none",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
"spaceLength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Remote Write Current Segment",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "none",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Segments",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 13,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -528,8 +1142,7 @@ data:
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -559,7 +1172,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -568,7 +1181,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -581,12 +1194,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 7,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 14,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -596,11 +1214,12 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -614,8 +1233,7 @@ data:
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -645,7 +1263,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -654,7 +1272,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -667,12 +1285,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 8,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 15,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -682,11 +1305,12 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -700,8 +1324,7 @@ data:
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -731,7 +1354,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -740,7 +1363,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -753,12 +1376,17 @@ data:
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"id": 9,
|
||||
"gridPos": {
|
||||
|
||||
},
|
||||
"id": 16,
|
||||
"legend": {
|
||||
"alignAsTable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
@ -768,11 +1396,12 @@ data:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"nullPointMode": "null as zero",
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [
|
||||
|
||||
],
|
||||
@ -786,8 +1415,7 @@ data:
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}}-{{queue}}",
|
||||
"legendLink": null,
|
||||
"step": 10
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
@ -817,7 +1445,7 @@ data:
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
@ -826,7 +1454,7 @@ data:
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -835,8 +1463,9 @@ data:
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": true,
|
||||
"title": "Misc Rates.",
|
||||
"titleSize": "h6"
|
||||
"title": "Misc. Rates",
|
||||
"titleSize": "h6",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
@ -847,10 +1476,6 @@ data:
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"name": "datasource",
|
||||
@ -865,23 +1490,30 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
"text": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"value": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
}
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "instance",
|
||||
"multi": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "instance",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(prometheus_build_info, instance)",
|
||||
"refresh": 1,
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -893,23 +1525,56 @@ data:
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
"text": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"value": {
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
}
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "cluster",
|
||||
"multi": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "cluster",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(kube_pod_container_info{image=~\".*prometheus.*\"}, cluster)",
|
||||
"refresh": 1,
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 2,
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
|
||||
},
|
||||
"datasource": "$datasource",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "queue",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, queue)",
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [
|
||||
|
||||
@ -921,7 +1586,7 @@ data:
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
@ -949,9 +1614,8 @@ data:
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"timezone": "browser",
|
||||
"title": "Prometheus Remote Write",
|
||||
"uid": "",
|
||||
"version": 0
|
||||
}
|
||||
prometheus.json: |-
|
||||
@ -2048,8 +2712,8 @@ data:
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
|
@ -23,7 +23,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: grafana
|
||||
image: docker.io/grafana/grafana:6.5.1
|
||||
image: docker.io/grafana/grafana:7.0.3
|
||||
env:
|
||||
- name: GF_PATHS_CONFIG
|
||||
value: "/etc/grafana/custom.ini"
|
||||
|
6
addons/nginx-ingress/aws/class.yaml
Normal file
6
addons/nginx-ingress/aws/class.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
spec:
|
||||
controller: k8s.io/ingress-nginx
|
@ -22,7 +22,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.26.1
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -76,6 +76,6 @@ spec:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 33 # www-data
|
||||
runAsUser: 101 # www-data
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 300
|
||||
|
@ -51,3 +51,12 @@ rules:
|
||||
- ingresses/status
|
||||
verbs:
|
||||
- update
|
||||
- apiGroups:
|
||||
- "networking.k8s.io"
|
||||
resources:
|
||||
- ingressclasses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
|
||||
|
6
addons/nginx-ingress/azure/class.yaml
Normal file
6
addons/nginx-ingress/azure/class.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
spec:
|
||||
controller: k8s.io/ingress-nginx
|
@ -22,7 +22,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.26.1
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -76,6 +76,6 @@ spec:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 33 # www-data
|
||||
runAsUser: 101 # www-data
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 300
|
||||
|
@ -51,3 +51,12 @@ rules:
|
||||
- ingresses/status
|
||||
verbs:
|
||||
- update
|
||||
- apiGroups:
|
||||
- "networking.k8s.io"
|
||||
resources:
|
||||
- ingressclasses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
|
||||
|
6
addons/nginx-ingress/bare-metal/class.yaml
Normal file
6
addons/nginx-ingress/bare-metal/class.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
spec:
|
||||
controller: k8s.io/ingress-nginx
|
@ -22,7 +22,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.26.1
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -73,7 +73,7 @@ spec:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 33 # www-data
|
||||
runAsUser: 101 # www-data
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 300
|
||||
|
||||
|
@ -51,3 +51,12 @@ rules:
|
||||
- ingresses/status
|
||||
verbs:
|
||||
- update
|
||||
- apiGroups:
|
||||
- "networking.k8s.io"
|
||||
resources:
|
||||
- ingressclasses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
|
||||
|
6
addons/nginx-ingress/digital-ocean/class.yaml
Normal file
6
addons/nginx-ingress/digital-ocean/class.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
spec:
|
||||
controller: k8s.io/ingress-nginx
|
@ -22,7 +22,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.26.1
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -76,6 +76,6 @@ spec:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 33 # www-data
|
||||
runAsUser: 101 # www-data
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 300
|
||||
|
@ -51,3 +51,12 @@ rules:
|
||||
- ingresses/status
|
||||
verbs:
|
||||
- update
|
||||
- apiGroups:
|
||||
- "networking.k8s.io"
|
||||
resources:
|
||||
- ingressclasses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
|
||||
|
6
addons/nginx-ingress/google-cloud/class.yaml
Normal file
6
addons/nginx-ingress/google-cloud/class.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: public
|
||||
spec:
|
||||
controller: k8s.io/ingress-nginx
|
@ -22,7 +22,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx-ingress-controller
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.26.1
|
||||
image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.33.0
|
||||
args:
|
||||
- /nginx-ingress-controller
|
||||
- --ingress-class=public
|
||||
@ -76,6 +76,6 @@ spec:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- ALL
|
||||
runAsUser: 33 # www-data
|
||||
runAsUser: 101 # www-data
|
||||
restartPolicy: Always
|
||||
terminationGracePeriodSeconds: 300
|
||||
|
@ -51,3 +51,12 @@ rules:
|
||||
- ingresses/status
|
||||
verbs:
|
||||
- update
|
||||
- apiGroups:
|
||||
- "networking.k8s.io"
|
||||
resources:
|
||||
- ingressclasses
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
|
||||
|
@ -20,7 +20,7 @@ spec:
|
||||
serviceAccountName: prometheus
|
||||
containers:
|
||||
- name: prometheus
|
||||
image: quay.io/prometheus/prometheus:v2.14.0
|
||||
image: quay.io/prometheus/prometheus:v2.19.0
|
||||
args:
|
||||
- --web.listen-address=0.0.0.0:9090
|
||||
- --config.file=/etc/prometheus/prometheus.yaml
|
||||
|
@ -1,3 +1,4 @@
|
||||
# Allow Prometheus to scrape service endpoints
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
@ -7,7 +8,6 @@ metadata:
|
||||
prometheus.io/scrape: 'true'
|
||||
spec:
|
||||
type: ClusterIP
|
||||
# service is created to allow prometheus to scrape endpoints
|
||||
clusterIP: None
|
||||
selector:
|
||||
k8s-app: kube-controller-manager
|
||||
|
19
addons/prometheus/discovery/kube-proxy.yaml
Normal file
19
addons/prometheus/discovery/kube-proxy.yaml
Normal file
@ -0,0 +1,19 @@
|
||||
# Allow Prometheus to scrape service endpoints
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-proxy
|
||||
namespace: kube-system
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
prometheus.io/port: '10249'
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
selector:
|
||||
k8s-app: kube-proxy
|
||||
ports:
|
||||
- name: metrics
|
||||
protocol: TCP
|
||||
port: 10249
|
||||
targetPort: 10249
|
@ -1,3 +1,4 @@
|
||||
# Allow Prometheus to scrape service endpoints
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
@ -7,7 +8,6 @@ metadata:
|
||||
prometheus.io/scrape: 'true'
|
||||
spec:
|
||||
type: ClusterIP
|
||||
# service is created to allow prometheus to scrape endpoints
|
||||
clusterIP: None
|
||||
selector:
|
||||
k8s-app: kube-scheduler
|
||||
|
@ -74,6 +74,7 @@ rules:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
- volumeattachments
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
@ -84,4 +85,19 @@ rules:
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
verbs:
|
||||
- list
|
||||
- watch
|
||||
|
||||
|
@ -24,7 +24,7 @@ spec:
|
||||
serviceAccountName: kube-state-metrics
|
||||
containers:
|
||||
- name: kube-state-metrics
|
||||
image: quay.io/coreos/kube-state-metrics:v1.8.0
|
||||
image: quay.io/coreos/kube-state-metrics:v1.9.7
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 8080
|
||||
|
@ -28,7 +28,7 @@ spec:
|
||||
hostPID: true
|
||||
containers:
|
||||
- name: node-exporter
|
||||
image: quay.io/prometheus/node-exporter:v0.18.1
|
||||
image: quay.io/prometheus/node-exporter:v1.0.1
|
||||
args:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
@ -57,7 +57,9 @@ spec:
|
||||
mountPath: /host/root
|
||||
readOnly: true
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
- key: node.kubernetes.io/not-ready
|
||||
operator: Exists
|
||||
volumes:
|
||||
- name: proc
|
||||
|
@ -42,10 +42,10 @@ data:
|
||||
{
|
||||
"alert": "etcdHighNumberOfLeaderChanges",
|
||||
"annotations": {
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last 30 minutes."
|
||||
"message": "etcd cluster \"{{ $labels.job }}\": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated."
|
||||
},
|
||||
"expr": "rate(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}[15m]) > 3\n",
|
||||
"for": "15m",
|
||||
"expr": "increase((max by (job) (etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\"}))[15m:1m]) >= 3\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -145,25 +145,132 @@ data:
|
||||
kube.yaml: |-
|
||||
{
|
||||
"groups": [
|
||||
{
|
||||
"name": "kube-apiserver-error",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[5m]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate5m"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[30m]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate30m"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[1h]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[2h]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate2h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[6h]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate6h"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[1d]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate1d"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (status_class) (\n label_replace(\n rate(apiserver_request_total{job=\"apiserver\"}[3d]\n ), \"status_class\", \"${1}xx\", \"code\", \"([0-9])..\")\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class:apiserver_request_total:rate3d"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate5m{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate5m{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate5m"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate30m{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate30m{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate30m"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate1h{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate1h{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate1h"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate2h{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate2h{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate2h"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate6h{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate6h{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate6h"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate1d{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate1d{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate1d"
|
||||
},
|
||||
{
|
||||
"expr": "sum(status_class:apiserver_request_total:rate3d{job=\"apiserver\",status_class=\"5xx\"})\n/\nsum(status_class:apiserver_request_total:rate3d{job=\"apiserver\"})\n",
|
||||
"labels": {
|
||||
"job": "apiserver"
|
||||
},
|
||||
"record": "status_class_5xx:apiserver_request_total:ratio_rate3d"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kube-apiserver.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\"}[5m])) without(instance, pod))\n",
|
||||
"expr": "sum(rate(apiserver_request_duration_seconds_sum{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod)\n/\nsum(rate(apiserver_request_duration_seconds_count{subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod)\n",
|
||||
"record": "cluster:apiserver_request_duration_seconds:mean5m"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||
"labels": {
|
||||
"quantile": "0.99"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\"}[5m])) without(instance, pod))\n",
|
||||
"expr": "histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||
"labels": {
|
||||
"quantile": "0.9"
|
||||
},
|
||||
"record": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\"}[5m])) without(instance, pod))\n",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|DELETECOLLECTION|PROXY|CONNECT\"}[5m])) without(instance, pod))\n",
|
||||
"labels": {
|
||||
"quantile": "0.5"
|
||||
},
|
||||
@ -179,23 +286,23 @@ data:
|
||||
"record": "namespace:container_cpu_usage_seconds_total:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (namespace, pod, container) (\n rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", image!=\"\", container!=\"POD\"}[5m])\n) * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)\n",
|
||||
"expr": "sum by (cluster, namespace, pod, container) (\n rate(container_cpu_usage_seconds_total{job=\"kubernetes-cadvisor\", image!=\"\", container!=\"POD\"}[5m])\n) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (\n 1, max by(cluster, namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"record": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_working_set_bytes{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)\n",
|
||||
"expr": "container_memory_working_set_bytes{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_working_set_bytes"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_rss{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)\n",
|
||||
"expr": "container_memory_rss{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_rss"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_cache{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)\n",
|
||||
"expr": "container_memory_cache{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_cache"
|
||||
},
|
||||
{
|
||||
"expr": "container_memory_swap{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)\n",
|
||||
"expr": "container_memory_swap{job=\"kubernetes-cadvisor\", image!=\"\"}\n* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,\n max by(namespace, pod, node) (kube_pod_info)\n)\n",
|
||||
"record": "node_namespace_pod_container:container_memory_swap"
|
||||
},
|
||||
{
|
||||
@ -203,29 +310,29 @@ data:
|
||||
"record": "namespace:container_memory_usage_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (namespace, label_name) (\n sum(kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)) by (namespace, pod)\n * on (namespace, pod)\n group_left(label_name) kube_pod_labels{job=\"kube-state-metrics\"}\n)\n",
|
||||
"expr": "sum by (namespace) (\n sum by (namespace, pod) (\n max by (namespace, pod, container) (\n kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\"}\n ) * on(namespace, pod) group_left() max by (namespace, pod) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n",
|
||||
"record": "namespace:kube_pod_container_resource_requests_memory_bytes:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum by (namespace, label_name) (\n sum(kube_pod_container_resource_requests_cpu_cores{job=\"kube-state-metrics\"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~\"Pending|Running\"} == 1)) by (namespace, pod)\n * on (namespace, pod)\n group_left(label_name) kube_pod_labels{job=\"kube-state-metrics\"}\n)\n",
|
||||
"expr": "sum by (namespace) (\n sum by (namespace, pod) (\n max by (namespace, pod, container) (\n kube_pod_container_resource_requests_cpu_cores{job=\"kube-state-metrics\"}\n ) * on(namespace, pod) group_left() max by (namespace, pod) (\n kube_pod_status_phase{phase=~\"Pending|Running\"} == 1\n )\n )\n)\n",
|
||||
"record": "namespace:kube_pod_container_resource_requests_cpu_cores:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n label_replace(\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"},\n \"replicaset\", \"$1\", \"owner_name\", \"(.*)\"\n ) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job=\"kube-state-metrics\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n) by (namespace, workload, pod)\n",
|
||||
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"},\n \"replicaset\", \"$1\", \"owner_name\", \"(.*)\"\n ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (\n 1, max by (replicaset, namespace, owner_name) (\n kube_replicaset_owner{job=\"kube-state-metrics\"}\n )\n ),\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
|
||||
"labels": {
|
||||
"workload_type": "deployment"
|
||||
},
|
||||
"record": "mixin_pod_workload"
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n) by (namespace, workload, pod)\n",
|
||||
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
|
||||
"labels": {
|
||||
"workload_type": "daemonset"
|
||||
},
|
||||
"record": "mixin_pod_workload"
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n) by (namespace, workload, pod)\n",
|
||||
"expr": "max by (cluster, namespace, workload, pod) (\n label_replace(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"},\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n)\n",
|
||||
"labels": {
|
||||
"workload_type": "statefulset"
|
||||
},
|
||||
@ -305,23 +412,49 @@ data:
|
||||
"name": "node.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "sum(min(kube_pod_info) by (node))",
|
||||
"expr": "sum(min(kube_pod_info) by (cluster, node))\n",
|
||||
"record": ":kube_pod_info_node_count:"
|
||||
},
|
||||
{
|
||||
"expr": "max(label_replace(kube_pod_info{job=\"kube-state-metrics\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")) by (node, namespace, pod)\n",
|
||||
"expr": "topk by(namespace, pod) (1,\n max by (node, namespace, pod) (\n label_replace(kube_pod_info{job=\"kube-state-metrics\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n))\n",
|
||||
"record": "node_namespace_pod:kube_pod_info:"
|
||||
},
|
||||
{
|
||||
"expr": "count by (node) (sum by (node, cpu) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n* on (namespace, pod) group_left(node)\n node_namespace_pod:kube_pod_info:\n))\n",
|
||||
"expr": "count by (cluster, node) (sum by (node, cpu) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n* on (namespace, pod) group_left(node)\n node_namespace_pod:kube_pod_info:\n))\n",
|
||||
"record": "node:node_num_cpu:sum"
|
||||
},
|
||||
{
|
||||
"expr": "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n)\n",
|
||||
"expr": "sum(\n node_memory_MemAvailable_bytes{job=\"node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"} +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n )\n) by (cluster)\n",
|
||||
"record": ":node_memory_MemAvailable_bytes:sum"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kubelet.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"})\n",
|
||||
"labels": {
|
||||
"quantile": "0.99"
|
||||
},
|
||||
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"})\n",
|
||||
"labels": {
|
||||
"quantile": "0.9"
|
||||
},
|
||||
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket[5m])) by (instance, le) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"})\n",
|
||||
"labels": {
|
||||
"quantile": "0.5"
|
||||
},
|
||||
"record": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kubernetes-apps",
|
||||
"rules": [
|
||||
@ -343,7 +476,7 @@ data:
|
||||
"message": "Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready"
|
||||
},
|
||||
"expr": "sum by (namespace, pod) (kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Failed|Pending|Unknown\"} * on(namespace, pod) group_left(owner_kind) kube_pod_owner{owner_kind!=\"Job\"}) > 0\n",
|
||||
"expr": "sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job=\"kube-state-metrics\", phase=~\"Pending|Unknown\"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!=\"Job\"})) > 0\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -367,7 +500,7 @@ data:
|
||||
"message": "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch"
|
||||
},
|
||||
"expr": "kube_deployment_spec_replicas{job=\"kube-state-metrics\"}\n !=\nkube_deployment_status_replicas_available{job=\"kube-state-metrics\"}\n",
|
||||
"expr": "(\n kube_deployment_spec_replicas{job=\"kube-state-metrics\"}\n !=\n kube_deployment_status_replicas_available{job=\"kube-state-metrics\"}\n) and (\n changes(kube_deployment_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -379,7 +512,7 @@ data:
|
||||
"message": "StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch"
|
||||
},
|
||||
"expr": "kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"}\n !=\nkube_statefulset_status_replicas{job=\"kube-state-metrics\"}\n",
|
||||
"expr": "(\n kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\"}\n !=\n kube_statefulset_status_replicas{job=\"kube-state-metrics\"}\n) and (\n changes(kube_statefulset_status_replicas_updated{job=\"kube-state-metrics\"}[5m])\n ==\n 0\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
@ -528,7 +661,7 @@ data:
|
||||
"message": "Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit"
|
||||
},
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum)\n /\nsum(kube_node_status_allocatable_cpu_cores)\n >\n(count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores)\n",
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_cpu_cores:sum{})\n /\nsum(kube_node_status_allocatable_cpu_cores)\n >\n(count(kube_node_status_allocatable_cpu_cores)-1) / count(kube_node_status_allocatable_cpu_cores)\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -540,7 +673,7 @@ data:
|
||||
"message": "Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit"
|
||||
},
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum)\n /\nsum(kube_node_status_allocatable_memory_bytes)\n >\n(count(kube_node_status_allocatable_memory_bytes)-1)\n /\ncount(kube_node_status_allocatable_memory_bytes)\n",
|
||||
"expr": "sum(namespace:kube_pod_container_resource_requests_memory_bytes:sum{})\n /\nsum(kube_node_status_allocatable_memory_bytes)\n >\n(count(kube_node_status_allocatable_memory_bytes)-1)\n /\ncount(kube_node_status_allocatable_memory_bytes)\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -618,7 +751,7 @@ data:
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays"
|
||||
},
|
||||
"expr": "(\n kubelet_volume_stats_available_bytes{job=\"kubelet\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}\n) < 0.15\nand\npredict_linear(kubelet_volume_stats_available_bytes{job=\"kubelet\"}[6h], 4 * 24 * 3600) < 0\n",
|
||||
"for": "5m",
|
||||
"for": "1h",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
@ -666,17 +799,46 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kube-apiserver-error-alerts",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "ErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"message": "High requests error budget burn for job=apiserver (current value: {{ $value }})",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
||||
},
|
||||
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1h{job=\"apiserver\"} > (14.4*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate5m{job=\"apiserver\"} > (14.4*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (6*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate30m{job=\"apiserver\"} > (6*0.010000)\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver",
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "ErrorBudgetBurn",
|
||||
"annotations": {
|
||||
"message": "High requests error budget burn for job=apiserver (current value: {{ $value }})",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn"
|
||||
},
|
||||
"expr": "(\n status_class_5xx:apiserver_request_total:ratio_rate1d{job=\"apiserver\"} > (3*0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate2h{job=\"apiserver\"} > (3*0.010000)\n)\nor\n(\n status_class_5xx:apiserver_request_total:ratio_rate3d{job=\"apiserver\"} > (0.010000)\n and\n status_class_5xx:apiserver_request_total:ratio_rate6h{job=\"apiserver\"} > (0.010000)\n)\n",
|
||||
"labels": {
|
||||
"job": "apiserver",
|
||||
"severity": "warning"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "kubernetes-system-apiserver",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "KubeAPILatencyHigh",
|
||||
"annotations": {
|
||||
"message": "The API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.",
|
||||
"message": "The API server has an abnormal latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh"
|
||||
},
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job=\"apiserver\",quantile=\"0.99\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|PROXY|CONNECT\"} > 1\n",
|
||||
"for": "10m",
|
||||
"expr": "(\n cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"}\n >\n on (verb) group_left()\n (\n avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"} >= 0)\n +\n 2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"} >= 0)\n )\n) > on (verb) group_left()\n1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job=\"apiserver\"} >= 0)\nand on (verb,resource)\ncluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job=\"apiserver\",quantile=\"0.99\"}\n>\n1\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -687,36 +849,12 @@ data:
|
||||
"message": "The API server has a 99th percentile latency of {{ $value }} seconds for {{ $labels.verb }} {{ $labels.resource }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh"
|
||||
},
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job=\"apiserver\",quantile=\"0.99\",subresource!=\"log\",verb!~\"LIST|WATCH|WATCHLIST|PROXY|CONNECT\"} > 4\n",
|
||||
"expr": "cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job=\"apiserver\",quantile=\"0.99\"} > 4\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIErrorsHigh",
|
||||
"annotations": {
|
||||
"message": "API server is returning errors for {{ $value | humanizePercentage }} of requests.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh"
|
||||
},
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\",code=~\"5..\"}[5m]))\n /\nsum(rate(apiserver_request_total{job=\"apiserver\"}[5m])) > 0.03\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIErrorsHigh",
|
||||
"annotations": {
|
||||
"message": "API server is returning errors for {{ $value | humanizePercentage }} of requests.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh"
|
||||
},
|
||||
"expr": "sum(rate(apiserver_request_total{job=\"apiserver\",code=~\"5..\"}[5m]))\n /\nsum(rate(apiserver_request_total{job=\"apiserver\"}[5m])) > 0.01\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIErrorsHigh",
|
||||
"annotations": {
|
||||
@ -744,10 +882,10 @@ data:
|
||||
{
|
||||
"alert": "KubeClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"message": "A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.",
|
||||
"message": "A client certificate used to authenticate to the apiserver is expiring in less than 1.0 hours.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration"
|
||||
},
|
||||
"expr": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 604800\n",
|
||||
"expr": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 3600\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -755,14 +893,37 @@ data:
|
||||
{
|
||||
"alert": "KubeClientCertificateExpiration",
|
||||
"annotations": {
|
||||
"message": "A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.",
|
||||
"message": "A client certificate used to authenticate to the apiserver is expiring in less than 0.1 hours.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration"
|
||||
},
|
||||
"expr": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 86400\n",
|
||||
"expr": "apiserver_client_certificate_expiration_seconds_count{job=\"apiserver\"} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"apiserver\"}[5m]))) < 300\n",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "AggregatedAPIErrors",
|
||||
"annotations": {
|
||||
"message": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors"
|
||||
},
|
||||
"expr": "sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "AggregatedAPIDown",
|
||||
"annotations": {
|
||||
"message": "An aggregated API {{ $labels.name }}/{{ $labels.namespace }} is down. It has not been available at least for the past five minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown"
|
||||
},
|
||||
"expr": "sum by(name, namespace)(sum_over_time(aggregator_unavailable_apiservice[5m])) > 0\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeAPIDown",
|
||||
"annotations": {
|
||||
@ -799,6 +960,7 @@ data:
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable"
|
||||
},
|
||||
"expr": "kube_node_spec_taint{job=\"kube-state-metrics\",key=\"node.kubernetes.io/unreachable\",effect=\"NoSchedule\"} == 1\n",
|
||||
"for": "2m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
@ -809,7 +971,43 @@ data:
|
||||
"message": "Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods"
|
||||
},
|
||||
"expr": "max(max(kubelet_running_pod_count{job=\"kubelet\"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"}) by(node) / max(kube_node_status_capacity_pods{job=\"kube-state-metrics\"}) by(node) > 0.95\n",
|
||||
"expr": "max(max(kubelet_running_pod_count{job=\"kubelet\"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job=\"kubelet\"}) by(node) / max(kube_node_status_capacity_pods{job=\"kube-state-metrics\"} != 1) by(node) > 0.95\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeNodeReadinessFlapping",
|
||||
"annotations": {
|
||||
"message": "The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping"
|
||||
},
|
||||
"expr": "sum(changes(kube_node_status_condition{status=\"true\",condition=\"Ready\"}[15m])) by (node) > 2\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletPlegDurationHigh",
|
||||
"annotations": {
|
||||
"message": "The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh"
|
||||
},
|
||||
"expr": "node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile=\"0.99\"} >= 10\n",
|
||||
"for": "5m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "KubeletPodStartUpLatencyHigh",
|
||||
"annotations": {
|
||||
"message": "Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.",
|
||||
"runbook_url": "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job=\"kubelet\"}[5m])) by (instance, le)) * on(instance) group_left(node) kubelet_node_name > 60\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -865,9 +1063,167 @@ data:
|
||||
}
|
||||
]
|
||||
}
|
||||
loki.yaml: |-
|
||||
{
|
||||
"groups": [
|
||||
{
|
||||
"name": "loki_rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job))",
|
||||
"record": "job:loki_request_duration_seconds:99quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job))",
|
||||
"record": "job:loki_request_duration_seconds:50quantile"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job) / sum(rate(loki_request_duration_seconds_count[1m])) by (job)",
|
||||
"record": "job:loki_request_duration_seconds:avg"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job)",
|
||||
"record": "job:loki_request_duration_seconds_bucket:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job)",
|
||||
"record": "job:loki_request_duration_seconds_sum:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (job)",
|
||||
"record": "job:loki_request_duration_seconds_count:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route))",
|
||||
"record": "job_route:loki_request_duration_seconds:99quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route))",
|
||||
"record": "job_route:loki_request_duration_seconds:50quantile"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds:avg"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds_bucket:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds_sum:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (job, route)",
|
||||
"record": "job_route:loki_request_duration_seconds_count:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route))",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds:99quantile"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route))",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds:50quantile"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds:avg"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds_bucket:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_sum[1m])) by (namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds_sum:sum_rate"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)",
|
||||
"record": "namespace_job_route:loki_request_duration_seconds_count:sum_rate"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "loki_alerts",
|
||||
"rules": [
|
||||
{
|
||||
"alert": "LokiRequestErrors",
|
||||
"annotations": {
|
||||
"message": "{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}% errors.\n"
|
||||
},
|
||||
"expr": "100 * sum(rate(loki_request_duration_seconds_count{status_code=~\"5..\"}[1m])) by (namespace, job, route)\n /\nsum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route)\n > 10\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "LokiRequestLatency",
|
||||
"annotations": {
|
||||
"message": "{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency.\n"
|
||||
},
|
||||
"expr": "namespace_job_route:loki_request_duration_seconds:99quantile{route!~\"(?i).*tail.*\"} > 1\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
node-exporter.yaml: |-
|
||||
{
|
||||
"groups": [
|
||||
{
|
||||
"name": "node-exporter.rules",
|
||||
"rules": [
|
||||
{
|
||||
"expr": "count without (cpu) (\n count without (mode) (\n node_cpu_seconds_total{job=\"node-exporter\"}\n )\n)\n",
|
||||
"record": "instance:node_num_cpu:sum"
|
||||
},
|
||||
{
|
||||
"expr": "1 - avg without (cpu, mode) (\n rate(node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\"}[1m])\n)\n",
|
||||
"record": "instance:node_cpu_utilisation:rate1m"
|
||||
},
|
||||
{
|
||||
"expr": "(\n node_load1{job=\"node-exporter\"}\n/\n instance:node_num_cpu:sum{job=\"node-exporter\"}\n)\n",
|
||||
"record": "instance:node_load1_per_cpu:ratio"
|
||||
},
|
||||
{
|
||||
"expr": "1 - (\n node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\"}\n)\n",
|
||||
"record": "instance:node_memory_utilisation:ratio"
|
||||
},
|
||||
{
|
||||
"expr": "rate(node_vmstat_pgmajfault{job=\"node-exporter\"}[1m])\n",
|
||||
"record": "instance:node_vmstat_pgmajfault:rate1m"
|
||||
},
|
||||
{
|
||||
"expr": "rate(node_disk_io_time_seconds_total{job=\"node-exporter\", device!~\"dm.*\"}[1m])\n",
|
||||
"record": "instance_device:node_disk_io_time_seconds:rate1m"
|
||||
},
|
||||
{
|
||||
"expr": "rate(node_disk_io_time_weighted_seconds_total{job=\"node-exporter\", device!~\"dm.*\"}[1m])\n",
|
||||
"record": "instance_device:node_disk_io_time_weighted_seconds:rate1m"
|
||||
},
|
||||
{
|
||||
"expr": "sum without (device) (\n rate(node_network_receive_bytes_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||
"record": "instance:node_network_receive_bytes_excluding_lo:rate1m"
|
||||
},
|
||||
{
|
||||
"expr": "sum without (device) (\n rate(node_network_transmit_bytes_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||
"record": "instance:node_network_transmit_bytes_excluding_lo:rate1m"
|
||||
},
|
||||
{
|
||||
"expr": "sum without (device) (\n rate(node_network_receive_drop_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||
"record": "instance:node_network_receive_drop_excluding_lo:rate1m"
|
||||
},
|
||||
{
|
||||
"expr": "sum without (device) (\n rate(node_network_transmit_drop_total{job=\"node-exporter\", device!=\"lo\"}[1m])\n)\n",
|
||||
"record": "instance:node_network_transmit_drop_excluding_lo:rate1m"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "node-exporter",
|
||||
"rules": [
|
||||
@ -990,6 +1346,41 @@ data:
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "NodeHighNumberConntrackEntriesUsed",
|
||||
"annotations": {
|
||||
"description": "{{ $value | humanizePercentage }} of conntrack entries are used",
|
||||
"summary": "Number of conntrack are getting close to the limit"
|
||||
},
|
||||
"expr": "(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75\n",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "NodeClockSkewDetected",
|
||||
"annotations": {
|
||||
"message": "Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.",
|
||||
"summary": "Clock skew detected."
|
||||
},
|
||||
"expr": "(\n node_timex_offset_seconds > 0.05\nand\n deriv(node_timex_offset_seconds[5m]) >= 0\n)\nor\n(\n node_timex_offset_seconds < -0.05\nand\n deriv(node_timex_offset_seconds[5m]) <= 0\n)\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "NodeClockNotSynchronising",
|
||||
"annotations": {
|
||||
"message": "Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.",
|
||||
"summary": "Clock not synchronising."
|
||||
},
|
||||
"expr": "min_over_time(node_timex_sync_status[5m]) == 0\n",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1124,7 +1515,7 @@ data:
|
||||
{
|
||||
"alert": "PrometheusRemoteStorageFailures",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.instance}} failed to send {{ printf \"%.1f\" $value }}% of the samples to queue {{$labels.queue}}.",
|
||||
"description": "Prometheus {{$labels.instance}} failed to send {{ printf \"%.1f\" $value }}% of the samples to {{ if $labels.queue }}{{ $labels.queue }}{{ else }}{{ $labels.url }}{{ end }}.",
|
||||
"summary": "Prometheus fails to send samples to remote storage."
|
||||
},
|
||||
"expr": "(\n rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus\"}[5m])\n/\n (\n rate(prometheus_remote_storage_failed_samples_total{job=\"prometheus\"}[5m])\n +\n rate(prometheus_remote_storage_succeeded_samples_total{job=\"prometheus\"}[5m])\n )\n)\n* 100\n> 1\n",
|
||||
@ -1136,7 +1527,7 @@ data:
|
||||
{
|
||||
"alert": "PrometheusRemoteWriteBehind",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.instance}} remote write is {{ printf \"%.1f\" $value }}s behind for queue {{$labels.queue}}.",
|
||||
"description": "Prometheus {{$labels.instance}} remote write is {{ printf \"%.1f\" $value }}s behind for {{ if $labels.queue }}{{ $labels.queue }}{{ else }}{{ $labels.url }}{{ end }}.",
|
||||
"summary": "Prometheus remote write is behind."
|
||||
},
|
||||
"expr": "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job=\"prometheus\"}[5m])\n- on(job, instance) group_right\n max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job=\"prometheus\"}[5m])\n)\n> 120\n",
|
||||
@ -1148,10 +1539,10 @@ data:
|
||||
{
|
||||
"alert": "PrometheusRemoteWriteDesiredShards",
|
||||
"annotations": {
|
||||
"description": "Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ printf $value }} shards, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance=\"%s\",job=\"prometheus\"}` $labels.instance | query | first | value }}.",
|
||||
"description": "Prometheus {{$labels.instance}} remote write desired shards calculation wants to run {{ $value }} shards, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance=\"%s\",job=\"prometheus\"}` $labels.instance | query | first | value }}.",
|
||||
"summary": "Prometheus remote write desired shards calculation wants to run more than configured max shards."
|
||||
},
|
||||
"expr": "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_shards_desired{job=\"prometheus\"}[5m])\n> on(job, instance) group_right\n max_over_time(prometheus_remote_storage_shards_max{job=\"prometheus\"}[5m])\n)\n",
|
||||
"expr": "# Without max_over_time, failed scrapes could create false negatives, see\n# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.\n(\n max_over_time(prometheus_remote_storage_shards_desired{job=\"prometheus\"}[5m])\n>\n max_over_time(prometheus_remote_storage_shards_max{job=\"prometheus\"}[5m])\n)\n",
|
||||
"for": "15m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
@ -1201,6 +1592,17 @@ data:
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
}
|
||||
},
|
||||
{
|
||||
"alert": "BlackboxProbeFailure",
|
||||
"annotations": {
|
||||
"message": "Blackbox probe {{$labels.instance}} failed"
|
||||
},
|
||||
"expr": "probe_success == 0",
|
||||
"for": "2m",
|
||||
"labels": {
|
||||
"severity": "critical"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -1212,7 +1614,7 @@ data:
|
||||
"annotations": {
|
||||
"message": "{{ $value }} RAID disk(s) on node {{ $labels.instance }} are inactive."
|
||||
},
|
||||
"expr": "node_md_disks - node_md_disks_active > 0",
|
||||
"expr": "node_md_disks{state=\"failed\"} > 0",
|
||||
"for": "10m",
|
||||
"labels": {
|
||||
"severity": "warning"
|
||||
|
@ -11,11 +11,11 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.17.0 (upstream)
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/cl/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
* Ready for Ingress, Prometheus, Grafana, CSI, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Docs
|
||||
|
||||
|
@ -4,8 +4,8 @@ locals {
|
||||
# flatcar-stable -> Flatcar Linux AMI
|
||||
ami_id = local.flavor == "flatcar" ? data.aws_ami.flatcar.image_id : data.aws_ami.coreos.image_id
|
||||
|
||||
flavor = element(split("-", var.os_image), 0)
|
||||
channel = element(split("-", var.os_image), 1)
|
||||
flavor = split("-", var.os_image)[0]
|
||||
channel = split("-", var.os_image)[1]
|
||||
}
|
||||
|
||||
data "aws_ami" "coreos" {
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=24e5513ee6da4888005aac02101f73fc9e5e829f"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
|
@ -2,12 +2,14 @@
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
enable: true
|
||||
enabled: true
|
||||
dropins:
|
||||
- name: 40-etcd-cluster.conf
|
||||
contents: |
|
||||
[Service]
|
||||
Environment="ETCD_IMAGE_TAG=v3.4.3"
|
||||
Environment="ETCD_IMAGE_TAG=v3.4.9"
|
||||
Environment="ETCD_IMAGE_URL=docker://quay.io/coreos/etcd"
|
||||
Environment="RKT_RUN_ARGS=--insecure-options=image"
|
||||
Environment="ETCD_NAME=${etcd_name}"
|
||||
Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379"
|
||||
Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380"
|
||||
@ -26,11 +28,11 @@ systemd:
|
||||
Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key"
|
||||
Environment="ETCD_PEER_CLIENT_CERT_AUTH=true"
|
||||
- name: docker.service
|
||||
enable: true
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
@ -44,52 +46,70 @@ systemd:
|
||||
RequiredBy=kubelet.service
|
||||
RequiredBy=etcd-member.service
|
||||
- name: kubelet.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet via Hyperkube
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
EnvironmentFile=/etc/kubernetes/kubelet.env
|
||||
Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--volume=resolv,kind=host,source=/etc/resolv.conf \
|
||||
--mount volume=resolv,target=/etc/resolv.conf \
|
||||
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
||||
--mount volume=var-lib-cni,target=/var/lib/cni \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--insecure-options=image"
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
Environment=KUBELET_CGROUP_DRIVER=${cgroup_driver}
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=$${KUBELET_CGROUP_DRIVER} \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--exit-on-lock-contention \
|
||||
--kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--lock-file=/var/run/lock/kubelet.lock \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/master \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--register-with-taints=node-role.kubernetes.io/master=:NoSchedule \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
@ -105,7 +125,6 @@ systemd:
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
ExecStartPre=-/usr/bin/bash -c 'set -x && [ -n "$(ls /opt/bootstrap/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootstrap/assets/manifests-*/* /opt/bootstrap/assets/manifests && rm -rf /opt/bootstrap/assets/manifests-*'
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
--volume config,kind=host,source=/etc/kubernetes/bootstrap-secrets \
|
||||
@ -115,7 +134,7 @@ systemd:
|
||||
--volume script,kind=host,source=/opt/bootstrap/apply \
|
||||
--mount volume=script,target=/apply \
|
||||
--insecure-options=image \
|
||||
docker://k8s.gcr.io/hyperkube:v1.17.0 \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/apply
|
||||
@ -130,14 +149,6 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/kubernetes/kubelet.env
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.17.0
|
||||
KUBELET_IMAGE_ARGS="--exec=/usr/local/bin/kubelet"
|
||||
- path: /opt/bootstrap/layout
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
@ -154,12 +165,12 @@ storage:
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
sudo mkdir -p /etc/kubernetes/manifests
|
||||
sudo mv static-manifests/* /etc/kubernetes/manifests/
|
||||
sudo mkdir -p /opt/bootstrap/assets
|
||||
sudo mv manifests /opt/bootstrap/assets/manifests
|
||||
sudo mv manifests-networking /opt/bootstrap/assets/manifests-networking
|
||||
rm -rf assets auth static-manifests tls
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
@ -177,6 +188,7 @@ storage:
|
||||
done
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
@ -10,7 +10,7 @@ resource "aws_route53_record" "etcds" {
|
||||
ttl = 300
|
||||
|
||||
# private IPv4 address for etcd
|
||||
records = [element(aws_instance.controllers.*.private_ip, count.index)]
|
||||
records = [aws_instance.controllers.*.private_ip[count.index]]
|
||||
}
|
||||
|
||||
# Controller instances
|
||||
@ -24,7 +24,7 @@ resource "aws_instance" "controllers" {
|
||||
instance_type = var.controller_type
|
||||
|
||||
ami = local.ami_id
|
||||
user_data = element(data.ct_config.controller-ignitions.*.rendered, count.index)
|
||||
user_data = data.ct_config.controller-ignitions.*.rendered[count.index]
|
||||
|
||||
# storage
|
||||
root_block_device {
|
||||
@ -49,20 +49,17 @@ resource "aws_instance" "controllers" {
|
||||
|
||||
# Controller Ignition configs
|
||||
data "ct_config" "controller-ignitions" {
|
||||
count = var.controller_count
|
||||
content = element(
|
||||
data.template_file.controller-configs.*.rendered,
|
||||
count.index,
|
||||
)
|
||||
pretty_print = false
|
||||
snippets = var.controller_clc_snippets
|
||||
count = var.controller_count
|
||||
content = data.template_file.controller-configs.*.rendered[count.index]
|
||||
strict = true
|
||||
snippets = var.controller_snippets
|
||||
}
|
||||
|
||||
# Controller Container Linux configs
|
||||
data "template_file" "controller-configs" {
|
||||
count = var.controller_count
|
||||
|
||||
template = file("${path.module}/cl/controller.yaml.tmpl")
|
||||
template = file("${path.module}/cl/controller.yaml")
|
||||
|
||||
vars = {
|
||||
# Cannot use cyclic dependencies on controllers or their DNS records
|
||||
|
@ -25,21 +25,23 @@ resource "aws_internet_gateway" "gateway" {
|
||||
resource "aws_route_table" "default" {
|
||||
vpc_id = aws_vpc.network.id
|
||||
|
||||
route {
|
||||
cidr_block = "0.0.0.0/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
route {
|
||||
ipv6_cidr_block = "::/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
tags = {
|
||||
"Name" = var.cluster_name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_route" "egress-ipv4" {
|
||||
route_table_id = aws_route_table.default.id
|
||||
destination_cidr_block = "0.0.0.0/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
resource "aws_route" "egress-ipv6" {
|
||||
route_table_id = aws_route_table.default.id
|
||||
destination_ipv6_cidr_block = "::/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
# Subnets (one per availability zone)
|
||||
|
||||
resource "aws_subnet" "public" {
|
||||
@ -62,6 +64,6 @@ resource "aws_route_table_association" "public" {
|
||||
count = length(data.aws_availability_zones.all.names)
|
||||
|
||||
route_table_id = aws_route_table.default.id
|
||||
subnet_id = element(aws_subnet.public.*.id, count.index)
|
||||
subnet_id = aws_subnet.public.*.id[count.index]
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ resource "aws_lb_target_group_attachment" "controllers" {
|
||||
count = var.controller_count
|
||||
|
||||
target_group_arn = aws_lb_target_group.controllers.arn
|
||||
target_id = element(aws_instance.controllers.*.id, count.index)
|
||||
target_id = aws_instance.controllers.*.id[count.index]
|
||||
port = 6443
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,28 @@ resource "aws_security_group_rule" "controller-etcd" {
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape etcd metrics
|
||||
resource "aws_security_group_rule" "controller-etcd-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 2381
|
||||
to_port = 2381
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "kube-proxy-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler
|
||||
resource "aws_security_group_rule" "controller-scheduler-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
@ -55,17 +77,6 @@ resource "aws_security_group_rule" "controller-manager-metrics" {
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape etcd metrics
|
||||
resource "aws_security_group_rule" "controller-etcd-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 2381
|
||||
to_port = 2381
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "controller-vxlan" {
|
||||
count = var.networking == "flannel" ? 1 : 0
|
||||
|
||||
@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" {
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "ingress-health" {
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "worker-kube-proxy" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10254
|
||||
to_port = 10254
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow apiserver to access kubelets for exec, log, port-forward
|
||||
@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" {
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "ingress-health" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10254
|
||||
to_port = 10254
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-bgp" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
|
@ -2,7 +2,7 @@ locals {
|
||||
# format assets for distribution
|
||||
assets_bundle = [
|
||||
# header with the unpack location
|
||||
for key, value in module.bootstrap.assets_dist:
|
||||
for key, value in module.bootstrap.assets_dist :
|
||||
format("##### %s\n%s", key, value)
|
||||
]
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ variable "worker_type" {
|
||||
variable "os_image" {
|
||||
type = string
|
||||
description = "AMI channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha, flatcar-stable, flatcar-beta, flatcar-alpha, flatcar-edge)"
|
||||
default = "coreos-stable"
|
||||
default = "flatcar-stable"
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
@ -77,13 +77,13 @@ variable "worker_target_groups" {
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "controller_clc_snippets" {
|
||||
variable "controller_snippets" {
|
||||
type = list(string)
|
||||
description = "Controller Container Linux Config snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "worker_clc_snippets" {
|
||||
variable "worker_snippets" {
|
||||
type = list(string)
|
||||
description = "Worker Container Linux Config snippets"
|
||||
default = []
|
||||
@ -96,12 +96,6 @@ variable "ssh_authorized_key" {
|
||||
description = "SSH public key for user 'core'"
|
||||
}
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "networking" {
|
||||
type = string
|
||||
description = "Choice of networking provider (calico or flannel)"
|
||||
@ -155,6 +149,12 @@ variable "worker_node_labels" {
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cluster_domain_suffix" {
|
||||
type = string
|
||||
description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
|
||||
|
@ -4,7 +4,7 @@ terraform {
|
||||
required_version = "~> 0.12.6"
|
||||
required_providers {
|
||||
aws = "~> 2.23"
|
||||
ct = "~> 0.3"
|
||||
ct = "~> 0.4"
|
||||
template = "~> 2.1"
|
||||
null = "~> 2.1"
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ module "workers" {
|
||||
ssh_authorized_key = var.ssh_authorized_key
|
||||
service_cidr = var.service_cidr
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
clc_snippets = var.worker_clc_snippets
|
||||
snippets = var.worker_snippets
|
||||
node_labels = var.worker_node_labels
|
||||
}
|
||||
|
||||
|
@ -4,8 +4,8 @@ locals {
|
||||
# flatcar-stable -> Flatcar Linux AMI
|
||||
ami_id = local.flavor == "flatcar" ? data.aws_ami.flatcar.image_id : data.aws_ami.coreos.image_id
|
||||
|
||||
flavor = element(split("-", var.os_image), 0)
|
||||
channel = element(split("-", var.os_image), 1)
|
||||
flavor = split("-", var.os_image)[0]
|
||||
channel = split("-", var.os_image)[1]
|
||||
}
|
||||
|
||||
data "aws_ami" "coreos" {
|
||||
|
@ -2,11 +2,11 @@
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
enable: true
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
@ -19,53 +19,72 @@ systemd:
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
- name: kubelet.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet via Hyperkube
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
EnvironmentFile=/etc/kubernetes/kubelet.env
|
||||
Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--volume=resolv,kind=host,source=/etc/resolv.conf \
|
||||
--mount volume=resolv,target=/etc/resolv.conf \
|
||||
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
||||
--mount volume=var-lib-cni,target=/var/lib/cni \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--insecure-options=image"
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
Environment=KUBELET_CGROUP_DRIVER=${cgroup_driver}
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=$${KUBELET_CGROUP_DRIVER} \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--exit-on-lock-contention \
|
||||
--kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--lock-file=/var/run/lock/kubelet.lock \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{ for label in split(",", node_labels) }
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{ endfor ~}
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
@ -92,16 +111,9 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/kubernetes/kubelet.env
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.17.0
|
||||
KUBELET_IMAGE_ARGS="--exec=/usr/local/bin/kubelet"
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
@ -117,11 +129,10 @@ storage:
|
||||
--volume config,kind=host,source=/etc/kubernetes \
|
||||
--mount volume=config,target=/etc/kubernetes \
|
||||
--insecure-options=image \
|
||||
docker://k8s.gcr.io/hyperkube:v1.17.0 \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
-- \
|
||||
kubectl --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
||||
--exec=/usr/local/bin/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname)
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
@ -37,7 +37,7 @@ variable "instance_type" {
|
||||
variable "os_image" {
|
||||
type = string
|
||||
description = "AMI channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha, flatcar-stable, flatcar-beta, flatcar-alpha, flatcar-edge)"
|
||||
default = "coreos-stable"
|
||||
default = "flatcar-stable"
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
@ -70,7 +70,7 @@ variable "target_groups" {
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "clc_snippets" {
|
||||
variable "snippets" {
|
||||
type = list(string)
|
||||
description = "Container Linux Config snippets"
|
||||
default = []
|
||||
|
@ -71,14 +71,14 @@ resource "aws_launch_configuration" "worker" {
|
||||
|
||||
# Worker Ignition config
|
||||
data "ct_config" "worker-ignition" {
|
||||
content = data.template_file.worker-config.rendered
|
||||
pretty_print = false
|
||||
snippets = var.clc_snippets
|
||||
content = data.template_file.worker-config.rendered
|
||||
strict = true
|
||||
snippets = var.snippets
|
||||
}
|
||||
|
||||
# Worker Container Linux config
|
||||
data "template_file" "worker-config" {
|
||||
template = file("${path.module}/cl/worker.yaml.tmpl")
|
||||
template = file("${path.module}/cl/worker.yaml")
|
||||
|
||||
vars = {
|
||||
kubeconfig = indent(10, var.kubeconfig)
|
||||
|
@ -11,11 +11,11 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.17.0 (upstream)
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/cl/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
* Ready for Ingress, Prometheus, Grafana, CSI, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Docs
|
||||
|
||||
|
@ -14,10 +14,7 @@ data "aws_ami" "fedora-coreos" {
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["fedora-coreos-30.*.*-hvm"]
|
||||
name = "description"
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
|
||||
# try to filter out dev images (AWS filters can't)
|
||||
name_regex = "^fedora-coreos-30.[0-9]*.[0-9]*-hvm*"
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=24e5513ee6da4888005aac02101f73fc9e5e829f"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
|
@ -36,7 +36,7 @@ resource "aws_instance" "controllers" {
|
||||
|
||||
# network
|
||||
associate_public_ip_address = true
|
||||
subnet_id = aws_subnet.public.*.id[count.index]
|
||||
subnet_id = element(aws_subnet.public.*.id, count.index)
|
||||
vpc_security_group_ids = [aws_security_group.controller.id]
|
||||
|
||||
lifecycle {
|
||||
|
@ -28,7 +28,7 @@ systemd:
|
||||
--network host \
|
||||
--volume /var/lib/etcd:/var/lib/etcd:rw,Z \
|
||||
--volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
|
||||
quay.io/coreos/etcd:v3.4.3
|
||||
quay.io/coreos/etcd:v3.4.9
|
||||
ExecStop=/usr/bin/podman stop etcd
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@ -38,11 +38,12 @@ systemd:
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Description=Wait for DNS and hostname
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStartPre=/bin/sh -c 'while [ `hostname -s` == "localhost" ]; do sleep 1; done;'
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
@ -51,9 +52,10 @@ systemd:
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet via Hyperkube (System Container)
|
||||
Description=Kubelet (System Container)
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
@ -73,17 +75,17 @@ systemd:
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /var/lib/calico:/var/lib/calico \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run:/var/run \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
--volume /opt/cni/bin:/opt/cni/bin:z \
|
||||
k8s.gcr.io/hyperkube:v1.17.0 kubelet \
|
||||
$${KUBELET_IMAGE} \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--enforce-node-allocatable=pods \
|
||||
@ -91,15 +93,14 @@ systemd:
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--exit-on-lock-contention \
|
||||
--kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--lock-file=/var/run/lock/kubelet.lock \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/master \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--register-with-taints=node-role.kubernetes.io/master=:NoSchedule \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/podman stop kubelet
|
||||
Delegate=yes
|
||||
@ -116,14 +117,14 @@ systemd:
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
ExecStartPre=-/usr/bin/bash -c 'set -x && [ -n "$(ls /opt/bootstrap/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootstrap/assets/manifests-*/* /opt/bootstrap/assets/manifests && rm -rf /opt/bootstrap/assets/manifests-*'
|
||||
ExecStartPre=-/usr/bin/podman rm bootstrap
|
||||
ExecStart=/usr/bin/podman run --name bootstrap \
|
||||
--network host \
|
||||
--volume /etc/kubernetes/bootstrap-secrets:/etc/kubernetes/secrets:ro,Z \
|
||||
--volume /etc/kubernetes/bootstrap-secrets:/etc/kubernetes/secrets:ro,z \
|
||||
--volume /opt/bootstrap/assets:/assets:ro,Z \
|
||||
--volume /opt/bootstrap/apply:/apply:ro,Z \
|
||||
--entrypoint=/apply \
|
||||
k8s.gcr.io/hyperkube:v1.17.0
|
||||
quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
ExecStartPost=-/usr/bin/podman stop bootstrap
|
||||
storage:
|
||||
@ -151,12 +152,12 @@ storage:
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
sudo mkdir -p /etc/kubernetes/manifests
|
||||
sudo mv static-manifests/* /etc/kubernetes/manifests/
|
||||
sudo mkdir -p /opt/bootstrap/assets
|
||||
sudo mv manifests /opt/bootstrap/assets/manifests
|
||||
sudo mv manifests-networking /opt/bootstrap/assets/manifests-networking
|
||||
rm -rf assets auth static-manifests tls
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
mode: 0544
|
||||
contents:
|
||||
@ -171,10 +172,6 @@ storage:
|
||||
echo "Retry applying manifests"
|
||||
sleep 5
|
||||
done
|
||||
- path: /etc/sysctl.d/reverse-path-filter.conf
|
||||
contents:
|
||||
inline: |
|
||||
net.ipv4.conf.all.rp_filter=1
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
contents:
|
||||
inline: |
|
||||
|
@ -25,21 +25,23 @@ resource "aws_internet_gateway" "gateway" {
|
||||
resource "aws_route_table" "default" {
|
||||
vpc_id = aws_vpc.network.id
|
||||
|
||||
route {
|
||||
cidr_block = "0.0.0.0/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
route {
|
||||
ipv6_cidr_block = "::/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
tags = {
|
||||
"Name" = var.cluster_name
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_route" "egress-ipv4" {
|
||||
route_table_id = aws_route_table.default.id
|
||||
destination_cidr_block = "0.0.0.0/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
resource "aws_route" "egress-ipv6" {
|
||||
route_table_id = aws_route_table.default.id
|
||||
destination_ipv6_cidr_block = "::/0"
|
||||
gateway_id = aws_internet_gateway.gateway.id
|
||||
}
|
||||
|
||||
# Subnets (one per availability zone)
|
||||
|
||||
resource "aws_subnet" "public" {
|
||||
|
@ -44,6 +44,17 @@ resource "aws_security_group_rule" "controller-etcd-metrics" {
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "kube-proxy-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
source_security_group_id = aws_security_group.worker.id
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler
|
||||
resource "aws_security_group_rule" "controller-scheduler-metrics" {
|
||||
security_group_id = aws_security_group.controller.id
|
||||
@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" {
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "ingress-health" {
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "aws_security_group_rule" "worker-kube-proxy" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10254
|
||||
to_port = 10254
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10249
|
||||
to_port = 10249
|
||||
self = true
|
||||
}
|
||||
|
||||
# Allow apiserver to access kubelets for exec, log, port-forward
|
||||
@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" {
|
||||
self = true
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "ingress-health" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
type = "ingress"
|
||||
protocol = "tcp"
|
||||
from_port = 10254
|
||||
to_port = 10254
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
|
||||
resource "aws_security_group_rule" "worker-bgp" {
|
||||
security_group_id = aws_security_group.worker.id
|
||||
|
||||
|
@ -2,7 +2,7 @@ locals {
|
||||
# format assets for distribution
|
||||
assets_bundle = [
|
||||
# header with the unpack location
|
||||
for key, value in module.bootstrap.assets_dist:
|
||||
for key, value in module.bootstrap.assets_dist :
|
||||
format("##### %s\n%s", key, value)
|
||||
]
|
||||
}
|
||||
@ -21,7 +21,7 @@ resource "null_resource" "copy-controller-secrets" {
|
||||
user = "core"
|
||||
timeout = "15m"
|
||||
}
|
||||
|
||||
|
||||
provisioner "file" {
|
||||
content = join("\n", local.assets_bundle)
|
||||
destination = "$HOME/assets"
|
||||
|
@ -41,10 +41,10 @@ variable "worker_type" {
|
||||
default = "t3.small"
|
||||
}
|
||||
|
||||
variable "os_image" {
|
||||
variable "os_stream" {
|
||||
type = string
|
||||
description = "AMI channel for Fedora CoreOS (not yet used)"
|
||||
default = "coreos-stable"
|
||||
description = "Fedora CoreOs image stream for instances (e.g. stable, testing, next)"
|
||||
default = "stable"
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
|
@ -8,7 +8,7 @@ module "workers" {
|
||||
security_groups = [aws_security_group.worker.id]
|
||||
worker_count = var.worker_count
|
||||
instance_type = var.worker_type
|
||||
os_image = var.os_image
|
||||
os_stream = var.os_stream
|
||||
disk_size = var.disk_size
|
||||
spot_price = var.worker_price
|
||||
target_groups = var.worker_target_groups
|
||||
|
@ -14,10 +14,7 @@ data "aws_ami" "fedora-coreos" {
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["fedora-coreos-30.*.*-hvm"]
|
||||
name = "description"
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
|
||||
# try to filter out dev images (AWS filters can't)
|
||||
name_regex = "^fedora-coreos-30.[0-9]*.[0-9]*-hvm*"
|
||||
}
|
||||
|
@ -9,11 +9,12 @@ systemd:
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Description=Wait for DNS and hostname
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStartPre=/bin/sh -c 'while [ `hostname -s` == "localhost" ]; do sleep 1; done;'
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
@ -21,9 +22,10 @@ systemd:
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet via Hyperkube (System Container)
|
||||
Description=Kubelet (System Container)
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
@ -43,17 +45,17 @@ systemd:
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /var/lib/calico:/var/lib/calico \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run:/var/run \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
--volume /opt/cni/bin:/opt/cni/bin:z \
|
||||
k8s.gcr.io/hyperkube:v1.17.0 kubelet \
|
||||
$${KUBELET_IMAGE} \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--enforce-node-allocatable=pods \
|
||||
@ -61,16 +63,16 @@ systemd:
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--exit-on-lock-contention \
|
||||
--kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--lock-file=/var/run/lock/kubelet.lock \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{ for label in split(",", node_labels) }
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{ endfor ~}
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/podman stop kubelet
|
||||
Delegate=yes
|
||||
@ -78,6 +80,18 @@ systemd:
|
||||
RestartSec=10
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: delete-node.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Delete Kubernetes node on shutdown
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/true
|
||||
ExecStop=/bin/bash -c '/usr/bin/podman run --volume /etc/kubernetes:/etc/kubernetes:ro,z --entrypoint /usr/local/bin/kubectl quay.io/poseidon/kubelet:v1.18.4 --kubeconfig=/etc/kubernetes/kubeconfig delete node $HOSTNAME'
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
directories:
|
||||
- path: /etc/kubernetes
|
||||
@ -87,10 +101,6 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/sysctl.d/reverse-path-filter.conf
|
||||
contents:
|
||||
inline: |
|
||||
net.ipv4.conf.all.rp_filter=1
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
contents:
|
||||
inline: |
|
||||
|
@ -34,10 +34,10 @@ variable "instance_type" {
|
||||
default = "t3.small"
|
||||
}
|
||||
|
||||
variable "os_image" {
|
||||
variable "os_stream" {
|
||||
type = string
|
||||
description = "AMI channel for Fedora CoreOS (not yet used)"
|
||||
default = "coreos-stable"
|
||||
description = "Fedora CoreOs image stream for instances (e.g. stable, testing, next)"
|
||||
default = "stable"
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
|
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.17.0 (upstream)
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/cl/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=24e5513ee6da4888005aac02101f73fc9e5e829f"
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
|
@ -2,12 +2,14 @@
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
enable: true
|
||||
enabled: true
|
||||
dropins:
|
||||
- name: 40-etcd-cluster.conf
|
||||
contents: |
|
||||
[Service]
|
||||
Environment="ETCD_IMAGE_TAG=v3.4.3"
|
||||
Environment="ETCD_IMAGE_TAG=v3.4.9"
|
||||
Environment="ETCD_IMAGE_URL=docker://quay.io/coreos/etcd"
|
||||
Environment="RKT_RUN_ARGS=--insecure-options=image"
|
||||
Environment="ETCD_NAME=${etcd_name}"
|
||||
Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379"
|
||||
Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380"
|
||||
@ -26,11 +28,11 @@ systemd:
|
||||
Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key"
|
||||
Environment="ETCD_PEER_CLIENT_CERT_AUTH=true"
|
||||
- name: docker.service
|
||||
enable: true
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
@ -44,51 +46,68 @@ systemd:
|
||||
RequiredBy=kubelet.service
|
||||
RequiredBy=etcd-member.service
|
||||
- name: kubelet.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet via Hyperkube
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
EnvironmentFile=/etc/kubernetes/kubelet.env
|
||||
Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--volume=resolv,kind=host,source=/etc/resolv.conf \
|
||||
--mount volume=resolv,target=/etc/resolv.conf \
|
||||
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
||||
--mount volume=var-lib-cni,target=/var/lib/cni \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--hosts-entry=host \
|
||||
--insecure-options=image"
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--exit-on-lock-contention \
|
||||
--kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--lock-file=/var/run/lock/kubelet.lock \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/master \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--register-with-taints=node-role.kubernetes.io/master=:NoSchedule \
|
||||
--read-only-port=0 \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
@ -104,7 +123,6 @@ systemd:
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
ExecStartPre=-/usr/bin/bash -c 'set -x && [ -n "$(ls /opt/bootstrap/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootstrap/assets/manifests-*/* /opt/bootstrap/assets/manifests && rm -rf /opt/bootstrap/assets/manifests-*'
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--trust-keys-from-https \
|
||||
--volume config,kind=host,source=/etc/kubernetes/bootstrap-secrets \
|
||||
@ -114,7 +132,7 @@ systemd:
|
||||
--volume script,kind=host,source=/opt/bootstrap/apply \
|
||||
--mount volume=script,target=/apply \
|
||||
--insecure-options=image \
|
||||
docker://k8s.gcr.io/hyperkube:v1.17.0 \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
--exec=/apply
|
||||
@ -129,14 +147,6 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/kubernetes/kubelet.env
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.17.0
|
||||
KUBELET_IMAGE_ARGS="--exec=/usr/local/bin/kubelet"
|
||||
- path: /opt/bootstrap/layout
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
@ -153,12 +163,12 @@ storage:
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
sudo mkdir -p /etc/kubernetes/manifests
|
||||
sudo mv static-manifests/* /etc/kubernetes/manifests/
|
||||
sudo mkdir -p /opt/bootstrap/assets
|
||||
sudo mv manifests /opt/bootstrap/assets/manifests
|
||||
sudo mv manifests-networking /opt/bootstrap/assets/manifests-networking
|
||||
rm -rf assets auth static-manifests tls
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
filesystem: root
|
||||
mode: 0544
|
||||
@ -176,6 +186,7 @@ storage:
|
||||
done
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
@ -11,16 +11,15 @@ resource "azurerm_dns_a_record" "etcds" {
|
||||
ttl = 300
|
||||
|
||||
# private IPv4 address for etcd
|
||||
records = [element(
|
||||
azurerm_network_interface.controllers.*.private_ip_address,
|
||||
count.index,
|
||||
)]
|
||||
records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
|
||||
}
|
||||
|
||||
locals {
|
||||
# Channel for a Container Linux derivative
|
||||
# Container Linux derivative
|
||||
# coreos-stable -> Container Linux Stable
|
||||
channel = element(split("-", var.os_image), 1)
|
||||
# flatcar-stable -> Flatcar Linux Stable
|
||||
flavor = split("-", var.os_image)[0]
|
||||
channel = split("-", var.os_image)[1]
|
||||
}
|
||||
|
||||
# Controller availability set to spread controllers
|
||||
@ -35,92 +34,64 @@ resource "azurerm_availability_set" "controllers" {
|
||||
}
|
||||
|
||||
# Controller instances
|
||||
resource "azurerm_virtual_machine" "controllers" {
|
||||
resource "azurerm_linux_virtual_machine" "controllers" {
|
||||
count = var.controller_count
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
location = var.region
|
||||
availability_set_id = azurerm_availability_set.controllers.id
|
||||
vm_size = var.controller_type
|
||||
|
||||
# boot
|
||||
storage_image_reference {
|
||||
publisher = "CoreOS"
|
||||
offer = "CoreOS"
|
||||
size = var.controller_type
|
||||
custom_data = base64encode(data.ct_config.controller-ignitions.*.rendered[count.index])
|
||||
|
||||
# storage
|
||||
os_disk {
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
caching = "None"
|
||||
disk_size_gb = var.disk_size
|
||||
storage_account_type = "Premium_LRS"
|
||||
}
|
||||
|
||||
# CoreOS Container Linux or Flatcar Container Linux
|
||||
source_image_reference {
|
||||
publisher = local.flavor == "flatcar" ? "Kinvolk" : "CoreOS"
|
||||
offer = local.flavor == "flatcar" ? "flatcar-container-linux-free" : "CoreOS"
|
||||
sku = local.channel
|
||||
version = "latest"
|
||||
}
|
||||
|
||||
# storage
|
||||
storage_os_disk {
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
create_option = "FromImage"
|
||||
caching = "ReadWrite"
|
||||
disk_size_gb = var.disk_size
|
||||
os_type = "Linux"
|
||||
managed_disk_type = "Premium_LRS"
|
||||
}
|
||||
# Gross hack for Flatcar Linux
|
||||
dynamic "plan" {
|
||||
for_each = local.flavor == "flatcar" ? [1] : []
|
||||
|
||||
# network
|
||||
network_interface_ids = [element(azurerm_network_interface.controllers.*.id, count.index)]
|
||||
|
||||
os_profile {
|
||||
computer_name = "${var.cluster_name}-controller-${count.index}"
|
||||
admin_username = "core"
|
||||
custom_data = element(data.ct_config.controller-ignitions.*.rendered, count.index)
|
||||
}
|
||||
|
||||
# Azure mandates setting an ssh_key, even though Ignition custom_data handles it too
|
||||
os_profile_linux_config {
|
||||
disable_password_authentication = true
|
||||
|
||||
ssh_keys {
|
||||
path = "/home/core/.ssh/authorized_keys"
|
||||
key_data = var.ssh_authorized_key
|
||||
content {
|
||||
name = local.channel
|
||||
publisher = "kinvolk"
|
||||
product = "flatcar-container-linux-free"
|
||||
}
|
||||
}
|
||||
|
||||
# lifecycle
|
||||
delete_os_disk_on_termination = true
|
||||
delete_data_disks_on_termination = true
|
||||
# network
|
||||
network_interface_ids = [
|
||||
azurerm_network_interface.controllers.*.id[count.index]
|
||||
]
|
||||
|
||||
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
|
||||
admin_username = "core"
|
||||
admin_ssh_key {
|
||||
username = "core"
|
||||
public_key = var.ssh_authorized_key
|
||||
}
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [
|
||||
storage_os_disk,
|
||||
os_profile,
|
||||
os_disk,
|
||||
custom_data,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Controller NICs with public and private IPv4
|
||||
resource "azurerm_network_interface" "controllers" {
|
||||
count = var.controller_count
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
location = azurerm_resource_group.cluster.location
|
||||
network_security_group_id = azurerm_network_security_group.controller.id
|
||||
|
||||
ip_configuration {
|
||||
name = "ip0"
|
||||
subnet_id = azurerm_subnet.controller.id
|
||||
private_ip_address_allocation = "dynamic"
|
||||
|
||||
# public IPv4
|
||||
public_ip_address_id = element(azurerm_public_ip.controllers.*.id, count.index)
|
||||
}
|
||||
}
|
||||
|
||||
# Add controller NICs to the controller backend address pool
|
||||
resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
|
||||
count = var.controller_count
|
||||
|
||||
network_interface_id = azurerm_network_interface.controllers[count.index].id
|
||||
ip_configuration_name = "ip0"
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
|
||||
}
|
||||
|
||||
# Controller public IPv4 addresses
|
||||
resource "azurerm_public_ip" "controllers" {
|
||||
count = var.controller_count
|
||||
@ -132,22 +103,53 @@ resource "azurerm_public_ip" "controllers" {
|
||||
allocation_method = "Static"
|
||||
}
|
||||
|
||||
# Controller NICs with public and private IPv4
|
||||
resource "azurerm_network_interface" "controllers" {
|
||||
count = var.controller_count
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
location = azurerm_resource_group.cluster.location
|
||||
|
||||
ip_configuration {
|
||||
name = "ip0"
|
||||
subnet_id = azurerm_subnet.controller.id
|
||||
private_ip_address_allocation = "Dynamic"
|
||||
# instance public IPv4
|
||||
public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
|
||||
}
|
||||
}
|
||||
|
||||
# Associate controller network interface with controller security group
|
||||
resource "azurerm_network_interface_security_group_association" "controllers" {
|
||||
count = var.controller_count
|
||||
|
||||
network_interface_id = azurerm_network_interface.controllers[count.index].id
|
||||
network_security_group_id = azurerm_network_security_group.controller.id
|
||||
}
|
||||
|
||||
# Associate controller network interface with controller backend address pool
|
||||
resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
|
||||
count = var.controller_count
|
||||
|
||||
network_interface_id = azurerm_network_interface.controllers[count.index].id
|
||||
ip_configuration_name = "ip0"
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
|
||||
}
|
||||
|
||||
# Controller Ignition configs
|
||||
data "ct_config" "controller-ignitions" {
|
||||
count = var.controller_count
|
||||
content = element(
|
||||
data.template_file.controller-configs.*.rendered,
|
||||
count.index,
|
||||
)
|
||||
pretty_print = false
|
||||
snippets = var.controller_clc_snippets
|
||||
count = var.controller_count
|
||||
content = data.template_file.controller-configs.*.rendered[count.index]
|
||||
strict = true
|
||||
snippets = var.controller_snippets
|
||||
}
|
||||
|
||||
# Controller Container Linux configs
|
||||
data "template_file" "controller-configs" {
|
||||
count = var.controller_count
|
||||
|
||||
template = file("${path.module}/cl/controller.yaml.tmpl")
|
||||
template = file("${path.module}/cl/controller.yaml")
|
||||
|
||||
vars = {
|
||||
# Cannot use cyclic dependencies on controllers or their DNS records
|
||||
|
@ -72,6 +72,7 @@ resource "azurerm_lb_rule" "ingress-http" {
|
||||
name = "ingress-http"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 80
|
||||
@ -86,6 +87,7 @@ resource "azurerm_lb_rule" "ingress-https" {
|
||||
name = "ingress-https"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 443
|
||||
@ -94,6 +96,20 @@ resource "azurerm_lb_rule" "ingress-https" {
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
}
|
||||
|
||||
# Worker outbound TCP/UDP SNAT
|
||||
resource "azurerm_lb_outbound_rule" "worker-outbound" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "worker"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration {
|
||||
name = "ingress"
|
||||
}
|
||||
|
||||
protocol = "All"
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
}
|
||||
|
||||
# Address pool of controllers
|
||||
resource "azurerm_lb_backend_address_pool" "controller" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
@ -21,7 +21,12 @@ resource "azurerm_subnet" "controller" {
|
||||
|
||||
name = "controller"
|
||||
virtual_network_name = azurerm_virtual_network.network.name
|
||||
address_prefix = cidrsubnet(var.host_cidr, 1, 0)
|
||||
address_prefixes = [cidrsubnet(var.host_cidr, 1, 0)]
|
||||
}
|
||||
|
||||
resource "azurerm_subnet_network_security_group_association" "controller" {
|
||||
subnet_id = azurerm_subnet.controller.id
|
||||
network_security_group_id = azurerm_network_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "azurerm_subnet" "worker" {
|
||||
@ -29,6 +34,11 @@ resource "azurerm_subnet" "worker" {
|
||||
|
||||
name = "worker"
|
||||
virtual_network_name = azurerm_virtual_network.network.name
|
||||
address_prefix = cidrsubnet(var.host_cidr, 1, 1)
|
||||
address_prefixes = [cidrsubnet(var.host_cidr, 1, 1)]
|
||||
}
|
||||
|
||||
resource "azurerm_subnet_network_security_group_association" "worker" {
|
||||
subnet_id = azurerm_subnet.worker.id
|
||||
network_security_group_id = azurerm_network_security_group.worker.id
|
||||
}
|
||||
|
||||
|
@ -53,13 +53,29 @@ resource "azurerm_network_security_rule" "controller-etcd-metrics" {
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy metrics
|
||||
resource "azurerm_network_security_rule" "controller-kube-proxy" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kube-proxy-metrics"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2011"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10249"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
|
||||
resource "azurerm_network_security_rule" "controller-kube-metrics" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kube-metrics"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2011"
|
||||
priority = "2012"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
@ -251,6 +267,22 @@ resource "azurerm_network_security_rule" "worker-node-exporter" {
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "azurerm_network_security_rule" "worker-kube-proxy" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kube-proxy"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2024"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10249"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Allow apiserver to access kubelet's for exec, log, port-forward
|
||||
resource "azurerm_network_security_rule" "worker-kubelet" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
@ -2,7 +2,7 @@ locals {
|
||||
# format assets for distribution
|
||||
assets_bundle = [
|
||||
# header with the unpack location
|
||||
for key, value in module.bootstrap.assets_dist:
|
||||
for key, value in module.bootstrap.assets_dist :
|
||||
format("##### %s\n%s", key, value)
|
||||
]
|
||||
}
|
||||
@ -13,7 +13,7 @@ resource "null_resource" "copy-controller-secrets" {
|
||||
|
||||
depends_on = [
|
||||
module.bootstrap,
|
||||
azurerm_virtual_machine.controllers
|
||||
azurerm_linux_virtual_machine.controllers
|
||||
]
|
||||
|
||||
connection {
|
||||
@ -22,7 +22,7 @@ resource "null_resource" "copy-controller-secrets" {
|
||||
user = "core"
|
||||
timeout = "15m"
|
||||
}
|
||||
|
||||
|
||||
provisioner "file" {
|
||||
content = join("\n", local.assets_bundle)
|
||||
destination = "$HOME/assets"
|
||||
@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" {
|
||||
|
||||
connection {
|
||||
type = "ssh"
|
||||
host = element(azurerm_public_ip.controllers.*.ip_address, 0)
|
||||
host = azurerm_public_ip.controllers.*.ip_address[0]
|
||||
user = "core"
|
||||
timeout = "15m"
|
||||
}
|
||||
|
@ -48,8 +48,8 @@ variable "worker_type" {
|
||||
|
||||
variable "os_image" {
|
||||
type = string
|
||||
default = "coreos-stable"
|
||||
description = "Channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha)"
|
||||
description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha, flatcar-edge, coreos-stable, coreos-beta, coreos-alpha)"
|
||||
default = "flatcar-stable"
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
@ -60,17 +60,17 @@ variable "disk_size" {
|
||||
|
||||
variable "worker_priority" {
|
||||
type = string
|
||||
description = "Set worker priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
|
||||
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
|
||||
default = "Regular"
|
||||
}
|
||||
|
||||
variable "controller_clc_snippets" {
|
||||
variable "controller_snippets" {
|
||||
type = list(string)
|
||||
description = "Controller Container Linux Config snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "worker_clc_snippets" {
|
||||
variable "worker_snippets" {
|
||||
type = list(string)
|
||||
description = "Worker Container Linux Config snippets"
|
||||
default = []
|
||||
@ -83,12 +83,6 @@ variable "ssh_authorized_key" {
|
||||
description = "SSH public key for user 'core'"
|
||||
}
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "networking" {
|
||||
type = string
|
||||
description = "Choice of networking provider (flannel or calico)"
|
||||
@ -136,6 +130,12 @@ variable "worker_node_labels" {
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cluster_domain_suffix" {
|
||||
type = string
|
||||
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
|
||||
|
@ -3,8 +3,8 @@
|
||||
terraform {
|
||||
required_version = "~> 0.12.6"
|
||||
required_providers {
|
||||
azurerm = "~> 1.27"
|
||||
ct = "~> 0.3"
|
||||
azurerm = "~> 2.8"
|
||||
ct = "~> 0.4"
|
||||
template = "~> 2.1"
|
||||
null = "~> 2.1"
|
||||
}
|
||||
|
@ -19,7 +19,6 @@ module "workers" {
|
||||
ssh_authorized_key = var.ssh_authorized_key
|
||||
service_cidr = var.service_cidr
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
clc_snippets = var.worker_clc_snippets
|
||||
snippets = var.worker_snippets
|
||||
node_labels = var.worker_node_labels
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,11 @@
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
enable: true
|
||||
enabled: true
|
||||
- name: locksmithd.service
|
||||
mask: true
|
||||
- name: wait-for-dns.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
@ -19,51 +19,70 @@ systemd:
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
- name: kubelet.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet via Hyperkube
|
||||
Description=Kubelet
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
EnvironmentFile=/etc/kubernetes/kubelet.env
|
||||
Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--volume=resolv,kind=host,source=/etc/resolv.conf \
|
||||
--mount volume=resolv,target=/etc/resolv.conf \
|
||||
--volume var-lib-cni,kind=host,source=/var/lib/cni \
|
||||
--mount volume=var-lib-cni,target=/var/lib/cni \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--insecure-options=image"
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
Environment=KUBELET_IMAGE=docker://quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/cni
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
ExecStart=/usr/lib/coreos/kubelet-wrapper \
|
||||
ExecStart=/usr/bin/rkt run \
|
||||
--uuid-file-save=/var/cache/kubelet-pod.uuid \
|
||||
--stage1-from-dir=stage1-fly.aci \
|
||||
--hosts-entry host \
|
||||
--insecure-options=image \
|
||||
--volume etc-kubernetes,kind=host,source=/etc/kubernetes,readOnly=true \
|
||||
--mount volume=etc-kubernetes,target=/etc/kubernetes \
|
||||
--volume etc-machine-id,kind=host,source=/etc/machine-id,readOnly=true \
|
||||
--mount volume=etc-machine-id,target=/etc/machine-id \
|
||||
--volume etc-os-release,kind=host,source=/usr/lib/os-release,readOnly=true \
|
||||
--mount volume=etc-os-release,target=/etc/os-release \
|
||||
--volume=etc-resolv,kind=host,source=/etc/resolv.conf,readOnly=true \
|
||||
--mount volume=etc-resolv,target=/etc/resolv.conf \
|
||||
--volume etc-ssl-certs,kind=host,source=/etc/ssl/certs,readOnly=true \
|
||||
--mount volume=etc-ssl-certs,target=/etc/ssl/certs \
|
||||
--volume lib-modules,kind=host,source=/lib/modules,readOnly=true \
|
||||
--mount volume=lib-modules,target=/lib/modules \
|
||||
--volume run,kind=host,source=/run \
|
||||
--mount volume=run,target=/run \
|
||||
--volume usr-share-certs,kind=host,source=/usr/share/ca-certificates,readOnly=true \
|
||||
--mount volume=usr-share-certs,target=/usr/share/ca-certificates \
|
||||
--volume var-lib-calico,kind=host,source=/var/lib/calico,readOnly=true \
|
||||
--mount volume=var-lib-calico,target=/var/lib/calico \
|
||||
--volume var-lib-docker,kind=host,source=/var/lib/docker \
|
||||
--mount volume=var-lib-docker,target=/var/lib/docker \
|
||||
--volume var-lib-kubelet,kind=host,source=/var/lib/kubelet,recursive=true \
|
||||
--mount volume=var-lib-kubelet,target=/var/lib/kubelet \
|
||||
--volume var-log,kind=host,source=/var/log \
|
||||
--mount volume=var-log,target=/var/log \
|
||||
--volume opt-cni-bin,kind=host,source=/opt/cni/bin \
|
||||
--mount volume=opt-cni-bin,target=/opt/cni/bin \
|
||||
$${KUBELET_IMAGE} -- \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--exit-on-lock-contention \
|
||||
--kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--lock-file=/var/run/lock/kubelet.lock \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{ for label in split(",", node_labels) }
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{ endfor ~}
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid
|
||||
Restart=always
|
||||
@ -71,7 +90,7 @@ systemd:
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: delete-node.service
|
||||
enable: true
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Waiting to delete Kubernetes node on shutdown
|
||||
@ -90,16 +109,9 @@ storage:
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/kubernetes/kubelet.env
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
||||
KUBELET_IMAGE_TAG=v1.17.0
|
||||
KUBELET_IMAGE_ARGS="--exec=/usr/local/bin/kubelet"
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
filesystem: root
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
@ -115,11 +127,10 @@ storage:
|
||||
--volume config,kind=host,source=/etc/kubernetes \
|
||||
--mount volume=config,target=/etc/kubernetes \
|
||||
--insecure-options=image \
|
||||
docker://k8s.gcr.io/hyperkube:v1.17.0 \
|
||||
docker://quay.io/poseidon/kubelet:v1.18.4 \
|
||||
--net=host \
|
||||
--dns=host \
|
||||
-- \
|
||||
kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname | tr '[:upper:]' '[:lower:]')
|
||||
--exec=/usr/local/bin/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname | tr '[:upper:]' '[:lower:]')
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
@ -46,17 +46,17 @@ variable "vm_type" {
|
||||
|
||||
variable "os_image" {
|
||||
type = string
|
||||
description = "Channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha)"
|
||||
default = "coreos-stable"
|
||||
description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha, flatcar-edge, coreos-stable, coreos-beta, coreos-alpha)"
|
||||
default = "flatcar-stable"
|
||||
}
|
||||
|
||||
variable "priority" {
|
||||
type = string
|
||||
description = "Set priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
|
||||
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
|
||||
default = "Regular"
|
||||
}
|
||||
|
||||
variable "clc_snippets" {
|
||||
variable "snippets" {
|
||||
type = list(string)
|
||||
description = "Container Linux Config snippets"
|
||||
default = []
|
||||
|
@ -1,57 +1,57 @@
|
||||
locals {
|
||||
# Channel for a Container Linux derivative
|
||||
# coreos-stable -> Container Linux Stable
|
||||
channel = element(split("-", var.os_image), 1)
|
||||
# flatcar-stable -> Flatcar Linux Stable
|
||||
flavor = split("-", var.os_image)[0]
|
||||
channel = split("-", var.os_image)[1]
|
||||
}
|
||||
|
||||
# Workers scale set
|
||||
resource "azurerm_virtual_machine_scale_set" "workers" {
|
||||
resource "azurerm_linux_virtual_machine_scale_set" "workers" {
|
||||
resource_group_name = var.resource_group_name
|
||||
|
||||
name = "${var.name}-workers"
|
||||
location = var.region
|
||||
name = "${var.name}-worker"
|
||||
location = var.region
|
||||
sku = var.vm_type
|
||||
instances = var.worker_count
|
||||
# instance name prefix for instances in the set
|
||||
computer_name_prefix = "${var.name}-worker"
|
||||
single_placement_group = false
|
||||
custom_data = base64encode(data.ct_config.worker-ignition.rendered)
|
||||
|
||||
sku {
|
||||
name = var.vm_type
|
||||
tier = "standard"
|
||||
capacity = var.worker_count
|
||||
# storage
|
||||
os_disk {
|
||||
storage_account_type = "Standard_LRS"
|
||||
caching = "ReadWrite"
|
||||
}
|
||||
|
||||
# boot
|
||||
storage_profile_image_reference {
|
||||
publisher = "CoreOS"
|
||||
offer = "CoreOS"
|
||||
# CoreOS Container Linux or Flatcar Container Linux
|
||||
source_image_reference {
|
||||
publisher = local.flavor == "flatcar" ? "Kinvolk" : "CoreOS"
|
||||
offer = local.flavor == "flatcar" ? "flatcar-container-linux-free" : "CoreOS"
|
||||
sku = local.channel
|
||||
version = "latest"
|
||||
}
|
||||
|
||||
# storage
|
||||
storage_profile_os_disk {
|
||||
create_option = "FromImage"
|
||||
caching = "ReadWrite"
|
||||
os_type = "linux"
|
||||
managed_disk_type = "Standard_LRS"
|
||||
}
|
||||
# Gross hack for Flatcar Linux
|
||||
dynamic "plan" {
|
||||
for_each = local.flavor == "flatcar" ? [1] : []
|
||||
|
||||
os_profile {
|
||||
computer_name_prefix = "${var.name}-worker-"
|
||||
admin_username = "core"
|
||||
custom_data = data.ct_config.worker-ignition.rendered
|
||||
}
|
||||
|
||||
# Azure mandates setting an ssh_key, even though Ignition custom_data handles it too
|
||||
os_profile_linux_config {
|
||||
disable_password_authentication = true
|
||||
|
||||
ssh_keys {
|
||||
path = "/home/core/.ssh/authorized_keys"
|
||||
key_data = var.ssh_authorized_key
|
||||
content {
|
||||
name = local.channel
|
||||
publisher = "kinvolk"
|
||||
product = "flatcar-container-linux-free"
|
||||
}
|
||||
}
|
||||
|
||||
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
|
||||
admin_username = "core"
|
||||
admin_ssh_key {
|
||||
username = "core"
|
||||
public_key = var.ssh_authorized_key
|
||||
}
|
||||
|
||||
# network
|
||||
network_profile {
|
||||
network_interface {
|
||||
name = "nic0"
|
||||
primary = true
|
||||
network_security_group_id = var.security_group_id
|
||||
@ -67,10 +67,10 @@ resource "azurerm_virtual_machine_scale_set" "workers" {
|
||||
}
|
||||
|
||||
# lifecycle
|
||||
upgrade_policy_mode = "Manual"
|
||||
# eviction policy may only be set when priority is Low
|
||||
upgrade_mode = "Manual"
|
||||
# eviction policy may only be set when priority is Spot
|
||||
priority = var.priority
|
||||
eviction_policy = var.priority == "Low" ? "Delete" : null
|
||||
eviction_policy = var.priority == "Spot" ? "Delete" : null
|
||||
}
|
||||
|
||||
# Scale up or down to maintain desired number, tolerating deallocations.
|
||||
@ -82,7 +82,7 @@ resource "azurerm_monitor_autoscale_setting" "workers" {
|
||||
|
||||
# autoscale
|
||||
enabled = true
|
||||
target_resource_id = azurerm_virtual_machine_scale_set.workers.id
|
||||
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
|
||||
|
||||
profile {
|
||||
name = "default"
|
||||
@ -97,14 +97,14 @@ resource "azurerm_monitor_autoscale_setting" "workers" {
|
||||
|
||||
# Worker Ignition configs
|
||||
data "ct_config" "worker-ignition" {
|
||||
content = data.template_file.worker-config.rendered
|
||||
pretty_print = false
|
||||
snippets = var.clc_snippets
|
||||
content = data.template_file.worker-config.rendered
|
||||
strict = true
|
||||
snippets = var.snippets
|
||||
}
|
||||
|
||||
# Worker Container Linux configs
|
||||
data "template_file" "worker-config" {
|
||||
template = file("${path.module}/cl/worker.yaml.tmpl")
|
||||
template = file("${path.module}/cl/worker.yaml")
|
||||
|
||||
vars = {
|
||||
kubeconfig = indent(10, var.kubeconfig)
|
||||
|
23
azure/fedora-coreos/kubernetes/LICENSE
Normal file
23
azure/fedora-coreos/kubernetes/LICENSE
Normal file
@ -0,0 +1,23 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2020 Typhoon Authors
|
||||
Copyright (c) 2020 Dalton Hubble
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
23
azure/fedora-coreos/kubernetes/README.md
Normal file
23
azure/fedora-coreos/kubernetes/README.md
Normal file
@ -0,0 +1,23 @@
|
||||
# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
|
||||
|
||||
Typhoon is a minimal and free Kubernetes distribution.
|
||||
|
||||
* Minimal, stable base Kubernetes distribution
|
||||
* Declarative infrastructure and configuration
|
||||
* Free (freedom and cost) and privacy-respecting
|
||||
* Practical for labs, datacenters, and clouds
|
||||
|
||||
Typhoon distributes upstream Kubernetes, architectural conventions, and cluster addons, much like a GNU/Linux distribution provides the Linux kernel and userspace components.
|
||||
|
||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||
|
||||
* Kubernetes v1.18.4 (upstream)
|
||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
|
||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot priority](https://typhoon.psdn.io/fedora-coreos/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/) customization
|
||||
* Ready for Ingress, Prometheus, Grafana, and other optional [addons](https://typhoon.psdn.io/addons/overview/)
|
||||
|
||||
## Docs
|
||||
|
||||
Please see the [official docs](https://typhoon.psdn.io) and the Azure [tutorial](https://typhoon.psdn.io/fedora-coreos/azure/).
|
||||
|
26
azure/fedora-coreos/kubernetes/bootstrap.tf
Normal file
26
azure/fedora-coreos/kubernetes/bootstrap.tf
Normal file
@ -0,0 +1,26 @@
|
||||
# Kubernetes assets (kubeconfig, manifests)
|
||||
module "bootstrap" {
|
||||
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e75697ce35d7773705f0b9b28ce1ffbe99f9493c"
|
||||
|
||||
cluster_name = var.cluster_name
|
||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||
etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)
|
||||
asset_dir = var.asset_dir
|
||||
|
||||
networking = var.networking
|
||||
|
||||
# only effective with Calico networking
|
||||
# we should be able to use 1450 MTU, but in practice, 1410 was needed
|
||||
network_encapsulation = "vxlan"
|
||||
network_mtu = "1410"
|
||||
|
||||
pod_cidr = var.pod_cidr
|
||||
service_cidr = var.service_cidr
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
enable_reporting = var.enable_reporting
|
||||
enable_aggregation = var.enable_aggregation
|
||||
|
||||
# Fedora CoreOS
|
||||
trusted_certs_dir = "/etc/pki/tls/certs"
|
||||
}
|
||||
|
151
azure/fedora-coreos/kubernetes/controllers.tf
Normal file
151
azure/fedora-coreos/kubernetes/controllers.tf
Normal file
@ -0,0 +1,151 @@
|
||||
# Discrete DNS records for each controller's private IPv4 for etcd usage
|
||||
resource "azurerm_dns_a_record" "etcds" {
|
||||
count = var.controller_count
|
||||
resource_group_name = var.dns_zone_group
|
||||
|
||||
# DNS Zone name where record should be created
|
||||
zone_name = var.dns_zone
|
||||
|
||||
# DNS record
|
||||
name = format("%s-etcd%d", var.cluster_name, count.index)
|
||||
ttl = 300
|
||||
|
||||
# private IPv4 address for etcd
|
||||
records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
|
||||
}
|
||||
|
||||
# Controller availability set to spread controllers
|
||||
resource "azurerm_availability_set" "controllers" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controllers"
|
||||
location = var.region
|
||||
platform_fault_domain_count = 2
|
||||
platform_update_domain_count = 4
|
||||
managed = true
|
||||
}
|
||||
|
||||
# Controller instances
|
||||
resource "azurerm_linux_virtual_machine" "controllers" {
|
||||
count = var.controller_count
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
location = var.region
|
||||
availability_set_id = azurerm_availability_set.controllers.id
|
||||
|
||||
size = var.controller_type
|
||||
custom_data = base64encode(data.ct_config.controller-ignitions.*.rendered[count.index])
|
||||
|
||||
# storage
|
||||
source_image_id = var.os_image
|
||||
os_disk {
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
caching = "None"
|
||||
disk_size_gb = var.disk_size
|
||||
storage_account_type = "Premium_LRS"
|
||||
}
|
||||
|
||||
# network
|
||||
network_interface_ids = [
|
||||
azurerm_network_interface.controllers.*.id[count.index]
|
||||
]
|
||||
|
||||
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
|
||||
admin_username = "core"
|
||||
admin_ssh_key {
|
||||
username = "core"
|
||||
public_key = var.ssh_authorized_key
|
||||
}
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [
|
||||
os_disk,
|
||||
custom_data,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Controller public IPv4 addresses
|
||||
resource "azurerm_public_ip" "controllers" {
|
||||
count = var.controller_count
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
location = azurerm_resource_group.cluster.location
|
||||
sku = "Standard"
|
||||
allocation_method = "Static"
|
||||
}
|
||||
|
||||
# Controller NICs with public and private IPv4
|
||||
resource "azurerm_network_interface" "controllers" {
|
||||
count = var.controller_count
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controller-${count.index}"
|
||||
location = azurerm_resource_group.cluster.location
|
||||
|
||||
ip_configuration {
|
||||
name = "ip0"
|
||||
subnet_id = azurerm_subnet.controller.id
|
||||
private_ip_address_allocation = "Dynamic"
|
||||
# instance public IPv4
|
||||
public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
|
||||
}
|
||||
}
|
||||
|
||||
# Associate controller network interface with controller security group
|
||||
resource "azurerm_network_interface_security_group_association" "controllers" {
|
||||
count = var.controller_count
|
||||
|
||||
network_interface_id = azurerm_network_interface.controllers[count.index].id
|
||||
network_security_group_id = azurerm_network_security_group.controller.id
|
||||
}
|
||||
|
||||
# Associate controller network interface with controller backend address pool
|
||||
resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
|
||||
count = var.controller_count
|
||||
|
||||
network_interface_id = azurerm_network_interface.controllers[count.index].id
|
||||
ip_configuration_name = "ip0"
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
|
||||
}
|
||||
|
||||
# Controller Ignition configs
|
||||
data "ct_config" "controller-ignitions" {
|
||||
count = var.controller_count
|
||||
content = data.template_file.controller-configs.*.rendered[count.index]
|
||||
strict = true
|
||||
snippets = var.controller_snippets
|
||||
}
|
||||
|
||||
# Controller Fedora CoreOS configs
|
||||
data "template_file" "controller-configs" {
|
||||
count = var.controller_count
|
||||
|
||||
template = file("${path.module}/fcc/controller.yaml")
|
||||
|
||||
vars = {
|
||||
# Cannot use cyclic dependencies on controllers or their DNS records
|
||||
etcd_name = "etcd${count.index}"
|
||||
etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
|
||||
# etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
|
||||
etcd_initial_cluster = join(",", data.template_file.etcds.*.rendered)
|
||||
kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet)
|
||||
ssh_authorized_key = var.ssh_authorized_key
|
||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
}
|
||||
}
|
||||
|
||||
data "template_file" "etcds" {
|
||||
count = var.controller_count
|
||||
template = "etcd$${index}=https://$${cluster_name}-etcd$${index}.$${dns_zone}:2380"
|
||||
|
||||
vars = {
|
||||
index = count.index
|
||||
cluster_name = var.cluster_name
|
||||
dns_zone = var.dns_zone
|
||||
}
|
||||
}
|
||||
|
213
azure/fedora-coreos/kubernetes/fcc/controller.yaml
Normal file
213
azure/fedora-coreos/kubernetes/fcc/controller.yaml
Normal file
@ -0,0 +1,213 @@
|
||||
---
|
||||
variant: fcos
|
||||
version: 1.0.0
|
||||
systemd:
|
||||
units:
|
||||
- name: etcd-member.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=etcd (System Container)
|
||||
Documentation=https://github.com/coreos/etcd
|
||||
Wants=network-online.target network.target
|
||||
After=network-online.target
|
||||
[Service]
|
||||
# https://github.com/opencontainers/runc/pull/1807
|
||||
# Type=notify
|
||||
# NotifyAccess=exec
|
||||
Type=exec
|
||||
Restart=on-failure
|
||||
RestartSec=10s
|
||||
TimeoutStartSec=0
|
||||
LimitNOFILE=40000
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/etcd
|
||||
ExecStartPre=-/usr/bin/podman rm etcd
|
||||
#--volume $${NOTIFY_SOCKET}:/run/systemd/notify \
|
||||
ExecStart=/usr/bin/podman run --name etcd \
|
||||
--env-file /etc/etcd/etcd.env \
|
||||
--network host \
|
||||
--volume /var/lib/etcd:/var/lib/etcd:rw,Z \
|
||||
--volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
|
||||
quay.io/coreos/etcd:v3.4.9
|
||||
ExecStop=/usr/bin/podman stop etcd
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
RequiredBy=etcd-member.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet (System Container)
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/podman rm kubelet
|
||||
ExecStart=/usr/bin/podman run --name kubelet \
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
|
||||
--volume /usr/lib/os-release:/etc/os-release:ro \
|
||||
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
|
||||
--volume /lib/modules:/lib/modules:ro \
|
||||
--volume /run:/run \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
--volume /opt/cni/bin:/opt/cni/bin:z \
|
||||
$${KUBELET_IMAGE} \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--enforce-node-allocatable=pods \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/controller="true" \
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/podman stop kubelet
|
||||
Delegate=yes
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: bootstrap.service
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubernetes control plane
|
||||
ConditionPathExists=!/opt/bootstrap/bootstrap.done
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
WorkingDirectory=/opt/bootstrap
|
||||
ExecStartPre=-/usr/bin/podman rm bootstrap
|
||||
ExecStart=/usr/bin/podman run --name bootstrap \
|
||||
--network host \
|
||||
--volume /etc/kubernetes/bootstrap-secrets:/etc/kubernetes/secrets:ro,z \
|
||||
--volume /opt/bootstrap/assets:/assets:ro,Z \
|
||||
--volume /opt/bootstrap/apply:/apply:ro,Z \
|
||||
--entrypoint=/apply \
|
||||
quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||
ExecStartPost=-/usr/bin/podman stop bootstrap
|
||||
storage:
|
||||
directories:
|
||||
- path: /etc/kubernetes
|
||||
- path: /opt/bootstrap
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /opt/bootstrap/layout
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
|
||||
awk '/#####/ {filename=$2; next} {print > filename}' assets
|
||||
mkdir -p /etc/ssl/etcd/etcd
|
||||
mkdir -p /etc/kubernetes/bootstrap-secrets
|
||||
mv tls/etcd/{peer*,server*} /etc/ssl/etcd/etcd/
|
||||
mv tls/etcd/etcd-client* /etc/kubernetes/bootstrap-secrets/
|
||||
chown -R etcd:etcd /etc/ssl/etcd
|
||||
chmod -R 500 /etc/ssl/etcd
|
||||
mv auth/kubeconfig /etc/kubernetes/bootstrap-secrets/
|
||||
mv tls/k8s/* /etc/kubernetes/bootstrap-secrets/
|
||||
mkdir -p /etc/kubernetes/manifests
|
||||
mv static-manifests/* /etc/kubernetes/manifests/
|
||||
mkdir -p /opt/bootstrap/assets
|
||||
mv manifests /opt/bootstrap/assets/manifests
|
||||
mv manifests-networking/* /opt/bootstrap/assets/manifests/
|
||||
rm -rf assets auth static-manifests tls manifests-networking
|
||||
- path: /opt/bootstrap/apply
|
||||
mode: 0544
|
||||
contents:
|
||||
inline: |
|
||||
#!/bin/bash -e
|
||||
export KUBECONFIG=/etc/kubernetes/secrets/kubeconfig
|
||||
until kubectl version; do
|
||||
echo "Waiting for static pod control plane"
|
||||
sleep 5
|
||||
done
|
||||
until kubectl apply -f /assets/manifests -R; do
|
||||
echo "Retry applying manifests"
|
||||
sleep 5
|
||||
done
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/systemd/system.conf.d/accounting.conf
|
||||
contents:
|
||||
inline: |
|
||||
[Manager]
|
||||
DefaultCPUAccounting=yes
|
||||
DefaultMemoryAccounting=yes
|
||||
DefaultBlockIOAccounting=yes
|
||||
- path: /etc/etcd/etcd.env
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
# TODO: Use a systemd dropin once podman v1.4.5 is avail.
|
||||
NOTIFY_SOCKET=/run/systemd/notify
|
||||
ETCD_NAME=${etcd_name}
|
||||
ETCD_DATA_DIR=/var/lib/etcd
|
||||
ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379
|
||||
ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380
|
||||
ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379
|
||||
ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380
|
||||
ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381
|
||||
ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}
|
||||
ETCD_STRICT_RECONFIG_CHECK=true
|
||||
ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt
|
||||
ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt
|
||||
ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key
|
||||
ETCD_CLIENT_CERT_AUTH=true
|
||||
ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt
|
||||
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
||||
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
||||
ETCD_PEER_CLIENT_CERT_AUTH=true
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- ${ssh_authorized_key}
|
||||
|
161
azure/fedora-coreos/kubernetes/lb.tf
Normal file
161
azure/fedora-coreos/kubernetes/lb.tf
Normal file
@ -0,0 +1,161 @@
|
||||
# DNS record for the apiserver load balancer
|
||||
resource "azurerm_dns_a_record" "apiserver" {
|
||||
resource_group_name = var.dns_zone_group
|
||||
|
||||
# DNS Zone name where record should be created
|
||||
zone_name = var.dns_zone
|
||||
|
||||
# DNS record
|
||||
name = var.cluster_name
|
||||
ttl = 300
|
||||
|
||||
# IPv4 address of apiserver load balancer
|
||||
records = [azurerm_public_ip.apiserver-ipv4.ip_address]
|
||||
}
|
||||
|
||||
# Static IPv4 address for the apiserver frontend
|
||||
resource "azurerm_public_ip" "apiserver-ipv4" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-apiserver-ipv4"
|
||||
location = var.region
|
||||
sku = "Standard"
|
||||
allocation_method = "Static"
|
||||
}
|
||||
|
||||
# Static IPv4 address for the ingress frontend
|
||||
resource "azurerm_public_ip" "ingress-ipv4" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-ingress-ipv4"
|
||||
location = var.region
|
||||
sku = "Standard"
|
||||
allocation_method = "Static"
|
||||
}
|
||||
|
||||
# Network Load Balancer for apiservers and ingress
|
||||
resource "azurerm_lb" "cluster" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = var.cluster_name
|
||||
location = var.region
|
||||
sku = "Standard"
|
||||
|
||||
frontend_ip_configuration {
|
||||
name = "apiserver"
|
||||
public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id
|
||||
}
|
||||
|
||||
frontend_ip_configuration {
|
||||
name = "ingress"
|
||||
public_ip_address_id = azurerm_public_ip.ingress-ipv4.id
|
||||
}
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "apiserver" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "apiserver"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration_name = "apiserver"
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 6443
|
||||
backend_port = 6443
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
|
||||
probe_id = azurerm_lb_probe.apiserver.id
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-http" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "ingress-http"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 80
|
||||
backend_port = 80
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
}
|
||||
|
||||
resource "azurerm_lb_rule" "ingress-https" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "ingress-https"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration_name = "ingress"
|
||||
disable_outbound_snat = true
|
||||
|
||||
protocol = "Tcp"
|
||||
frontend_port = 443
|
||||
backend_port = 443
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
probe_id = azurerm_lb_probe.ingress.id
|
||||
}
|
||||
|
||||
# Worker outbound TCP/UDP SNAT
|
||||
resource "azurerm_lb_outbound_rule" "worker-outbound" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "worker"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
frontend_ip_configuration {
|
||||
name = "ingress"
|
||||
}
|
||||
|
||||
protocol = "All"
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
}
|
||||
|
||||
# Address pool of controllers
|
||||
resource "azurerm_lb_backend_address_pool" "controller" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "controller"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
}
|
||||
|
||||
# Address pool of workers
|
||||
resource "azurerm_lb_backend_address_pool" "worker" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "worker"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
}
|
||||
|
||||
# Health checks / probes
|
||||
|
||||
# TCP health check for apiserver
|
||||
resource "azurerm_lb_probe" "apiserver" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "apiserver"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
protocol = "Tcp"
|
||||
port = 6443
|
||||
|
||||
# unhealthy threshold
|
||||
number_of_probes = 3
|
||||
|
||||
interval_in_seconds = 5
|
||||
}
|
||||
|
||||
# HTTP health check for ingress
|
||||
resource "azurerm_lb_probe" "ingress" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "ingress"
|
||||
loadbalancer_id = azurerm_lb.cluster.id
|
||||
protocol = "Http"
|
||||
port = 10254
|
||||
request_path = "/healthz"
|
||||
|
||||
# unhealthy threshold
|
||||
number_of_probes = 3
|
||||
|
||||
interval_in_seconds = 5
|
||||
}
|
||||
|
44
azure/fedora-coreos/kubernetes/network.tf
Normal file
44
azure/fedora-coreos/kubernetes/network.tf
Normal file
@ -0,0 +1,44 @@
|
||||
# Organize cluster into a resource group
|
||||
resource "azurerm_resource_group" "cluster" {
|
||||
name = var.cluster_name
|
||||
location = var.region
|
||||
}
|
||||
|
||||
resource "azurerm_virtual_network" "network" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = var.cluster_name
|
||||
location = azurerm_resource_group.cluster.location
|
||||
address_space = [var.host_cidr]
|
||||
}
|
||||
|
||||
# Subnets - separate subnets for controller and workers because Azure
|
||||
# network security groups are based on IPv4 CIDR rather than instance
|
||||
# tags like GCP or security group membership like AWS
|
||||
|
||||
resource "azurerm_subnet" "controller" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "controller"
|
||||
virtual_network_name = azurerm_virtual_network.network.name
|
||||
address_prefixes = [cidrsubnet(var.host_cidr, 1, 0)]
|
||||
}
|
||||
|
||||
resource "azurerm_subnet_network_security_group_association" "controller" {
|
||||
subnet_id = azurerm_subnet.controller.id
|
||||
network_security_group_id = azurerm_network_security_group.controller.id
|
||||
}
|
||||
|
||||
resource "azurerm_subnet" "worker" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "worker"
|
||||
virtual_network_name = azurerm_virtual_network.network.name
|
||||
address_prefixes = [cidrsubnet(var.host_cidr, 1, 1)]
|
||||
}
|
||||
|
||||
resource "azurerm_subnet_network_security_group_association" "worker" {
|
||||
subnet_id = azurerm_subnet.worker.id
|
||||
network_security_group_id = azurerm_network_security_group.worker.id
|
||||
}
|
||||
|
59
azure/fedora-coreos/kubernetes/outputs.tf
Normal file
59
azure/fedora-coreos/kubernetes/outputs.tf
Normal file
@ -0,0 +1,59 @@
|
||||
output "kubeconfig-admin" {
|
||||
value = module.bootstrap.kubeconfig-admin
|
||||
}
|
||||
|
||||
# Outputs for Kubernetes Ingress
|
||||
|
||||
output "ingress_static_ipv4" {
|
||||
value = azurerm_public_ip.ingress-ipv4.ip_address
|
||||
description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers"
|
||||
}
|
||||
|
||||
# Outputs for worker pools
|
||||
|
||||
output "region" {
|
||||
value = azurerm_resource_group.cluster.location
|
||||
}
|
||||
|
||||
output "resource_group_name" {
|
||||
value = azurerm_resource_group.cluster.name
|
||||
}
|
||||
|
||||
output "resource_group_id" {
|
||||
value = azurerm_resource_group.cluster.id
|
||||
}
|
||||
|
||||
output "subnet_id" {
|
||||
value = azurerm_subnet.worker.id
|
||||
}
|
||||
|
||||
output "security_group_id" {
|
||||
value = azurerm_network_security_group.worker.id
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = module.bootstrap.kubeconfig-kubelet
|
||||
}
|
||||
|
||||
# Outputs for custom firewalling
|
||||
|
||||
output "worker_security_group_name" {
|
||||
value = azurerm_network_security_group.worker.name
|
||||
}
|
||||
|
||||
output "worker_address_prefix" {
|
||||
description = "Worker network subnet CIDR address (for source/destination)"
|
||||
value = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Outputs for custom load balancing
|
||||
|
||||
output "loadbalancer_id" {
|
||||
description = "ID of the cluster load balancer"
|
||||
value = azurerm_lb.cluster.id
|
||||
}
|
||||
|
||||
output "backend_address_pool_id" {
|
||||
description = "ID of the worker backend address pool"
|
||||
value = azurerm_lb_backend_address_pool.worker.id
|
||||
}
|
336
azure/fedora-coreos/kubernetes/security.tf
Normal file
336
azure/fedora-coreos/kubernetes/security.tf
Normal file
@ -0,0 +1,336 @@
|
||||
# Controller security group
|
||||
|
||||
resource "azurerm_network_security_group" "controller" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-controller"
|
||||
location = azurerm_resource_group.cluster.location
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-ssh" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-ssh"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2000"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "22"
|
||||
source_address_prefix = "*"
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-etcd" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-etcd"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2005"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "2379-2380"
|
||||
source_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape etcd metrics
|
||||
resource "azurerm_network_security_rule" "controller-etcd-metrics" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-etcd-metrics"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2010"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "2381"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy metrics
|
||||
resource "azurerm_network_security_rule" "controller-kube-proxy" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kube-proxy-metrics"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2011"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10249"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
|
||||
resource "azurerm_network_security_rule" "controller-kube-metrics" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kube-metrics"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2012"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10251-10252"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-apiserver" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-apiserver"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2015"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "6443"
|
||||
source_address_prefix = "*"
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-vxlan" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-vxlan"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2020"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Udp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "4789"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "azurerm_network_security_rule" "controller-node-exporter" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-node-exporter"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2025"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "9100"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Allow apiserver to access kubelet's for exec, log, port-forward
|
||||
resource "azurerm_network_security_rule" "controller-kubelet" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kubelet"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "2030"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10250"
|
||||
|
||||
# allow Prometheus to scrape kubelet metrics too
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
}
|
||||
|
||||
# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
|
||||
# https://docs.microsoft.com/en-us/azure/virtual-network/security-overview#default-security-rules
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-allow-loadblancer" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-loadbalancer"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "3000"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "*"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "*"
|
||||
source_address_prefix = "AzureLoadBalancer"
|
||||
destination_address_prefix = "*"
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "controller-deny-all" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "deny-all"
|
||||
network_security_group_name = azurerm_network_security_group.controller.name
|
||||
priority = "3005"
|
||||
access = "Deny"
|
||||
direction = "Inbound"
|
||||
protocol = "*"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "*"
|
||||
source_address_prefix = "*"
|
||||
destination_address_prefix = "*"
|
||||
}
|
||||
|
||||
# Worker security group
|
||||
|
||||
resource "azurerm_network_security_group" "worker" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "${var.cluster_name}-worker"
|
||||
location = azurerm_resource_group.cluster.location
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-ssh" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-ssh"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2000"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "22"
|
||||
source_address_prefix = azurerm_subnet.controller.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-http" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-http"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2005"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "80"
|
||||
source_address_prefix = "*"
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-https" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-https"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2010"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "443"
|
||||
source_address_prefix = "*"
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-vxlan" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-vxlan"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2015"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Udp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "4789"
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape node-exporter daemonset
|
||||
resource "azurerm_network_security_rule" "worker-node-exporter" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-node-exporter"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2020"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "9100"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Allow Prometheus to scrape kube-proxy
|
||||
resource "azurerm_network_security_rule" "worker-kube-proxy" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kube-proxy"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2024"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10249"
|
||||
source_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Allow apiserver to access kubelet's for exec, log, port-forward
|
||||
resource "azurerm_network_security_rule" "worker-kubelet" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-kubelet"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "2025"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "Tcp"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "10250"
|
||||
|
||||
# allow Prometheus to scrape kubelet metrics too
|
||||
source_address_prefixes = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
|
||||
destination_address_prefix = azurerm_subnet.worker.address_prefix
|
||||
}
|
||||
|
||||
# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
|
||||
# https://docs.microsoft.com/en-us/azure/virtual-network/security-overview#default-security-rules
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-allow-loadblancer" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "allow-loadbalancer"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "3000"
|
||||
access = "Allow"
|
||||
direction = "Inbound"
|
||||
protocol = "*"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "*"
|
||||
source_address_prefix = "AzureLoadBalancer"
|
||||
destination_address_prefix = "*"
|
||||
}
|
||||
|
||||
resource "azurerm_network_security_rule" "worker-deny-all" {
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
|
||||
name = "deny-all"
|
||||
network_security_group_name = azurerm_network_security_group.worker.name
|
||||
priority = "3005"
|
||||
access = "Deny"
|
||||
direction = "Inbound"
|
||||
protocol = "*"
|
||||
source_port_range = "*"
|
||||
destination_port_range = "*"
|
||||
source_address_prefix = "*"
|
||||
destination_address_prefix = "*"
|
||||
}
|
||||
|
59
azure/fedora-coreos/kubernetes/ssh.tf
Normal file
59
azure/fedora-coreos/kubernetes/ssh.tf
Normal file
@ -0,0 +1,59 @@
|
||||
locals {
|
||||
# format assets for distribution
|
||||
assets_bundle = [
|
||||
# header with the unpack location
|
||||
for key, value in module.bootstrap.assets_dist :
|
||||
format("##### %s\n%s", key, value)
|
||||
]
|
||||
}
|
||||
|
||||
# Secure copy assets to controllers.
|
||||
resource "null_resource" "copy-controller-secrets" {
|
||||
count = var.controller_count
|
||||
|
||||
depends_on = [
|
||||
module.bootstrap,
|
||||
azurerm_linux_virtual_machine.controllers
|
||||
]
|
||||
|
||||
connection {
|
||||
type = "ssh"
|
||||
host = azurerm_public_ip.controllers.*.ip_address[count.index]
|
||||
user = "core"
|
||||
timeout = "15m"
|
||||
}
|
||||
|
||||
provisioner "file" {
|
||||
content = join("\n", local.assets_bundle)
|
||||
destination = "$HOME/assets"
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
inline = [
|
||||
"sudo /opt/bootstrap/layout",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Connect to a controller to perform one-time cluster bootstrap.
|
||||
resource "null_resource" "bootstrap" {
|
||||
depends_on = [
|
||||
null_resource.copy-controller-secrets,
|
||||
module.workers,
|
||||
azurerm_dns_a_record.apiserver,
|
||||
]
|
||||
|
||||
connection {
|
||||
type = "ssh"
|
||||
host = azurerm_public_ip.controllers.*.ip_address[0]
|
||||
user = "core"
|
||||
timeout = "15m"
|
||||
}
|
||||
|
||||
provisioner "remote-exec" {
|
||||
inline = [
|
||||
"sudo systemctl start bootstrap",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
143
azure/fedora-coreos/kubernetes/variables.tf
Normal file
143
azure/fedora-coreos/kubernetes/variables.tf
Normal file
@ -0,0 +1,143 @@
|
||||
variable "cluster_name" {
|
||||
type = string
|
||||
description = "Unique cluster name (prepended to dns_zone)"
|
||||
}
|
||||
|
||||
# Azure
|
||||
|
||||
variable "region" {
|
||||
type = string
|
||||
description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)"
|
||||
}
|
||||
|
||||
variable "dns_zone" {
|
||||
type = string
|
||||
description = "Azure DNS Zone (e.g. azure.example.com)"
|
||||
}
|
||||
|
||||
variable "dns_zone_group" {
|
||||
type = string
|
||||
description = "Resource group where the Azure DNS Zone resides (e.g. global)"
|
||||
}
|
||||
|
||||
# instances
|
||||
|
||||
variable "controller_count" {
|
||||
type = number
|
||||
description = "Number of controllers (i.e. masters)"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "worker_count" {
|
||||
type = number
|
||||
description = "Number of workers"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "controller_type" {
|
||||
type = string
|
||||
description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
|
||||
default = "Standard_B2s"
|
||||
}
|
||||
|
||||
variable "worker_type" {
|
||||
type = string
|
||||
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
|
||||
default = "Standard_DS1_v2"
|
||||
}
|
||||
|
||||
variable "os_image" {
|
||||
type = string
|
||||
description = "Fedora CoreOS image for instances"
|
||||
}
|
||||
|
||||
variable "disk_size" {
|
||||
type = number
|
||||
description = "Size of the disk in GB"
|
||||
default = 40
|
||||
}
|
||||
|
||||
variable "worker_priority" {
|
||||
type = string
|
||||
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
|
||||
default = "Regular"
|
||||
}
|
||||
|
||||
variable "controller_snippets" {
|
||||
type = list(string)
|
||||
description = "Controller Fedora CoreOS Config snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "worker_snippets" {
|
||||
type = list(string)
|
||||
description = "Worker Fedora CoreOS Config snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
# configuration
|
||||
|
||||
variable "ssh_authorized_key" {
|
||||
type = string
|
||||
description = "SSH public key for user 'core'"
|
||||
}
|
||||
|
||||
variable "networking" {
|
||||
type = string
|
||||
description = "Choice of networking provider (flannel or calico)"
|
||||
default = "calico"
|
||||
}
|
||||
|
||||
variable "host_cidr" {
|
||||
type = string
|
||||
description = "CIDR IPv4 range to assign to instances"
|
||||
default = "10.0.0.0/16"
|
||||
}
|
||||
|
||||
variable "pod_cidr" {
|
||||
type = string
|
||||
description = "CIDR IPv4 range to assign Kubernetes pods"
|
||||
default = "10.2.0.0/16"
|
||||
}
|
||||
|
||||
variable "service_cidr" {
|
||||
type = string
|
||||
description = <<EOD
|
||||
CIDR IPv4 range to assign Kubernetes services.
|
||||
The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for coredns.
|
||||
EOD
|
||||
default = "10.3.0.0/16"
|
||||
}
|
||||
|
||||
variable "enable_reporting" {
|
||||
type = bool
|
||||
description = "Enable usage or analytics reporting to upstreams (Calico)"
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "enable_aggregation" {
|
||||
type = bool
|
||||
description = "Enable the Kubernetes Aggregation Layer (defaults to false)"
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "worker_node_labels" {
|
||||
type = list(string)
|
||||
description = "List of initial worker node labels"
|
||||
default = []
|
||||
}
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "asset_dir" {
|
||||
type = string
|
||||
description = "Absolute path to a directory where generated assets should be placed (contains secrets)"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cluster_domain_suffix" {
|
||||
type = string
|
||||
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
|
||||
default = "cluster.local"
|
||||
}
|
||||
|
12
azure/fedora-coreos/kubernetes/versions.tf
Normal file
12
azure/fedora-coreos/kubernetes/versions.tf
Normal file
@ -0,0 +1,12 @@
|
||||
# Terraform version and plugin versions
|
||||
|
||||
terraform {
|
||||
required_version = "~> 0.12.6"
|
||||
required_providers {
|
||||
azurerm = "~> 2.8"
|
||||
ct = "~> 0.4"
|
||||
template = "~> 2.1"
|
||||
null = "~> 2.1"
|
||||
}
|
||||
}
|
||||
|
24
azure/fedora-coreos/kubernetes/workers.tf
Normal file
24
azure/fedora-coreos/kubernetes/workers.tf
Normal file
@ -0,0 +1,24 @@
|
||||
module "workers" {
|
||||
source = "./workers"
|
||||
name = var.cluster_name
|
||||
|
||||
# Azure
|
||||
resource_group_name = azurerm_resource_group.cluster.name
|
||||
region = azurerm_resource_group.cluster.location
|
||||
subnet_id = azurerm_subnet.worker.id
|
||||
security_group_id = azurerm_network_security_group.worker.id
|
||||
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
|
||||
|
||||
worker_count = var.worker_count
|
||||
vm_type = var.worker_type
|
||||
os_image = var.os_image
|
||||
priority = var.worker_priority
|
||||
|
||||
# configuration
|
||||
kubeconfig = module.bootstrap.kubeconfig-kubelet
|
||||
ssh_authorized_key = var.ssh_authorized_key
|
||||
service_cidr = var.service_cidr
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
snippets = var.worker_snippets
|
||||
node_labels = var.worker_node_labels
|
||||
}
|
120
azure/fedora-coreos/kubernetes/workers/fcc/worker.yaml
Normal file
120
azure/fedora-coreos/kubernetes/workers/fcc/worker.yaml
Normal file
@ -0,0 +1,120 @@
|
||||
---
|
||||
variant: fcos
|
||||
version: 1.0.0
|
||||
systemd:
|
||||
units:
|
||||
- name: docker.service
|
||||
enabled: true
|
||||
- name: wait-for-dns.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Wait for DNS entries
|
||||
Before=kubelet.service
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
|
||||
[Install]
|
||||
RequiredBy=kubelet.service
|
||||
- name: kubelet.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Kubelet (System Container)
|
||||
Wants=rpc-statd.service
|
||||
[Service]
|
||||
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.18.4
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d
|
||||
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
|
||||
ExecStartPre=/bin/mkdir -p /opt/cni/bin
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/calico
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
|
||||
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
|
||||
ExecStartPre=-/usr/bin/podman rm kubelet
|
||||
ExecStart=/usr/bin/podman run --name kubelet \
|
||||
--privileged \
|
||||
--pid host \
|
||||
--network host \
|
||||
--volume /etc/kubernetes:/etc/kubernetes:ro,z \
|
||||
--volume /usr/lib/os-release:/etc/os-release:ro \
|
||||
--volume /etc/ssl/certs:/etc/ssl/certs:ro \
|
||||
--volume /lib/modules:/lib/modules:ro \
|
||||
--volume /run:/run \
|
||||
--volume /sys/fs/cgroup:/sys/fs/cgroup:ro \
|
||||
--volume /sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd \
|
||||
--volume /etc/pki/tls/certs:/usr/share/ca-certificates:ro \
|
||||
--volume /var/lib/calico:/var/lib/calico:ro \
|
||||
--volume /var/lib/docker:/var/lib/docker \
|
||||
--volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
|
||||
--volume /var/log:/var/log \
|
||||
--volume /var/run/lock:/var/run/lock:z \
|
||||
--volume /opt/cni/bin:/opt/cni/bin:z \
|
||||
$${KUBELET_IMAGE} \
|
||||
--anonymous-auth=false \
|
||||
--authentication-token-webhook \
|
||||
--authorization-mode=Webhook \
|
||||
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
|
||||
--cgroup-driver=systemd \
|
||||
--cgroups-per-qos=true \
|
||||
--enforce-node-allocatable=pods \
|
||||
--client-ca-file=/etc/kubernetes/ca.crt \
|
||||
--cluster_dns=${cluster_dns_service_ip} \
|
||||
--cluster_domain=${cluster_domain_suffix} \
|
||||
--cni-conf-dir=/etc/kubernetes/cni/net.d \
|
||||
--healthz-port=0 \
|
||||
--kubeconfig=/var/lib/kubelet/kubeconfig \
|
||||
--network-plugin=cni \
|
||||
--node-labels=node.kubernetes.io/node \
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--rotate-certificates \
|
||||
--volume-plugin-dir=/var/lib/kubelet/volumeplugins
|
||||
ExecStop=-/usr/bin/podman stop kubelet
|
||||
Delegate=yes
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
- name: delete-node.service
|
||||
enabled: true
|
||||
contents: |
|
||||
[Unit]
|
||||
Description=Delete Kubernetes node on shutdown
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=true
|
||||
ExecStart=/bin/true
|
||||
ExecStop=/bin/bash -c '/usr/bin/podman run --volume /etc/kubernetes:/etc/kubernetes:ro,z --entrypoint /usr/local/bin/kubectl quay.io/poseidon/kubelet:v1.18.4 --kubeconfig=/etc/kubernetes/kubeconfig delete node $HOSTNAME'
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
storage:
|
||||
directories:
|
||||
- path: /etc/kubernetes
|
||||
files:
|
||||
- path: /etc/kubernetes/kubeconfig
|
||||
mode: 0644
|
||||
contents:
|
||||
inline: |
|
||||
${kubeconfig}
|
||||
- path: /etc/sysctl.d/max-user-watches.conf
|
||||
contents:
|
||||
inline: |
|
||||
fs.inotify.max_user_watches=16184
|
||||
- path: /etc/systemd/system.conf.d/accounting.conf
|
||||
contents:
|
||||
inline: |
|
||||
[Manager]
|
||||
DefaultCPUAccounting=yes
|
||||
DefaultMemoryAccounting=yes
|
||||
DefaultBlockIOAccounting=yes
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
ssh_authorized_keys:
|
||||
- ${ssh_authorized_key}
|
||||
|
||||
|
98
azure/fedora-coreos/kubernetes/workers/variables.tf
Normal file
98
azure/fedora-coreos/kubernetes/workers/variables.tf
Normal file
@ -0,0 +1,98 @@
|
||||
variable "name" {
|
||||
type = string
|
||||
description = "Unique name for the worker pool"
|
||||
}
|
||||
|
||||
# Azure
|
||||
|
||||
variable "region" {
|
||||
type = string
|
||||
description = "Must be set to the Azure Region of cluster"
|
||||
}
|
||||
|
||||
variable "resource_group_name" {
|
||||
type = string
|
||||
description = "Must be set to the resource group name of cluster"
|
||||
}
|
||||
|
||||
variable "subnet_id" {
|
||||
type = string
|
||||
description = "Must be set to the `worker_subnet_id` output by cluster"
|
||||
}
|
||||
|
||||
variable "security_group_id" {
|
||||
type = string
|
||||
description = "Must be set to the `worker_security_group_id` output by cluster"
|
||||
}
|
||||
|
||||
variable "backend_address_pool_id" {
|
||||
type = string
|
||||
description = "Must be set to the `worker_backend_address_pool_id` output by cluster"
|
||||
}
|
||||
|
||||
# instances
|
||||
|
||||
variable "worker_count" {
|
||||
type = number
|
||||
description = "Number of instances"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "vm_type" {
|
||||
type = string
|
||||
description = "Machine type for instances (see `az vm list-skus --location centralus`)"
|
||||
default = "Standard_DS1_v2"
|
||||
}
|
||||
|
||||
variable "os_image" {
|
||||
type = string
|
||||
description = "Fedora CoreOS image for instances"
|
||||
}
|
||||
|
||||
variable "priority" {
|
||||
type = string
|
||||
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
|
||||
default = "Regular"
|
||||
}
|
||||
|
||||
variable "snippets" {
|
||||
type = list(string)
|
||||
description = "Fedora CoreOS Config snippets"
|
||||
default = []
|
||||
}
|
||||
|
||||
# configuration
|
||||
|
||||
variable "kubeconfig" {
|
||||
type = string
|
||||
description = "Must be set to `kubeconfig` output by cluster"
|
||||
}
|
||||
|
||||
variable "ssh_authorized_key" {
|
||||
type = string
|
||||
description = "SSH public key for user 'core'"
|
||||
}
|
||||
|
||||
variable "service_cidr" {
|
||||
type = string
|
||||
description = <<EOD
|
||||
CIDR IPv4 range to assign Kubernetes services.
|
||||
The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for coredns.
|
||||
EOD
|
||||
default = "10.3.0.0/16"
|
||||
}
|
||||
|
||||
variable "node_labels" {
|
||||
type = list(string)
|
||||
description = "List of initial node labels"
|
||||
default = []
|
||||
}
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "cluster_domain_suffix" {
|
||||
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
|
||||
type = string
|
||||
default = "cluster.local"
|
||||
}
|
||||
|
4
azure/fedora-coreos/kubernetes/workers/versions.tf
Normal file
4
azure/fedora-coreos/kubernetes/workers/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user