diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 6943ea7b..00000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,10 +0,0 @@ -High level description of the change. - -* Specific change -* Specific change - -## Testing - -Describe your work to validate the change works. - -rel: issue number (if applicable) diff --git a/.github/release.yaml b/.github/release.yaml new file mode 100644 index 00000000..3073b415 --- /dev/null +++ b/.github/release.yaml @@ -0,0 +1,12 @@ +changelog: + categories: + - title: Contributions + labels: + - '*' + exclude: + labels: + - dependencies + - no-release-note + - title: Dependencies + labels: + - dependencies diff --git a/CHANGES.md b/CHANGES.md index c5ccb0dc..409cd149 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,214 @@ Notable changes between versions. ## Latest +## v1.31.3 + +* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312) +* Update CoreDNS from v1.11.3 to v1.11.4 +* Update Cilium from v1.16.3 to [v1.16.4](https://github.com/cilium/cilium/releases/tag/v1.16.4) + +### Deprecations + +* Plan to drop support for using Calico CNI, recommend everyone use the Cilium default + +## v1.31.2 + +* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312) +* Update Cilium from v1.16.1 to [v1.16.3](https://github.com/cilium/cilium/releases/tag/v1.16.3) +* Update flannel from v0.25.6 to [v0.26.0](https://github.com/flannel-io/flannel/releases/tag/v0.26.0) + +## v1.31.1 + +* Kubernetes [v1.31.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1311) +* Update flannel from v0.25.5 to [v0.25.6](https://github.com/flannel-io/flannel/releases/tag/v0.25.6) + +### Google + +* Add `controller_disk_type` and `worker_disk_type` variables ([#1513](https://github.com/poseidon/typhoon/pull/1513)) +* Add explicit `region` field to regional worker instance templates ([#1524](https://github.com/poseidon/typhoon/pull/1524)) + +## v1.31.0 + +* Kubernetes [v1.31.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1310) +* Use Cilium kube-proxy replacement mode when `cilium` networking is chosen ([#1501](https://github.com/poseidon/typhoon/pull/1501)) +* Fix invalid flannel-cni container image for those using `flannel` networking ([#1497](https://github.com/poseidon/typhoon/pull/1497)) + +### AWS + +* Use EC2 resource-based hostnames instead of IP-based hostnames ([#1499](https://github.com/poseidon/typhoon/pull/1499)) + * The Amazon DNS server can resolve A and AAAA queries to IPv4 and IPv6 node addresses +* Tag controller node EBS volumes with a name based on the controller node name + +### Google + +* Use `google_compute_region_instance_template` instead of `google_compute_instance_template` + * Google's regional instance template metadata is kept in the associated region for greater resiliency. The "global" instance templates were kept in a single region + +## v1.30.4 + +* Kubernetes [v1.30.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1304) +* Update Cilium from v1.15.7 to [v1.16.1](https://github.com/cilium/cilium/releases/tag/v1.16.1) +* Update CoreDNS from v1.11.1 to v1.11.3 +* Remove `enable_aggregation` variable for Kubernetes Aggregation Layer, always set to true +* Remove `cluster_domain_suffix` variable, always use "cluster.local" +* Remove `enable_reporting` variable for analytics, always set to false + +## v1.30.3 + +* Kubernetes [v1.30.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1303) +* Update Cilium from v1.15.6 to [v1.15.7](https://github.com/cilium/cilium/releases/tag/v1.15.7) +* Update flannel from v0.25.4 to [v0.25.5](https://github.com/flannel-io/flannel/releases/tag/v0.25.5) + +### AWS + +* Configure controller and worker disks ([#1482](https://github.com/poseidon/typhoon/pull/1482)) + * Add `controller_disk_type`, `controller_disk_size`, and `controller_disk_iops` variables + * Add `worker_disk_type`, `worker_disk_size`, and `worker_disk_iops` variables + * Remove `disk_type`, `disk_size`, and `disk_iops` variables + * Fix propagating settings to worker disks, previously ignored +* Configure CPU pricing model for burstable instance types ([#1482](https://github.com/poseidon/typhoon/pull/1482)) + * Add `controller_cpu_credits` and `worker_cpu_credits` variables (`standard` or `unlimited`) +* Configure controller or worker instance architecture ([#1485](https://github.com/poseidon/typhoon/pull/1485)) + * Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`) + * Remove `arch` variable + +```diff +module "cluster" { + ... +- arch = "amd64" +- disk_type = "gp3" +- disk_size = 30 +- disk_iops = 3000 + ++ controller_arch = "amd64" ++ controller_disk_size = 15 ++ controller_cpu_credits = "standard" ++ worker_arch = "amd64" ++ worker_disk_size = 22 ++ worker_cpu_credits = "unlimited" +} +``` + +### Azure + +* Configure the virtual network and subnets with IPv6 private address space + * Change `host_cidr` variable (string) to a `network_cidr` object with `ipv4` and `ipv6` fields that list CIDR strings. Leave the variable unset to use the defaults. (**breaking**) +* Add support for dual-stack Kubernetes Ingress Load Balancing + * Add a public IPv6 frontend, 80/443 rules, and a worker-ipv6 backend pool + * Change the `controller_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**) + * Change the `worker_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**) + * Change the `backend_address_pool_id` output (and worker module input) from a string to an object with `ipv4` and `ipv6` fields that list ids (**breaking**) +* Configure nodes to have outbound IPv6 internet connectivity (analogous to IPv4 SNAT) + * Configure controller nodes to have a public IPv6 address + * Configure worker nodes to use outbound rules and the load balancer for SNAT +* Extend network security rules to allow IPv6 traffic, analogous to IPv4 +* Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469)) +* Change worker pools from uniform to flexible orchestration mode ([#1473](https://github.com/poseidon/typhoon/pull/1473)) +* Add options to allow workers nodes to use ephemeral local disks ([#1473](https://github.com/poseidon/typhoon/pull/1473)) + * Add `controller_disk_type` and `controller_disk_size` variables + * Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables +* Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470)) +* Configure controller or worker instance architecture for Flatcar Linux ([#1485](https://github.com/poseidon/typhoon/pull/1485)) + * Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`) + * Remove `arch` variable + +```diff +module "cluster" { + ... +- region = "centralus" ++ location = "centralus" + # optional +- host_cidr = "10.0.0.0/16" ++ network_cidr = { ++ ipv4 = ["10.0.0.0/16"] ++ } + + # instances ++ controller_disk_type = "StandardSSD_LRS" ++ worker_ephemeral_disk = true +} +``` + +### Google Cloud + +* Allow configuring controller and worker disks ([#1486](https://github.com/poseidon/typhoon/pull/1486)) + * Add `controller_disk_size` and `worker_disk_size` variables + * Remove `disk_size` variable + +## v1.30.2 + +* Kubernetes [v1.30.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1302) +* Update CoreDNS from v1.9.4 to v1.11.1 +* Update Cilium from v1.15.5 to [v1.15.6](https://github.com/cilium/cilium/releases/tag/v1.15.6) +* Update flannel from v0.25.1 to [v0.25.4](https://github.com/flannel-io/flannel/releases/tag/v0.25.4) + +## v1.30.1 + +* Kubernetes [v1.30.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1301) +* Add firewall rules and security group rules for Cilium and Hubble metrics ([#1449](https://github.com/poseidon/typhoon/pull/1449)) +* Update Cilium from v1.15.3 to [v1.15.5](https://github.com/cilium/cilium/releases/tag/v1.15.5) +* Update flannel from v0.24.4 to [v0.25.1](https://github.com/flannel-io/flannel/releases/tag/v0.25.1) +* Introduce `components` variabe to enable/disable/configure pre-installed components ([#1453](https://github.com/poseidon/typhoon/pull/1453)) +* Add Terraform modules for `coredns`, `cilium`, and `flannel` components + +### Azure + +* Add `controller_security_group_name` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450)) +* Add `controller_address_prefixes` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450)) + +## v1.30.0 + +* Kubernetes [v1.30.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1300) +* Update etcd from v3.5.12 to [v3.5.13](https://github.com/etcd-io/etcd/releases/tag/v3.5.13) +* Update Cilium from v1.15.2 to [v1.15.3](https://github.com/cilium/cilium/releases/tag/v1.15.3) +* Update Calico from v3.27.2 to [v3.27.3](https://github.com/projectcalico/calico/releases/tag/v3.27.3) + +## v1.29.3 + +* Kubernetes [v1.29.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1293) +* Update Cilium from v1.15.1 to [v1.15.2](https://github.com/cilium/cilium/releases/tag/v1.15.2) +* Update flannel from v0.24.2 to [v0.24.4](https://github.com/flannel-io/flannel/releases/tag/v0.24.4) + +## v1.29.2 + +* Kubernetes [v1.29.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1292) +* Update etcd from v3.5.10 to [v3.5.12](https://github.com/etcd-io/etcd/releases/tag/v3.5.12) +* Update Cilium from v1.14.3 to [v1.15.1](https://github.com/cilium/cilium/releases/tag/v1.15.1) +* Update Calico from v3.26.3 to [v3.27.2](https://github.com/projectcalico/calico/releases/tag/v3.27.2) + * Fix upstream incompatibility with Fedora CoreOS ([calico#8372](https://github.com/projectcalico/calico/issues/8372)) +* Update flannel from v0.22.2 to [v0.24.2](https://github.com/flannel-io/flannel/releases/tag/v0.24.2) +* Add an `install_container_networking` variable (default `true`) ([#1421](https://github.com/poseidon/typhoon/pull/1421)) + * When `true`, the chosen container `networking` provider is installed during cluster bootstrap + * Set `false` to self-manage the container networking provider. This allows flannel, Calico, or Cilium + to be managed via Terraform (like any other Kubernetes resources). Nodes will be NotReady until you + apply the self-managed container networking provider. This may become the default in future. + * Continue to set `networking` to one of the three supported container networking providers. Most + require custom firewall / security policies be present across nodes so they have some infra tie-ins. + +## v1.29.1 + +* Kubernetes [v1.29.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1291) + +### AWS + +* Continue to support AWS IMDSv1 ([#1412](https://github.com/poseidon/typhoon/pull/1412)) + +### Known Issues + +* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372)) + +## v1.29.0 + +* Kubernetes [v1.29.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1290) + +### Known Issues + +* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372)) + +## v1.28.4 + +* Kubernetes [v1.28.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1284) + ## v1.28.3 * Kubernetes [v1.28.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1283) diff --git a/README.md b/README.md index e6c73518..4e554b96 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ -# Typhoon [![Release](https://img.shields.io/github/v/release/poseidon/typhoon)](https://github.com/poseidon/typhoon/releases) [![Stars](https://img.shields.io/github/stars/poseidon/typhoon)](https://github.com/poseidon/typhoon/stargazers) [![Sponsors](https://img.shields.io/github/sponsors/poseidon?logo=github)](https://github.com/sponsors/poseidon) [![Mastodon](https://img.shields.io/badge/follow-news-6364ff?logo=mastodon)](https://fosstodon.org/@typhoon) +# Typhoon + +[![Release](https://img.shields.io/github/v/release/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/releases) +[![Stars](https://img.shields.io/github/stars/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/stargazers) +[![Sponsors](https://img.shields.io/github/sponsors/poseidon?logo=github&style=flat-square)](https://github.com/sponsors/poseidon) +[![Mastodon](https://img.shields.io/badge/follow-news-6364ff?logo=mastodon&style=flat-square)](https://fosstodon.org/@typhoon) @@ -13,7 +18,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/flatcar-linux/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization @@ -21,7 +26,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Modules -Typhoon provides a Terraform Module for each supported operating system and platform. +Typhoon provides a Terraform Module for defining a Kubernetes cluster on each supported operating system and platform. Typhoon is available for [Fedora CoreOS](https://getfedora.org/coreos/). @@ -52,6 +57,14 @@ Typhoon is available for [Flatcar Linux](https://www.flatcar-linux.org/releases/ | AWS | Flatcar Linux (ARM64) | [aws/flatcar-linux/kubernetes](aws/flatcar-linux/kubernetes) | alpha | | Azure | Flatcar Linux (ARM64) | [azure/flatcar-linux/kubernetes](azure/flatcar-linux/kubernetes) | alpha | +Typhoon also provides Terraform Modules for optionally managing individual components applied onto clusters. + +| Name | Terraform Module | Status | +|---------|------------------|--------| +| CoreDNS | [addons/coredns](addons/coredns) | beta | +| Cilium | [addons/cilium](addons/cilium) | beta | +| flannel | [addons/flannel](addons/flannel) | beta | + ## Documentation * [Docs](https://typhoon.psdn.io) @@ -65,7 +78,7 @@ Define a Kubernetes cluster by using the Terraform module for your chosen platfo ```tf module "yavin" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3" # Google Cloud cluster_name = "yavin" @@ -83,8 +96,9 @@ module "yavin" { # Obtain cluster kubeconfig resource "local_file" "kubeconfig-yavin" { - content = module.yavin.kubeconfig-admin - filename = "/home/user/.kube/configs/yavin-config" + content = module.yavin.kubeconfig-admin + filename = "/home/user/.kube/configs/yavin-config" + file_permission = "0600" } ``` @@ -104,9 +118,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou $ export KUBECONFIG=/home/user/.kube/configs/yavin-config $ kubectl get nodes NAME ROLES STATUS AGE VERSION -yavin-controller-0.c.example-com.internal Ready 6m v1.28.3 -yavin-worker-jrbf.c.example-com.internal Ready 5m v1.28.3 -yavin-worker-mzdm.c.example-com.internal Ready 5m v1.28.3 +yavin-controller-0.c.example-com.internal Ready 6m v1.31.3 +yavin-worker-jrbf.c.example-com.internal Ready 5m v1.31.3 +yavin-worker-mzdm.c.example-com.internal Ready 5m v1.31.3 ``` List the pods. @@ -114,9 +128,10 @@ List the pods. ``` $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-1cs8z 2/2 Running 0 6m -kube-system calico-node-d1l5b 2/2 Running 0 6m -kube-system calico-node-sp9ps 2/2 Running 0 6m +kube-system cilium-1cs8z 1/1 Running 0 6m +kube-system cilium-d1l5b 1/1 Running 0 6m +kube-system cilium-sp9ps 1/1 Running 0 6m +kube-system cilium-operator-68d778b448-g744f 1/1 Running 0 6m kube-system coredns-1187388186-zj5dl 1/1 Running 0 6m kube-system coredns-1187388186-dkh3o 1/1 Running 0 6m kube-system kube-apiserver-controller-0 1/1 Running 0 6m diff --git a/addons/cilium/cluster-role-binding.tf b/addons/cilium/cluster-role-binding.tf new file mode 100644 index 00000000..4e52951e --- /dev/null +++ b/addons/cilium/cluster-role-binding.tf @@ -0,0 +1,36 @@ +resource "kubernetes_cluster_role_binding" "operator" { + metadata { + name = "cilium-operator" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "cilium-operator" + } + + subject { + kind = "ServiceAccount" + name = "cilium-operator" + namespace = "kube-system" + } +} + +resource "kubernetes_cluster_role_binding" "agent" { + metadata { + name = "cilium-agent" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "cilium-agent" + } + + subject { + kind = "ServiceAccount" + name = "cilium-agent" + namespace = "kube-system" + } +} + diff --git a/addons/cilium/cluster-role.tf b/addons/cilium/cluster-role.tf new file mode 100644 index 00000000..a791ab3b --- /dev/null +++ b/addons/cilium/cluster-role.tf @@ -0,0 +1,112 @@ +resource "kubernetes_cluster_role" "operator" { + metadata { + name = "cilium-operator" + } + + # detect and restart [core|kube]dns pods on startup + rule { + verbs = ["get", "list", "watch", "delete"] + api_groups = [""] + resources = ["pods"] + } + + rule { + verbs = ["list", "watch"] + api_groups = [""] + resources = ["nodes"] + } + + rule { + verbs = ["patch"] + api_groups = [""] + resources = ["nodes", "nodes/status"] + } + + rule { + verbs = ["get", "list", "watch"] + api_groups = ["discovery.k8s.io"] + resources = ["endpointslices"] + } + + rule { + verbs = ["get", "list", "watch"] + api_groups = [""] + resources = ["services"] + } + + # Perform LB IP allocation for BGP + rule { + verbs = ["update"] + api_groups = [""] + resources = ["services/status"] + } + + # Perform the translation of a CNP that contains `ToGroup` to its endpoints + rule { + verbs = ["get", "list", "watch"] + api_groups = [""] + resources = ["services", "endpoints", "namespaces"] + } + + rule { + verbs = ["*"] + api_groups = ["cilium.io"] + resources = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumnetworkpolicies/finalizers", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumclusterwidenetworkpolicies/finalizers", "ciliumendpoints", "ciliumendpoints/status", "ciliumendpoints/finalizers", "ciliumnodes", "ciliumnodes/status", "ciliumnodes/finalizers", "ciliumidentities", "ciliumidentities/status", "ciliumidentities/finalizers", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumlocalredirectpolicies/finalizers", "ciliumendpointslices", "ciliumloadbalancerippools", "ciliumloadbalancerippools/status", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"] + } + + rule { + verbs = ["create", "get", "list", "update", "watch"] + api_groups = ["apiextensions.k8s.io"] + resources = ["customresourcedefinitions"] + } + + # Cilium leader elects if among multiple operator replicas + rule { + verbs = ["create", "get", "update"] + api_groups = ["coordination.k8s.io"] + resources = ["leases"] + } +} + +resource "kubernetes_cluster_role" "agent" { + metadata { + name = "cilium-agent" + } + + rule { + verbs = ["get", "list", "watch"] + api_groups = ["networking.k8s.io"] + resources = ["networkpolicies"] + } + + rule { + verbs = ["get", "list", "watch"] + api_groups = ["discovery.k8s.io"] + resources = ["endpointslices"] + } + + rule { + verbs = ["get", "list", "watch"] + api_groups = [""] + resources = ["namespaces", "services", "pods", "endpoints", "nodes"] + } + + rule { + verbs = ["patch"] + api_groups = [""] + resources = ["nodes/status"] + } + + rule { + verbs = ["create", "get", "list", "watch", "update"] + api_groups = ["apiextensions.k8s.io"] + resources = ["customresourcedefinitions"] + } + + rule { + verbs = ["*"] + api_groups = ["cilium.io"] + resources = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumendpoints", "ciliumendpoints/status", "ciliumnodes", "ciliumnodes/status", "ciliumidentities", "ciliumidentities/status", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumegressnatpolicies", "ciliumendpointslices", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"] + } +} + diff --git a/addons/cilium/config.tf b/addons/cilium/config.tf new file mode 100644 index 00000000..60cc03f1 --- /dev/null +++ b/addons/cilium/config.tf @@ -0,0 +1,196 @@ +resource "kubernetes_config_map" "cilium" { + metadata { + name = "cilium" + namespace = "kube-system" + } + data = { + # Identity allocation mode selects how identities are shared between cilium + # nodes by setting how they are stored. The options are "crd" or "kvstore". + # - "crd" stores identities in kubernetes as CRDs (custom resource definition). + # These can be queried with: + # kubectl get ciliumid + # - "kvstore" stores identities in a kvstore, etcd or consul, that is + # configured below. Cilium versions before 1.6 supported only the kvstore + # backend. Upgrades from these older cilium versions should continue using + # the kvstore by commenting out the identity-allocation-mode below, or + # setting it to "kvstore". + identity-allocation-mode = "crd" + cilium-endpoint-gc-interval = "5m0s" + nodes-gc-interval = "5m0s" + + # If you want to run cilium in debug mode change this value to true + debug = "false" + # The agent can be put into the following three policy enforcement modes + # default, always and never. + # https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes + enable-policy = "default" + + # Prometheus + enable-metrics = "true" + prometheus-serve-addr = ":9962" + operator-prometheus-serve-addr = ":9963" + proxy-prometheus-port = "9964" # envoy + + # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4 + # address. + enable-ipv4 = "true" + + # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6 + # address. + enable-ipv6 = "false" + + # Enable probing for a more efficient clock source for the BPF datapath + enable-bpf-clock-probe = "true" + + # Enable use of transparent proxying mechanisms (Linux 5.7+) + enable-bpf-tproxy = "false" + + # If you want cilium monitor to aggregate tracing for packets, set this level + # to "low", "medium", or "maximum". The higher the level, the less packets + # that will be seen in monitor output. + monitor-aggregation = "medium" + + # The monitor aggregation interval governs the typical time between monitor + # notification events for each allowed connection. + # + # Only effective when monitor aggregation is set to "medium" or higher. + monitor-aggregation-interval = "5s" + + # The monitor aggregation flags determine which TCP flags which, upon the + # first observation, cause monitor notifications to be generated. + # + # Only effective when monitor aggregation is set to "medium" or higher. + monitor-aggregation-flags = "all" + + # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic + # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps. + bpf-map-dynamic-size-ratio = "0.0025" + # bpf-policy-map-max specified the maximum number of entries in endpoint + # policy map (per endpoint) + bpf-policy-map-max = "16384" + # bpf-lb-map-max specifies the maximum number of entries in bpf lb service, + # backend and affinity maps. + bpf-lb-map-max = "65536" + + # Pre-allocation of map entries allows per-packet latency to be reduced, at + # the expense of up-front memory allocation for the entries in the maps. The + # default value below will minimize memory usage in the default installation; + # users who are sensitive to latency may consider setting this to "true". + # + # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore + # this option and behave as though it is set to "true". + # + # If this value is modified, then during the next Cilium startup the restore + # of existing endpoints and tracking of ongoing connections may be disrupted. + # As a result, reply packets may be dropped and the load-balancing decisions + # for established connections may change. + # + # If this option is set to "false" during an upgrade from 1.3 or earlier to + # 1.4 or later, then it may cause one-time disruptions during the upgrade. + preallocate-bpf-maps = "false" + + # Name of the cluster. Only relevant when building a mesh of clusters. + cluster-name = "default" + # Unique ID of the cluster. Must be unique across all conneted clusters and + # in the range of 1 and 255. Only relevant when building a mesh of clusters. + cluster-id = "0" + + # Encapsulation mode for communication between nodes + # Possible values: + # - disabled + # - vxlan (default) + # - geneve + routing-mode = "tunnel" + tunnel = "vxlan" + # Enables L7 proxy for L7 policy enforcement and visibility + enable-l7-proxy = "true" + + auto-direct-node-routes = "false" + + # enableXTSocketFallback enables the fallback compatibility solution + # when the xt_socket kernel module is missing and it is needed for + # the datapath L7 redirection to work properly. See documentation + # for details on when this can be disabled: + # http://docs.cilium.io/en/latest/install/system_requirements/#admin-kernel-version. + enable-xt-socket-fallback = "true" + + # installIptablesRules enables installation of iptables rules to allow for + # TPROXY (L7 proxy injection), itpables based masquerading and compatibility + # with kube-proxy. See documentation for details on when this can be + # disabled. + install-iptables-rules = "true" + + # masquerade traffic leaving the node destined for outside + enable-ipv4-masquerade = "true" + enable-ipv6-masquerade = "false" + + # bpfMasquerade enables masquerading with BPF instead of iptables + enable-bpf-masquerade = "true" + + # kube-proxy + kube-proxy-replacement = "true" + kube-proxy-replacement-healthz-bind-address = ":10256" + enable-session-affinity = "true" + + # ClusterIPs from host namespace + bpf-lb-sock = "true" + # ClusterIPs from external nodes + bpf-lb-external-clusterip = "true" + + # NodePort + enable-node-port = "true" + enable-health-check-nodeport = "false" + + # ExternalIPs + enable-external-ips = "true" + + # HostPort + enable-host-port = "true" + + # IPAM + ipam = "cluster-pool" + disable-cnp-status-updates = "true" + cluster-pool-ipv4-cidr = "${var.pod_cidr}" + cluster-pool-ipv4-mask-size = "24" + + # Health + agent-health-port = "9876" + enable-health-checking = "true" + enable-endpoint-health-checking = "true" + + # Identity + enable-well-known-identities = "false" + enable-remote-node-identity = "true" + + # Hubble server + enable-hubble = var.enable_hubble + hubble-disable-tls = "false" + hubble-listen-address = ":4244" + hubble-socket-path = "/var/run/cilium/hubble.sock" + hubble-tls-client-ca-files = "/var/lib/cilium/tls/hubble/client-ca.crt" + hubble-tls-cert-file = "/var/lib/cilium/tls/hubble/server.crt" + hubble-tls-key-file = "/var/lib/cilium/tls/hubble/server.key" + hubble-export-file-max-backups = "5" + hubble-export-file-max-size-mb = "10" + + # Hubble metrics + hubble-metrics-server = ":9965" + hubble-metrics = "dns drop tcp flow port-distribution icmp httpV2" + enable-hubble-open-metrics = "false" + + + # Misc + enable-bandwidth-manager = "false" + enable-local-redirect-policy = "false" + policy-audit-mode = "false" + operator-api-serve-addr = "127.0.0.1:9234" + enable-l2-neigh-discovery = "true" + enable-k8s-terminating-endpoint = "true" + enable-k8s-networkpolicy = "true" + external-envoy-proxy = "false" + write-cni-conf-when-ready = "/host/etc/cni/net.d/05-cilium.conflist" + cni-exclusive = "true" + cni-log-file = "/var/run/cilium/cilium-cni.log" + } +} + diff --git a/addons/cilium/daemonset.tf b/addons/cilium/daemonset.tf new file mode 100644 index 00000000..d160f0e9 --- /dev/null +++ b/addons/cilium/daemonset.tf @@ -0,0 +1,379 @@ +resource "kubernetes_daemonset" "cilium" { + wait_for_rollout = false + + metadata { + name = "cilium" + namespace = "kube-system" + labels = { + k8s-app = "cilium" + } + } + spec { + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = "1" + } + } + selector { + match_labels = { + k8s-app = "cilium-agent" + } + } + template { + metadata { + labels = { + k8s-app = "cilium-agent" + } + annotations = { + "prometheus.io/port" = "9962" + "prometheus.io/scrape" = "true" + } + } + spec { + host_network = true + priority_class_name = "system-node-critical" + service_account_name = "cilium-agent" + security_context { + seccomp_profile { + type = "RuntimeDefault" + } + } + toleration { + key = "node-role.kubernetes.io/controller" + operator = "Exists" + } + toleration { + key = "node.kubernetes.io/not-ready" + operator = "Exists" + } + dynamic "toleration" { + for_each = var.daemonset_tolerations + content { + key = toleration.value + operator = "Exists" + } + } + automount_service_account_token = true + enable_service_links = false + + # Cilium v1.13.1 starts installing CNI plugins in yet another init container + # https://github.com/cilium/cilium/pull/24075 + init_container { + name = "install-cni" + image = "quay.io/cilium/cilium:v1.16.4" + command = ["/install-plugin.sh"] + security_context { + allow_privilege_escalation = true + privileged = true + capabilities { + drop = ["ALL"] + } + } + volume_mount { + name = "cni-bin-dir" + mount_path = "/host/opt/cni/bin" + } + } + + # Required to mount cgroup2 filesystem on the underlying Kubernetes node. + # We use nsenter command with host's cgroup and mount namespaces enabled. + init_container { + name = "mount-cgroup" + image = "quay.io/cilium/cilium:v1.16.4" + command = [ + "sh", + "-ec", + # The statically linked Go program binary is invoked to avoid any + # dependency on utilities like sh and mount that can be missing on certain + # distros installed on the underlying host. Copy the binary to the + # same directory where we install cilium cni plugin so that exec permissions + # are available. + "cp /usr/bin/cilium-mount /hostbin/cilium-mount && nsenter --cgroup=/hostproc/1/ns/cgroup --mount=/hostproc/1/ns/mnt \"$${BIN_PATH}/cilium-mount\" $CGROUP_ROOT; rm /hostbin/cilium-mount" + ] + env { + name = "CGROUP_ROOT" + value = "/run/cilium/cgroupv2" + } + env { + name = "BIN_PATH" + value = "/opt/cni/bin" + } + security_context { + allow_privilege_escalation = true + privileged = true + } + volume_mount { + name = "hostproc" + mount_path = "/hostproc" + } + volume_mount { + name = "cni-bin-dir" + mount_path = "/hostbin" + } + } + + init_container { + name = "clean-cilium-state" + image = "quay.io/cilium/cilium:v1.16.4" + command = ["/init-container.sh"] + security_context { + allow_privilege_escalation = true + privileged = true + } + volume_mount { + name = "sys-fs-bpf" + mount_path = "/sys/fs/bpf" + } + volume_mount { + name = "var-run-cilium" + mount_path = "/var/run/cilium" + } + # Required to mount cgroup filesystem from the host to cilium agent pod + volume_mount { + name = "cilium-cgroup" + mount_path = "/run/cilium/cgroupv2" + mount_propagation = "HostToContainer" + } + } + + container { + name = "cilium-agent" + image = "quay.io/cilium/cilium:v1.16.4" + command = ["cilium-agent"] + args = [ + "--config-dir=/tmp/cilium/config-map" + ] + env { + name = "K8S_NODE_NAME" + value_from { + field_ref { + api_version = "v1" + field_path = "spec.nodeName" + } + } + } + env { + name = "CILIUM_K8S_NAMESPACE" + value_from { + field_ref { + api_version = "v1" + field_path = "metadata.namespace" + } + } + } + env { + name = "KUBERNETES_SERVICE_HOST" + value_from { + config_map_key_ref { + name = "in-cluster" + key = "apiserver-host" + } + } + } + env { + name = "KUBERNETES_SERVICE_PORT" + value_from { + config_map_key_ref { + name = "in-cluster" + key = "apiserver-port" + } + } + } + port { + name = "peer-service" + protocol = "TCP" + container_port = 4244 + } + # Metrics + port { + name = "metrics" + protocol = "TCP" + container_port = 9962 + } + port { + name = "envoy-metrics" + protocol = "TCP" + container_port = 9964 + } + port { + name = "hubble-metrics" + protocol = "TCP" + container_port = 9965 + } + # Not yet used, prefer exec's + port { + name = "health" + protocol = "TCP" + container_port = 9876 + } + lifecycle { + pre_stop { + exec { + command = ["/cni-uninstall.sh"] + } + } + } + security_context { + allow_privilege_escalation = true + privileged = true + } + liveness_probe { + exec { + command = ["cilium", "status", "--brief"] + } + initial_delay_seconds = 120 + timeout_seconds = 5 + period_seconds = 30 + success_threshold = 1 + failure_threshold = 10 + } + readiness_probe { + exec { + command = ["cilium", "status", "--brief"] + } + initial_delay_seconds = 5 + timeout_seconds = 5 + period_seconds = 20 + success_threshold = 1 + failure_threshold = 3 + } + # Load kernel modules + volume_mount { + name = "lib-modules" + read_only = true + mount_path = "/lib/modules" + } + # Access iptables concurrently + volume_mount { + name = "xtables-lock" + mount_path = "/run/xtables.lock" + } + # Keep state between restarts + volume_mount { + name = "var-run-cilium" + mount_path = "/var/run/cilium" + } + volume_mount { + name = "sys-fs-bpf" + mount_path = "/sys/fs/bpf" + mount_propagation = "Bidirectional" + } + # Configuration + volume_mount { + name = "config" + read_only = true + mount_path = "/tmp/cilium/config-map" + } + # Install config on host + volume_mount { + name = "cni-conf-dir" + mount_path = "/host/etc/cni/net.d" + } + # Hubble + volume_mount { + name = "hubble-tls" + mount_path = "/var/lib/cilium/tls/hubble" + read_only = true + } + } + termination_grace_period_seconds = 1 + + # Load kernel modules + volume { + name = "lib-modules" + host_path { + path = "/lib/modules" + } + } + # Access iptables concurrently with other processes (e.g. kube-proxy) + volume { + name = "xtables-lock" + host_path { + path = "/run/xtables.lock" + type = "FileOrCreate" + } + } + # Keep state between restarts + volume { + name = "var-run-cilium" + host_path { + path = "/var/run/cilium" + type = "DirectoryOrCreate" + } + } + # Keep state for bpf maps between restarts + volume { + name = "sys-fs-bpf" + host_path { + path = "/sys/fs/bpf" + type = "DirectoryOrCreate" + } + } + # Mount host cgroup2 filesystem + volume { + name = "hostproc" + host_path { + path = "/proc" + type = "Directory" + } + } + volume { + name = "cilium-cgroup" + host_path { + path = "/run/cilium/cgroupv2" + type = "DirectoryOrCreate" + } + } + # Read configuration + volume { + name = "config" + config_map { + name = "cilium" + } + } + # Install CNI plugin and config on host + volume { + name = "cni-bin-dir" + host_path { + path = "/opt/cni/bin" + type = "DirectoryOrCreate" + } + } + volume { + name = "cni-conf-dir" + host_path { + path = "/etc/cni/net.d" + type = "DirectoryOrCreate" + } + } + # Hubble TLS (optional) + volume { + name = "hubble-tls" + projected { + default_mode = "0400" + sources { + secret { + name = "hubble-server-certs" + optional = true + items { + key = "ca.crt" + path = "client-ca.crt" + } + items { + key = "tls.crt" + path = "server.crt" + } + items { + key = "tls.key" + path = "server.key" + } + } + } + } + } + } + } + } +} + diff --git a/addons/cilium/deployment.tf b/addons/cilium/deployment.tf new file mode 100644 index 00000000..fbbb03dc --- /dev/null +++ b/addons/cilium/deployment.tf @@ -0,0 +1,163 @@ +resource "kubernetes_deployment" "operator" { + wait_for_rollout = false + metadata { + name = "cilium-operator" + namespace = "kube-system" + } + spec { + replicas = 1 + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = "1" + } + } + selector { + match_labels = { + name = "cilium-operator" + } + } + template { + metadata { + labels = { + name = "cilium-operator" + } + annotations = { + "prometheus.io/scrape" = "true" + "prometheus.io/port" = "9963" + } + } + spec { + host_network = true + priority_class_name = "system-cluster-critical" + service_account_name = "cilium-operator" + security_context { + seccomp_profile { + type = "RuntimeDefault" + } + } + toleration { + key = "node-role.kubernetes.io/controller" + operator = "Exists" + } + toleration { + key = "node.kubernetes.io/not-ready" + operator = "Exists" + } + topology_spread_constraint { + max_skew = 1 + topology_key = "kubernetes.io/hostname" + when_unsatisfiable = "DoNotSchedule" + label_selector { + match_labels = { + name = "cilium-operator" + } + } + } + automount_service_account_token = true + enable_service_links = false + container { + name = "cilium-operator" + image = "quay.io/cilium/operator-generic:v1.16.4" + command = ["cilium-operator-generic"] + args = [ + "--config-dir=/tmp/cilium/config-map", + "--debug=$(CILIUM_DEBUG)" + ] + env { + name = "K8S_NODE_NAME" + value_from { + field_ref { + api_version = "v1" + field_path = "spec.nodeName" + } + } + } + env { + name = "CILIUM_K8S_NAMESPACE" + value_from { + field_ref { + api_version = "v1" + field_path = "metadata.namespace" + } + } + } + env { + name = "KUBERNETES_SERVICE_HOST" + value_from { + config_map_key_ref { + name = "in-cluster" + key = "apiserver-host" + } + } + } + env { + name = "KUBERNETES_SERVICE_PORT" + value_from { + config_map_key_ref { + name = "in-cluster" + key = "apiserver-port" + } + } + } + env { + name = "CILIUM_DEBUG" + value_from { + config_map_key_ref { + name = "cilium" + key = "debug" + optional = true + } + } + } + port { + name = "metrics" + protocol = "TCP" + host_port = 9963 + container_port = 9963 + } + port { + name = "health" + container_port = 9234 + protocol = "TCP" + } + liveness_probe { + http_get { + scheme = "HTTP" + host = "127.0.0.1" + port = "9234" + path = "/healthz" + } + initial_delay_seconds = 60 + timeout_seconds = 3 + period_seconds = 10 + } + readiness_probe { + http_get { + scheme = "HTTP" + host = "127.0.0.1" + port = "9234" + path = "/healthz" + } + timeout_seconds = 3 + period_seconds = 15 + failure_threshold = 5 + } + volume_mount { + name = "config" + read_only = true + mount_path = "/tmp/cilium/config-map" + } + } + + volume { + name = "config" + config_map { + name = "cilium" + } + } + } + } + } +} + diff --git a/addons/cilium/service-account.tf b/addons/cilium/service-account.tf new file mode 100644 index 00000000..5729dcd8 --- /dev/null +++ b/addons/cilium/service-account.tf @@ -0,0 +1,15 @@ +resource "kubernetes_service_account" "operator" { + metadata { + name = "cilium-operator" + namespace = "kube-system" + } + automount_service_account_token = false +} + +resource "kubernetes_service_account" "agent" { + metadata { + name = "cilium-agent" + namespace = "kube-system" + } + automount_service_account_token = false +} diff --git a/addons/cilium/variables.tf b/addons/cilium/variables.tf new file mode 100644 index 00000000..6d269fd2 --- /dev/null +++ b/addons/cilium/variables.tf @@ -0,0 +1,17 @@ +variable "pod_cidr" { + type = string + description = "CIDR IP range to assign Kubernetes pods" + default = "10.2.0.0/16" +} + +variable "daemonset_tolerations" { + type = list(string) + description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])" + default = [] +} + +variable "enable_hubble" { + type = bool + description = "Run the embedded Hubble Server and mount hubble-server-certs Secret" + default = true +} diff --git a/addons/cilium/versions.tf b/addons/cilium/versions.tf new file mode 100644 index 00000000..ad7f7415 --- /dev/null +++ b/addons/cilium/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.8" + } + } +} diff --git a/addons/coredns/cluster-role.tf b/addons/coredns/cluster-role.tf new file mode 100644 index 00000000..3cc095e8 --- /dev/null +++ b/addons/coredns/cluster-role.tf @@ -0,0 +1,37 @@ +resource "kubernetes_cluster_role" "coredns" { + metadata { + name = "system:coredns" + } + rule { + api_groups = [""] + resources = [ + "endpoints", + "services", + "pods", + "namespaces", + ] + verbs = [ + "list", + "watch", + ] + } + rule { + api_groups = [""] + resources = [ + "nodes", + ] + verbs = [ + "get", + ] + } + rule { + api_groups = ["discovery.k8s.io"] + resources = [ + "endpointslices", + ] + verbs = [ + "list", + "watch", + ] + } +} diff --git a/addons/coredns/config.tf b/addons/coredns/config.tf new file mode 100644 index 00000000..e1a614cb --- /dev/null +++ b/addons/coredns/config.tf @@ -0,0 +1,30 @@ +resource "kubernetes_config_map" "coredns" { + metadata { + name = "coredns" + namespace = "kube-system" + } + data = { + "Corefile" = <<-EOF + .:53 { + errors + health { + lameduck 5s + } + ready + log . { + class error + } + kubernetes ${var.cluster_domain_suffix} in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + } + prometheus :9153 + forward . /etc/resolv.conf + cache 30 + loop + reload + loadbalance + } + EOF + } +} diff --git a/addons/coredns/deployment.tf b/addons/coredns/deployment.tf new file mode 100644 index 00000000..34602a9d --- /dev/null +++ b/addons/coredns/deployment.tf @@ -0,0 +1,151 @@ +resource "kubernetes_deployment" "coredns" { + wait_for_rollout = false + metadata { + name = "coredns" + namespace = "kube-system" + labels = { + k8s-app = "coredns" + "kubernetes.io/name" = "CoreDNS" + } + } + spec { + replicas = var.replicas + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = "1" + } + } + selector { + match_labels = { + k8s-app = "coredns" + tier = "control-plane" + } + } + template { + metadata { + labels = { + k8s-app = "coredns" + tier = "control-plane" + } + } + spec { + affinity { + node_affinity { + preferred_during_scheduling_ignored_during_execution { + weight = 100 + preference { + match_expressions { + key = "node.kubernetes.io/controller" + operator = "Exists" + } + } + } + } + pod_anti_affinity { + preferred_during_scheduling_ignored_during_execution { + weight = 100 + pod_affinity_term { + label_selector { + match_expressions { + key = "tier" + operator = "In" + values = ["control-plane"] + } + match_expressions { + key = "k8s-app" + operator = "In" + values = ["coredns"] + } + } + topology_key = "kubernetes.io/hostname" + } + } + } + } + dns_policy = "Default" + priority_class_name = "system-cluster-critical" + security_context { + seccomp_profile { + type = "RuntimeDefault" + } + } + service_account_name = "coredns" + toleration { + key = "node-role.kubernetes.io/controller" + effect = "NoSchedule" + } + container { + name = "coredns" + image = "registry.k8s.io/coredns/coredns:v1.12.0" + args = ["-conf", "/etc/coredns/Corefile"] + port { + name = "dns" + container_port = 53 + protocol = "UDP" + } + port { + name = "dns-tcp" + container_port = 53 + protocol = "TCP" + } + port { + name = "metrics" + container_port = 9153 + protocol = "TCP" + } + resources { + requests = { + cpu = "100m" + memory = "70Mi" + } + limits = { + memory = "170Mi" + } + } + security_context { + capabilities { + add = ["NET_BIND_SERVICE"] + drop = ["all"] + } + read_only_root_filesystem = true + } + liveness_probe { + http_get { + path = "/health" + port = "8080" + scheme = "HTTP" + } + initial_delay_seconds = 60 + timeout_seconds = 5 + success_threshold = 1 + failure_threshold = 5 + } + readiness_probe { + http_get { + path = "/ready" + port = "8181" + scheme = "HTTP" + } + } + volume_mount { + name = "config" + mount_path = "/etc/coredns" + read_only = true + } + } + volume { + name = "config" + config_map { + name = "coredns" + items { + key = "Corefile" + path = "Corefile" + } + } + } + } + } + } +} + diff --git a/addons/coredns/service-account.tf b/addons/coredns/service-account.tf new file mode 100644 index 00000000..61a124e1 --- /dev/null +++ b/addons/coredns/service-account.tf @@ -0,0 +1,24 @@ +resource "kubernetes_service_account" "coredns" { + metadata { + name = "coredns" + namespace = "kube-system" + } + automount_service_account_token = false +} + + +resource "kubernetes_cluster_role_binding" "coredns" { + metadata { + name = "system:coredns" + } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "system:coredns" + } + subject { + kind = "ServiceAccount" + name = "coredns" + namespace = "kube-system" + } +} diff --git a/addons/coredns/service.tf b/addons/coredns/service.tf new file mode 100644 index 00000000..9bcf59d5 --- /dev/null +++ b/addons/coredns/service.tf @@ -0,0 +1,31 @@ +resource "kubernetes_service" "coredns" { + metadata { + name = "coredns" + namespace = "kube-system" + labels = { + "k8s-app" = "coredns" + "kubernetes.io/name" = "CoreDNS" + } + annotations = { + "prometheus.io/scrape" = "true" + "prometheus.io/port" = "9153" + } + } + spec { + type = "ClusterIP" + cluster_ip = var.cluster_dns_service_ip + selector = { + k8s-app = "coredns" + } + port { + name = "dns" + protocol = "UDP" + port = 53 + } + port { + name = "dns-tcp" + protocol = "TCP" + port = 53 + } + } +} diff --git a/addons/coredns/variables.tf b/addons/coredns/variables.tf new file mode 100644 index 00000000..1bd8c4da --- /dev/null +++ b/addons/coredns/variables.tf @@ -0,0 +1,15 @@ +variable "replicas" { + type = number + description = "CoreDNS replica count" + default = 2 +} + +variable "cluster_dns_service_ip" { + description = "Must be set to `cluster_dns_service_ip` output by cluster" + default = "10.3.0.10" +} + +variable "cluster_domain_suffix" { + description = "Must be set to `cluster_domain_suffix` output by cluster" + default = "cluster.local" +} diff --git a/addons/coredns/versions.tf b/addons/coredns/versions.tf new file mode 100644 index 00000000..5d8ee2d7 --- /dev/null +++ b/addons/coredns/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_providers { + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.8" + } + } +} + diff --git a/addons/flannel/cluster-role-binding.tf b/addons/flannel/cluster-role-binding.tf new file mode 100644 index 00000000..3a7aa9e7 --- /dev/null +++ b/addons/flannel/cluster-role-binding.tf @@ -0,0 +1,18 @@ +resource "kubernetes_cluster_role_binding" "flannel" { + metadata { + name = "flannel" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "flannel" + } + + subject { + kind = "ServiceAccount" + name = "flannel" + namespace = "kube-system" + } +} + diff --git a/addons/flannel/cluster-role.tf b/addons/flannel/cluster-role.tf new file mode 100644 index 00000000..9c2d461d --- /dev/null +++ b/addons/flannel/cluster-role.tf @@ -0,0 +1,24 @@ +resource "kubernetes_cluster_role" "flannel" { + metadata { + name = "flannel" + } + + rule { + api_groups = [""] + resources = ["pods"] + verbs = ["get"] + } + + rule { + api_groups = [""] + resources = ["nodes"] + verbs = ["list", "watch"] + } + + rule { + api_groups = [""] + resources = ["nodes/status"] + verbs = ["patch"] + } +} + diff --git a/addons/flannel/config.tf b/addons/flannel/config.tf new file mode 100644 index 00000000..4a0f8cc9 --- /dev/null +++ b/addons/flannel/config.tf @@ -0,0 +1,44 @@ +resource "kubernetes_config_map" "config" { + metadata { + name = "flannel-config" + namespace = "kube-system" + labels = { + k8s-app = "flannel" + tier = "node" + } + } + + data = { + "cni-conf.json" = <<-EOF + { + "name": "cbr0", + "cniVersion": "0.3.1", + "plugins": [ + { + "type": "flannel", + "delegate": { + "hairpinMode": true, + "isDefaultGateway": true + } + }, + { + "type": "portmap", + "capabilities": { + "portMappings": true + } + } + ] + } + EOF + "net-conf.json" = <<-EOF + { + "Network": "${var.pod_cidr}", + "Backend": { + "Type": "vxlan", + "Port": 4789 + } + } + EOF + } +} + diff --git a/addons/flannel/daemonset.tf b/addons/flannel/daemonset.tf new file mode 100644 index 00000000..e9644626 --- /dev/null +++ b/addons/flannel/daemonset.tf @@ -0,0 +1,167 @@ +resource "kubernetes_daemonset" "flannel" { + metadata { + name = "flannel" + namespace = "kube-system" + labels = { + k8s-app = "flannel" + } + } + spec { + strategy { + type = "RollingUpdate" + rolling_update { + max_unavailable = "1" + } + } + selector { + match_labels = { + k8s-app = "flannel" + } + } + template { + metadata { + labels = { + k8s-app = "flannel" + } + } + spec { + host_network = true + priority_class_name = "system-node-critical" + service_account_name = "flannel" + security_context { + seccomp_profile { + type = "RuntimeDefault" + } + } + toleration { + key = "node-role.kubernetes.io/controller" + operator = "Exists" + } + toleration { + key = "node.kubernetes.io/not-ready" + operator = "Exists" + } + dynamic "toleration" { + for_each = var.daemonset_tolerations + content { + key = toleration.value + operator = "Exists" + } + } + init_container { + name = "install-cni" + image = "quay.io/poseidon/flannel-cni:v0.4.2" + command = ["/install-cni.sh"] + env { + name = "CNI_NETWORK_CONFIG" + value_from { + config_map_key_ref { + name = "flannel-config" + key = "cni-conf.json" + } + } + } + volume_mount { + name = "cni-bin-dir" + mount_path = "/host/opt/cni/bin/" + } + volume_mount { + name = "cni-conf-dir" + mount_path = "/host/etc/cni/net.d" + } + } + + container { + name = "flannel" + image = "docker.io/flannel/flannel:v0.26.1" + command = [ + "/opt/bin/flanneld", + "--ip-masq", + "--kube-subnet-mgr", + "--iface=$(POD_IP)" + ] + env { + name = "POD_NAME" + value_from { + field_ref { + field_path = "metadata.name" + } + } + } + env { + name = "POD_NAMESPACE" + value_from { + field_ref { + field_path = "metadata.namespace" + } + } + } + env { + name = "POD_IP" + value_from { + field_ref { + field_path = "status.podIP" + } + } + } + security_context { + privileged = true + } + resources { + requests = { + cpu = "100m" + } + } + volume_mount { + name = "flannel-config" + mount_path = "/etc/kube-flannel/" + } + volume_mount { + name = "run-flannel" + mount_path = "/run/flannel" + } + volume_mount { + name = "xtables-lock" + mount_path = "/run/xtables.lock" + } + } + + volume { + name = "flannel-config" + config_map { + name = "flannel-config" + } + } + volume { + name = "run-flannel" + host_path { + path = "/run/flannel" + } + } + # Used by install-cni + volume { + name = "cni-bin-dir" + host_path { + path = "/opt/cni/bin" + } + } + volume { + name = "cni-conf-dir" + host_path { + path = "/etc/cni/net.d" + type = "DirectoryOrCreate" + } + } + # Acces iptables concurrently + volume { + name = "xtables-lock" + host_path { + path = "/run/xtables.lock" + type = "FileOrCreate" + } + } + } + } + } +} + diff --git a/addons/flannel/service-account.tf b/addons/flannel/service-account.tf new file mode 100644 index 00000000..b3b81c13 --- /dev/null +++ b/addons/flannel/service-account.tf @@ -0,0 +1,7 @@ +resource "kubernetes_service_account" "flannel" { + metadata { + name = "flannel" + namespace = "kube-system" + } +} + diff --git a/addons/flannel/variables.tf b/addons/flannel/variables.tf new file mode 100644 index 00000000..7a6ad2fc --- /dev/null +++ b/addons/flannel/variables.tf @@ -0,0 +1,11 @@ +variable "pod_cidr" { + type = string + description = "CIDR IP range to assign Kubernetes pods" + default = "10.2.0.0/16" +} + +variable "daemonset_tolerations" { + type = list(string) + description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])" + default = [] +} diff --git a/addons/flannel/versions.tf b/addons/flannel/versions.tf new file mode 100644 index 00000000..ad7f7415 --- /dev/null +++ b/addons/flannel/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.8" + } + } +} diff --git a/addons/nginx-ingress/aws/rbac/cluster-role.yaml b/addons/nginx-ingress/aws/rbac/cluster-role.yaml index 90edbeb1..916a3675 100644 --- a/addons/nginx-ingress/aws/rbac/cluster-role.yaml +++ b/addons/nginx-ingress/aws/rbac/cluster-role.yaml @@ -29,7 +29,7 @@ rules: - list - watch - apiGroups: - - "" + - "" resources: - events verbs: @@ -59,4 +59,11 @@ rules: - get - list - watch - + - apiGroups: + - discovery.k8s.io + resources: + - "endpointslices" + verbs: + - get + - list + - watch diff --git a/addons/nginx-ingress/azure/rbac/cluster-role.yaml b/addons/nginx-ingress/azure/rbac/cluster-role.yaml index 90edbeb1..916a3675 100644 --- a/addons/nginx-ingress/azure/rbac/cluster-role.yaml +++ b/addons/nginx-ingress/azure/rbac/cluster-role.yaml @@ -29,7 +29,7 @@ rules: - list - watch - apiGroups: - - "" + - "" resources: - events verbs: @@ -59,4 +59,11 @@ rules: - get - list - watch - + - apiGroups: + - discovery.k8s.io + resources: + - "endpointslices" + verbs: + - get + - list + - watch diff --git a/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml b/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml index 90edbeb1..916a3675 100644 --- a/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml +++ b/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml @@ -29,7 +29,7 @@ rules: - list - watch - apiGroups: - - "" + - "" resources: - events verbs: @@ -59,4 +59,11 @@ rules: - get - list - watch - + - apiGroups: + - discovery.k8s.io + resources: + - "endpointslices" + verbs: + - get + - list + - watch diff --git a/addons/nginx-ingress/bare-metal/service.yaml b/addons/nginx-ingress/bare-metal/service.yaml index cca30291..1339b3c3 100644 --- a/addons/nginx-ingress/bare-metal/service.yaml +++ b/addons/nginx-ingress/bare-metal/service.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Service metadata: - name: ingress-controller-public + name: nginx-ingress-controller namespace: ingress annotations: prometheus.io/scrape: 'true' @@ -10,7 +10,7 @@ spec: type: ClusterIP clusterIP: 10.3.0.12 selector: - name: ingress-controller-public + name: nginx-ingress-controller phase: prod ports: - name: http diff --git a/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml b/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml index 90edbeb1..916a3675 100644 --- a/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml +++ b/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml @@ -29,7 +29,7 @@ rules: - list - watch - apiGroups: - - "" + - "" resources: - events verbs: @@ -59,4 +59,11 @@ rules: - get - list - watch - + - apiGroups: + - discovery.k8s.io + resources: + - "endpointslices" + verbs: + - get + - list + - watch diff --git a/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml b/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml index 90edbeb1..916a3675 100644 --- a/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml +++ b/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml @@ -29,7 +29,7 @@ rules: - list - watch - apiGroups: - - "" + - "" resources: - events verbs: @@ -59,4 +59,11 @@ rules: - get - list - watch - + - apiGroups: + - discovery.k8s.io + resources: + - "endpointslices" + verbs: + - get + - list + - watch diff --git a/aws/fedora-coreos/kubernetes/README.md b/aws/fedora-coreos/kubernetes/README.md index 74aba0af..204202f3 100644 --- a/aws/fedora-coreos/kubernetes/README.md +++ b/aws/fedora-coreos/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/fedora-coreos/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/aws/fedora-coreos/kubernetes/ami.tf b/aws/fedora-coreos/kubernetes/ami.tf index 93d2556e..caac6121 100644 --- a/aws/fedora-coreos/kubernetes/ami.tf +++ b/aws/fedora-coreos/kubernetes/ami.tf @@ -19,7 +19,7 @@ data "aws_ami" "fedora-coreos" { } data "aws_ami" "fedora-coreos-arm" { - count = var.arch == "arm64" ? 1 : 0 + count = var.controller_arch == "arm64" ? 1 : 0 most_recent = true owners = ["125523088429"] diff --git a/aws/fedora-coreos/kubernetes/bootstrap.tf b/aws/fedora-coreos/kubernetes/bootstrap.tf index f08b85f4..56a5e257 100644 --- a/aws/fedora-coreos/kubernetes/bootstrap.tf +++ b/aws/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] @@ -9,9 +9,7 @@ module "bootstrap" { network_mtu = var.network_mtu pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation daemonset_tolerations = var.daemonset_tolerations + components = var.components } diff --git a/aws/fedora-coreos/kubernetes/butane/controller.yaml b/aws/fedora-coreos/kubernetes/butane/controller.yaml index 044f5fba..90cbf91f 100644 --- a/aws/fedora-coreos/kubernetes/butane/controller.yaml +++ b/aws/fedora-coreos/kubernetes/butane/controller.yaml @@ -12,7 +12,7 @@ systemd: Wants=network-online.target After=network-online.target [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 Type=exec ExecStartPre=/bin/mkdir -p /var/lib/etcd ExecStartPre=-/usr/bin/podman rm etcd @@ -57,7 +57,7 @@ systemd: After=afterburn.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/afterburn ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -116,7 +116,7 @@ systemd: --volume /opt/bootstrap/assets:/assets:ro,Z \ --volume /opt/bootstrap/apply:/apply:ro,Z \ --entrypoint=/apply \ - quay.io/poseidon/kubelet:v1.28.3 + quay.io/poseidon/kubelet:v1.31.3 ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done ExecStartPost=-/usr/bin/podman stop bootstrap storage: @@ -149,7 +149,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -163,7 +163,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -177,8 +177,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests chcon -R -u system_u -t container_file_t /etc/kubernetes/pki - path: /opt/bootstrap/apply mode: 0544 diff --git a/aws/fedora-coreos/kubernetes/controllers.tf b/aws/fedora-coreos/kubernetes/controllers.tf index ad4d8c84..4e8db88b 100644 --- a/aws/fedora-coreos/kubernetes/controllers.tf +++ b/aws/fedora-coreos/kubernetes/controllers.tf @@ -20,18 +20,18 @@ resource "aws_instance" "controllers" { tags = { Name = "${var.cluster_name}-controller-${count.index}" } - instance_type = var.controller_type - ami = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id - user_data = data.ct_config.controllers.*.rendered[count.index] + ami = var.controller_arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id # storage root_block_device { - volume_type = var.disk_type - volume_size = var.disk_size - iops = var.disk_iops + volume_type = var.controller_disk_type + volume_size = var.controller_disk_size + iops = var.controller_disk_iops encrypted = true - tags = {} + tags = { + Name = "${var.cluster_name}-controller-${count.index}" + } } # network @@ -39,6 +39,14 @@ resource "aws_instance" "controllers" { subnet_id = element(aws_subnet.public.*.id, count.index) vpc_security_group_ids = [aws_security_group.controller.id] + # boot + user_data = data.ct_config.controllers.*.rendered[count.index] + + # cost + credit_specification { + cpu_credits = var.controller_cpu_credits + } + lifecycle { ignore_changes = [ ami, @@ -61,7 +69,6 @@ data "ct_config" "controllers" { kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/aws/fedora-coreos/kubernetes/network.tf b/aws/fedora-coreos/kubernetes/network.tf index bdb4bff1..98ac9bba 100644 --- a/aws/fedora-coreos/kubernetes/network.tf +++ b/aws/fedora-coreos/kubernetes/network.tf @@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" { resource "aws_subnet" "public" { count = length(data.aws_availability_zones.all.names) - vpc_id = aws_vpc.network.id - availability_zone = data.aws_availability_zones.all.names[count.index] - - cidr_block = cidrsubnet(var.host_cidr, 4, count.index) - ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index) - map_public_ip_on_launch = true - assign_ipv6_address_on_creation = true - tags = { "Name" = "${var.cluster_name}-public-${count.index}" } + vpc_id = aws_vpc.network.id + availability_zone = data.aws_availability_zones.all.names[count.index] + + # IPv4 and IPv6 CIDR blocks + cidr_block = cidrsubnet(var.host_cidr, 4, count.index) + ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index) + + # Assign IPv4 and IPv6 addresses to instances + map_public_ip_on_launch = true + assign_ipv6_address_on_creation = true + + # Hostnames assigned to instances + # resource-name: .region.compute.internal + private_dns_hostname_type_on_launch = "resource-name" + enable_resource_name_dns_a_record_on_launch = true + enable_resource_name_dns_aaaa_record_on_launch = true } resource "aws_route_table_association" "public" { diff --git a/aws/fedora-coreos/kubernetes/security.tf b/aws/fedora-coreos/kubernetes/security.tf index 3d94cd66..7bd62e42 100644 --- a/aws/fedora-coreos/kubernetes/security.tf +++ b/aws/fedora-coreos/kubernetes/security.tf @@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" { self = true } +resource "aws_security_group_rule" "controller-cilium-metrics" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + source_security_group_id = aws_security_group.worker.id +} + +resource "aws_security_group_rule" "controller-cilium-metrics-self" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + self = true +} + # IANA VXLAN default resource "aws_security_group_rule" "controller-vxlan" { count = var.networking == "flannel" ? 1 : 0 @@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" { self = true } +resource "aws_security_group_rule" "worker-cilium-metrics" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.worker.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + source_security_group_id = aws_security_group.controller.id +} + +resource "aws_security_group_rule" "worker-cilium-metrics-self" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.worker.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + self = true +} + # IANA VXLAN default resource "aws_security_group_rule" "worker-vxlan" { count = var.networking == "flannel" ? 1 : 0 diff --git a/aws/fedora-coreos/kubernetes/variables.tf b/aws/fedora-coreos/kubernetes/variables.tf index b8679bf9..f54506b5 100644 --- a/aws/fedora-coreos/kubernetes/variables.tf +++ b/aws/fedora-coreos/kubernetes/variables.tf @@ -17,30 +17,6 @@ variable "dns_zone_id" { # instances -variable "controller_count" { - type = number - description = "Number of controllers (i.e. masters)" - default = 1 -} - -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - -variable "controller_type" { - type = string - description = "EC2 instance type for controllers" - default = "t3.small" -} - -variable "worker_type" { - type = string - description = "EC2 instance type for workers" - default = "t3.small" -} - variable "os_stream" { type = string description = "Fedora CoreOS image stream for instances (e.g. stable, testing, next)" @@ -52,24 +28,78 @@ variable "os_stream" { } } -variable "disk_size" { +variable "controller_count" { + type = number + description = "Number of controllers (i.e. masters)" + default = 1 +} + +variable "controller_type" { + type = string + description = "EC2 instance type for controllers" + default = "t3.small" +} + +variable "controller_disk_size" { type = number description = "Size of the EBS volume in GB" default = 30 } -variable "disk_type" { +variable "controller_disk_type" { type = string description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)" default = "gp3" } -variable "disk_iops" { +variable "controller_disk_iops" { type = number description = "IOPS of the EBS volume (e.g. 3000)" default = 3000 } +variable "controller_cpu_credits" { + type = string + description = "CPU credits mode (if using a burstable instance type)" + default = null +} + +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + +variable "worker_type" { + type = string + description = "EC2 instance type for workers" + default = "t3.small" +} + +variable "worker_disk_size" { + type = number + description = "Size of the EBS volume in GB" + default = 30 +} + +variable "worker_disk_type" { + type = string + description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)" + default = "gp3" +} + +variable "worker_disk_iops" { + type = number + description = "IOPS of the EBS volume (e.g. 3000)" + default = 3000 +} + +variable "worker_cpu_credits" { + type = string + description = "CPU credits mode (if using a burstable instance type)" + default = null +} + variable "worker_price" { type = number description = "Spot price in USD for worker instances or 0 to use on-demand instances" @@ -134,40 +164,31 @@ EOD default = "10.3.0.0/16" } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false -} - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - variable "worker_node_labels" { type = list(string) description = "List of initial worker node labels" default = [] } -# unofficial, undocumented, unsupported +# advanced -variable "cluster_domain_suffix" { +variable "controller_arch" { type = string - description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)" - default = "cluster.local" + description = "Controller node(s) architecture (amd64 or arm64)" + default = "amd64" + validation { + condition = contains(["amd64", "arm64"], var.controller_arch) + error_message = "The controller_arch must be amd64 or arm64." + } } -variable "arch" { +variable "worker_arch" { type = string - description = "Container architecture (amd64 or arm64)" + description = "Worker node(s) architecture (amd64 or arm64)" default = "amd64" - validation { - condition = var.arch == "amd64" || var.arch == "arm64" - error_message = "The arch must be amd64 or arm64." + condition = contains(["amd64", "arm64"], var.worker_arch) + error_message = "The worker_arch must be amd64 or arm64." } } @@ -176,3 +197,19 @@ variable "daemonset_tolerations" { description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])" default = [] } + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null +} diff --git a/aws/fedora-coreos/kubernetes/workers.tf b/aws/fedora-coreos/kubernetes/workers.tf index 0ec9cdb6..debe57a6 100644 --- a/aws/fedora-coreos/kubernetes/workers.tf +++ b/aws/fedora-coreos/kubernetes/workers.tf @@ -6,20 +6,24 @@ module "workers" { vpc_id = aws_vpc.network.id subnet_ids = aws_subnet.public.*.id security_groups = [aws_security_group.worker.id] - worker_count = var.worker_count - instance_type = var.worker_type - os_stream = var.os_stream - arch = var.arch - disk_size = var.disk_size - spot_price = var.worker_price - target_groups = var.worker_target_groups + + # instances + os_stream = var.os_stream + worker_count = var.worker_count + instance_type = var.worker_type + arch = var.worker_arch + disk_type = var.worker_disk_type + disk_size = var.worker_disk_size + disk_iops = var.worker_disk_iops + cpu_credits = var.worker_cpu_credits + spot_price = var.worker_price + target_groups = var.worker_target_groups # configuration - kubeconfig = module.bootstrap.kubeconfig-kubelet - ssh_authorized_key = var.ssh_authorized_key - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - snippets = var.worker_snippets - node_labels = var.worker_node_labels + kubeconfig = module.bootstrap.kubeconfig-kubelet + ssh_authorized_key = var.ssh_authorized_key + service_cidr = var.service_cidr + snippets = var.worker_snippets + node_labels = var.worker_node_labels } diff --git a/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml b/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml index f5c71872..1ddfa590 100644 --- a/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml +++ b/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml @@ -29,7 +29,7 @@ systemd: After=afterburn.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/afterburn ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -104,7 +104,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/aws/fedora-coreos/kubernetes/workers/variables.tf b/aws/fedora-coreos/kubernetes/workers/variables.tf index 4bb8b714..4b743368 100644 --- a/aws/fedora-coreos/kubernetes/workers/variables.tf +++ b/aws/fedora-coreos/kubernetes/workers/variables.tf @@ -69,6 +69,12 @@ variable "spot_price" { default = 0 } +variable "cpu_credits" { + type = string + description = "CPU burst credits mode (if applicable)" + default = null +} + variable "target_groups" { type = list(string) description = "Additional target group ARNs to which instances should be added" @@ -102,12 +108,6 @@ EOD default = "10.3.0.0/16" } -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} - variable "node_labels" { type = list(string) description = "List of initial node labels" @@ -120,15 +120,14 @@ variable "node_taints" { default = [] } -# unofficial, undocumented, unsupported +# advanced variable "arch" { type = string description = "Container architecture (amd64 or arm64)" default = "amd64" - validation { - condition = var.arch == "amd64" || var.arch == "arm64" + condition = contains(["amd64", "arm64"], var.arch) error_message = "The arch must be amd64 or arm64." } } diff --git a/aws/fedora-coreos/kubernetes/workers/workers.tf b/aws/fedora-coreos/kubernetes/workers/workers.tf index 1b0fc1e3..2d07d8b4 100644 --- a/aws/fedora-coreos/kubernetes/workers/workers.tf +++ b/aws/fedora-coreos/kubernetes/workers/workers.tf @@ -3,16 +3,14 @@ resource "aws_autoscaling_group" "workers" { name = "${var.name}-worker" # count - desired_capacity = var.worker_count - min_size = var.worker_count - max_size = var.worker_count + 2 - default_cooldown = 30 - health_check_grace_period = 30 + desired_capacity = var.worker_count + min_size = var.worker_count + max_size = var.worker_count + 2 # network vpc_zone_identifier = var.subnet_ids - # template + # instance template launch_template { id = aws_launch_template.worker.id version = aws_launch_template.worker.latest_version @@ -32,6 +30,11 @@ resource "aws_autoscaling_group" "workers" { min_healthy_percentage = 90 } } + # Grace period before checking new instance's health + health_check_grace_period = 30 + # Cooldown period between scaling activities + default_cooldown = 30 + lifecycle { # override the default destroy and replace update behavior @@ -56,11 +59,6 @@ resource "aws_launch_template" "worker" { name_prefix = "${var.name}-worker" image_id = local.ami_id instance_type = var.instance_type - monitoring { - enabled = false - } - - user_data = sensitive(base64encode(data.ct_config.worker.rendered)) # storage ebs_optimized = true @@ -76,9 +74,26 @@ resource "aws_launch_template" "worker" { } # network - vpc_security_group_ids = var.security_groups + network_interfaces { + associate_public_ip_address = true + security_groups = var.security_groups + } - # spot + # boot + user_data = sensitive(base64encode(data.ct_config.worker.rendered)) + + # metadata + metadata_options { + http_tokens = "optional" + } + monitoring { + enabled = false + } + + # cost + credit_specification { + cpu_credits = var.cpu_credits + } dynamic "instance_market_options" { for_each = var.spot_price > 0 ? [1] : [] content { @@ -102,7 +117,6 @@ data "ct_config" "worker" { kubeconfig = indent(10, var.kubeconfig) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix node_labels = join(",", var.node_labels) node_taints = join(",", var.node_taints) }) diff --git a/aws/flatcar-linux/kubernetes/README.md b/aws/flatcar-linux/kubernetes/README.md index aa7d7b16..f511e4e0 100644 --- a/aws/flatcar-linux/kubernetes/README.md +++ b/aws/flatcar-linux/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/) * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/flatcar-linux/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/aws/flatcar-linux/kubernetes/ami.tf b/aws/flatcar-linux/kubernetes/ami.tf index 43eb9f59..5b35ec06 100644 --- a/aws/flatcar-linux/kubernetes/ami.tf +++ b/aws/flatcar-linux/kubernetes/ami.tf @@ -1,7 +1,7 @@ locals { # Pick a Flatcar Linux AMI # flatcar-stable -> Flatcar Linux AMI - ami_id = var.arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id + ami_id = var.controller_arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id channel = split("-", var.os_image)[1] } @@ -26,7 +26,7 @@ data "aws_ami" "flatcar" { } data "aws_ami" "flatcar-arm64" { - count = var.arch == "arm64" ? 1 : 0 + count = var.controller_arch == "arm64" ? 1 : 0 most_recent = true owners = ["075585003325"] diff --git a/aws/flatcar-linux/kubernetes/bootstrap.tf b/aws/flatcar-linux/kubernetes/bootstrap.tf index f08b85f4..56a5e257 100644 --- a/aws/flatcar-linux/kubernetes/bootstrap.tf +++ b/aws/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] @@ -9,9 +9,7 @@ module "bootstrap" { network_mtu = var.network_mtu pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation daemonset_tolerations = var.daemonset_tolerations + components = var.components } diff --git a/aws/flatcar-linux/kubernetes/butane/controller.yaml b/aws/flatcar-linux/kubernetes/butane/controller.yaml index 58268cc9..febcb2c0 100644 --- a/aws/flatcar-linux/kubernetes/butane/controller.yaml +++ b/aws/flatcar-linux/kubernetes/butane/controller.yaml @@ -11,7 +11,7 @@ systemd: Requires=docker.service After=docker.service [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 ExecStartPre=/usr/bin/docker run -d \ --name etcd \ --network host \ @@ -58,7 +58,7 @@ systemd: After=coreos-metadata.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/coreos ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -109,7 +109,7 @@ systemd: Type=oneshot RemainAfterExit=true WorkingDirectory=/opt/bootstrap - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStart=/usr/bin/docker run \ -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \ -v /opt/bootstrap/assets:/assets:ro \ @@ -148,7 +148,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -162,7 +162,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -177,8 +177,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests - path: /opt/bootstrap/apply mode: 0544 contents: diff --git a/aws/flatcar-linux/kubernetes/controllers.tf b/aws/flatcar-linux/kubernetes/controllers.tf index 82d92ce8..90442cde 100644 --- a/aws/flatcar-linux/kubernetes/controllers.tf +++ b/aws/flatcar-linux/kubernetes/controllers.tf @@ -20,19 +20,18 @@ resource "aws_instance" "controllers" { tags = { Name = "${var.cluster_name}-controller-${count.index}" } - instance_type = var.controller_type - - ami = local.ami_id - user_data = data.ct_config.controllers.*.rendered[count.index] + ami = local.ami_id # storage root_block_device { - volume_type = var.disk_type - volume_size = var.disk_size - iops = var.disk_iops + volume_type = var.controller_disk_type + volume_size = var.controller_disk_size + iops = var.controller_disk_iops encrypted = true - tags = {} + tags = { + Name = "${var.cluster_name}-controller-${count.index}" + } } # network @@ -40,6 +39,14 @@ resource "aws_instance" "controllers" { subnet_id = element(aws_subnet.public.*.id, count.index) vpc_security_group_ids = [aws_security_group.controller.id] + # boot + user_data = data.ct_config.controllers.*.rendered[count.index] + + # cost + credit_specification { + cpu_credits = var.controller_cpu_credits + } + lifecycle { ignore_changes = [ ami, @@ -62,7 +69,6 @@ data "ct_config" "controllers" { kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/aws/flatcar-linux/kubernetes/network.tf b/aws/flatcar-linux/kubernetes/network.tf index bdb4bff1..98ac9bba 100644 --- a/aws/flatcar-linux/kubernetes/network.tf +++ b/aws/flatcar-linux/kubernetes/network.tf @@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" { resource "aws_subnet" "public" { count = length(data.aws_availability_zones.all.names) - vpc_id = aws_vpc.network.id - availability_zone = data.aws_availability_zones.all.names[count.index] - - cidr_block = cidrsubnet(var.host_cidr, 4, count.index) - ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index) - map_public_ip_on_launch = true - assign_ipv6_address_on_creation = true - tags = { "Name" = "${var.cluster_name}-public-${count.index}" } + vpc_id = aws_vpc.network.id + availability_zone = data.aws_availability_zones.all.names[count.index] + + # IPv4 and IPv6 CIDR blocks + cidr_block = cidrsubnet(var.host_cidr, 4, count.index) + ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index) + + # Assign IPv4 and IPv6 addresses to instances + map_public_ip_on_launch = true + assign_ipv6_address_on_creation = true + + # Hostnames assigned to instances + # resource-name: .region.compute.internal + private_dns_hostname_type_on_launch = "resource-name" + enable_resource_name_dns_a_record_on_launch = true + enable_resource_name_dns_aaaa_record_on_launch = true } resource "aws_route_table_association" "public" { diff --git a/aws/flatcar-linux/kubernetes/security.tf b/aws/flatcar-linux/kubernetes/security.tf index 3d94cd66..7bd62e42 100644 --- a/aws/flatcar-linux/kubernetes/security.tf +++ b/aws/flatcar-linux/kubernetes/security.tf @@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" { self = true } +resource "aws_security_group_rule" "controller-cilium-metrics" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + source_security_group_id = aws_security_group.worker.id +} + +resource "aws_security_group_rule" "controller-cilium-metrics-self" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + self = true +} + # IANA VXLAN default resource "aws_security_group_rule" "controller-vxlan" { count = var.networking == "flannel" ? 1 : 0 @@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" { self = true } +resource "aws_security_group_rule" "worker-cilium-metrics" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.worker.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + source_security_group_id = aws_security_group.controller.id +} + +resource "aws_security_group_rule" "worker-cilium-metrics-self" { + count = var.networking == "cilium" ? 1 : 0 + + security_group_id = aws_security_group.worker.id + + type = "ingress" + protocol = "tcp" + from_port = 9962 + to_port = 9965 + self = true +} + # IANA VXLAN default resource "aws_security_group_rule" "worker-vxlan" { count = var.networking == "flannel" ? 1 : 0 diff --git a/aws/flatcar-linux/kubernetes/variables.tf b/aws/flatcar-linux/kubernetes/variables.tf index 81e62ed2..9c267385 100644 --- a/aws/flatcar-linux/kubernetes/variables.tf +++ b/aws/flatcar-linux/kubernetes/variables.tf @@ -17,30 +17,6 @@ variable "dns_zone_id" { # instances -variable "controller_count" { - type = number - description = "Number of controllers (i.e. masters)" - default = 1 -} - -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - -variable "controller_type" { - type = string - description = "EC2 instance type for controllers" - default = "t3.small" -} - -variable "worker_type" { - type = string - description = "EC2 instance type for workers" - default = "t3.small" -} - variable "os_image" { type = string description = "AMI channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)" @@ -52,24 +28,78 @@ variable "os_image" { } } -variable "disk_size" { +variable "controller_count" { + type = number + description = "Number of controllers (i.e. masters)" + default = 1 +} + +variable "controller_type" { + type = string + description = "EC2 instance type for controllers" + default = "t3.small" +} + +variable "controller_disk_size" { type = number description = "Size of the EBS volume in GB" default = 30 } -variable "disk_type" { +variable "controller_disk_type" { type = string description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)" default = "gp3" } -variable "disk_iops" { +variable "controller_disk_iops" { type = number description = "IOPS of the EBS volume (e.g. 3000)" default = 3000 } +variable "controller_cpu_credits" { + type = string + description = "CPU credits mode (if using a burstable instance type)" + default = null +} + +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + +variable "worker_type" { + type = string + description = "EC2 instance type for workers" + default = "t3.small" +} + +variable "worker_disk_size" { + type = number + description = "Size of the EBS volume in GB" + default = 30 +} + +variable "worker_disk_type" { + type = string + description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)" + default = "gp3" +} + +variable "worker_disk_iops" { + type = number + description = "IOPS of the EBS volume (e.g. 3000)" + default = 3000 +} + +variable "worker_cpu_credits" { + type = string + description = "CPU credits mode (if using a burstable instance type)" + default = null +} + variable "worker_price" { type = number description = "Spot price in USD for worker instances or 0 to use on-demand instances" @@ -134,40 +164,31 @@ EOD default = "10.3.0.0/16" } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false -} - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - variable "worker_node_labels" { type = list(string) description = "List of initial worker node labels" default = [] } -# unofficial, undocumented, unsupported +# advanced -variable "cluster_domain_suffix" { +variable "controller_arch" { type = string - description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)" - default = "cluster.local" + description = "Controller node(s) architecture (amd64 or arm64)" + default = "amd64" + validation { + condition = contains(["amd64", "arm64"], var.controller_arch) + error_message = "The controller_arch must be amd64 or arm64." + } } -variable "arch" { +variable "worker_arch" { type = string - description = "Container architecture (amd64 or arm64)" + description = "Worker node(s) architecture (amd64 or arm64)" default = "amd64" - validation { - condition = var.arch == "amd64" || var.arch == "arm64" - error_message = "The arch must be amd64 or arm64." + condition = contains(["amd64", "arm64"], var.worker_arch) + error_message = "The worker_arch must be amd64 or arm64." } } @@ -176,3 +197,19 @@ variable "daemonset_tolerations" { description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])" default = [] } + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null +} diff --git a/aws/flatcar-linux/kubernetes/versions.tf b/aws/flatcar-linux/kubernetes/versions.tf index 5eaa340c..f9f11f4b 100644 --- a/aws/flatcar-linux/kubernetes/versions.tf +++ b/aws/flatcar-linux/kubernetes/versions.tf @@ -7,7 +7,7 @@ terraform { null = ">= 2.1" ct = { source = "poseidon/ct" - version = "~> 0.11" + version = "~> 0.13" } } } diff --git a/aws/flatcar-linux/kubernetes/workers.tf b/aws/flatcar-linux/kubernetes/workers.tf index 64a46cab..9e5de509 100644 --- a/aws/flatcar-linux/kubernetes/workers.tf +++ b/aws/flatcar-linux/kubernetes/workers.tf @@ -6,20 +6,23 @@ module "workers" { vpc_id = aws_vpc.network.id subnet_ids = aws_subnet.public.*.id security_groups = [aws_security_group.worker.id] - worker_count = var.worker_count - instance_type = var.worker_type - os_image = var.os_image - arch = var.arch - disk_size = var.disk_size - spot_price = var.worker_price - target_groups = var.worker_target_groups + + # instances + os_image = var.os_image + worker_count = var.worker_count + instance_type = var.worker_type + arch = var.worker_arch + disk_type = var.worker_disk_type + disk_size = var.worker_disk_size + disk_iops = var.worker_disk_iops + spot_price = var.worker_price + target_groups = var.worker_target_groups # configuration - kubeconfig = module.bootstrap.kubeconfig-kubelet - ssh_authorized_key = var.ssh_authorized_key - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - snippets = var.worker_snippets - node_labels = var.worker_node_labels + kubeconfig = module.bootstrap.kubeconfig-kubelet + ssh_authorized_key = var.ssh_authorized_key + service_cidr = var.service_cidr + snippets = var.worker_snippets + node_labels = var.worker_node_labels } diff --git a/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml b/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml index 9e78ea78..ada79a8b 100644 --- a/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml +++ b/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml @@ -30,7 +30,7 @@ systemd: After=coreos-metadata.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/coreos ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -103,7 +103,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/aws/flatcar-linux/kubernetes/workers/variables.tf b/aws/flatcar-linux/kubernetes/workers/variables.tf index 26bd2c1d..13a203ed 100644 --- a/aws/flatcar-linux/kubernetes/workers/variables.tf +++ b/aws/flatcar-linux/kubernetes/workers/variables.tf @@ -69,6 +69,12 @@ variable "spot_price" { default = 0 } +variable "cpu_credits" { + type = string + description = "CPU burst credits mode (if applicable)" + default = null +} + variable "target_groups" { type = list(string) description = "Additional target group ARNs to which instances should be added" @@ -102,12 +108,6 @@ EOD default = "10.3.0.0/16" } -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} - variable "node_labels" { type = list(string) description = "List of initial node labels" @@ -128,7 +128,7 @@ variable "arch" { default = "amd64" validation { - condition = var.arch == "amd64" || var.arch == "arm64" + condition = contains(["amd64", "arm64"], var.arch) error_message = "The arch must be amd64 or arm64." } } diff --git a/aws/flatcar-linux/kubernetes/workers/versions.tf b/aws/flatcar-linux/kubernetes/workers/versions.tf index 4d442fc3..256e9c63 100644 --- a/aws/flatcar-linux/kubernetes/workers/versions.tf +++ b/aws/flatcar-linux/kubernetes/workers/versions.tf @@ -6,7 +6,7 @@ terraform { aws = ">= 2.23, <= 6.0" ct = { source = "poseidon/ct" - version = "~> 0.11" + version = "~> 0.13" } } } diff --git a/aws/flatcar-linux/kubernetes/workers/workers.tf b/aws/flatcar-linux/kubernetes/workers/workers.tf index 67c015ca..268650c7 100644 --- a/aws/flatcar-linux/kubernetes/workers/workers.tf +++ b/aws/flatcar-linux/kubernetes/workers/workers.tf @@ -3,16 +3,14 @@ resource "aws_autoscaling_group" "workers" { name = "${var.name}-worker" # count - desired_capacity = var.worker_count - min_size = var.worker_count - max_size = var.worker_count + 2 - default_cooldown = 30 - health_check_grace_period = 30 + desired_capacity = var.worker_count + min_size = var.worker_count + max_size = var.worker_count + 2 # network vpc_zone_identifier = var.subnet_ids - # template + # instance template launch_template { id = aws_launch_template.worker.id version = aws_launch_template.worker.latest_version @@ -32,6 +30,10 @@ resource "aws_autoscaling_group" "workers" { min_healthy_percentage = 90 } } + # Grace period before checking new instance's health + health_check_grace_period = 30 + # Cooldown period between scaling activities + default_cooldown = 30 lifecycle { # override the default destroy and replace update behavior @@ -56,11 +58,6 @@ resource "aws_launch_template" "worker" { name_prefix = "${var.name}-worker" image_id = local.ami_id instance_type = var.instance_type - monitoring { - enabled = false - } - - user_data = sensitive(base64encode(data.ct_config.worker.rendered)) # storage ebs_optimized = true @@ -76,9 +73,26 @@ resource "aws_launch_template" "worker" { } # network - vpc_security_group_ids = var.security_groups + network_interfaces { + associate_public_ip_address = true + security_groups = var.security_groups + } - # spot + # boot + user_data = sensitive(base64encode(data.ct_config.worker.rendered)) + + # metadata + metadata_options { + http_tokens = "optional" + } + monitoring { + enabled = false + } + + # cost + credit_specification { + cpu_credits = var.cpu_credits + } dynamic "instance_market_options" { for_each = var.spot_price > 0 ? [1] : [] content { @@ -102,7 +116,6 @@ data "ct_config" "worker" { kubeconfig = indent(10, var.kubeconfig) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix node_labels = join(",", var.node_labels) node_taints = join(",", var.node_taints) }) diff --git a/azure/fedora-coreos/kubernetes/README.md b/azure/fedora-coreos/kubernetes/README.md index 09c7f897..70f1936e 100644 --- a/azure/fedora-coreos/kubernetes/README.md +++ b/azure/fedora-coreos/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot priority](https://typhoon.psdn.io/fedora-coreos/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/azure/fedora-coreos/kubernetes/bootstrap.tf b/azure/fedora-coreos/kubernetes/bootstrap.tf index 6cd97007..1e735c10 100644 --- a/azure/fedora-coreos/kubernetes/bootstrap.tf +++ b/azure/fedora-coreos/kubernetes/bootstrap.tf @@ -1,13 +1,12 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone) networking = var.networking - # only effective with Calico networking # we should be able to use 1450 MTU, but in practice, 1410 was needed network_encapsulation = "vxlan" @@ -15,9 +14,7 @@ module "bootstrap" { pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation daemonset_tolerations = var.daemonset_tolerations + components = var.components } diff --git a/azure/fedora-coreos/kubernetes/butane/controller.yaml b/azure/fedora-coreos/kubernetes/butane/controller.yaml index f5397340..846d8bbf 100644 --- a/azure/fedora-coreos/kubernetes/butane/controller.yaml +++ b/azure/fedora-coreos/kubernetes/butane/controller.yaml @@ -12,7 +12,7 @@ systemd: Wants=network-online.target After=network-online.target [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 Type=exec ExecStartPre=/bin/mkdir -p /var/lib/etcd ExecStartPre=-/usr/bin/podman rm etcd @@ -54,7 +54,7 @@ systemd: Description=Kubelet (System Container) Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -111,7 +111,7 @@ systemd: --volume /opt/bootstrap/assets:/assets:ro,Z \ --volume /opt/bootstrap/apply:/apply:ro,Z \ --entrypoint=/apply \ - quay.io/poseidon/kubelet:v1.28.3 + quay.io/poseidon/kubelet:v1.31.3 ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done ExecStartPost=-/usr/bin/podman stop bootstrap storage: @@ -144,7 +144,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -158,7 +158,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -172,8 +172,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests chcon -R -u system_u -t container_file_t /etc/kubernetes/pki - path: /opt/bootstrap/apply mode: 0544 diff --git a/azure/fedora-coreos/kubernetes/controllers.tf b/azure/fedora-coreos/kubernetes/controllers.tf index 6381f1d0..504c6904 100644 --- a/azure/fedora-coreos/kubernetes/controllers.tf +++ b/azure/fedora-coreos/kubernetes/controllers.tf @@ -8,26 +8,23 @@ locals { # Discrete DNS records for each controller's private IPv4 for etcd usage resource "azurerm_dns_a_record" "etcds" { - count = var.controller_count - resource_group_name = var.dns_zone_group + count = var.controller_count # DNS Zone name where record should be created - zone_name = var.dns_zone - + zone_name = var.dns_zone + resource_group_name = var.dns_zone_group # DNS record name = format("%s-etcd%d", var.cluster_name, count.index) ttl = 300 - # private IPv4 address for etcd - records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]] + records = [azurerm_network_interface.controllers[count.index].private_ip_address] } # Controller availability set to spread controllers resource "azurerm_availability_set" "controllers" { - resource_group_name = azurerm_resource_group.cluster.name - name = "${var.cluster_name}-controllers" - location = var.region + resource_group_name = azurerm_resource_group.cluster.name + location = var.location platform_fault_domain_count = 2 platform_update_domain_count = 4 managed = true @@ -35,31 +32,35 @@ resource "azurerm_availability_set" "controllers" { # Controller instances resource "azurerm_linux_virtual_machine" "controllers" { - count = var.controller_count - resource_group_name = azurerm_resource_group.cluster.name + count = var.controller_count name = "${var.cluster_name}-controller-${count.index}" - location = var.region + resource_group_name = azurerm_resource_group.cluster.name + location = var.location availability_set_id = azurerm_availability_set.controllers.id - - size = var.controller_type - custom_data = base64encode(data.ct_config.controllers.*.rendered[count.index]) + size = var.controller_type # storage source_image_id = var.os_image os_disk { name = "${var.cluster_name}-controller-${count.index}" + storage_account_type = var.controller_disk_type + disk_size_gb = var.controller_disk_size caching = "None" - disk_size_gb = var.disk_size - storage_account_type = "Premium_LRS" } # network network_interface_ids = [ - azurerm_network_interface.controllers.*.id[count.index] + azurerm_network_interface.controllers[count.index].id ] - # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too + # boot + custom_data = base64encode(data.ct_config.controllers[count.index].rendered) + boot_diagnostics { + # defaults to a managed storage account + } + + # Azure requires an RSA admin_ssh_key admin_username = "core" admin_ssh_key { username = "core" @@ -74,31 +75,52 @@ resource "azurerm_linux_virtual_machine" "controllers" { } } -# Controller public IPv4 addresses -resource "azurerm_public_ip" "controllers" { - count = var.controller_count - resource_group_name = azurerm_resource_group.cluster.name +# Controller node public IPv4 addresses +resource "azurerm_public_ip" "controllers-ipv4" { + count = var.controller_count - name = "${var.cluster_name}-controller-${count.index}" - location = azurerm_resource_group.cluster.location - sku = "Standard" - allocation_method = "Static" + name = "${var.cluster_name}-controller-${count.index}-ipv4" + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location + ip_version = "IPv4" + sku = "Standard" + allocation_method = "Static" } -# Controller NICs with public and private IPv4 -resource "azurerm_network_interface" "controllers" { - count = var.controller_count - resource_group_name = azurerm_resource_group.cluster.name +# Controller node public IPv6 addresses +resource "azurerm_public_ip" "controllers-ipv6" { + count = var.controller_count - name = "${var.cluster_name}-controller-${count.index}" - location = azurerm_resource_group.cluster.location + name = "${var.cluster_name}-controller-${count.index}-ipv6" + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location + ip_version = "IPv6" + sku = "Standard" + allocation_method = "Static" +} + +# Controllers' network interfaces +resource "azurerm_network_interface" "controllers" { + count = var.controller_count + + name = "${var.cluster_name}-controller-${count.index}" + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location ip_configuration { - name = "ip0" + name = "ipv4" + primary = true subnet_id = azurerm_subnet.controller.id private_ip_address_allocation = "Dynamic" - # instance public IPv4 - public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index] + private_ip_address_version = "IPv4" + public_ip_address_id = azurerm_public_ip.controllers-ipv4[count.index].id + } + ip_configuration { + name = "ipv6" + subnet_id = azurerm_subnet.controller.id + private_ip_address_allocation = "Dynamic" + private_ip_address_version = "IPv6" + public_ip_address_id = azurerm_public_ip.controllers-ipv6[count.index].id } } @@ -111,12 +133,20 @@ resource "azurerm_network_interface_security_group_association" "controllers" { } # Associate controller network interface with controller backend address pool -resource "azurerm_network_interface_backend_address_pool_association" "controllers" { +resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" { count = var.controller_count network_interface_id = azurerm_network_interface.controllers[count.index].id - ip_configuration_name = "ip0" - backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id + ip_configuration_name = "ipv4" + backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id +} + +resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" { + count = var.controller_count + + network_interface_id = azurerm_network_interface.controllers[count.index].id + ip_configuration_name = "ipv6" + backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id } # Fedora CoreOS controllers @@ -133,7 +163,6 @@ data "ct_config" "controllers" { kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/azure/fedora-coreos/kubernetes/lb.tf b/azure/fedora-coreos/kubernetes/lb.tf index 4e139471..cc706752 100644 --- a/azure/fedora-coreos/kubernetes/lb.tf +++ b/azure/fedora-coreos/kubernetes/lb.tf @@ -1,116 +1,164 @@ -# DNS record for the apiserver load balancer +# DNS A record for the apiserver load balancer resource "azurerm_dns_a_record" "apiserver" { - resource_group_name = var.dns_zone_group - # DNS Zone name where record should be created - zone_name = var.dns_zone - + zone_name = var.dns_zone + resource_group_name = var.dns_zone_group # DNS record name = var.cluster_name ttl = 300 - # IPv4 address of apiserver load balancer - records = [azurerm_public_ip.apiserver-ipv4.ip_address] + records = [azurerm_public_ip.frontend-ipv4.ip_address] } -# Static IPv4 address for the apiserver frontend -resource "azurerm_public_ip" "apiserver-ipv4" { - resource_group_name = azurerm_resource_group.cluster.name - - name = "${var.cluster_name}-apiserver-ipv4" - location = var.region - sku = "Standard" - allocation_method = "Static" +# DNS AAAA record for the apiserver load balancer +resource "azurerm_dns_aaaa_record" "apiserver" { + # DNS Zone name where record should be created + zone_name = var.dns_zone + resource_group_name = var.dns_zone_group + # DNS record + name = var.cluster_name + ttl = 300 + # IPv4 address of apiserver load balancer + records = [azurerm_public_ip.frontend-ipv6.ip_address] } -# Static IPv4 address for the ingress frontend -resource "azurerm_public_ip" "ingress-ipv4" { +# Static IPv4 address for the load balancer +resource "azurerm_public_ip" "frontend-ipv4" { + name = "${var.cluster_name}-frontend-ipv4" resource_group_name = azurerm_resource_group.cluster.name + location = var.location + ip_version = "IPv4" + sku = "Standard" + allocation_method = "Static" +} - name = "${var.cluster_name}-ingress-ipv4" - location = var.region - sku = "Standard" - allocation_method = "Static" +# Static IPv6 address for the load balancer +resource "azurerm_public_ip" "frontend-ipv6" { + name = "${var.cluster_name}-frontend-ipv6" + resource_group_name = azurerm_resource_group.cluster.name + location = var.location + ip_version = "IPv6" + sku = "Standard" + allocation_method = "Static" } # Network Load Balancer for apiservers and ingress resource "azurerm_lb" "cluster" { + name = var.cluster_name resource_group_name = azurerm_resource_group.cluster.name - - name = var.cluster_name - location = var.region - sku = "Standard" + location = var.location + sku = "Standard" frontend_ip_configuration { - name = "apiserver" - public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id + name = "frontend-ipv4" + public_ip_address_id = azurerm_public_ip.frontend-ipv4.id } frontend_ip_configuration { - name = "ingress" - public_ip_address_id = azurerm_public_ip.ingress-ipv4.id + name = "frontend-ipv6" + public_ip_address_id = azurerm_public_ip.frontend-ipv6.id } } -resource "azurerm_lb_rule" "apiserver" { - name = "apiserver" +resource "azurerm_lb_rule" "apiserver-ipv4" { + name = "apiserver-ipv4" loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration_name = "apiserver" + frontend_ip_configuration_name = "frontend-ipv4" + disable_outbound_snat = true protocol = "Tcp" frontend_port = 6443 backend_port = 6443 - backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id] + backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id] probe_id = azurerm_lb_probe.apiserver.id } -resource "azurerm_lb_rule" "ingress-http" { - name = "ingress-http" +resource "azurerm_lb_rule" "apiserver-ipv6" { + name = "apiserver-ipv6" loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration_name = "ingress" + frontend_ip_configuration_name = "frontend-ipv6" + disable_outbound_snat = true + + protocol = "Tcp" + frontend_port = 6443 + backend_port = 6443 + backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id] + probe_id = azurerm_lb_probe.apiserver.id +} + +resource "azurerm_lb_rule" "ingress-http-ipv4" { + name = "ingress-http-ipv4" + loadbalancer_id = azurerm_lb.cluster.id + frontend_ip_configuration_name = "frontend-ipv4" disable_outbound_snat = true protocol = "Tcp" frontend_port = 80 backend_port = 80 - backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id] + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id] probe_id = azurerm_lb_probe.ingress.id } -resource "azurerm_lb_rule" "ingress-https" { - name = "ingress-https" +resource "azurerm_lb_rule" "ingress-https-ipv4" { + name = "ingress-https-ipv4" loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration_name = "ingress" + frontend_ip_configuration_name = "frontend-ipv4" disable_outbound_snat = true protocol = "Tcp" frontend_port = 443 backend_port = 443 - backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id] + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id] probe_id = azurerm_lb_probe.ingress.id } -# Worker outbound TCP/UDP SNAT -resource "azurerm_lb_outbound_rule" "worker-outbound" { - name = "worker" - loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration { - name = "ingress" - } +resource "azurerm_lb_rule" "ingress-http-ipv6" { + name = "ingress-http-ipv6" + loadbalancer_id = azurerm_lb.cluster.id + frontend_ip_configuration_name = "frontend-ipv6" + disable_outbound_snat = true - protocol = "All" - backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id + protocol = "Tcp" + frontend_port = 80 + backend_port = 80 + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id] + probe_id = azurerm_lb_probe.ingress.id } +resource "azurerm_lb_rule" "ingress-https-ipv6" { + name = "ingress-https-ipv6" + loadbalancer_id = azurerm_lb.cluster.id + frontend_ip_configuration_name = "frontend-ipv6" + disable_outbound_snat = true + + protocol = "Tcp" + frontend_port = 443 + backend_port = 443 + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id] + probe_id = azurerm_lb_probe.ingress.id +} + +# Backend Address Pools + # Address pool of controllers -resource "azurerm_lb_backend_address_pool" "controller" { - name = "controller" +resource "azurerm_lb_backend_address_pool" "controller-ipv4" { + name = "controller-ipv4" + loadbalancer_id = azurerm_lb.cluster.id +} + +resource "azurerm_lb_backend_address_pool" "controller-ipv6" { + name = "controller-ipv6" loadbalancer_id = azurerm_lb.cluster.id } # Address pool of workers -resource "azurerm_lb_backend_address_pool" "worker" { - name = "worker" +resource "azurerm_lb_backend_address_pool" "worker-ipv4" { + name = "worker-ipv4" + loadbalancer_id = azurerm_lb.cluster.id +} + +resource "azurerm_lb_backend_address_pool" "worker-ipv6" { + name = "worker-ipv6" loadbalancer_id = azurerm_lb.cluster.id } @@ -122,10 +170,8 @@ resource "azurerm_lb_probe" "apiserver" { loadbalancer_id = azurerm_lb.cluster.id protocol = "Tcp" port = 6443 - # unhealthy threshold - number_of_probes = 3 - + number_of_probes = 3 interval_in_seconds = 5 } @@ -136,10 +182,29 @@ resource "azurerm_lb_probe" "ingress" { protocol = "Http" port = 10254 request_path = "/healthz" - # unhealthy threshold - number_of_probes = 3 - + number_of_probes = 3 interval_in_seconds = 5 } +# Outbound SNAT + +resource "azurerm_lb_outbound_rule" "outbound-ipv4" { + name = "outbound-ipv4" + protocol = "All" + loadbalancer_id = azurerm_lb.cluster.id + backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id + frontend_ip_configuration { + name = "frontend-ipv4" + } +} + +resource "azurerm_lb_outbound_rule" "outbound-ipv6" { + name = "outbound-ipv6" + protocol = "All" + loadbalancer_id = azurerm_lb.cluster.id + backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id + frontend_ip_configuration { + name = "frontend-ipv6" + } +} diff --git a/azure/fedora-coreos/kubernetes/locals.tf b/azure/fedora-coreos/kubernetes/locals.tf new file mode 100644 index 00000000..0c840906 --- /dev/null +++ b/azure/fedora-coreos/kubernetes/locals.tf @@ -0,0 +1,6 @@ +locals { + backend_address_pool_ids = { + ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id] + ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id] + } +} diff --git a/azure/fedora-coreos/kubernetes/network.tf b/azure/fedora-coreos/kubernetes/network.tf index 0fcaa8b8..c0bde6c6 100644 --- a/azure/fedora-coreos/kubernetes/network.tf +++ b/azure/fedora-coreos/kubernetes/network.tf @@ -1,27 +1,64 @@ +# Choose an IPv6 ULA subnet at random +# https://datatracker.ietf.org/doc/html/rfc4193 +resource "random_id" "ula-netnum" { + byte_length = 5 # 40 bits +} + +locals { + # fd00::/8 -> shift 40 -> 2^40 possible /48 subnets + ula-range = cidrsubnet("fd00::/8", 40, random_id.ula-netnum.dec) + network_cidr = { + ipv4 = var.network_cidr.ipv4 + ipv6 = length(var.network_cidr.ipv6) > 0 ? var.network_cidr.ipv6 : [local.ula-range] + } + + # Subdivide the virtual network into subnets + # - controllers use netnum 0 + # - workers use netnum 1 + controller_subnets = { + ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 0)] + ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 0)] + } + worker_subnets = { + ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 1)] + ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 1)] + } + cluster_subnets = { + ipv4 = concat(local.controller_subnets.ipv4, local.worker_subnets.ipv4) + ipv6 = concat(local.controller_subnets.ipv6, local.worker_subnets.ipv6) + } +} + # Organize cluster into a resource group resource "azurerm_resource_group" "cluster" { name = var.cluster_name - location = var.region + location = var.location } resource "azurerm_virtual_network" "network" { + name = var.cluster_name resource_group_name = azurerm_resource_group.cluster.name - - name = var.cluster_name - location = azurerm_resource_group.cluster.location - address_space = [var.host_cidr] + location = azurerm_resource_group.cluster.location + address_space = concat( + local.network_cidr.ipv4, + local.network_cidr.ipv6 + ) } -# Subnets - separate subnets for controller and workers because Azure -# network security groups are based on IPv4 CIDR rather than instance -# tags like GCP or security group membership like AWS +# Subnets - separate subnets for controllers and workers because Azure +# network security groups are oriented around address prefixes rather +# than instance tags (GCP) or security group membership (AWS) resource "azurerm_subnet" "controller" { - resource_group_name = azurerm_resource_group.cluster.name - name = "controller" + resource_group_name = azurerm_resource_group.cluster.name virtual_network_name = azurerm_virtual_network.network.name - address_prefixes = [cidrsubnet(var.host_cidr, 1, 0)] + address_prefixes = concat( + local.controller_subnets.ipv4, + local.controller_subnets.ipv6, + ) + default_outbound_access_enabled = false + } resource "azurerm_subnet_network_security_group_association" "controller" { @@ -30,11 +67,14 @@ resource "azurerm_subnet_network_security_group_association" "controller" { } resource "azurerm_subnet" "worker" { - resource_group_name = azurerm_resource_group.cluster.name - name = "worker" + resource_group_name = azurerm_resource_group.cluster.name virtual_network_name = azurerm_virtual_network.network.name - address_prefixes = [cidrsubnet(var.host_cidr, 1, 1)] + address_prefixes = concat( + local.worker_subnets.ipv4, + local.worker_subnets.ipv6, + ) + default_outbound_access_enabled = false } resource "azurerm_subnet_network_security_group_association" "worker" { diff --git a/azure/fedora-coreos/kubernetes/outputs.tf b/azure/fedora-coreos/kubernetes/outputs.tf index 1fb9cd4d..7559b25e 100644 --- a/azure/fedora-coreos/kubernetes/outputs.tf +++ b/azure/fedora-coreos/kubernetes/outputs.tf @@ -6,13 +6,18 @@ output "kubeconfig-admin" { # Outputs for Kubernetes Ingress output "ingress_static_ipv4" { - value = azurerm_public_ip.ingress-ipv4.ip_address + value = azurerm_public_ip.frontend-ipv4.ip_address description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers" } +output "ingress_static_ipv6" { + value = azurerm_public_ip.frontend-ipv6.ip_address + description = "IPv6 address of the load balancer for distributing traffic to Ingress controllers" +} + # Outputs for worker pools -output "region" { +output "location" { value = azurerm_resource_group.cluster.location } @@ -39,13 +44,24 @@ output "kubeconfig" { # Outputs for custom firewalling +output "controller_security_group_name" { + description = "Network Security Group for controller nodes" + value = azurerm_network_security_group.controller.name +} + output "worker_security_group_name" { - value = azurerm_network_security_group.worker.name + description = "Network Security Group for worker nodes" + value = azurerm_network_security_group.worker.name +} + +output "controller_address_prefixes" { + description = "Controller network subnet CIDR addresses (for source/destination)" + value = local.controller_subnets } output "worker_address_prefixes" { description = "Worker network subnet CIDR addresses (for source/destination)" - value = azurerm_subnet.worker.address_prefixes + value = local.worker_subnets } # Outputs for custom load balancing @@ -55,9 +71,12 @@ output "loadbalancer_id" { value = azurerm_lb.cluster.id } -output "backend_address_pool_id" { - description = "ID of the worker backend address pool" - value = azurerm_lb_backend_address_pool.worker.id +output "backend_address_pool_ids" { + description = "IDs of the worker backend address pools" + value = { + ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id] + ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id] + } } # Outputs for debug diff --git a/azure/fedora-coreos/kubernetes/security.tf b/azure/fedora-coreos/kubernetes/security.tf index 0367cee1..f9a70273 100644 --- a/azure/fedora-coreos/kubernetes/security.tf +++ b/azure/fedora-coreos/kubernetes/security.tf @@ -1,198 +1,223 @@ # Controller security group resource "azurerm_network_security_group" "controller" { + name = "${var.cluster_name}-controller" resource_group_name = azurerm_resource_group.cluster.name - - name = "${var.cluster_name}-controller" - location = azurerm_resource_group.cluster.location + location = azurerm_resource_group.cluster.location } resource "azurerm_network_security_rule" "controller-icmp" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-icmp" + name = "allow-icmp-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "1995" + priority = 1995 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Icmp" source_port_range = "*" destination_port_range = "*" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-ssh" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-ssh" + name = "allow-ssh-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2000" + priority = 2000 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "22" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-etcd" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-etcd" + name = "allow-etcd-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2005" + priority = 2005 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "2379-2380" - source_address_prefixes = azurerm_subnet.controller.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.controller_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape etcd metrics resource "azurerm_network_security_rule" "controller-etcd-metrics" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-etcd-metrics" + name = "allow-etcd-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2010" + priority = 2010 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "2381" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape kube-proxy metrics resource "azurerm_network_security_rule" "controller-kube-proxy" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-kube-proxy-metrics" + name = "allow-kube-proxy-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2011" + priority = 2012 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10249" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics resource "azurerm_network_security_rule" "controller-kube-metrics" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-kube-metrics" + name = "allow-kube-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2012" + priority = 2014 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10257-10259" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-apiserver" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-apiserver" + name = "allow-apiserver-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2015" + priority = 2016 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "6443" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-cilium-health" { - resource_group_name = azurerm_resource_group.cluster.name - count = var.networking == "cilium" ? 1 : 0 + for_each = var.networking == "cilium" ? local.controller_subnets : {} - name = "allow-cilium-health" + name = "allow-cilium-health-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2019" + priority = 2018 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "4240" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] +} + +resource "azurerm_network_security_rule" "controller-cilium-metrics" { + for_each = var.networking == "cilium" ? local.controller_subnets : {} + + name = "allow-cilium-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name + network_security_group_name = azurerm_network_security_group.controller.name + priority = 2035 + (each.key == "ipv4" ? 0 : 1) + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "9962-9965" + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-vxlan" + name = "allow-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2020" + priority = 2020 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "4789" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-linux-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-linux-vxlan" + name = "allow-linux-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2021" + priority = 2022 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "8472" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape node-exporter daemonset resource "azurerm_network_security_rule" "controller-node-exporter" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-node-exporter" + name = "allow-node-exporter-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2025" + priority = 2025 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "9100" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow apiserver to access kubelet's for exec, log, port-forward resource "azurerm_network_security_rule" "controller-kubelet" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-kubelet" + name = "allow-kubelet-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2030" + priority = 2030 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10250" - # allow Prometheus to scrape kubelet metrics too - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound @@ -231,166 +256,189 @@ resource "azurerm_network_security_rule" "controller-deny-all" { # Worker security group resource "azurerm_network_security_group" "worker" { + name = "${var.cluster_name}-worker" resource_group_name = azurerm_resource_group.cluster.name - - name = "${var.cluster_name}-worker" - location = azurerm_resource_group.cluster.location + location = azurerm_resource_group.cluster.location } resource "azurerm_network_security_rule" "worker-icmp" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-icmp" + name = "allow-icmp-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "1995" + priority = 1995 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Icmp" source_port_range = "*" destination_port_range = "*" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-ssh" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-ssh" + name = "allow-ssh-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2000" + priority = 2000 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "22" - source_address_prefixes = azurerm_subnet.controller.address_prefixes - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.controller_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-http" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-http" + name = "allow-http-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2005" + priority = 2005 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "80" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-https" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-https" + name = "allow-https-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2010" + priority = 2010 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "443" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-cilium-health" { - resource_group_name = azurerm_resource_group.cluster.name - count = var.networking == "cilium" ? 1 : 0 + for_each = var.networking == "cilium" ? local.worker_subnets : {} - name = "allow-cilium-health" + name = "allow-cilium-health-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2014" + priority = 2012 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "4240" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] +} + +resource "azurerm_network_security_rule" "worker-cilium-metrics" { + for_each = var.networking == "cilium" ? local.worker_subnets : {} + + name = "allow-cilium-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name + network_security_group_name = azurerm_network_security_group.worker.name + priority = 2014 + (each.key == "ipv4" ? 0 : 1) + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "9962-9965" + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-vxlan" + name = "allow-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2015" + priority = 2016 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "4789" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-linux-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-linux-vxlan" + name = "allow-linux-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2016" + priority = 2018 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "8472" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Allow Prometheus to scrape node-exporter daemonset resource "azurerm_network_security_rule" "worker-node-exporter" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-node-exporter" + name = "allow-node-exporter-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2020" + priority = 2020 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "9100" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Allow Prometheus to scrape kube-proxy resource "azurerm_network_security_rule" "worker-kube-proxy" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-kube-proxy" + name = "allow-kube-proxy-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2024" + priority = 2024 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10249" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Allow apiserver to access kubelet's for exec, log, port-forward resource "azurerm_network_security_rule" "worker-kubelet" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-kubelet" + name = "allow-kubelet-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2025" + priority = 2026 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10250" - # allow Prometheus to scrape kubelet metrics too - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound diff --git a/azure/fedora-coreos/kubernetes/ssh.tf b/azure/fedora-coreos/kubernetes/ssh.tf index 9b1f3a8a..ad0baa67 100644 --- a/azure/fedora-coreos/kubernetes/ssh.tf +++ b/azure/fedora-coreos/kubernetes/ssh.tf @@ -18,7 +18,7 @@ resource "null_resource" "copy-controller-secrets" { connection { type = "ssh" - host = azurerm_public_ip.controllers.*.ip_address[count.index] + host = azurerm_public_ip.controllers-ipv4[count.index].ip_address user = "core" timeout = "15m" } @@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" { connection { type = "ssh" - host = azurerm_public_ip.controllers.*.ip_address[0] + host = azurerm_public_ip.controllers-ipv4[0].ip_address user = "core" timeout = "15m" } diff --git a/azure/fedora-coreos/kubernetes/variables.tf b/azure/fedora-coreos/kubernetes/variables.tf index 05ae4496..decf59b8 100644 --- a/azure/fedora-coreos/kubernetes/variables.tf +++ b/azure/fedora-coreos/kubernetes/variables.tf @@ -5,9 +5,9 @@ variable "cluster_name" { # Azure -variable "region" { +variable "location" { type = string - description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)" + description = "Azure location (e.g. centralus , see `az account list-locations --output table`)" } variable "dns_zone" { @@ -22,41 +22,65 @@ variable "dns_zone_group" { # instances +variable "os_image" { + type = string + description = "Fedora CoreOS image for instances" +} + variable "controller_count" { type = number description = "Number of controllers (i.e. masters)" default = 1 } -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - variable "controller_type" { type = string description = "Machine type for controllers (see `az vm list-skus --location centralus`)" default = "Standard_B2s" } +variable "controller_disk_type" { + type = string + description = "Type of managed disk for controller node(s)" + default = "Premium_LRS" +} + +variable "controller_disk_size" { + type = number + description = "Size of the managed disk in GB for controller node(s)" + default = 30 +} + +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + variable "worker_type" { type = string description = "Machine type for workers (see `az vm list-skus --location centralus`)" default = "Standard_D2as_v5" } -variable "os_image" { +variable "worker_disk_type" { type = string - description = "Fedora CoreOS image for instances" + description = "Type of managed disk for worker nodes" + default = "Standard_LRS" } -variable "disk_size" { +variable "worker_disk_size" { type = number - description = "Size of the disk in GB" + description = "Size of the managed disk in GB for worker nodes" default = 30 } +variable "worker_ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "worker_priority" { type = string description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time." @@ -94,10 +118,15 @@ variable "networking" { default = "cilium" } -variable "host_cidr" { - type = string - description = "CIDR IPv4 range to assign to instances" - default = "10.0.0.0/16" +variable "network_cidr" { + type = object({ + ipv4 = list(string) + ipv6 = optional(list(string), []) + }) + description = "Virtual network CIDR ranges" + default = { + ipv4 = ["10.0.0.0/16"] + } } variable "pod_cidr" { @@ -115,34 +144,32 @@ EOD default = "10.3.0.0/16" } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false -} - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - variable "worker_node_labels" { type = list(string) description = "List of initial worker node labels" default = [] } -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} +# advanced variable "daemonset_tolerations" { type = list(string) description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])" default = [] } + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null +} diff --git a/azure/fedora-coreos/kubernetes/versions.tf b/azure/fedora-coreos/kubernetes/versions.tf index 2cc5ec75..c928a28b 100644 --- a/azure/fedora-coreos/kubernetes/versions.tf +++ b/azure/fedora-coreos/kubernetes/versions.tf @@ -3,7 +3,7 @@ terraform { required_version = ">= 0.13.0, < 2.0.0" required_providers { - azurerm = ">= 2.8, < 4.0" + azurerm = ">= 2.8" null = ">= 2.1" ct = { source = "poseidon/ct" diff --git a/azure/fedora-coreos/kubernetes/workers.tf b/azure/fedora-coreos/kubernetes/workers.tf index 7e9e5e37..9ffc86f3 100644 --- a/azure/fedora-coreos/kubernetes/workers.tf +++ b/azure/fedora-coreos/kubernetes/workers.tf @@ -3,23 +3,26 @@ module "workers" { name = var.cluster_name # Azure - resource_group_name = azurerm_resource_group.cluster.name - region = azurerm_resource_group.cluster.location - subnet_id = azurerm_subnet.worker.id - security_group_id = azurerm_network_security_group.worker.id - backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location + subnet_id = azurerm_subnet.worker.id + security_group_id = azurerm_network_security_group.worker.id + backend_address_pool_ids = local.backend_address_pool_ids - worker_count = var.worker_count - vm_type = var.worker_type - os_image = var.os_image - priority = var.worker_priority + # instances + os_image = var.os_image + worker_count = var.worker_count + vm_type = var.worker_type + disk_type = var.worker_disk_type + disk_size = var.worker_disk_size + ephemeral_disk = var.worker_ephemeral_disk + priority = var.worker_priority # configuration - kubeconfig = module.bootstrap.kubeconfig-kubelet - ssh_authorized_key = var.ssh_authorized_key - azure_authorized_key = var.azure_authorized_key - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - snippets = var.worker_snippets - node_labels = var.worker_node_labels + kubeconfig = module.bootstrap.kubeconfig-kubelet + ssh_authorized_key = var.ssh_authorized_key + azure_authorized_key = var.azure_authorized_key + service_cidr = var.service_cidr + snippets = var.worker_snippets + node_labels = var.worker_node_labels } diff --git a/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml b/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml index ec50e714..bb2055ff 100644 --- a/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml +++ b/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml @@ -26,7 +26,7 @@ systemd: Description=Kubelet (System Container) Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -99,7 +99,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/azure/fedora-coreos/kubernetes/workers/variables.tf b/azure/fedora-coreos/kubernetes/workers/variables.tf index a27b69f8..3a28d72a 100644 --- a/azure/fedora-coreos/kubernetes/workers/variables.tf +++ b/azure/fedora-coreos/kubernetes/workers/variables.tf @@ -5,9 +5,9 @@ variable "name" { # Azure -variable "region" { +variable "location" { type = string - description = "Must be set to the Azure Region of cluster" + description = "Must be set to the Azure location of cluster" } variable "resource_group_name" { @@ -25,9 +25,12 @@ variable "security_group_id" { description = "Must be set to the `worker_security_group_id` output by cluster" } -variable "backend_address_pool_id" { - type = string - description = "Must be set to the `worker_backend_address_pool_id` output by cluster" +variable "backend_address_pool_ids" { + type = object({ + ipv4 = list(string) + ipv6 = list(string) + }) + description = "Must be set to the `backend_address_pool_ids` output by cluster" } # instances @@ -49,6 +52,24 @@ variable "os_image" { description = "Fedora CoreOS image for instances" } +variable "disk_type" { + type = string + description = "Type of managed disk" + default = "Standard_LRS" +} + +variable "disk_size" { + type = number + description = "Size of the managed disk in GB" + default = 30 +} + +variable "ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "priority" { type = string description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." @@ -99,12 +120,3 @@ variable "node_taints" { description = "List of initial node taints" default = [] } - -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - type = string - default = "cluster.local" -} - diff --git a/azure/fedora-coreos/kubernetes/workers/versions.tf b/azure/fedora-coreos/kubernetes/workers/versions.tf index 4ca84265..7e40fee1 100644 --- a/azure/fedora-coreos/kubernetes/workers/versions.tf +++ b/azure/fedora-coreos/kubernetes/workers/versions.tf @@ -3,7 +3,7 @@ terraform { required_version = ">= 0.13.0, < 2.0.0" required_providers { - azurerm = ">= 2.8, < 4.0" + azurerm = ">= 2.8" ct = { source = "poseidon/ct" version = "~> 0.13" diff --git a/azure/fedora-coreos/kubernetes/workers/workers.tf b/azure/fedora-coreos/kubernetes/workers/workers.tf index 6f98376a..cd0a52a2 100644 --- a/azure/fedora-coreos/kubernetes/workers/workers.tf +++ b/azure/fedora-coreos/kubernetes/workers/workers.tf @@ -3,30 +3,29 @@ locals { } # Workers scale set -resource "azurerm_linux_virtual_machine_scale_set" "workers" { - resource_group_name = var.resource_group_name - - name = "${var.name}-worker" - location = var.region - sku = var.vm_type - instances = var.worker_count - # instance name prefix for instances in the set - computer_name_prefix = "${var.name}-worker" - single_placement_group = false - custom_data = base64encode(data.ct_config.worker.rendered) +resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" { + name = "${var.name}-worker" + resource_group_name = var.resource_group_name + location = var.location + platform_fault_domain_count = 1 + sku_name = var.vm_type + instances = var.worker_count # storage - source_image_id = var.os_image + encryption_at_host_enabled = true + source_image_id = var.os_image os_disk { - storage_account_type = "Standard_LRS" - caching = "ReadWrite" - } - - # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too - admin_username = "core" - admin_ssh_key { - username = "core" - public_key = var.azure_authorized_key + storage_account_type = var.disk_type + disk_size_gb = var.disk_size + caching = "ReadOnly" + # Optionally, use the ephemeral disk of the instance type (support varies) + dynamic "diff_disk_settings" { + for_each = var.ephemeral_disk ? [1] : [] + content { + option = "Local" + placement = "ResourceDisk" + } + } } # network @@ -36,41 +35,46 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { network_security_group_id = var.security_group_id ip_configuration { - name = "ip0" + name = "ipv4" + version = "IPv4" primary = true subnet_id = var.subnet_id - # backend address pool to which the NIC should be added - load_balancer_backend_address_pool_ids = [var.backend_address_pool_id] + load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4 + } + ip_configuration { + name = "ipv6" + version = "IPv6" + subnet_id = var.subnet_id + # backend address pool to which the NIC should be added + load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6 + } + } + + # boot + user_data_base64 = base64encode(data.ct_config.worker.rendered) + boot_diagnostics { + # defaults to a managed storage account + } + + # Azure requires an RSA admin_ssh_key + os_profile { + linux_configuration { + admin_username = "core" + admin_ssh_key { + username = "core" + public_key = local.azure_authorized_key + } + computer_name_prefix = "${var.name}-worker" } } # lifecycle - upgrade_mode = "Manual" # eviction policy may only be set when priority is Spot priority = var.priority eviction_policy = var.priority == "Spot" ? "Delete" : null -} - -# Scale up or down to maintain desired number, tolerating deallocations. -resource "azurerm_monitor_autoscale_setting" "workers" { - resource_group_name = var.resource_group_name - - name = "${var.name}-maintain-desired" - location = var.region - - # autoscale - enabled = true - target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id - - profile { - name = "default" - - capacity { - minimum = var.worker_count - default = var.worker_count - maximum = var.worker_count - } + termination_notification { + enabled = true } } @@ -80,7 +84,6 @@ data "ct_config" "worker" { kubeconfig = indent(10, var.kubeconfig) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix node_labels = join(",", var.node_labels) node_taints = join(",", var.node_taints) }) diff --git a/azure/flatcar-linux/kubernetes/README.md b/azure/flatcar-linux/kubernetes/README.md index 792f7b9b..01b3ff88 100644 --- a/azure/flatcar-linux/kubernetes/README.md +++ b/azure/flatcar-linux/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/) * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/flatcar-linux/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/azure/flatcar-linux/kubernetes/bootstrap.tf b/azure/flatcar-linux/kubernetes/bootstrap.tf index 6cd97007..1e735c10 100644 --- a/azure/flatcar-linux/kubernetes/bootstrap.tf +++ b/azure/flatcar-linux/kubernetes/bootstrap.tf @@ -1,13 +1,12 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone) networking = var.networking - # only effective with Calico networking # we should be able to use 1450 MTU, but in practice, 1410 was needed network_encapsulation = "vxlan" @@ -15,9 +14,7 @@ module "bootstrap" { pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation daemonset_tolerations = var.daemonset_tolerations + components = var.components } diff --git a/azure/flatcar-linux/kubernetes/butane/controller.yaml b/azure/flatcar-linux/kubernetes/butane/controller.yaml index 20b4139a..6b5a5a13 100644 --- a/azure/flatcar-linux/kubernetes/butane/controller.yaml +++ b/azure/flatcar-linux/kubernetes/butane/controller.yaml @@ -11,7 +11,7 @@ systemd: Requires=docker.service After=docker.service [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 ExecStartPre=/usr/bin/docker run -d \ --name etcd \ --network host \ @@ -56,7 +56,7 @@ systemd: After=docker.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -105,7 +105,7 @@ systemd: Type=oneshot RemainAfterExit=true WorkingDirectory=/opt/bootstrap - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStart=/usr/bin/docker run \ -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \ -v /opt/bootstrap/assets:/assets:ro \ @@ -144,7 +144,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -158,7 +158,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -173,8 +173,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests - path: /opt/bootstrap/apply mode: 0544 contents: diff --git a/azure/flatcar-linux/kubernetes/controllers.tf b/azure/flatcar-linux/kubernetes/controllers.tf index b19a242e..35d79405 100644 --- a/azure/flatcar-linux/kubernetes/controllers.tf +++ b/azure/flatcar-linux/kubernetes/controllers.tf @@ -1,25 +1,9 @@ -# Discrete DNS records for each controller's private IPv4 for etcd usage -resource "azurerm_dns_a_record" "etcds" { - count = var.controller_count - resource_group_name = var.dns_zone_group - - # DNS Zone name where record should be created - zone_name = var.dns_zone - - # DNS record - name = format("%s-etcd%d", var.cluster_name, count.index) - ttl = 300 - - # private IPv4 address for etcd - records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]] -} - locals { # Container Linux derivative # flatcar-stable -> Flatcar Linux Stable channel = split("-", var.os_image)[1] - offer_suffix = var.arch == "arm64" ? "corevm" : "free" - urn = var.arch == "arm64" ? local.channel : "${local.channel}-gen2" + offer_suffix = var.controller_arch == "arm64" ? "corevm" : "free" + urn = var.controller_arch == "arm64" ? local.channel : "${local.channel}-gen2" # Typhoon ssh_authorized_key supports RSA or a newer formats (e.g. ed25519). # However, Azure requires an older RSA key to pass validations. To use a @@ -28,12 +12,25 @@ locals { azure_authorized_key = var.azure_authorized_key == "" ? var.ssh_authorized_key : var.azure_authorized_key } +# Discrete DNS records for each controller's private IPv4 for etcd usage +resource "azurerm_dns_a_record" "etcds" { + count = var.controller_count + + # DNS Zone name where record should be created + zone_name = var.dns_zone + resource_group_name = var.dns_zone_group + # DNS record + name = format("%s-etcd%d", var.cluster_name, count.index) + ttl = 300 + # private IPv4 address for etcd + records = [azurerm_network_interface.controllers[count.index].private_ip_address] +} + # Controller availability set to spread controllers resource "azurerm_availability_set" "controllers" { - resource_group_name = azurerm_resource_group.cluster.name - name = "${var.cluster_name}-controllers" - location = var.region + resource_group_name = azurerm_resource_group.cluster.name + location = var.location platform_fault_domain_count = 2 platform_update_domain_count = 4 managed = true @@ -41,25 +38,20 @@ resource "azurerm_availability_set" "controllers" { # Controller instances resource "azurerm_linux_virtual_machine" "controllers" { - count = var.controller_count - resource_group_name = azurerm_resource_group.cluster.name + count = var.controller_count name = "${var.cluster_name}-controller-${count.index}" - location = var.region + resource_group_name = azurerm_resource_group.cluster.name + location = var.location availability_set_id = azurerm_availability_set.controllers.id - - size = var.controller_type - custom_data = base64encode(data.ct_config.controllers.*.rendered[count.index]) - boot_diagnostics { - # defaults to a managed storage account - } + size = var.controller_type # storage os_disk { name = "${var.cluster_name}-controller-${count.index}" + storage_account_type = var.controller_disk_type + disk_size_gb = var.controller_disk_size caching = "None" - disk_size_gb = var.disk_size - storage_account_type = "Premium_LRS" } # Flatcar Container Linux @@ -71,7 +63,7 @@ resource "azurerm_linux_virtual_machine" "controllers" { } dynamic "plan" { - for_each = var.arch == "arm64" ? [] : [1] + for_each = var.controller_arch == "arm64" ? [] : [1] content { publisher = "kinvolk" product = "flatcar-container-linux-${local.offer_suffix}" @@ -84,7 +76,13 @@ resource "azurerm_linux_virtual_machine" "controllers" { azurerm_network_interface.controllers[count.index].id ] - # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too + # boot + custom_data = base64encode(data.ct_config.controllers[count.index].rendered) + boot_diagnostics { + # defaults to a managed storage account + } + + # Azure requires an RSA admin_ssh_key admin_username = "core" admin_ssh_key { username = "core" @@ -99,31 +97,52 @@ resource "azurerm_linux_virtual_machine" "controllers" { } } -# Controller public IPv4 addresses -resource "azurerm_public_ip" "controllers" { - count = var.controller_count - resource_group_name = azurerm_resource_group.cluster.name +# Controller node public IPv4 addresses +resource "azurerm_public_ip" "controllers-ipv4" { + count = var.controller_count - name = "${var.cluster_name}-controller-${count.index}" - location = azurerm_resource_group.cluster.location - sku = "Standard" - allocation_method = "Static" + name = "${var.cluster_name}-controller-${count.index}-ipv4" + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location + ip_version = "IPv4" + sku = "Standard" + allocation_method = "Static" } -# Controller NICs with public and private IPv4 -resource "azurerm_network_interface" "controllers" { - count = var.controller_count - resource_group_name = azurerm_resource_group.cluster.name +# Controller node public IPv6 addresses +resource "azurerm_public_ip" "controllers-ipv6" { + count = var.controller_count - name = "${var.cluster_name}-controller-${count.index}" - location = azurerm_resource_group.cluster.location + name = "${var.cluster_name}-controller-${count.index}-ipv6" + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location + ip_version = "IPv6" + sku = "Standard" + allocation_method = "Static" +} + +# Controllers' network interfaces +resource "azurerm_network_interface" "controllers" { + count = var.controller_count + + name = "${var.cluster_name}-controller-${count.index}" + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location ip_configuration { - name = "ip0" + name = "ipv4" + primary = true subnet_id = azurerm_subnet.controller.id private_ip_address_allocation = "Dynamic" - # instance public IPv4 - public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index] + private_ip_address_version = "IPv4" + public_ip_address_id = azurerm_public_ip.controllers-ipv4[count.index].id + } + ip_configuration { + name = "ipv6" + subnet_id = azurerm_subnet.controller.id + private_ip_address_allocation = "Dynamic" + private_ip_address_version = "IPv6" + public_ip_address_id = azurerm_public_ip.controllers-ipv6[count.index].id } } @@ -135,13 +154,21 @@ resource "azurerm_network_interface_security_group_association" "controllers" { network_security_group_id = azurerm_network_security_group.controller.id } -# Associate controller network interface with controller backend address pool -resource "azurerm_network_interface_backend_address_pool_association" "controllers" { +# Associate controller network interface with controller backend address pools +resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" { count = var.controller_count network_interface_id = azurerm_network_interface.controllers[count.index].id - ip_configuration_name = "ip0" - backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id + ip_configuration_name = "ipv4" + backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id +} + +resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" { + count = var.controller_count + + network_interface_id = azurerm_network_interface.controllers[count.index].id + ip_configuration_name = "ipv6" + backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id } # Flatcar Linux controllers @@ -158,7 +185,6 @@ data "ct_config" "controllers" { kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/azure/flatcar-linux/kubernetes/lb.tf b/azure/flatcar-linux/kubernetes/lb.tf index 4e139471..32f6d000 100644 --- a/azure/flatcar-linux/kubernetes/lb.tf +++ b/azure/flatcar-linux/kubernetes/lb.tf @@ -1,116 +1,164 @@ -# DNS record for the apiserver load balancer +# DNS A record for the apiserver load balancer resource "azurerm_dns_a_record" "apiserver" { - resource_group_name = var.dns_zone_group - # DNS Zone name where record should be created - zone_name = var.dns_zone - + zone_name = var.dns_zone + resource_group_name = var.dns_zone_group # DNS record name = var.cluster_name ttl = 300 - # IPv4 address of apiserver load balancer - records = [azurerm_public_ip.apiserver-ipv4.ip_address] + records = [azurerm_public_ip.frontend-ipv4.ip_address] } -# Static IPv4 address for the apiserver frontend -resource "azurerm_public_ip" "apiserver-ipv4" { - resource_group_name = azurerm_resource_group.cluster.name - - name = "${var.cluster_name}-apiserver-ipv4" - location = var.region - sku = "Standard" - allocation_method = "Static" +# DNS AAAA record for the apiserver load balancer +resource "azurerm_dns_aaaa_record" "apiserver" { + # DNS Zone name where record should be created + zone_name = var.dns_zone + resource_group_name = var.dns_zone_group + # DNS record + name = var.cluster_name + ttl = 300 + # IPv6 address of apiserver load balancer + records = [azurerm_public_ip.frontend-ipv6.ip_address] } -# Static IPv4 address for the ingress frontend -resource "azurerm_public_ip" "ingress-ipv4" { +# Static IPv4 address for the load balancer +resource "azurerm_public_ip" "frontend-ipv4" { + name = "${var.cluster_name}-frontend-ipv4" resource_group_name = azurerm_resource_group.cluster.name + location = var.location + ip_version = "IPv4" + sku = "Standard" + allocation_method = "Static" +} - name = "${var.cluster_name}-ingress-ipv4" - location = var.region - sku = "Standard" - allocation_method = "Static" +# Static IPv6 address for the load balancer +resource "azurerm_public_ip" "frontend-ipv6" { + name = "${var.cluster_name}-frontend-ipv6" + resource_group_name = azurerm_resource_group.cluster.name + location = var.location + ip_version = "IPv6" + sku = "Standard" + allocation_method = "Static" } # Network Load Balancer for apiservers and ingress resource "azurerm_lb" "cluster" { + name = var.cluster_name resource_group_name = azurerm_resource_group.cluster.name - - name = var.cluster_name - location = var.region - sku = "Standard" + location = var.location + sku = "Standard" frontend_ip_configuration { - name = "apiserver" - public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id + name = "frontend-ipv4" + public_ip_address_id = azurerm_public_ip.frontend-ipv4.id } frontend_ip_configuration { - name = "ingress" - public_ip_address_id = azurerm_public_ip.ingress-ipv4.id + name = "frontend-ipv6" + public_ip_address_id = azurerm_public_ip.frontend-ipv6.id } } -resource "azurerm_lb_rule" "apiserver" { - name = "apiserver" +resource "azurerm_lb_rule" "apiserver-ipv4" { + name = "apiserver-ipv4" loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration_name = "apiserver" + frontend_ip_configuration_name = "frontend-ipv4" + disable_outbound_snat = true protocol = "Tcp" frontend_port = 6443 backend_port = 6443 - backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id] + backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id] probe_id = azurerm_lb_probe.apiserver.id } -resource "azurerm_lb_rule" "ingress-http" { - name = "ingress-http" +resource "azurerm_lb_rule" "apiserver-ipv6" { + name = "apiserver-ipv6" loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration_name = "ingress" + frontend_ip_configuration_name = "frontend-ipv6" + disable_outbound_snat = true + + protocol = "Tcp" + frontend_port = 6443 + backend_port = 6443 + backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id] + probe_id = azurerm_lb_probe.apiserver.id +} + +resource "azurerm_lb_rule" "ingress-http-ipv4" { + name = "ingress-http-ipv4" + loadbalancer_id = azurerm_lb.cluster.id + frontend_ip_configuration_name = "frontend-ipv4" disable_outbound_snat = true protocol = "Tcp" frontend_port = 80 backend_port = 80 - backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id] + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id] probe_id = azurerm_lb_probe.ingress.id } -resource "azurerm_lb_rule" "ingress-https" { - name = "ingress-https" +resource "azurerm_lb_rule" "ingress-https-ipv4" { + name = "ingress-https-ipv4" loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration_name = "ingress" + frontend_ip_configuration_name = "frontend-ipv4" disable_outbound_snat = true protocol = "Tcp" frontend_port = 443 backend_port = 443 - backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id] + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id] probe_id = azurerm_lb_probe.ingress.id } -# Worker outbound TCP/UDP SNAT -resource "azurerm_lb_outbound_rule" "worker-outbound" { - name = "worker" - loadbalancer_id = azurerm_lb.cluster.id - frontend_ip_configuration { - name = "ingress" - } +resource "azurerm_lb_rule" "ingress-http-ipv6" { + name = "ingress-http-ipv6" + loadbalancer_id = azurerm_lb.cluster.id + frontend_ip_configuration_name = "frontend-ipv6" + disable_outbound_snat = true - protocol = "All" - backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id + protocol = "Tcp" + frontend_port = 80 + backend_port = 80 + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id] + probe_id = azurerm_lb_probe.ingress.id } +resource "azurerm_lb_rule" "ingress-https-ipv6" { + name = "ingress-https-ipv6" + loadbalancer_id = azurerm_lb.cluster.id + frontend_ip_configuration_name = "frontend-ipv6" + disable_outbound_snat = true + + protocol = "Tcp" + frontend_port = 443 + backend_port = 443 + backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id] + probe_id = azurerm_lb_probe.ingress.id +} + +# Backend Address Pools + # Address pool of controllers -resource "azurerm_lb_backend_address_pool" "controller" { - name = "controller" +resource "azurerm_lb_backend_address_pool" "controller-ipv4" { + name = "controller-ipv4" loadbalancer_id = azurerm_lb.cluster.id } -# Address pool of workers -resource "azurerm_lb_backend_address_pool" "worker" { - name = "worker" +resource "azurerm_lb_backend_address_pool" "controller-ipv6" { + name = "controller-ipv6" + loadbalancer_id = azurerm_lb.cluster.id +} + +# Address pools for workers +resource "azurerm_lb_backend_address_pool" "worker-ipv4" { + name = "worker-ipv4" + loadbalancer_id = azurerm_lb.cluster.id +} + +resource "azurerm_lb_backend_address_pool" "worker-ipv6" { + name = "worker-ipv6" loadbalancer_id = azurerm_lb.cluster.id } @@ -122,10 +170,8 @@ resource "azurerm_lb_probe" "apiserver" { loadbalancer_id = azurerm_lb.cluster.id protocol = "Tcp" port = 6443 - # unhealthy threshold - number_of_probes = 3 - + number_of_probes = 3 interval_in_seconds = 5 } @@ -136,10 +182,29 @@ resource "azurerm_lb_probe" "ingress" { protocol = "Http" port = 10254 request_path = "/healthz" - # unhealthy threshold - number_of_probes = 3 - + number_of_probes = 3 interval_in_seconds = 5 } +# Outbound SNAT + +resource "azurerm_lb_outbound_rule" "outbound-ipv4" { + name = "outbound-ipv4" + protocol = "All" + loadbalancer_id = azurerm_lb.cluster.id + backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id + frontend_ip_configuration { + name = "frontend-ipv4" + } +} + +resource "azurerm_lb_outbound_rule" "outbound-ipv6" { + name = "outbound-ipv6" + protocol = "All" + loadbalancer_id = azurerm_lb.cluster.id + backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id + frontend_ip_configuration { + name = "frontend-ipv6" + } +} diff --git a/azure/flatcar-linux/kubernetes/locals.tf b/azure/flatcar-linux/kubernetes/locals.tf new file mode 100644 index 00000000..0c840906 --- /dev/null +++ b/azure/flatcar-linux/kubernetes/locals.tf @@ -0,0 +1,6 @@ +locals { + backend_address_pool_ids = { + ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id] + ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id] + } +} diff --git a/azure/flatcar-linux/kubernetes/network.tf b/azure/flatcar-linux/kubernetes/network.tf index 0fcaa8b8..e28e51e6 100644 --- a/azure/flatcar-linux/kubernetes/network.tf +++ b/azure/flatcar-linux/kubernetes/network.tf @@ -1,27 +1,63 @@ +# Choose an IPv6 ULA subnet at random +# https://datatracker.ietf.org/doc/html/rfc4193 +resource "random_id" "ula-netnum" { + byte_length = 5 # 40 bits +} + +locals { + # fd00::/8 -> shift 40 -> 2^40 possible /48 subnets + ula-range = cidrsubnet("fd00::/8", 40, random_id.ula-netnum.dec) + network_cidr = { + ipv4 = var.network_cidr.ipv4 + ipv6 = length(var.network_cidr.ipv6) > 0 ? var.network_cidr.ipv6 : [local.ula-range] + } + + # Subdivide the virtual network into subnets + # - controllers use netnum 0 + # - workers use netnum 1 + controller_subnets = { + ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 0)] + ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 0)] + } + worker_subnets = { + ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 1)] + ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 1)] + } + cluster_subnets = { + ipv4 = concat(local.controller_subnets.ipv4, local.worker_subnets.ipv4) + ipv6 = concat(local.controller_subnets.ipv6, local.worker_subnets.ipv6) + } +} + # Organize cluster into a resource group resource "azurerm_resource_group" "cluster" { name = var.cluster_name - location = var.region + location = var.location } resource "azurerm_virtual_network" "network" { + name = var.cluster_name resource_group_name = azurerm_resource_group.cluster.name - - name = var.cluster_name - location = azurerm_resource_group.cluster.location - address_space = [var.host_cidr] + location = azurerm_resource_group.cluster.location + address_space = concat( + local.network_cidr.ipv4, + local.network_cidr.ipv6 + ) } -# Subnets - separate subnets for controller and workers because Azure -# network security groups are based on IPv4 CIDR rather than instance -# tags like GCP or security group membership like AWS +# Subnets - separate subnets for controllers and workers because Azure +# network security groups are oriented around address prefixes rather +# than instance tags (GCP) or security group membership (AWS) resource "azurerm_subnet" "controller" { - resource_group_name = azurerm_resource_group.cluster.name - name = "controller" + resource_group_name = azurerm_resource_group.cluster.name virtual_network_name = azurerm_virtual_network.network.name - address_prefixes = [cidrsubnet(var.host_cidr, 1, 0)] + address_prefixes = concat( + local.controller_subnets.ipv4, + local.controller_subnets.ipv6, + ) + default_outbound_access_enabled = false } resource "azurerm_subnet_network_security_group_association" "controller" { @@ -30,11 +66,14 @@ resource "azurerm_subnet_network_security_group_association" "controller" { } resource "azurerm_subnet" "worker" { - resource_group_name = azurerm_resource_group.cluster.name - name = "worker" + resource_group_name = azurerm_resource_group.cluster.name virtual_network_name = azurerm_virtual_network.network.name - address_prefixes = [cidrsubnet(var.host_cidr, 1, 1)] + address_prefixes = concat( + local.worker_subnets.ipv4, + local.worker_subnets.ipv6, + ) + default_outbound_access_enabled = false } resource "azurerm_subnet_network_security_group_association" "worker" { diff --git a/azure/flatcar-linux/kubernetes/outputs.tf b/azure/flatcar-linux/kubernetes/outputs.tf index 1fb9cd4d..7559b25e 100644 --- a/azure/flatcar-linux/kubernetes/outputs.tf +++ b/azure/flatcar-linux/kubernetes/outputs.tf @@ -6,13 +6,18 @@ output "kubeconfig-admin" { # Outputs for Kubernetes Ingress output "ingress_static_ipv4" { - value = azurerm_public_ip.ingress-ipv4.ip_address + value = azurerm_public_ip.frontend-ipv4.ip_address description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers" } +output "ingress_static_ipv6" { + value = azurerm_public_ip.frontend-ipv6.ip_address + description = "IPv6 address of the load balancer for distributing traffic to Ingress controllers" +} + # Outputs for worker pools -output "region" { +output "location" { value = azurerm_resource_group.cluster.location } @@ -39,13 +44,24 @@ output "kubeconfig" { # Outputs for custom firewalling +output "controller_security_group_name" { + description = "Network Security Group for controller nodes" + value = azurerm_network_security_group.controller.name +} + output "worker_security_group_name" { - value = azurerm_network_security_group.worker.name + description = "Network Security Group for worker nodes" + value = azurerm_network_security_group.worker.name +} + +output "controller_address_prefixes" { + description = "Controller network subnet CIDR addresses (for source/destination)" + value = local.controller_subnets } output "worker_address_prefixes" { description = "Worker network subnet CIDR addresses (for source/destination)" - value = azurerm_subnet.worker.address_prefixes + value = local.worker_subnets } # Outputs for custom load balancing @@ -55,9 +71,12 @@ output "loadbalancer_id" { value = azurerm_lb.cluster.id } -output "backend_address_pool_id" { - description = "ID of the worker backend address pool" - value = azurerm_lb_backend_address_pool.worker.id +output "backend_address_pool_ids" { + description = "IDs of the worker backend address pools" + value = { + ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id] + ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id] + } } # Outputs for debug diff --git a/azure/flatcar-linux/kubernetes/security.tf b/azure/flatcar-linux/kubernetes/security.tf index 0367cee1..f9a70273 100644 --- a/azure/flatcar-linux/kubernetes/security.tf +++ b/azure/flatcar-linux/kubernetes/security.tf @@ -1,198 +1,223 @@ # Controller security group resource "azurerm_network_security_group" "controller" { + name = "${var.cluster_name}-controller" resource_group_name = azurerm_resource_group.cluster.name - - name = "${var.cluster_name}-controller" - location = azurerm_resource_group.cluster.location + location = azurerm_resource_group.cluster.location } resource "azurerm_network_security_rule" "controller-icmp" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-icmp" + name = "allow-icmp-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "1995" + priority = 1995 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Icmp" source_port_range = "*" destination_port_range = "*" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-ssh" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-ssh" + name = "allow-ssh-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2000" + priority = 2000 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "22" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-etcd" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-etcd" + name = "allow-etcd-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2005" + priority = 2005 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "2379-2380" - source_address_prefixes = azurerm_subnet.controller.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.controller_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape etcd metrics resource "azurerm_network_security_rule" "controller-etcd-metrics" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-etcd-metrics" + name = "allow-etcd-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2010" + priority = 2010 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "2381" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape kube-proxy metrics resource "azurerm_network_security_rule" "controller-kube-proxy" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-kube-proxy-metrics" + name = "allow-kube-proxy-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2011" + priority = 2012 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10249" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics resource "azurerm_network_security_rule" "controller-kube-metrics" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-kube-metrics" + name = "allow-kube-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2012" + priority = 2014 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10257-10259" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-apiserver" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-apiserver" + name = "allow-apiserver-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2015" + priority = 2016 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "6443" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-cilium-health" { - resource_group_name = azurerm_resource_group.cluster.name - count = var.networking == "cilium" ? 1 : 0 + for_each = var.networking == "cilium" ? local.controller_subnets : {} - name = "allow-cilium-health" + name = "allow-cilium-health-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2019" + priority = 2018 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "4240" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] +} + +resource "azurerm_network_security_rule" "controller-cilium-metrics" { + for_each = var.networking == "cilium" ? local.controller_subnets : {} + + name = "allow-cilium-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name + network_security_group_name = azurerm_network_security_group.controller.name + priority = 2035 + (each.key == "ipv4" ? 0 : 1) + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "9962-9965" + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-vxlan" + name = "allow-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2020" + priority = 2020 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "4789" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } resource "azurerm_network_security_rule" "controller-linux-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-linux-vxlan" + name = "allow-linux-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2021" + priority = 2022 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "8472" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow Prometheus to scrape node-exporter daemonset resource "azurerm_network_security_rule" "controller-node-exporter" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-node-exporter" + name = "allow-node-exporter-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2025" + priority = 2025 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "9100" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Allow apiserver to access kubelet's for exec, log, port-forward resource "azurerm_network_security_rule" "controller-kubelet" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.controller_subnets - name = "allow-kubelet" + name = "allow-kubelet-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.controller.name - priority = "2030" + priority = 2030 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10250" - # allow Prometheus to scrape kubelet metrics too - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.controller.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.controller_subnets[each.key] } # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound @@ -231,166 +256,189 @@ resource "azurerm_network_security_rule" "controller-deny-all" { # Worker security group resource "azurerm_network_security_group" "worker" { + name = "${var.cluster_name}-worker" resource_group_name = azurerm_resource_group.cluster.name - - name = "${var.cluster_name}-worker" - location = azurerm_resource_group.cluster.location + location = azurerm_resource_group.cluster.location } resource "azurerm_network_security_rule" "worker-icmp" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-icmp" + name = "allow-icmp-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "1995" + priority = 1995 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Icmp" source_port_range = "*" destination_port_range = "*" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-ssh" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-ssh" + name = "allow-ssh-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2000" + priority = 2000 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "22" - source_address_prefixes = azurerm_subnet.controller.address_prefixes - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.controller_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-http" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-http" + name = "allow-http-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2005" + priority = 2005 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "80" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-https" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-https" + name = "allow-https-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2010" + priority = 2010 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "443" source_address_prefix = "*" - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-cilium-health" { - resource_group_name = azurerm_resource_group.cluster.name - count = var.networking == "cilium" ? 1 : 0 + for_each = var.networking == "cilium" ? local.worker_subnets : {} - name = "allow-cilium-health" + name = "allow-cilium-health-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2014" + priority = 2012 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "4240" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] +} + +resource "azurerm_network_security_rule" "worker-cilium-metrics" { + for_each = var.networking == "cilium" ? local.worker_subnets : {} + + name = "allow-cilium-metrics-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name + network_security_group_name = azurerm_network_security_group.worker.name + priority = 2014 + (each.key == "ipv4" ? 0 : 1) + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "9962-9965" + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-vxlan" + name = "allow-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2015" + priority = 2016 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "4789" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } resource "azurerm_network_security_rule" "worker-linux-vxlan" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-linux-vxlan" + name = "allow-linux-vxlan-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2016" + priority = 2018 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Udp" source_port_range = "*" destination_port_range = "8472" - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Allow Prometheus to scrape node-exporter daemonset resource "azurerm_network_security_rule" "worker-node-exporter" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-node-exporter" + name = "allow-node-exporter-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2020" + priority = 2020 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "9100" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Allow Prometheus to scrape kube-proxy resource "azurerm_network_security_rule" "worker-kube-proxy" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-kube-proxy" + name = "allow-kube-proxy-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2024" + priority = 2024 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10249" - source_address_prefixes = azurerm_subnet.worker.address_prefixes - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.worker_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Allow apiserver to access kubelet's for exec, log, port-forward resource "azurerm_network_security_rule" "worker-kubelet" { - resource_group_name = azurerm_resource_group.cluster.name + for_each = local.worker_subnets - name = "allow-kubelet" + name = "allow-kubelet-${each.key}" + resource_group_name = azurerm_resource_group.cluster.name network_security_group_name = azurerm_network_security_group.worker.name - priority = "2025" + priority = 2026 + (each.key == "ipv4" ? 0 : 1) access = "Allow" direction = "Inbound" protocol = "Tcp" source_port_range = "*" destination_port_range = "10250" - # allow Prometheus to scrape kubelet metrics too - source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes) - destination_address_prefixes = azurerm_subnet.worker.address_prefixes + source_address_prefixes = local.cluster_subnets[each.key] + destination_address_prefixes = local.worker_subnets[each.key] } # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound diff --git a/azure/flatcar-linux/kubernetes/ssh.tf b/azure/flatcar-linux/kubernetes/ssh.tf index 9b1f3a8a..ad0baa67 100644 --- a/azure/flatcar-linux/kubernetes/ssh.tf +++ b/azure/flatcar-linux/kubernetes/ssh.tf @@ -18,7 +18,7 @@ resource "null_resource" "copy-controller-secrets" { connection { type = "ssh" - host = azurerm_public_ip.controllers.*.ip_address[count.index] + host = azurerm_public_ip.controllers-ipv4[count.index].ip_address user = "core" timeout = "15m" } @@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" { connection { type = "ssh" - host = azurerm_public_ip.controllers.*.ip_address[0] + host = azurerm_public_ip.controllers-ipv4[0].ip_address user = "core" timeout = "15m" } diff --git a/azure/flatcar-linux/kubernetes/variables.tf b/azure/flatcar-linux/kubernetes/variables.tf index 7b2dd15a..dd8d6b30 100644 --- a/azure/flatcar-linux/kubernetes/variables.tf +++ b/azure/flatcar-linux/kubernetes/variables.tf @@ -5,9 +5,9 @@ variable "cluster_name" { # Azure -variable "region" { +variable "location" { type = string - description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)" + description = "Azure location (e.g. centralus , see `az account list-locations --output table`)" } variable "dns_zone" { @@ -22,30 +22,6 @@ variable "dns_zone_group" { # instances -variable "controller_count" { - type = number - description = "Number of controllers (i.e. masters)" - default = 1 -} - -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - -variable "controller_type" { - type = string - description = "Machine type for controllers (see `az vm list-skus --location centralus`)" - default = "Standard_B2s" -} - -variable "worker_type" { - type = string - description = "Machine type for workers (see `az vm list-skus --location centralus`)" - default = "Standard_D2as_v5" -} - variable "os_image" { type = string description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)" @@ -57,12 +33,60 @@ variable "os_image" { } } -variable "disk_size" { +variable "controller_count" { type = number - description = "Size of the disk in GB" + description = "Number of controllers (i.e. masters)" + default = 1 +} + +variable "controller_type" { + type = string + description = "Machine type for controllers (see `az vm list-skus --location centralus`)" + default = "Standard_B2s" +} + +variable "controller_disk_type" { + type = string + description = "Type of managed disk for controller node(s)" + default = "Premium_LRS" +} + +variable "controller_disk_size" { + type = number + description = "Size of the managed disk in GB for controller node(s)" default = 30 } +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + +variable "worker_type" { + type = string + description = "Machine type for workers (see `az vm list-skus --location centralus`)" + default = "Standard_D2as_v5" +} + +variable "worker_disk_type" { + type = string + description = "Type of managed disk for worker nodes" + default = "Standard_LRS" +} + +variable "worker_disk_size" { + type = number + description = "Size of the managed disk in GB for worker nodes" + default = 30 +} + +variable "worker_ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "worker_priority" { type = string description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time." @@ -100,10 +124,15 @@ variable "networking" { default = "cilium" } -variable "host_cidr" { - type = string - description = "CIDR IPv4 range to assign to instances" - default = "10.0.0.0/16" +variable "network_cidr" { + type = object({ + ipv4 = list(string) + ipv6 = optional(list(string), []) + }) + description = "Virtual network CIDR ranges" + default = { + ipv4 = ["10.0.0.0/16"] + } } variable "pod_cidr" { @@ -121,32 +150,31 @@ EOD default = "10.3.0.0/16" } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false -} - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - variable "worker_node_labels" { type = list(string) description = "List of initial worker node labels" default = [] } -variable "arch" { - type = string - description = "Container architecture (amd64 or arm64)" - default = "amd64" +# advanced +variable "controller_arch" { + type = string + description = "Controller node(s) architecture (amd64 or arm64)" + default = "amd64" validation { - condition = var.arch == "amd64" || var.arch == "arm64" - error_message = "The arch must be amd64 or arm64." + condition = contains(["amd64", "arm64"], var.controller_arch) + error_message = "The controller_arch must be amd64 or arm64." + } +} + +variable "worker_arch" { + type = string + description = "Worker node(s) architecture (amd64 or arm64)" + default = "amd64" + validation { + condition = contains(["amd64", "arm64"], var.worker_arch) + error_message = "The worker_arch must be amd64 or arm64." } } @@ -156,10 +184,18 @@ variable "daemonset_tolerations" { default = [] } -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null } diff --git a/azure/flatcar-linux/kubernetes/versions.tf b/azure/flatcar-linux/kubernetes/versions.tf index 04b3ca6c..c928a28b 100644 --- a/azure/flatcar-linux/kubernetes/versions.tf +++ b/azure/flatcar-linux/kubernetes/versions.tf @@ -3,11 +3,11 @@ terraform { required_version = ">= 0.13.0, < 2.0.0" required_providers { - azurerm = ">= 2.8, < 4.0" + azurerm = ">= 2.8" null = ">= 2.1" ct = { source = "poseidon/ct" - version = "~> 0.11" + version = "~> 0.13" } } } diff --git a/azure/flatcar-linux/kubernetes/workers.tf b/azure/flatcar-linux/kubernetes/workers.tf index 082f2917..cdfb1d33 100644 --- a/azure/flatcar-linux/kubernetes/workers.tf +++ b/azure/flatcar-linux/kubernetes/workers.tf @@ -3,24 +3,26 @@ module "workers" { name = var.cluster_name # Azure - resource_group_name = azurerm_resource_group.cluster.name - region = azurerm_resource_group.cluster.location - subnet_id = azurerm_subnet.worker.id - security_group_id = azurerm_network_security_group.worker.id - backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id + resource_group_name = azurerm_resource_group.cluster.name + location = azurerm_resource_group.cluster.location + subnet_id = azurerm_subnet.worker.id + security_group_id = azurerm_network_security_group.worker.id + backend_address_pool_ids = local.backend_address_pool_ids - worker_count = var.worker_count - vm_type = var.worker_type - os_image = var.os_image - priority = var.worker_priority + worker_count = var.worker_count + vm_type = var.worker_type + os_image = var.os_image + disk_type = var.worker_disk_type + disk_size = var.worker_disk_size + ephemeral_disk = var.worker_ephemeral_disk + priority = var.worker_priority # configuration - kubeconfig = module.bootstrap.kubeconfig-kubelet - ssh_authorized_key = var.ssh_authorized_key - azure_authorized_key = var.azure_authorized_key - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - snippets = var.worker_snippets - node_labels = var.worker_node_labels - arch = var.arch + kubeconfig = module.bootstrap.kubeconfig-kubelet + ssh_authorized_key = var.ssh_authorized_key + azure_authorized_key = var.azure_authorized_key + service_cidr = var.service_cidr + snippets = var.worker_snippets + node_labels = var.worker_node_labels + arch = var.worker_arch } diff --git a/azure/flatcar-linux/kubernetes/workers/butane/worker.yaml b/azure/flatcar-linux/kubernetes/workers/butane/worker.yaml index 4a454a4a..d2f92084 100644 --- a/azure/flatcar-linux/kubernetes/workers/butane/worker.yaml +++ b/azure/flatcar-linux/kubernetes/workers/butane/worker.yaml @@ -28,7 +28,7 @@ systemd: After=docker.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -99,7 +99,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/azure/flatcar-linux/kubernetes/workers/variables.tf b/azure/flatcar-linux/kubernetes/workers/variables.tf index a2612d97..684619aa 100644 --- a/azure/flatcar-linux/kubernetes/workers/variables.tf +++ b/azure/flatcar-linux/kubernetes/workers/variables.tf @@ -5,9 +5,9 @@ variable "name" { # Azure -variable "region" { +variable "location" { type = string - description = "Must be set to the Azure Region of cluster" + description = "Must be set to the Azure location of cluster" } variable "resource_group_name" { @@ -25,9 +25,12 @@ variable "security_group_id" { description = "Must be set to the `worker_security_group_id` output by cluster" } -variable "backend_address_pool_id" { - type = string - description = "Must be set to the `worker_backend_address_pool_id` output by cluster" +variable "backend_address_pool_ids" { + type = object({ + ipv4 = list(string) + ipv6 = list(string) + }) + description = "Must be set to the `backend_address_pool_ids` output by cluster" } # instances @@ -55,6 +58,24 @@ variable "os_image" { } } +variable "disk_type" { + type = string + description = "Type of managed disk" + default = "Standard_LRS" +} + +variable "disk_size" { + type = number + description = "Size of the managed disk in GB" + default = 30 +} + +variable "ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "priority" { type = string description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." @@ -116,12 +137,3 @@ variable "arch" { error_message = "The arch must be amd64 or arm64." } } - -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - type = string - default = "cluster.local" -} - diff --git a/azure/flatcar-linux/kubernetes/workers/versions.tf b/azure/flatcar-linux/kubernetes/workers/versions.tf index 3f82d350..7e40fee1 100644 --- a/azure/flatcar-linux/kubernetes/workers/versions.tf +++ b/azure/flatcar-linux/kubernetes/workers/versions.tf @@ -3,10 +3,10 @@ terraform { required_version = ">= 0.13.0, < 2.0.0" required_providers { - azurerm = ">= 2.8, < 4.0" + azurerm = ">= 2.8" ct = { source = "poseidon/ct" - version = "~> 0.11" + version = "~> 0.13" } } } diff --git a/azure/flatcar-linux/kubernetes/workers/workers.tf b/azure/flatcar-linux/kubernetes/workers/workers.tf index a0eaf066..f132bf7f 100644 --- a/azure/flatcar-linux/kubernetes/workers/workers.tf +++ b/azure/flatcar-linux/kubernetes/workers/workers.tf @@ -8,25 +8,28 @@ locals { } # Workers scale set -resource "azurerm_linux_virtual_machine_scale_set" "workers" { - resource_group_name = var.resource_group_name - - name = "${var.name}-worker" - location = var.region - sku = var.vm_type - instances = var.worker_count - # instance name prefix for instances in the set - computer_name_prefix = "${var.name}-worker" - single_placement_group = false - custom_data = base64encode(data.ct_config.worker.rendered) - boot_diagnostics { - # defaults to a managed storage account - } +resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" { + name = "${var.name}-worker" + resource_group_name = var.resource_group_name + location = var.location + platform_fault_domain_count = 1 + sku_name = var.vm_type + instances = var.worker_count # storage + encryption_at_host_enabled = true os_disk { - storage_account_type = "Standard_LRS" - caching = "ReadWrite" + storage_account_type = var.disk_type + disk_size_gb = var.disk_size + caching = "ReadOnly" + # Optionally, use the ephemeral disk of the instance type (support varies) + dynamic "diff_disk_settings" { + for_each = var.ephemeral_disk ? [1] : [] + content { + option = "Local" + placement = "ResourceDisk" + } + } } # Flatcar Container Linux @@ -46,13 +49,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { } } - # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too - admin_username = "core" - admin_ssh_key { - username = "core" - public_key = local.azure_authorized_key - } - # network network_interface { name = "nic0" @@ -60,17 +56,41 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { network_security_group_id = var.security_group_id ip_configuration { - name = "ip0" + name = "ipv4" + version = "IPv4" primary = true subnet_id = var.subnet_id - # backend address pool to which the NIC should be added - load_balancer_backend_address_pool_ids = [var.backend_address_pool_id] + load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4 + } + ip_configuration { + name = "ipv6" + version = "IPv6" + subnet_id = var.subnet_id + # backend address pool to which the NIC should be added + load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6 + } + } + + # boot + user_data_base64 = base64encode(data.ct_config.worker.rendered) + boot_diagnostics { + # defaults to a managed storage account + } + + # Azure requires an RSA admin_ssh_key + os_profile { + linux_configuration { + admin_username = "core" + admin_ssh_key { + username = "core" + public_key = local.azure_authorized_key + } + computer_name_prefix = "${var.name}-worker" } } # lifecycle - upgrade_mode = "Manual" # eviction policy may only be set when priority is Spot priority = var.priority eviction_policy = var.priority == "Spot" ? "Delete" : null @@ -79,35 +99,12 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { } } -# Scale up or down to maintain desired number, tolerating deallocations. -resource "azurerm_monitor_autoscale_setting" "workers" { - resource_group_name = var.resource_group_name - - name = "${var.name}-maintain-desired" - location = var.region - - # autoscale - enabled = true - target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id - - profile { - name = "default" - - capacity { - minimum = var.worker_count - default = var.worker_count - maximum = var.worker_count - } - } -} - # Flatcar Linux worker data "ct_config" "worker" { content = templatefile("${path.module}/butane/worker.yaml", { kubeconfig = indent(10, var.kubeconfig) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix node_labels = join(",", var.node_labels) node_taints = join(",", var.node_taints) }) diff --git a/bare-metal/fedora-coreos/kubernetes/README.md b/bare-metal/fedora-coreos/kubernetes/README.md index 4fde2d9b..df758938 100644 --- a/bare-metal/fedora-coreos/kubernetes/README.md +++ b/bare-metal/fedora-coreos/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing * Advanced features like [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/bare-metal/fedora-coreos/kubernetes/bootstrap.tf b/bare-metal/fedora-coreos/kubernetes/bootstrap.tf index 5c4a5b7c..e17f14cc 100644 --- a/bare-metal/fedora-coreos/kubernetes/bootstrap.tf +++ b/bare-metal/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [var.k8s_domain_name] @@ -10,9 +10,7 @@ module "bootstrap" { network_ip_autodetection_method = var.network_ip_autodetection_method pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation + components = var.components } diff --git a/bare-metal/fedora-coreos/kubernetes/butane/controller.yaml b/bare-metal/fedora-coreos/kubernetes/butane/controller.yaml index e59c401c..bdb7a0ce 100644 --- a/bare-metal/fedora-coreos/kubernetes/butane/controller.yaml +++ b/bare-metal/fedora-coreos/kubernetes/butane/controller.yaml @@ -12,7 +12,7 @@ systemd: Wants=network-online.target After=network-online.target [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 Type=exec ExecStartPre=/bin/mkdir -p /var/lib/etcd ExecStartPre=-/usr/bin/podman rm etcd @@ -53,7 +53,7 @@ systemd: Description=Kubelet (System Container) Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -113,7 +113,7 @@ systemd: Type=oneshot RemainAfterExit=true WorkingDirectory=/opt/bootstrap - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=-/usr/bin/podman rm bootstrap ExecStart=/usr/bin/podman run --name bootstrap \ --network host \ @@ -154,7 +154,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -168,7 +168,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -182,8 +182,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests chcon -R -u system_u -t container_file_t /etc/kubernetes/pki - path: /opt/bootstrap/apply mode: 0544 diff --git a/bare-metal/fedora-coreos/kubernetes/profiles.tf b/bare-metal/fedora-coreos/kubernetes/profiles.tf index c5b8c650..d1ec077f 100644 --- a/bare-metal/fedora-coreos/kubernetes/profiles.tf +++ b/bare-metal/fedora-coreos/kubernetes/profiles.tf @@ -59,7 +59,6 @@ data "ct_config" "controllers" { etcd_name = var.controllers.*.name[count.index] etcd_initial_cluster = join(",", formatlist("%s=https://%s:2380", var.controllers.*.name, var.controllers.*.domain)) cluster_dns_service_ip = module.bootstrap.cluster_dns_service_ip - cluster_domain_suffix = var.cluster_domain_suffix ssh_authorized_key = var.ssh_authorized_key }) strict = true diff --git a/bare-metal/fedora-coreos/kubernetes/variables.tf b/bare-metal/fedora-coreos/kubernetes/variables.tf index 943c2f0c..335e8ca4 100644 --- a/bare-metal/fedora-coreos/kubernetes/variables.tf +++ b/bare-metal/fedora-coreos/kubernetes/variables.tf @@ -139,23 +139,20 @@ variable "kernel_args" { default = [] } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false +# advanced + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null } - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - type = string - default = "cluster.local" -} - diff --git a/bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml b/bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml index dec42385..1601c3a5 100644 --- a/bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml +++ b/bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml @@ -25,7 +25,7 @@ systemd: Description=Kubelet (System Container) Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -108,7 +108,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf b/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf index 7fd0bdf9..3f7a7658 100644 --- a/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf +++ b/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf @@ -53,7 +53,6 @@ data "ct_config" "worker" { domain_name = var.domain ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix node_labels = join(",", var.node_labels) node_taints = join(",", var.node_taints) }) diff --git a/bare-metal/fedora-coreos/kubernetes/worker/variables.tf b/bare-metal/fedora-coreos/kubernetes/worker/variables.tf index fe89f21b..e2fd056c 100644 --- a/bare-metal/fedora-coreos/kubernetes/worker/variables.tf +++ b/bare-metal/fedora-coreos/kubernetes/worker/variables.tf @@ -103,9 +103,3 @@ The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for EOD default = "10.3.0.0/16" } - -variable "cluster_domain_suffix" { - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - type = string - default = "cluster.local" -} diff --git a/bare-metal/fedora-coreos/kubernetes/workers.tf b/bare-metal/fedora-coreos/kubernetes/workers.tf index 38a599ac..c7f4186b 100644 --- a/bare-metal/fedora-coreos/kubernetes/workers.tf +++ b/bare-metal/fedora-coreos/kubernetes/workers.tf @@ -15,13 +15,12 @@ module "workers" { domain = var.workers[count.index].domain # configuration - kubeconfig = module.bootstrap.kubeconfig-kubelet - ssh_authorized_key = var.ssh_authorized_key - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - node_labels = lookup(var.worker_node_labels, var.workers[count.index].name, []) - node_taints = lookup(var.worker_node_taints, var.workers[count.index].name, []) - snippets = lookup(var.snippets, var.workers[count.index].name, []) + kubeconfig = module.bootstrap.kubeconfig-kubelet + ssh_authorized_key = var.ssh_authorized_key + service_cidr = var.service_cidr + node_labels = lookup(var.worker_node_labels, var.workers[count.index].name, []) + node_taints = lookup(var.worker_node_taints, var.workers[count.index].name, []) + snippets = lookup(var.snippets, var.workers[count.index].name, []) # optional cached_install = var.cached_install diff --git a/bare-metal/flatcar-linux/kubernetes/README.md b/bare-metal/flatcar-linux/kubernetes/README.md index 29a5c165..dd9c3cee 100644 --- a/bare-metal/flatcar-linux/kubernetes/README.md +++ b/bare-metal/flatcar-linux/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/) * Advanced features like [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/bare-metal/flatcar-linux/kubernetes/bootstrap.tf b/bare-metal/flatcar-linux/kubernetes/bootstrap.tf index 7818a683..d24b3f31 100644 --- a/bare-metal/flatcar-linux/kubernetes/bootstrap.tf +++ b/bare-metal/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [var.k8s_domain_name] @@ -10,8 +10,6 @@ module "bootstrap" { network_ip_autodetection_method = var.network_ip_autodetection_method pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation + components = var.components } diff --git a/bare-metal/flatcar-linux/kubernetes/butane/controller.yaml b/bare-metal/flatcar-linux/kubernetes/butane/controller.yaml index 59dc077e..8f32a127 100644 --- a/bare-metal/flatcar-linux/kubernetes/butane/controller.yaml +++ b/bare-metal/flatcar-linux/kubernetes/butane/controller.yaml @@ -11,7 +11,7 @@ systemd: Requires=docker.service After=docker.service [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 ExecStartPre=/usr/bin/docker run -d \ --name etcd \ --network host \ @@ -64,7 +64,7 @@ systemd: After=docker.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -114,7 +114,7 @@ systemd: Type=oneshot RemainAfterExit=true WorkingDirectory=/opt/bootstrap - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStart=/usr/bin/docker run \ -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \ -v /opt/bootstrap/assets:/assets:ro \ @@ -155,7 +155,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -169,7 +169,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -184,8 +184,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests - path: /opt/bootstrap/apply mode: 0544 contents: diff --git a/bare-metal/flatcar-linux/kubernetes/profiles.tf b/bare-metal/flatcar-linux/kubernetes/profiles.tf index 5eebeb96..6da7334e 100644 --- a/bare-metal/flatcar-linux/kubernetes/profiles.tf +++ b/bare-metal/flatcar-linux/kubernetes/profiles.tf @@ -89,7 +89,6 @@ data "ct_config" "controllers" { etcd_name = var.controllers.*.name[count.index] etcd_initial_cluster = join(",", formatlist("%s=https://%s:2380", var.controllers.*.name, var.controllers.*.domain)) cluster_dns_service_ip = module.bootstrap.cluster_dns_service_ip - cluster_domain_suffix = var.cluster_domain_suffix ssh_authorized_key = var.ssh_authorized_key }) strict = true diff --git a/bare-metal/flatcar-linux/kubernetes/variables.tf b/bare-metal/flatcar-linux/kubernetes/variables.tf index 6d3f13f2..3885aafc 100644 --- a/bare-metal/flatcar-linux/kubernetes/variables.tf +++ b/bare-metal/flatcar-linux/kubernetes/variables.tf @@ -150,18 +150,6 @@ variable "kernel_args" { default = [] } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false -} - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - variable "oem_type" { type = string description = < -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing * Advanced features like [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf b/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf index 43b6ec2e..8fd1d6a8 100644 --- a/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf +++ b/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf @@ -1,21 +1,18 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] etcd_servers = digitalocean_record.etcds.*.fqdn networking = var.networking - # only effective with Calico networking network_encapsulation = "vxlan" network_mtu = "1450" - pod_cidr = var.pod_cidr - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation + pod_cidr = var.pod_cidr + service_cidr = var.service_cidr + components = var.components } diff --git a/digital-ocean/fedora-coreos/kubernetes/butane/controller.yaml b/digital-ocean/fedora-coreos/kubernetes/butane/controller.yaml index 35f55566..c42ea759 100644 --- a/digital-ocean/fedora-coreos/kubernetes/butane/controller.yaml +++ b/digital-ocean/fedora-coreos/kubernetes/butane/controller.yaml @@ -12,7 +12,7 @@ systemd: Wants=network-online.target After=network-online.target [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 Type=exec ExecStartPre=/bin/mkdir -p /var/lib/etcd ExecStartPre=-/usr/bin/podman rm etcd @@ -55,7 +55,7 @@ systemd: After=afterburn.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/afterburn ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -123,7 +123,7 @@ systemd: --volume /opt/bootstrap/assets:/assets:ro,Z \ --volume /opt/bootstrap/apply:/apply:ro,Z \ --entrypoint=/apply \ - quay.io/poseidon/kubelet:v1.28.3 + quay.io/poseidon/kubelet:v1.31.3 ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done ExecStartPost=-/usr/bin/podman stop bootstrap storage: @@ -151,7 +151,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -165,7 +165,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -179,8 +179,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests chcon -R -u system_u -t container_file_t /etc/kubernetes/pki - path: /opt/bootstrap/apply mode: 0544 diff --git a/digital-ocean/fedora-coreos/kubernetes/butane/worker.yaml b/digital-ocean/fedora-coreos/kubernetes/butane/worker.yaml index c330ebca..0bde53f0 100644 --- a/digital-ocean/fedora-coreos/kubernetes/butane/worker.yaml +++ b/digital-ocean/fedora-coreos/kubernetes/butane/worker.yaml @@ -28,7 +28,7 @@ systemd: After=afterburn.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/afterburn ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -104,7 +104,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/digital-ocean/fedora-coreos/kubernetes/controllers.tf b/digital-ocean/fedora-coreos/kubernetes/controllers.tf index 32568036..871c7252 100644 --- a/digital-ocean/fedora-coreos/kubernetes/controllers.tf +++ b/digital-ocean/fedora-coreos/kubernetes/controllers.tf @@ -74,7 +74,6 @@ data "ct_config" "controllers" { for i in range(var.controller_count) : "etcd${i}=https://${var.cluster_name}-etcd${i}.${var.dns_zone}:2380" ]) cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/digital-ocean/fedora-coreos/kubernetes/network.tf b/digital-ocean/fedora-coreos/kubernetes/network.tf index 0d506e58..b506c64e 100644 --- a/digital-ocean/fedora-coreos/kubernetes/network.tf +++ b/digital-ocean/fedora-coreos/kubernetes/network.tf @@ -32,6 +32,13 @@ resource "digitalocean_firewall" "rules" { source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name] } + # Cilium metrics + inbound_rule { + protocol = "tcp" + port_range = "9962-9965" + source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name] + } + # IANA vxlan (flannel, calico) inbound_rule { protocol = "udp" diff --git a/digital-ocean/fedora-coreos/kubernetes/variables.tf b/digital-ocean/fedora-coreos/kubernetes/variables.tf index 4a6dd8ad..32ee2ece 100644 --- a/digital-ocean/fedora-coreos/kubernetes/variables.tf +++ b/digital-ocean/fedora-coreos/kubernetes/variables.tf @@ -86,23 +86,20 @@ EOD default = "10.3.0.0/16" } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false +# advanced + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null } - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} - diff --git a/digital-ocean/fedora-coreos/kubernetes/versions.tf b/digital-ocean/fedora-coreos/kubernetes/versions.tf index 0e6823f5..4860c07c 100644 --- a/digital-ocean/fedora-coreos/kubernetes/versions.tf +++ b/digital-ocean/fedora-coreos/kubernetes/versions.tf @@ -6,7 +6,7 @@ terraform { null = ">= 2.1" ct = { source = "poseidon/ct" - version = "~> 0.9" + version = "~> 0.13" } digitalocean = { source = "digitalocean/digitalocean" diff --git a/digital-ocean/fedora-coreos/kubernetes/workers.tf b/digital-ocean/fedora-coreos/kubernetes/workers.tf index 2dd48572..1c4637b2 100644 --- a/digital-ocean/fedora-coreos/kubernetes/workers.tf +++ b/digital-ocean/fedora-coreos/kubernetes/workers.tf @@ -62,7 +62,6 @@ resource "digitalocean_tag" "workers" { data "ct_config" "worker" { content = templatefile("${path.module}/butane/worker.yaml", { cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.worker_snippets diff --git a/digital-ocean/flatcar-linux/kubernetes/README.md b/digital-ocean/flatcar-linux/kubernetes/README.md index c5ae31d6..4faa7d5c 100644 --- a/digital-ocean/flatcar-linux/kubernetes/README.md +++ b/digital-ocean/flatcar-linux/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/) * Advanced features like [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf b/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf index 43b6ec2e..8fd1d6a8 100644 --- a/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf +++ b/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf @@ -1,21 +1,18 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] etcd_servers = digitalocean_record.etcds.*.fqdn networking = var.networking - # only effective with Calico networking network_encapsulation = "vxlan" network_mtu = "1450" - pod_cidr = var.pod_cidr - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation + pod_cidr = var.pod_cidr + service_cidr = var.service_cidr + components = var.components } diff --git a/digital-ocean/flatcar-linux/kubernetes/butane/controller.yaml b/digital-ocean/flatcar-linux/kubernetes/butane/controller.yaml index fc7a18ba..4a2030e1 100644 --- a/digital-ocean/flatcar-linux/kubernetes/butane/controller.yaml +++ b/digital-ocean/flatcar-linux/kubernetes/butane/controller.yaml @@ -11,7 +11,7 @@ systemd: Requires=docker.service After=docker.service [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 ExecStartPre=/usr/bin/docker run -d \ --name etcd \ --network host \ @@ -66,7 +66,7 @@ systemd: After=coreos-metadata.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/coreos ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -117,7 +117,7 @@ systemd: Type=oneshot RemainAfterExit=true WorkingDirectory=/opt/bootstrap - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStart=/usr/bin/docker run \ -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \ -v /opt/bootstrap/assets:/assets:ro \ @@ -153,7 +153,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -167,7 +167,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -182,8 +182,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests - path: /opt/bootstrap/apply mode: 0544 contents: diff --git a/digital-ocean/flatcar-linux/kubernetes/butane/worker.yaml b/digital-ocean/flatcar-linux/kubernetes/butane/worker.yaml index 2b8d1b30..af549314 100644 --- a/digital-ocean/flatcar-linux/kubernetes/butane/worker.yaml +++ b/digital-ocean/flatcar-linux/kubernetes/butane/worker.yaml @@ -38,7 +38,7 @@ systemd: After=coreos-metadata.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 EnvironmentFile=/run/metadata/coreos ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests @@ -103,7 +103,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/digital-ocean/flatcar-linux/kubernetes/controllers.tf b/digital-ocean/flatcar-linux/kubernetes/controllers.tf index 8432ff74..928c544a 100644 --- a/digital-ocean/flatcar-linux/kubernetes/controllers.tf +++ b/digital-ocean/flatcar-linux/kubernetes/controllers.tf @@ -79,7 +79,6 @@ data "ct_config" "controllers" { for i in range(var.controller_count) : "etcd${i}=https://${var.cluster_name}-etcd${i}.${var.dns_zone}:2380" ]) cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/digital-ocean/flatcar-linux/kubernetes/network.tf b/digital-ocean/flatcar-linux/kubernetes/network.tf index 0d506e58..b506c64e 100644 --- a/digital-ocean/flatcar-linux/kubernetes/network.tf +++ b/digital-ocean/flatcar-linux/kubernetes/network.tf @@ -32,6 +32,13 @@ resource "digitalocean_firewall" "rules" { source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name] } + # Cilium metrics + inbound_rule { + protocol = "tcp" + port_range = "9962-9965" + source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name] + } + # IANA vxlan (flannel, calico) inbound_rule { protocol = "udp" diff --git a/digital-ocean/flatcar-linux/kubernetes/variables.tf b/digital-ocean/flatcar-linux/kubernetes/variables.tf index 7c755af6..81051519 100644 --- a/digital-ocean/flatcar-linux/kubernetes/variables.tf +++ b/digital-ocean/flatcar-linux/kubernetes/variables.tf @@ -86,23 +86,20 @@ EOD default = "10.3.0.0/16" } -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false +# advanced + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null } - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} - diff --git a/digital-ocean/flatcar-linux/kubernetes/versions.tf b/digital-ocean/flatcar-linux/kubernetes/versions.tf index 3c8aa2fd..4860c07c 100644 --- a/digital-ocean/flatcar-linux/kubernetes/versions.tf +++ b/digital-ocean/flatcar-linux/kubernetes/versions.tf @@ -6,7 +6,7 @@ terraform { null = ">= 2.1" ct = { source = "poseidon/ct" - version = "~> 0.11" + version = "~> 0.13" } digitalocean = { source = "digitalocean/digitalocean" diff --git a/digital-ocean/flatcar-linux/kubernetes/workers.tf b/digital-ocean/flatcar-linux/kubernetes/workers.tf index 863248a0..8484f505 100644 --- a/digital-ocean/flatcar-linux/kubernetes/workers.tf +++ b/digital-ocean/flatcar-linux/kubernetes/workers.tf @@ -60,7 +60,6 @@ resource "digitalocean_tag" "workers" { data "ct_config" "worker" { content = templatefile("${path.module}/butane/worker.yaml", { cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.worker_snippets diff --git a/docs/addons/ingress.md b/docs/addons/ingress.md index 14689c37..f72e5128 100644 --- a/docs/addons/ingress.md +++ b/docs/addons/ingress.md @@ -37,7 +37,7 @@ resource "google_dns_record_set" "some-application" { ## Azure -On Azure, a load balancer distributes traffic across a backend address pool of worker nodes running an Ingress controller deployment. Security group rules allow traffic to ports 80 and 443. Health probes ensure only workers with a healthy Ingress controller receive traffic. +On Azure, an Azure Load Balancer distributes IPv4/IPv6 traffic across backend address pools of worker nodes running an Ingress controller deployment. Security group rules allow traffic to ports 80 and 443. Health probes ensure only workers with a healthy Ingress controller receive traffic. Create the Ingress controller deployment, service, RBAC roles, RBAC bindings, and namespace. @@ -53,10 +53,10 @@ app2.example.com -> 11.22.33.44 app3.example.com -> 11.22.33.44 ``` -Find the load balancer's IPv4 address with the Azure console or use the Typhoon module's output `ingress_static_ipv4`. For example, you might use Terraform to manage a Google Cloud DNS record: +Find the load balancer's addresses with the Azure console or use the Typhoon module's outputs `ingress_static_ipv4` or `ingress_static_ipv6`. For example, you might use Terraform to manage a Google Cloud DNS record: ```tf -resource "google_dns_record_set" "some-application" { +resource "google_dns_record_set" "app-record-a" { # DNS zone name managed_zone = "example-zone" @@ -66,6 +66,17 @@ resource "google_dns_record_set" "some-application" { ttl = 300 rrdatas = [module.ramius.ingress_static_ipv4] } + +resource "google_dns_record_set" "app-record-aaaa" { + # DNS zone name + managed_zone = "example-zone" + + # DNS record + name = "app.example.com." + type = "AAAA" + ttl = 300 + rrdatas = [module.ramius.ingress_static_ipv6] +} ``` ## Bare-Metal diff --git a/docs/addons/overview.md b/docs/addons/overview.md index 13708c2b..ac6680d6 100644 --- a/docs/addons/overview.md +++ b/docs/addons/overview.md @@ -1,9 +1,131 @@ -# Addons +# Components -Typhoon clusters are verified to work well with several post-install addons. +Typhoon's component model allows for managing cluster components independent from the cluster's lifecycle, upgrading in a rolling or automated fashion, or customizing components in advanced ways. + +Typhoon clusters install core components like `CoreDNS`, `kube-proxy`, and a chosen CNI provider (`flannel`, `calico`, or `cilium`) by default. Since v1.30.1, pre-installed components are optional. Other "addon" components like Nginx Ingress, Prometheus, or Grafana may be optionally applied though the component model (after cluster creation). + +## Components + +Pre-installed by default: + +* CoreDNS +* kube-proxy +* CNI provider (set via `var.networking`) + * flannel + * Calico + * Cilium + +Addons: * Nginx [Ingress Controller](ingress.md) * [Prometheus](prometheus.md) * [Grafana](grafana.md) * [fleetlock](fleetlock.md) +## Pre-installed Components + +By default, Typhoon clusters install `CoreDNS`, `kube-proxy`, and a chosen CNI provider (`flannel`, `calico`, or `cilium`). Disable any or all of these components using the `components` system. + +```tf +module "yavin" { + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.30.1" + + # Google Cloud + cluster_name = "yavin" + region = "us-central1" + dns_zone = "example.com" + dns_zone_name = "example-zone" + + # configuration + ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." + + # pre-installed components (defaults shown) + components = { + enable = true + coredns = { + enable = true + } + kube_proxy = { + enable = true + } + # Only the CNI set in var.networking will be installed + flannel = { + enable = true + } + calico = { + enable = true + } + cilium = { + enable = true + } + } +} +``` + +!!! warn + Disabling pre-installed components is for advanced users who intend to manage these components separately. Without a CNI provider, cluster nodes will be NotReady and wait for the CNI provider to be applied. + +## Managing Components + +If you choose to manage components youself, a recommended pattern is to use a separate Terraform workspace per component, like you would any application. + +``` +mkdir -p infra/components/{coredns, cilium} + +tree components/coredns +components/coredns/ +├── backend.tf +├── manifests.tf +└── providers.tf +``` + +Let's consider managing CoreDNS resources. Configure the `kubernetes` provider to use the kubeconfig credentials of your Typhoon cluster(s) in a `providers.tf` file. Here we show provider blocks for interacting with Typhoon clusters on AWS, Azure, or Google Cloud, assuming each cluster's `kubeconfig-admin` output was written to local file. + +```tf +provider "kubernetes" { + alias = "aws" + config_path = "~/.kube/configs/aws-config" +} + +provider "kubernetes" { + alias = "google" + config_path = "~/.kube/configs/google-config" +} + +... +``` + +Typhoon maintains Terraform modules for most addon components. You can reference `main`, a tagged release, a SHA revision, or custom module of your own. Define the CoreDNS manifests using the `addons/coredns` module in a `manifests.tf` file. + +```tf +# CoreDNS manifests for the aws cluster +module "aws" { + source = "git::https://github.com/poseidon/typhoon//addons/coredns?ref=v1.30.1" + providers = { + kubernetes = kubernetes.aws + } +} + +# CoreDNS manifests for the google cloud cluster +module "aws" { + source = "git::https://github.com/poseidon/typhoon//addons/coredns?ref=v1.30.1" + providers = { + kubernetes = kubernetes.google + } +} +... +``` + +Plan and apply the CoreDNS Kubernetes resources to cluster(s). + +``` +terraform plan +terraform apply +... +module.aws.kubernetes_service_account.coredns: Refreshing state... [id=kube-system/coredns] +module.aws.kubernetes_config_map.coredns: Refreshing state... [id=kube-system/coredns] +module.aws.kubernetes_cluster_role.coredns: Refreshing state... [id=system:coredns] +module.aws.kubernetes_cluster_role_binding.coredns: Refreshing state... [id=system:coredns] +module.aws.kubernetes_service.coredns: Refreshing state... [id=kube-system/coredns] +... +``` diff --git a/docs/advanced/arm64.md b/docs/advanced/arm64.md index 956e996e..01930fd1 100644 --- a/docs/advanced/arm64.md +++ b/docs/advanced/arm64.md @@ -1,13 +1,11 @@ # ARM64 -Typhoon supports ARM64 Kubernetes clusters with ARM64 controller and worker nodes (full-cluster) or adding worker pools of ARM64 nodes to clusters with an x86/amd64 control plane for a hybdrid (mixed-arch) cluster. - -Typhoon ARM64 clusters (full-cluster or mixed-arch) are available on: +Typhoon supports Kubernetes clusters with ARM64 controller or worker nodes on several platforms: * AWS with Fedora CoreOS or Flatcar Linux * Azure with Flatcar Linux -## Cluster +## AWS Create a cluster on AWS with ARM64 controller and worker nodes. Container workloads must be `arm64` compatible and use `arm64` (or multi-arch) container images. @@ -15,24 +13,23 @@ Create a cluster on AWS with ARM64 controller and worker nodes. Container worklo ```tf module "gravitas" { - source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.31.3" # AWS cluster_name = "gravitas" dns_zone = "aws.example.com" dns_zone_id = "Z3PAABBCFAKEC0" + # instances + controller_type = "t4g.small" + controller_arch = "arm64" + worker_count = 2 + worker_type = "t4g.small" + worker_arch = "arm64" + worker_price = "0.0168" + # configuration ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." - - # optional - arch = "arm64" - networking = "cilium" - worker_count = 2 - worker_price = "0.0168" - - controller_type = "t4g.small" - worker_type = "t4g.small" } ``` @@ -40,24 +37,23 @@ Create a cluster on AWS with ARM64 controller and worker nodes. Container worklo ```tf module "gravitas" { - source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes?ref=v1.31.3" # AWS cluster_name = "gravitas" dns_zone = "aws.example.com" dns_zone_id = "Z3PAABBCFAKEC0" + # instances + controller_type = "t4g.small" + controller_arch = "arm64" + worker_count = 2 + worker_type = "t4g.small" + worker_arch = "arm64" + worker_price = "0.0168" + # configuration ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." - - # optional - arch = "arm64" - networking = "cilium" - worker_count = 2 - worker_price = "0.0168" - - controller_type = "t4g.small" - worker_type = "t4g.small" } ``` @@ -66,118 +62,9 @@ Verify the cluster has only arm64 (`aarch64`) nodes. For Flatcar Linux, describe ``` $ kubectl get nodes -o wide NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME -ip-10-0-21-119 Ready 77s v1.28.3 10.0.21.119 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.aarch64 containerd://1.5.8 -ip-10-0-32-166 Ready 80s v1.28.3 10.0.32.166 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.aarch64 containerd://1.5.8 -ip-10-0-5-79 Ready 77s v1.28.3 10.0.5.79 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.aarch64 containerd://1.5.8 -``` - -## Hybrid - -Create a hybrid/mixed arch cluster by defining an AWS cluster. Then define a [worker pool](worker-pools.md#aws) with ARM64 workers. Optional taints are added to aid in scheduling. - -=== "FCOS Cluster" - - ```tf - module "gravitas" { - source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.28.3" - - # AWS - cluster_name = "gravitas" - dns_zone = "aws.example.com" - dns_zone_id = "Z3PAABBCFAKEC0" - - # configuration - ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." - - # optional - networking = "cilium" - worker_count = 2 - worker_price = "0.021" - - daemonset_tolerations = ["arch"] # important - } - ``` - -=== "Flatcar Cluster" - - ```tf - module "gravitas" { - source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes?ref=v1.28.3" - - # AWS - cluster_name = "gravitas" - dns_zone = "aws.example.com" - dns_zone_id = "Z3PAABBCFAKEC0" - - # configuration - ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." - - # optional - networking = "cilium" - worker_count = 2 - worker_price = "0.021" - - daemonset_tolerations = ["arch"] # important - } - ``` - -=== "FCOS ARM64 Workers" - - ```tf - module "gravitas-arm64" { - source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.28.3" - - # AWS - vpc_id = module.gravitas.vpc_id - subnet_ids = module.gravitas.subnet_ids - security_groups = module.gravitas.worker_security_groups - - # configuration - name = "gravitas-arm64" - kubeconfig = module.gravitas.kubeconfig - ssh_authorized_key = var.ssh_authorized_key - - # optional - arch = "arm64" - instance_type = "t4g.small" - spot_price = "0.0168" - node_taints = ["arch=arm64:NoSchedule"] - } - ``` - -=== "Flatcar ARM64 Workers" - - ```tf - module "gravitas-arm64" { - source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes/workers?ref=v1.28.3" - - # AWS - vpc_id = module.gravitas.vpc_id - subnet_ids = module.gravitas.subnet_ids - security_groups = module.gravitas.worker_security_groups - - # configuration - name = "gravitas-arm64" - kubeconfig = module.gravitas.kubeconfig - ssh_authorized_key = var.ssh_authorized_key - - # optional - arch = "arm64" - instance_type = "t4g.small" - spot_price = "0.0168" - node_taints = ["arch=arm64:NoSchedule"] - } - ``` - -Verify amd64 (x86_64) and arm64 (aarch64) nodes are present. - -``` -$ kubectl get nodes -o wide -NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME -ip-10-0-1-73 Ready 111m v1.28.3 10.0.1.73 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.x86_64 containerd://1.5.8 -ip-10-0-22-79... Ready 111m v1.28.3 10.0.22.79 Flatcar Container Linux by Kinvolk 3033.2.0 (Oklo) 5.10.84-flatcar containerd://1.5.8 -ip-10-0-24-130 Ready 111m v1.28.3 10.0.24.130 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.x86_64 containerd://1.5.8 -ip-10-0-39-19 Ready 111m v1.28.3 10.0.39.19 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.x86_64 containerd://1.5.8 +ip-10-0-21-119 Ready 77s v1.31.3 10.0.21.119 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.aarch64 containerd://1.5.8 +ip-10-0-32-166 Ready 80s v1.31.3 10.0.32.166 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.aarch64 containerd://1.5.8 +ip-10-0-5-79 Ready 77s v1.31.3 10.0.5.79 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.aarch64 containerd://1.5.8 ``` ## Azure @@ -186,22 +73,136 @@ Create a cluster on Azure with ARM64 controller and worker nodes. Container work ```tf module "ramius" { - source = "git::https://github.com/poseidon/typhoon//azure/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//azure/flatcar-linux/kubernetes?ref=v1.31.3" # Azure cluster_name = "ramius" - region = "centralus" + location = "centralus" dns_zone = "azure.example.com" dns_zone_group = "example-group" + # instances + controller_arch = "arm64" + controller_type = "Standard_B2pls_v5" + worker_count = 2 + controller_arch = "arm64" + worker_type = "Standard_D2pls_v5" + # configuration ssh_authorized_key = "ssh-rsa AAAAB3Nz..." - - # optional - arch = "arm64" - controller_type = "Standard_D2pls_v5" - worker_type = "Standard_D2pls_v5" - worker_count = 2 - host_cidr = "10.0.0.0/20" } ``` + +## Hybrid + +Create a hybrid/mixed arch cluster by defining a cluster where [worker pool(s)](worker-pools.md#aws) have a different instance type architecture than controllers or other workers. Taints are added to aid in scheduling. + +Here's an AWS example, + +=== "FCOS Cluster" + + ```tf + module "gravitas" { + source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.31.3" + + # AWS + cluster_name = "gravitas" + dns_zone = "aws.example.com" + dns_zone_id = "Z3PAABBCFAKEC0" + + # instances + worker_count = 2 + worker_arch = "arm64" + worker_type = "t4g.medium" + worker_price = "0.021" + + # configuration + daemonset_tolerations = ["arch"] # important + networking = "cilium" + ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." + } + ``` + +=== "Flatcar Cluster" + + ```tf + module "gravitas" { + source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes?ref=v1.31.3" + + # AWS + cluster_name = "gravitas" + dns_zone = "aws.example.com" + dns_zone_id = "Z3PAABBCFAKEC0" + + # instances + worker_count = 2 + worker_arch = "arm64" + worker_type = "t4g.medium" + worker_price = "0.021" + + # configuration + daemonset_tolerations = ["arch"] # important + networking = "cilium" + ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." + } + ``` + +=== "FCOS ARM64 Workers" + + ```tf + module "gravitas-arm64" { + source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.31.3" + + # AWS + vpc_id = module.gravitas.vpc_id + subnet_ids = module.gravitas.subnet_ids + security_groups = module.gravitas.worker_security_groups + + # instances + arch = "arm64" + instance_type = "t4g.small" + spot_price = "0.0168" + + # configuration + name = "gravitas-arm64" + kubeconfig = module.gravitas.kubeconfig + node_taints = ["arch=arm64:NoSchedule"] + ssh_authorized_key = var.ssh_authorized_key + } + ``` + +=== "Flatcar ARM64 Workers" + + ```tf + module "gravitas-arm64" { + source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes/workers?ref=v1.31.3" + + # AWS + vpc_id = module.gravitas.vpc_id + subnet_ids = module.gravitas.subnet_ids + security_groups = module.gravitas.worker_security_groups + + # instances + arch = "arm64" + instance_type = "t4g.small" + spot_price = "0.0168" + + # configuration + name = "gravitas-arm64" + kubeconfig = module.gravitas.kubeconfig + node_taints = ["arch=arm64:NoSchedule"] + ssh_authorized_key = var.ssh_authorized_key + } + ``` + +Verify amd64 (x86_64) and arm64 (aarch64) nodes are present. + +``` +$ kubectl get nodes -o wide +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME +ip-10-0-1-73 Ready 111m v1.31.3 10.0.1.73 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.x86_64 containerd://1.5.8 +ip-10-0-22-79... Ready 111m v1.31.3 10.0.22.79 Flatcar Container Linux by Kinvolk 3033.2.0 (Oklo) 5.10.84-flatcar containerd://1.5.8 +ip-10-0-24-130 Ready 111m v1.31.3 10.0.24.130 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.x86_64 containerd://1.5.8 +ip-10-0-39-19 Ready 111m v1.31.3 10.0.39.19 Fedora CoreOS 35.20211215.3.0 5.15.7-200.fc35.x86_64 containerd://1.5.8 +``` + diff --git a/docs/advanced/nodes.md b/docs/advanced/nodes.md index 49816085..933deaa4 100644 --- a/docs/advanced/nodes.md +++ b/docs/advanced/nodes.md @@ -36,7 +36,7 @@ Add custom initial worker node labels to default workers or worker pool nodes to ```tf module "yavin" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3" # Google Cloud cluster_name = "yavin" @@ -57,7 +57,7 @@ Add custom initial worker node labels to default workers or worker pool nodes to ```tf module "yavin-pool" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.31.3" # Google Cloud cluster_name = "yavin" @@ -89,7 +89,7 @@ Add custom initial taints on worker pool nodes to indicate a node is unique and ```tf module "yavin" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3" # Google Cloud cluster_name = "yavin" @@ -110,7 +110,7 @@ Add custom initial taints on worker pool nodes to indicate a node is unique and ```tf module "yavin-pool" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.31.3" # Google Cloud cluster_name = "yavin" diff --git a/docs/advanced/worker-pools.md b/docs/advanced/worker-pools.md index 861524f1..cd963778 100644 --- a/docs/advanced/worker-pools.md +++ b/docs/advanced/worker-pools.md @@ -19,7 +19,7 @@ Create a cluster following the AWS [tutorial](../flatcar-linux/aws.md#cluster). ```tf module "tempest-worker-pool" { - source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.31.3" # AWS vpc_id = module.tempest.vpc_id @@ -42,7 +42,7 @@ Create a cluster following the AWS [tutorial](../flatcar-linux/aws.md#cluster). ```tf module "tempest-worker-pool" { - source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes/workers?ref=v1.31.3" # AWS vpc_id = module.tempest.vpc_id @@ -111,14 +111,14 @@ Create a cluster following the Azure [tutorial](../flatcar-linux/azure.md#cluste ```tf module "ramius-worker-pool" { - source = "git::https://github.com/poseidon/typhoon//azure/fedora-coreos/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//azure/fedora-coreos/kubernetes/workers?ref=v1.31.3" # Azure - region = module.ramius.region - resource_group_name = module.ramius.resource_group_name - subnet_id = module.ramius.subnet_id - security_group_id = module.ramius.security_group_id - backend_address_pool_id = module.ramius.backend_address_pool_id + location = module.ramius.location + resource_group_name = module.ramius.resource_group_name + subnet_id = module.ramius.subnet_id + security_group_id = module.ramius.security_group_id + backend_address_pool_ids = module.ramius.backend_address_pool_ids # configuration name = "ramius-spot" @@ -127,7 +127,7 @@ Create a cluster following the Azure [tutorial](../flatcar-linux/azure.md#cluste # optional worker_count = 2 - vm_type = "Standard_F4" + vm_type = "Standard_D2as_v5" priority = "Spot" os_image = "/subscriptions/some/path/Microsoft.Compute/images/fedora-coreos-31.20200323.3.2" } @@ -137,14 +137,14 @@ Create a cluster following the Azure [tutorial](../flatcar-linux/azure.md#cluste ```tf module "ramius-worker-pool" { - source = "git::https://github.com/poseidon/typhoon//azure/flatcar-linux/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//azure/flatcar-linux/kubernetes/workers?ref=v1.31.3" # Azure - region = module.ramius.region - resource_group_name = module.ramius.resource_group_name - subnet_id = module.ramius.subnet_id - security_group_id = module.ramius.security_group_id - backend_address_pool_id = module.ramius.backend_address_pool_id + location = module.ramius.location + resource_group_name = module.ramius.resource_group_name + subnet_id = module.ramius.subnet_id + security_group_id = module.ramius.security_group_id + backend_address_pool_ids = module.ramius.backend_address_pool_ids # configuration name = "ramius-spot" @@ -153,7 +153,7 @@ Create a cluster following the Azure [tutorial](../flatcar-linux/azure.md#cluste # optional worker_count = 2 - vm_type = "Standard_F4" + vm_type = "Standard_D2as_v5" priority = "Spot" os_image = "flatcar-beta" } @@ -180,7 +180,7 @@ The Azure internal `workers` module supports a number of [variables](https://git | resource_group_name | Must be set to `resource_group_name` output by cluster | module.cluster.resource_group_name | | subnet_id | Must be set to `subnet_id` output by cluster | module.cluster.subnet_id | | security_group_id | Must be set to `security_group_id` output by cluster | module.cluster.security_group_id | -| backend_address_pool_id | Must be set to `backend_address_pool_id` output by cluster | module.cluster.backend_address_pool_id | +| backend_address_pool_ids | Must be set to `backend_address_pool_ids` output by cluster | module.cluster.backend_address_pool_ids | | kubeconfig | Must be set to `kubeconfig` output by cluster | module.cluster.kubeconfig | | ssh_authorized_key | SSH public key for user 'core' | "ssh-ed25519 AAAAB3NZ..." | @@ -207,7 +207,7 @@ Create a cluster following the Google Cloud [tutorial](../flatcar-linux/google-c ```tf module "yavin-worker-pool" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.31.3" # Google Cloud region = "europe-west2" @@ -231,7 +231,7 @@ Create a cluster following the Google Cloud [tutorial](../flatcar-linux/google-c ```tf module "yavin-worker-pool" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/flatcar-linux/kubernetes/workers?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/flatcar-linux/kubernetes/workers?ref=v1.31.3" # Google Cloud region = "europe-west2" @@ -262,11 +262,11 @@ Verify a managed instance group of workers joins the cluster within a few minute ``` $ kubectl get nodes NAME STATUS AGE VERSION -yavin-controller-0.c.example-com.internal Ready 6m v1.28.3 -yavin-worker-jrbf.c.example-com.internal Ready 5m v1.28.3 -yavin-worker-mzdm.c.example-com.internal Ready 5m v1.28.3 -yavin-16x-worker-jrbf.c.example-com.internal Ready 3m v1.28.3 -yavin-16x-worker-mzdm.c.example-com.internal Ready 3m v1.28.3 +yavin-controller-0.c.example-com.internal Ready 6m v1.31.3 +yavin-worker-jrbf.c.example-com.internal Ready 5m v1.31.3 +yavin-worker-mzdm.c.example-com.internal Ready 5m v1.31.3 +yavin-16x-worker-jrbf.c.example-com.internal Ready 3m v1.31.3 +yavin-16x-worker-mzdm.c.example-com.internal Ready 3m v1.31.3 ``` ### Variables diff --git a/docs/architecture/azure.md b/docs/architecture/azure.md index 2f9c0ab1..5e76a14b 100644 --- a/docs/architecture/azure.md +++ b/docs/architecture/azure.md @@ -10,9 +10,9 @@ A load balancer distributes IPv4 TCP/6443 traffic across a backend address pool ### HTTP/HTTPS Ingress -A load balancer distributes IPv4 TCP/80 and TCP/443 traffic across a backend address pool of workers with a healthy Ingress controller. +An Azure Load Balancer distributes IPv4/IPv6 TCP/80 and TCP/443 traffic across backend address pools of workers with a healthy Ingress controller. -The Azure LB IPv4 address is output as `ingress_static_ipv4` for use in DNS A records. See [Ingress on Azure](/addons/ingress/#azure). +The load balancer addresses are output as `ingress_static_ipv4` and `ingress_static_ipv6` for use in DNS A and AAAA records. See [Ingress on Azure](/addons/ingress/#azure). ### TCP/UDP Services @@ -21,27 +21,25 @@ Load balance TCP/UDP applications by adding rules to the Azure LB (output). A ru ```tf # Forward traffic to the worker backend address pool resource "azurerm_lb_rule" "some-app-tcp" { - resource_group_name = module.ramius.resource_group_name - name = "some-app-tcp" + resource_group_name = module.ramius.resource_group_name loadbalancer_id = module.ramius.loadbalancer_id - frontend_ip_configuration_name = "ingress" + frontend_ip_configuration_name = "ingress-ipv4" - protocol = "Tcp" - frontend_port = 3333 - backend_port = 30333 - backend_address_pool_id = module.ramius.backend_address_pool_id - probe_id = azurerm_lb_probe.some-app.id + protocol = "Tcp" + frontend_port = 3333 + backend_port = 30333 + backend_address_pool_ids = module.ramius.backend_address_pool_ids.ipv4 + probe_id = azurerm_lb_probe.some-app.id } # Health check some-app resource "azurerm_lb_probe" "some-app" { + name = "some-app" resource_group_name = module.ramius.resource_group_name - - name = "some-app" - loadbalancer_id = module.ramius.loadbalancer_id - protocol = "Tcp" - port = 30333 + loadbalancer_id = module.ramius.loadbalancer_id + protocol = "Tcp" + port = 30333 } ``` @@ -51,9 +49,8 @@ Add firewall rules to the worker security group. ```tf resource "azurerm_network_security_rule" "some-app" { - resource_group_name = "${module.ramius.resource_group_name}" - name = "some-app" + resource_group_name = module.ramius.resource_group_name network_security_group_name = module.ramius.worker_security_group_name priority = "3001" access = "Allow" @@ -62,7 +59,7 @@ resource "azurerm_network_security_rule" "some-app" { source_port_range = "*" destination_port_range = "30333" source_address_prefix = "*" - destination_address_prefixes = module.ramius.worker_address_prefixes + destination_address_prefixes = module.ramius.worker_address_prefixes.ipv4 } ``` @@ -72,6 +69,6 @@ Azure does not provide public IPv6 addresses at the standard SKU. | IPv6 Feature | Supported | |-------------------------|-----------| -| Node IPv6 address | No | -| Node Outbound IPv6 | No | -| Kubernetes Ingress IPv6 | No | +| Node IPv6 address | Yes | +| Node Outbound IPv6 | Yes | +| Kubernetes Ingress IPv6 | Yes | diff --git a/docs/architecture/operating-systems.md b/docs/architecture/operating-systems.md index 6445fccf..4de58b94 100644 --- a/docs/architecture/operating-systems.md +++ b/docs/architecture/operating-systems.md @@ -16,8 +16,8 @@ Together, they diversify Typhoon to support a range of container technologies. | Property | Flatcar Linux | Fedora CoreOS | |-------------------|---------------|---------------| -| Kernel | ~5.10.x | ~5.16.x | -| systemd | 249 | 249 | +| Kernel | ~5.15.x | ~6.5.x | +| systemd | 252 | 254 | | Username | core | core | | Ignition system | Ignition v3.x spec | Ignition v3.x spec | | storage driver | overlay2 (extfs) | overlay2 (xfs) | diff --git a/docs/fedora-coreos/aws.md b/docs/fedora-coreos/aws.md index db3fa3d7..47419670 100644 --- a/docs/fedora-coreos/aws.md +++ b/docs/fedora-coreos/aws.md @@ -1,10 +1,10 @@ # AWS -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on AWS with Fedora CoreOS. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on AWS with Fedora CoreOS. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a VPC, gateway, subnets, security groups, controller instances, worker auto-scaling group, network load balancer, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -72,19 +72,19 @@ Define a Kubernetes cluster using the module `aws/fedora-coreos/kubernetes`. ```tf module "tempest" { - source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.31.3" # AWS cluster_name = "tempest" dns_zone = "aws.example.com" dns_zone_id = "Z3PAABBCFAKEC0" - # configuration - ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." - - # optional + # instances worker_count = 2 worker_type = "t3.small" + + # configuration + ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." } ``` @@ -134,8 +134,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-tempest" { - content = module.tempest.kubeconfig-admin - filename = "/home/user/.kube/configs/tempest-config" + content = module.tempest.kubeconfig-admin + filename = "/home/user/.kube/configs/tempest-config" + file_permission = "0600" } ``` @@ -145,9 +146,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/tempest-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -ip-10-0-3-155 Ready 10m v1.28.3 -ip-10-0-26-65 Ready 10m v1.28.3 -ip-10-0-41-21 Ready 10m v1.28.3 +ip-10-0-3-155 Ready 10m v1.31.3 +ip-10-0-26-65 Ready 10m v1.31.3 +ip-10-0-41-21 Ready 10m v1.31.3 ``` List the pods. @@ -155,9 +156,9 @@ List the pods. ``` $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-1m5bf 2/2 Running 0 34m -kube-system calico-node-7jmr1 2/2 Running 0 34m -kube-system calico-node-bknc8 2/2 Running 0 34m +kube-system cilium-1m5bf 1/1 Running 0 34m +kube-system cilium-7jmr1 1/1 Running 0 34m +kube-system cilium-bknc8 1/1 Running 0 34m kube-system coredns-1187388186-wx1lg 1/1 Running 0 34m kube-system coredns-1187388186-qjnvp 1/1 Running 0 34m kube-system kube-apiserver-ip-10-0-3-155 1/1 Running 0 34m @@ -206,16 +207,21 @@ Reference the DNS zone id with `aws_route53_zone.zone-for-clusters.zone_id`. | Name | Description | Default | Example | |:-----|:------------|:--------|:--------| +| os_stream | Fedora CoreOS stream for instances | "stable" | "testing", "next" | | controller_count | Number of controllers (i.e. masters) | 1 | 1 | -| worker_count | Number of workers | 1 | 3 | | controller_type | EC2 instance type for controllers | "t3.small" | See below | +| controller_disk_size | Size of EBS volume in GB | 30 | 100 | +| controller_disk_type | Type of EBS volume | gp3 | io1 | +| controller_disk_iops | IOPS of EBS volume | 3000 | 4000 | +| controller_cpu_credits | Burstable CPU pricing model | null (i.e. auto) | standard, unlimited | +| worker_count | Number of workers | 1 | 3 | | worker_type | EC2 instance type for workers | "t3.small" | See below | -| os_stream | Fedora CoreOS stream for compute instances | "stable" | "testing", "next" | -| disk_size | Size of the EBS volume in GB | 30 | 100 | -| disk_type | Type of the EBS volume | "gp3" | standard, gp2, gp3, io1 | -| disk_iops | IOPS of the EBS volume | 0 (i.e. auto) | 400 | -| worker_target_groups | Target group ARNs to which worker instances should be added | [] | [aws_lb_target_group.app.id] | +| worker_disk_size | Size of EBS volume in GB | 30 | 100 | +| worker_disk_type | Type of EBS volume | gp3 | io1 | +| worker_disk_iops | IOPS of EBS volume | 3000 | 4000 | +| worker_cpu_credits | Burstable CPU pricing model | null (i.e. auto) | standard, unlimited | | worker_price | Spot price in USD for worker instances or 0 to use on-demand instances | 0 | 0.10 | +| worker_target_groups | Target group ARNs to which worker instances should be added | [] | [aws_lb_target_group.app.id] | | controller_snippets | Controller Butane snippets | [] | [examples](/advanced/customization/) | | worker_snippets | Worker Butane snippets | [] | [examples](/advanced/customization/) | | networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | @@ -228,7 +234,7 @@ Reference the DNS zone id with `aws_route53_zone.zone-for-clusters.zone_id`. Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-types/). !!! warning - Do not choose a `controller_type` smaller than `t2.small`. Smaller instances are not sufficient for running a controller. + Do not choose a `controller_type` smaller than `t3.small`. Smaller instances are not sufficient for running a controller. !!! tip "MTU" If your EC2 instance type supports [Jumbo frames](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/network_mtu.html#jumbo_frame_instances) (most do), we recommend you change the `network_mtu` to 8981! You will get better pod-to-pod bandwidth. @@ -236,4 +242,3 @@ Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-typ #### Spot Add `worker_price = "0.10"` to use spot instance workers (instead of "on-demand") and set a maximum spot price in USD. Clusters can tolerate spot market interuptions fairly well (reschedules pods, but cannot drain) to save money, with the tradeoff that requests for workers may go unfulfilled. - diff --git a/docs/fedora-coreos/azure.md b/docs/fedora-coreos/azure.md index 14fd27fb..17910198 100644 --- a/docs/fedora-coreos/azure.md +++ b/docs/fedora-coreos/azure.md @@ -1,10 +1,10 @@ # Azure -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on Azure with Fedora CoreOS. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on Azure with Fedora CoreOS. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a resource group, virtual network, subnets, security groups, controller availability set, worker scale set, load balancer, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -67,15 +67,15 @@ Fedora CoreOS publishes images for Azure, but does not yet upload them. Azure al [Download](https://getfedora.org/en/coreos/download?tab=cloud_operators&stream=stable) a Fedora CoreOS Azure VHD image, decompress it, and upload it to an Azure storage account container (i.e. bucket) via the UI (quite slow). ``` -xz -d fedora-coreos-36.20220716.3.1-azure.x86_64.vhd.xz +xz -d fedora-coreos-40.20240616.3.0-azure.x86_64.vhd.xz ``` Create an Azure disk (note disk ID) and create an Azure image from it (note image ID). ``` -az disk create --name fedora-coreos-36.20220716.3.1 -g GROUP --source https://BUCKET.blob.core.windows.net/fedora-coreos/fedora-coreos-36.20220716.3.1-azure.x86_64.vhd +az disk create --name fedora-coreos-40.20240616.3.0 -g GROUP --source https://BUCKET.blob.core.windows.net/images/fedora-coreos-40.20240616.3.0-azure.x86_64.vhd -az image create --name fedora-coreos-36.20220716.3.1 -g GROUP --os-type=linux --source /subscriptions/some/path/providers/Microsoft.Compute/disks/fedora-coreos-36.20220716.3.1 +az image create --name fedora-coreos-40.20240616.3.0 -g GROUP --os-type linux --source /subscriptions/some/path/Microsoft.Compute/disks/fedora-coreos-40.20240616.3.0 ``` Set the [os_image](#variables) in the next step. @@ -86,21 +86,23 @@ Define a Kubernetes cluster using the module `azure/fedora-coreos/kubernetes`. ```tf module "ramius" { - source = "git::https://github.com/poseidon/typhoon//azure/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//azure/fedora-coreos/kubernetes?ref=v1.31.3" # Azure cluster_name = "ramius" - region = "centralus" + location = "centralus" dns_zone = "azure.example.com" dns_zone_group = "example-group" + network_cidr = { + ipv4 = ["10.0.0.0/20"] + } + + # instances + os_image = "/subscriptions/some/path/Microsoft.Compute/images/fedora-coreos-36.20220716.3.1" + worker_count = 2 # configuration - os_image = "/subscriptions/some/path/Microsoft.Compute/images/fedora-coreos-36.20220716.3.1" ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." - - # optional - worker_count = 2 - host_cidr = "10.0.0.0/20" } ``` @@ -150,8 +152,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-ramius" { - content = module.ramius.kubeconfig-admin - filename = "/home/user/.kube/configs/ramius-config" + content = module.ramius.kubeconfig-admin + filename = "/home/user/.kube/configs/ramius-config" + file_permission = "0600" } ``` @@ -161,9 +164,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/ramius-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -ramius-controller-0 Ready 24m v1.28.3 -ramius-worker-000001 Ready 25m v1.28.3 -ramius-worker-000002 Ready 24m v1.28.3 +ramius-controller-0 Ready 24m v1.31.3 +ramius-worker-000001 Ready 25m v1.31.3 +ramius-worker-000002 Ready 24m v1.31.3 ``` List the pods. @@ -173,9 +176,9 @@ $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE kube-system coredns-7c6fbb4f4b-b6qzx 1/1 Running 0 26m kube-system coredns-7c6fbb4f4b-j2k3d 1/1 Running 0 26m -kube-system calico-node-1m5bf 2/2 Running 0 26m -kube-system calico-node-7jmr1 2/2 Running 0 26m -kube-system calico-node-bknc8 2/2 Running 0 26m +kube-system cilium-1m5bf 1/1 Running 0 26m +kube-system cilium-7jmr1 1/1 Running 0 26m +kube-system cilium-bknc8 1/1 Running 0 26m kube-system kube-apiserver-ramius-controller-0 1/1 Running 0 26m kube-system kube-controller-manager-ramius-controller-0 1/1 Running 0 26m kube-system kube-proxy-j4vpq 1/1 Running 0 26m @@ -197,14 +200,14 @@ Check the [variables.tf](https://github.com/poseidon/typhoon/blob/master/azure/f | Name | Description | Example | |:-----|:------------|:--------| | cluster_name | Unique cluster name (prepended to dns_zone) | "ramius" | -| region | Azure region | "centralus" | +| location | Azure location | "centralus" | | dns_zone | Azure DNS zone | "azure.example.com" | | dns_zone_group | Resource group where the Azure DNS zone resides | "global" | | os_image | Fedora CoreOS image for instances | "/subscriptions/..../custom-image" | | ssh_authorized_key | SSH public key for user 'core' | "ssh-ed25519 AAAAB3NZ..." | !!! tip - Regions are shown in [docs](https://azure.microsoft.com/en-us/global-infrastructure/regions/) or with `az account list-locations --output table`. + Locations are shown in [docs](https://azure.microsoft.com/en-us/global-infrastructure/regions/) or with `az account list-locations --output table`. #### DNS Zone @@ -238,24 +241,25 @@ Reference the DNS zone with `azurerm_dns_zone.clusters.name` and its resource gr | Name | Description | Default | Example | |:-----|:------------|:--------|:--------| | controller_count | Number of controllers (i.e. masters) | 1 | 1 | -| worker_count | Number of workers | 1 | 3 | | controller_type | Machine type for controllers | "Standard_B2s" | See below | +| controller_disk_type | Managed disk for controllers | Premium_LRS | Standard_LRS | +| controller_disk_size | Managed disk size in GB | 30 | 50 | +| worker_count | Number of workers | 1 | 3 | | worker_type | Machine type for workers | "Standard_D2as_v5" | See below | -| disk_size | Size of the disk in GB | 30 | 100 | +| worker_disk_type | Managed disk for workers | Standard_LRS | Premium_LRS | +| worker_disk_size | Size of the disk in GB | 30 | 100 | +| worker_ephemeral_disk | Use ephemeral local disk instead of managed disk | false | true | | worker_priority | Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Spot | | controller_snippets | Controller Butane snippets | [] | [example](/advanced/customization/#usage) | | worker_snippets | Worker Butane snippets | [] | [example](/advanced/customization/#usage) | | networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | -| host_cidr | CIDR IPv4 range to assign to instances | "10.0.0.0/16" | "10.0.0.0/20" | +| network_cidr | Virtual network CIDR ranges | { ipv4 = ["10.0.0.0/16"], ipv6 = [ULA, ...] } | { ipv4 = ["10.0.0.0/20"] } | | pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | | service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | | worker_node_labels | List of initial worker node labels | [] | ["worker-pool=default"] | Check the list of valid [machine types](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/) and their [specs](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general). Use `az vm list-skus` to get the identifier. -!!! warning - Unlike AWS and GCP, Azure requires its *virtual* networks to have non-overlapping IPv4 CIDRs (yeah, go figure). Instead of each cluster just using `10.0.0.0/16` for instances, each Azure cluster's `host_cidr` must be non-overlapping (e.g. 10.0.0.0/20 for the 1st cluster, 10.0.16.0/20 for the 2nd cluster, etc). - !!! warning Do not choose a `controller_type` smaller than `Standard_B2s`. Smaller instances are not sufficient for running a controller. diff --git a/docs/fedora-coreos/bare-metal.md b/docs/fedora-coreos/bare-metal.md index 84fbc6f2..ca05b9d2 100644 --- a/docs/fedora-coreos/bare-metal.md +++ b/docs/fedora-coreos/bare-metal.md @@ -1,10 +1,10 @@ # Bare-Metal -In this tutorial, we'll network boot and provision a Kubernetes v1.28.3 cluster on bare-metal with Fedora CoreOS. +In this tutorial, we'll network boot and provision a Kubernetes v1.31.3 cluster on bare-metal with Fedora CoreOS. First, we'll deploy a [Matchbox](https://github.com/poseidon/matchbox) service and setup a network boot environment. Then, we'll declare a Kubernetes cluster using the Typhoon Terraform module and power on machines. On PXE boot, machines will install Fedora CoreOS to disk, reboot into the disk install, and provision themselves as Kubernetes controllers or workers via Ignition. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -154,7 +154,7 @@ Define a Kubernetes cluster using the module `bare-metal/fedora-coreos/kubernete ```tf module "mercury" { - source = "git::https://github.com/poseidon/typhoon//bare-metal/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//bare-metal/fedora-coreos/kubernetes?ref=v1.31.3" # bare-metal cluster_name = "mercury" @@ -191,7 +191,7 @@ Workers with similar features can be defined inline using the `workers` field as ```tf module "mercury-node1" { - source = "git::https://github.com/poseidon/typhoon//bare-metal/fedora-coreos/kubernetes/worker?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//bare-metal/fedora-coreos/kubernetes/worker?ref=v1.31.3" # bare-metal cluster_name = "mercury" @@ -302,8 +302,9 @@ systemd[1]: Started Kubernetes control plane. ``` resource "local_file" "kubeconfig-mercury" { - content = module.mercury.kubeconfig-admin - filename = "/home/user/.kube/configs/mercury-config" + content = module.mercury.kubeconfig-admin + filename = "/home/user/.kube/configs/mercury-config" + file_permission = "0600" } ``` @@ -313,9 +314,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/mercury-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -node1.example.com Ready 10m v1.28.3 -node2.example.com Ready 10m v1.28.3 -node3.example.com Ready 10m v1.28.3 +node1.example.com Ready 10m v1.31.3 +node2.example.com Ready 10m v1.31.3 +node3.example.com Ready 10m v1.31.3 ``` List the pods. @@ -323,9 +324,10 @@ List the pods. ``` $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-6qp7f 2/2 Running 1 11m -kube-system calico-node-gnjrm 2/2 Running 0 11m -kube-system calico-node-llbgt 2/2 Running 0 11m +kube-system cilium-6qp7f 1/1 Running 1 11m +kube-system cilium-gnjrm 1/1 Running 0 11m +kube-system cilium-llbgt 1/1 Running 0 11m +kube-system cilium-operator-68d778b448-g744f 1/1 Running 0 11m kube-system coredns-1187388186-dj3pd 1/1 Running 0 11m kube-system coredns-1187388186-mx9rt 1/1 Running 0 11m kube-system kube-apiserver-node1.example.com 1/1 Running 0 11m @@ -372,4 +374,3 @@ Check the [variables.tf](https://github.com/poseidon/typhoon/blob/master/bare-me | kernel_args | Additional kernel args to provide at PXE boot | [] | ["kvm-intel.nested=1"] | | worker_node_labels | Map from worker name to list of initial node labels | {} | {"node2" = ["role=special"]} | | worker_node_taints | Map from worker name to list of initial node taints | {} | {"node2" = ["role=special:NoSchedule"]} | - diff --git a/docs/fedora-coreos/digitalocean.md b/docs/fedora-coreos/digitalocean.md index bf3a5954..cf95787e 100644 --- a/docs/fedora-coreos/digitalocean.md +++ b/docs/fedora-coreos/digitalocean.md @@ -1,10 +1,10 @@ # DigitalOcean -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on DigitalOcean with Fedora CoreOS. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on DigitalOcean with Fedora CoreOS. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create controller droplets, worker droplets, DNS records, tags, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -81,19 +81,19 @@ Define a Kubernetes cluster using the module `digital-ocean/fedora-coreos/kubern ```tf module "nemo" { - source = "git::https://github.com/poseidon/typhoon//digital-ocean/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//digital-ocean/fedora-coreos/kubernetes?ref=v1.31.3" # Digital Ocean cluster_name = "nemo" region = "nyc3" dns_zone = "digital-ocean.example.com" - # configuration - os_image = data.digitalocean_image.fedora-coreos-31-20200323-3-2.id - ssh_fingerprints = ["d7:9d:79:ae:56:32:73:79:95:88:e3:a2:ab:5d:45:e7"] - - # optional + # instances + os_image = data.digitalocean_image.fedora-coreos-31-20200323-3-2.id worker_count = 2 + + # configuration + ssh_fingerprints = ["d7:9d:79:ae:56:32:73:79:95:88:e3:a2:ab:5d:45:e7"] } ``` @@ -144,8 +144,9 @@ In 3-6 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-nemo" { - content = module.nemo.kubeconfig-admin - filename = "/home/user/.kube/configs/nemo-config" + content = module.nemo.kubeconfig-admin + filename = "/home/user/.kube/configs/nemo-config" + file_permission = "0600" } ``` @@ -155,9 +156,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/nemo-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -10.132.110.130 Ready 10m v1.28.3 -10.132.115.81 Ready 10m v1.28.3 -10.132.124.107 Ready 10m v1.28.3 +10.132.110.130 Ready 10m v1.31.3 +10.132.115.81 Ready 10m v1.31.3 +10.132.124.107 Ready 10m v1.31.3 ``` List the pods. @@ -166,9 +167,9 @@ List the pods. NAMESPACE NAME READY STATUS RESTARTS AGE kube-system coredns-1187388186-ld1j7 1/1 Running 0 11m kube-system coredns-1187388186-rdhf7 1/1 Running 0 11m -kube-system calico-node-1m5bf 2/2 Running 0 11m -kube-system calico-node-7jmr1 2/2 Running 0 11m -kube-system calico-node-bknc8 2/2 Running 0 11m +kube-system cilium-1m5bf 1/1 Running 0 11m +kube-system cilium-7jmr1 1/1 Running 0 11m +kube-system cilium-bknc8 1/1 Running 0 11m kube-system kube-apiserver-ip-10.132.115.81 1/1 Running 0 11m kube-system kube-controller-manager-ip-10.132.115.81 1/1 Running 0 11m kube-system kube-proxy-6kxjf 1/1 Running 0 11m @@ -248,4 +249,3 @@ Check the list of valid [droplet types](https://developers.digitalocean.com/docu !!! warning Do not choose a `controller_type` smaller than 2GB. Smaller droplets are not sufficient for running a controller and bootstrapping will fail. - diff --git a/docs/fedora-coreos/google-cloud.md b/docs/fedora-coreos/google-cloud.md index 5423d332..faeadcf4 100644 --- a/docs/fedora-coreos/google-cloud.md +++ b/docs/fedora-coreos/google-cloud.md @@ -1,10 +1,10 @@ # Google Cloud -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on Google Compute Engine with Fedora CoreOS. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on Google Compute Engine with Fedora CoreOS. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a network, firewall rules, health checks, controller instances, worker managed instance group, load balancers, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -73,7 +73,7 @@ Define a Kubernetes cluster using the module `google-cloud/fedora-coreos/kuberne ```tf module "yavin" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3" # Google Cloud cluster_name = "yavin" @@ -81,11 +81,11 @@ module "yavin" { dns_zone = "example.com" dns_zone_name = "example-zone" + # instances + worker_count = 2 + # configuration ssh_authorized_key = "ssh-ed25519 AAAAB3Nz..." - - # optional - worker_count = 2 } ``` @@ -136,8 +136,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-yavin" { - content = module.yavin.kubeconfig-admin - filename = "/home/user/.kube/configs/yavin-config" + content = module.yavin.kubeconfig-admin + filename = "/home/user/.kube/configs/yavin-config" + file_permission = "0600" } ``` @@ -147,9 +148,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/yavin-config $ kubectl get nodes NAME ROLES STATUS AGE VERSION -yavin-controller-0.c.example-com.internal Ready 6m v1.28.3 -yavin-worker-jrbf.c.example-com.internal Ready 5m v1.28.3 -yavin-worker-mzdm.c.example-com.internal Ready 5m v1.28.3 +yavin-controller-0.c.example-com.internal Ready 6m v1.31.3 +yavin-worker-jrbf.c.example-com.internal Ready 5m v1.31.3 +yavin-worker-mzdm.c.example-com.internal Ready 5m v1.31.3 ``` List the pods. @@ -157,9 +158,9 @@ List the pods. ``` $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-1cs8z 2/2 Running 0 6m -kube-system calico-node-d1l5b 2/2 Running 0 6m -kube-system calico-node-sp9ps 2/2 Running 0 6m +kube-system cilium-1cs8z 1/1 Running 0 6m +kube-system cilium-d1l5b 1/1 Running 0 6m +kube-system cilium-sp9ps 1/1 Running 0 6m kube-system coredns-1187388186-dkh3o 1/1 Running 0 6m kube-system coredns-1187388186-zj5dl 1/1 Running 0 6m kube-system kube-apiserver-controller-0 1/1 Running 0 6m @@ -209,25 +210,27 @@ resource "google_dns_managed_zone" "zone-for-clusters" { ### Optional -| Name | Description | Default | Example | -|:-----|:------------|:--------|:--------| -| controller_count | Number of controllers (i.e. masters) | 1 | 3 | -| worker_count | Number of workers | 1 | 3 | -| controller_type | Machine type for controllers | "n1-standard-1" | See below | -| worker_type | Machine type for workers | "n1-standard-1" | See below | -| os_stream | Fedora CoreOS stream for compute instances | "stable" | "stable", "testing", "next" | -| disk_size | Size of the disk in GB | 30 | 100 | -| worker_preemptible | If enabled, Compute Engine will terminate workers randomly within 24 hours | false | true | -| controller_snippets | Controller Butane snippets | [] | [examples](/advanced/customization/) | -| worker_snippets | Worker Butane snippets | [] | [examples](/advanced/customization/) | -| networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | -| pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | -| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | -| worker_node_labels | List of initial worker node labels | [] | ["worker-pool=default"] | +| Name | Description | Default | Example | +|:---------------------|:---------------------------------------------------------------------------|:----------------|:-------------------------------------| +| os_stream | Fedora CoreOS stream for compute instances | "stable" | "stable", "testing", "next" | +| controller_count | Number of controllers (i.e. masters) | 1 | 3 | +| controller_type | Machine type for controllers | "n1-standard-1" | See below | +| controller_disk_size | Controller disk size in GB | 30 | 20 | +| controller_disk_type | Controller disk type | "pd-standard" | "pd-ssd" | +| worker_count | Number of workers | 1 | 3 | +| worker_type | Machine type for workers | "n1-standard-1" | See below | +| worker_disk_size | Worker disk size in GB | 30 | 100 | +| worker_disk_type | Worker disk type | "pd-standard" | "pd-ssd" | +| worker_preemptible | If enabled, Compute Engine will terminate workers randomly within 24 hours | false | true | +| controller_snippets | Controller Butane snippets | [] | [examples](/advanced/customization/) | +| worker_snippets | Worker Butane snippets | [] | [examples](/advanced/customization/) | +| networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | +| pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | +| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | +| worker_node_labels | List of initial worker node labels | [] | ["worker-pool=default"] | Check the list of valid [machine types](https://cloud.google.com/compute/docs/machine-types). #### Preemption Add `worker_preemptible = "true"` to allow worker nodes to be [preempted](https://cloud.google.com/compute/docs/instances/preemptible) at random, but pay [significantly](https://cloud.google.com/compute/pricing) less. Clusters tolerate stopping instances fairly well (reschedules pods, but cannot drain) and preemption provides a nice reward for running fault-tolerant cluster systems.` - diff --git a/docs/flatcar-linux/aws.md b/docs/flatcar-linux/aws.md index e8114a77..2e49b890 100644 --- a/docs/flatcar-linux/aws.md +++ b/docs/flatcar-linux/aws.md @@ -1,10 +1,10 @@ # AWS -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on AWS with Flatcar Linux. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on AWS with Flatcar Linux. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a VPC, gateway, subnets, security groups, controller instances, worker auto-scaling group, network load balancer, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -72,19 +72,19 @@ Define a Kubernetes cluster using the module `aws/flatcar-linux/kubernetes`. ```tf module "tempest" { - source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//aws/flatcar-linux/kubernetes?ref=v1.31.3" # AWS cluster_name = "tempest" dns_zone = "aws.example.com" dns_zone_id = "Z3PAABBCFAKEC0" - # configuration - ssh_authorized_key = "ssh-rsa AAAAB3Nz..." - - # optional + # instances worker_count = 2 worker_type = "t3.small" + + # configuration + ssh_authorized_key = "ssh-rsa AAAAB3Nz..." } ``` @@ -134,8 +134,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-tempest" { - content = module.tempest.kubeconfig-admin - filename = "/home/user/.kube/configs/tempest-config" + content = module.tempest.kubeconfig-admin + filename = "/home/user/.kube/configs/tempest-config" + file_permission = "0600" } ``` @@ -145,9 +146,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/tempest-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -ip-10-0-3-155 Ready 10m v1.28.3 -ip-10-0-26-65 Ready 10m v1.28.3 -ip-10-0-41-21 Ready 10m v1.28.3 +ip-10-0-3-155 Ready 10m v1.31.3 +ip-10-0-26-65 Ready 10m v1.31.3 +ip-10-0-41-21 Ready 10m v1.31.3 ``` List the pods. @@ -155,9 +156,9 @@ List the pods. ``` $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-1m5bf 2/2 Running 0 34m -kube-system calico-node-7jmr1 2/2 Running 0 34m -kube-system calico-node-bknc8 2/2 Running 0 34m +kube-system cilium-1m5bf 1/1 Running 0 34m +kube-system cilium-7jmr1 1/1 Running 0 34m +kube-system cilium-bknc8 1/1 Running 0 34m kube-system coredns-1187388186-wx1lg 1/1 Running 0 34m kube-system coredns-1187388186-qjnvp 1/1 Running 0 34m kube-system kube-apiserver-ip-10-0-3-155 1/1 Running 0 34m @@ -206,16 +207,19 @@ Reference the DNS zone id with `aws_route53_zone.zone-for-clusters.zone_id`. | Name | Description | Default | Example | |:-----|:------------|:--------|:--------| -| controller_count | Number of controllers (i.e. masters) | 1 | 1 | -| worker_count | Number of workers | 1 | 3 | -| controller_type | EC2 instance type for controllers | "t3.small" | See below | -| worker_type | EC2 instance type for workers | "t3.small" | See below | | os_image | AMI channel for a Container Linux derivative | "flatcar-stable" | flatcar-stable, flatcar-beta, flatcar-alpha | -| disk_size | Size of the EBS volume in GB | 30 | 100 | -| disk_type | Type of the EBS volume | "gp3" | standard, gp2, gp3, io1 | -| disk_iops | IOPS of the EBS volume | 0 (i.e. auto) | 400 | -| worker_target_groups | Target group ARNs to which worker instances should be added | [] | [aws_lb_target_group.app.id] | +| controller_count | Number of controllers (i.e. masters) | 1 | 1 | +| controller_type | EC2 instance type for controllers | "t3.small" | See below | +| controller_disk_size | Size of EBS volume in GB | 30 | 100 | +| controller_disk_type | Type of EBS volume | gp3 | io1 | +| controller_disk_iops | IOPS of EBS volume | 3000 | 4000 | +| controller_cpu_credits | Burstable CPU pricing model | null (i.e. auto) | standard, unlimited | +| worker_disk_size | Size of EBS volume in GB | 30 | 100 | +| worker_disk_type | Type of EBS volume | gp3 | io1 | +| worker_disk_iops | IOPS of EBS volume | 3000 | 4000 | +| worker_cpu_credits | Burstable CPU pricing model | null (i.e. auto) | standard, unlimited | | worker_price | Spot price in USD for worker instances or 0 to use on-demand instances | 0/null | 0.10 | +| worker_target_groups | Target group ARNs to which worker instances should be added | [] | [aws_lb_target_group.app.id] | | controller_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/) | | worker_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/) | | networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | @@ -228,7 +232,7 @@ Reference the DNS zone id with `aws_route53_zone.zone-for-clusters.zone_id`. Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-types/). !!! warning - Do not choose a `controller_type` smaller than `t2.small`. Smaller instances are not sufficient for running a controller. + Do not choose a `controller_type` smaller than `t3.small`. Smaller instances are not sufficient for running a controller. !!! tip "MTU" If your EC2 instance type supports [Jumbo frames](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/network_mtu.html#jumbo_frame_instances) (most do), we recommend you change the `network_mtu` to 8981! You will get better pod-to-pod bandwidth. @@ -236,4 +240,3 @@ Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-typ #### Spot Add `worker_price = "0.10"` to use spot instance workers (instead of "on-demand") and set a maximum spot price in USD. Clusters can tolerate spot market interuptions fairly well (reschedules pods, but cannot drain) to save money, with the tradeoff that requests for workers may go unfulfilled. - diff --git a/docs/flatcar-linux/azure.md b/docs/flatcar-linux/azure.md index 0d5601b5..891b69a9 100644 --- a/docs/flatcar-linux/azure.md +++ b/docs/flatcar-linux/azure.md @@ -1,10 +1,10 @@ # Azure -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on Azure with Flatcar Linux. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on Azure with Flatcar Linux. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a resource group, virtual network, subnets, security groups, controller availability set, worker scale set, load balancer, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -75,20 +75,22 @@ Define a Kubernetes cluster using the module `azure/flatcar-linux/kubernetes`. ```tf module "ramius" { - source = "git::https://github.com/poseidon/typhoon//azure/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//azure/flatcar-linux/kubernetes?ref=v1.31.3" # Azure cluster_name = "ramius" - region = "centralus" + location = "centralus" dns_zone = "azure.example.com" dns_zone_group = "example-group" + network_cidr = { + ipv4 = ["10.0.0.0/20"] + } + + # instances + worker_count = 2 # configuration ssh_authorized_key = "ssh-rsa AAAAB3Nz..." - - # optional - worker_count = 2 - host_cidr = "10.0.0.0/20" } ``` @@ -138,8 +140,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-ramius" { - content = module.ramius.kubeconfig-admin - filename = "/home/user/.kube/configs/ramius-config" + content = module.ramius.kubeconfig-admin + filename = "/home/user/.kube/configs/ramius-config" + file_permission = "0600" } ``` @@ -149,9 +152,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/ramius-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -ramius-controller-0 Ready 24m v1.28.3 -ramius-worker-000001 Ready 25m v1.28.3 -ramius-worker-000002 Ready 24m v1.28.3 +ramius-controller-0 Ready 24m v1.31.3 +ramius-worker-000001 Ready 25m v1.31.3 +ramius-worker-000002 Ready 24m v1.31.3 ``` List the pods. @@ -161,9 +164,9 @@ $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE kube-system coredns-7c6fbb4f4b-b6qzx 1/1 Running 0 26m kube-system coredns-7c6fbb4f4b-j2k3d 1/1 Running 0 26m -kube-system calico-node-1m5bf 2/2 Running 0 26m -kube-system calico-node-7jmr1 2/2 Running 0 26m -kube-system calico-node-bknc8 2/2 Running 0 26m +kube-system cilium-1m5bf 1/1 Running 0 26m +kube-system cilium-7jmr1 1/1 Running 0 26m +kube-system cilium-bknc8 1/1 Running 0 26m kube-system kube-apiserver-ramius-controller-0 1/1 Running 0 26m kube-system kube-controller-manager-ramius-controller-0 1/1 Running 0 26m kube-system kube-proxy-j4vpq 1/1 Running 0 26m @@ -185,13 +188,13 @@ Check the [variables.tf](https://github.com/poseidon/typhoon/blob/master/azure/f | Name | Description | Example | |:-----|:------------|:--------| | cluster_name | Unique cluster name (prepended to dns_zone) | "ramius" | -| region | Azure region | "centralus" | +| location | Azure location | "centralus" | | dns_zone | Azure DNS zone | "azure.example.com" | | dns_zone_group | Resource group where the Azure DNS zone resides | "global" | | ssh_authorized_key | SSH public key for user 'core' | "ssh-rsa AAAAB3NZ..." | !!! tip - Regions are shown in [docs](https://azure.microsoft.com/en-us/global-infrastructure/regions/) or with `az account list-locations --output table`. + Locations are shown in [docs](https://azure.microsoft.com/en-us/global-infrastructure/regions/) or with `az account list-locations --output table`. #### DNS Zone @@ -224,26 +227,27 @@ Reference the DNS zone with `azurerm_dns_zone.clusters.name` and its resource gr | Name | Description | Default | Example | |:-----|:------------|:--------|:--------| -| controller_count | Number of controllers (i.e. masters) | 1 | 1 | -| worker_count | Number of workers | 1 | 3 | -| controller_type | Machine type for controllers | "Standard_B2s" | See below | -| worker_type | Machine type for workers | "Standard_D2as_v5" | See below | | os_image | Channel for a Container Linux derivative | "flatcar-stable" | flatcar-stable, flatcar-beta, flatcar-alpha | -| disk_size | Size of the disk in GB | 30 | 100 | +| controller_count | Number of controllers (i.e. masters) | 1 | 1 | +| controller_type | Machine type for controllers | "Standard_B2s" | See below | +| controller_disk_type | Managed disk for controllers | Premium_LRS | Standard_LRS | +| controller_disk_size | Managed disk size in GB | 30 | 50 | +| worker_count | Number of workers | 1 | 3 | +| worker_type | Machine type for workers | "Standard_D2as_v5" | See below | +| worker_disk_type | Managed disk for workers | Standard_LRS | Premium_LRS | +| worker_disk_size | Size of the disk in GB | 30 | 100 | +| worker_ephemeral_disk | Use ephemeral local disk instead of managed disk | false | true | | worker_priority | Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Spot | | controller_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/#usage) | | worker_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/#usage) | | networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | -| host_cidr | CIDR IPv4 range to assign to instances | "10.0.0.0/16" | "10.0.0.0/20" | +| network_cidr | Virtual network CIDR ranges | { ipv4 = ["10.0.0.0/16"], ipv6 = [ULA, ...] } | { ipv4 = ["10.0.0.0/20"] } | | pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | | service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | | worker_node_labels | List of initial worker node labels | [] | ["worker-pool=default"] | Check the list of valid [machine types](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/) and their [specs](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general). Use `az vm list-skus` to get the identifier. -!!! warning - Unlike AWS and GCP, Azure requires its *virtual* networks to have non-overlapping IPv4 CIDRs (yeah, go figure). Instead of each cluster just using `10.0.0.0/16` for instances, each Azure cluster's `host_cidr` must be non-overlapping (e.g. 10.0.0.0/20 for the 1st cluster, 10.0.16.0/20 for the 2nd cluster, etc). - !!! warning Do not choose a `controller_type` smaller than `Standard_B2s`. Smaller instances are not sufficient for running a controller. diff --git a/docs/flatcar-linux/bare-metal.md b/docs/flatcar-linux/bare-metal.md index d152f127..edad8dba 100644 --- a/docs/flatcar-linux/bare-metal.md +++ b/docs/flatcar-linux/bare-metal.md @@ -1,10 +1,10 @@ # Bare-Metal -In this tutorial, we'll network boot and provision a Kubernetes v1.28.3 cluster on bare-metal with Flatcar Linux. +In this tutorial, we'll network boot and provision a Kubernetes v1.31.3 cluster on bare-metal with Flatcar Linux. First, we'll deploy a [Matchbox](https://github.com/poseidon/matchbox) service and setup a network boot environment. Then, we'll declare a Kubernetes cluster using the Typhoon Terraform module and power on machines. On PXE boot, machines will install Container Linux to disk, reboot into the disk install, and provision themselves as Kubernetes controllers or workers via Ignition. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns` while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns` while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -154,7 +154,7 @@ Define a Kubernetes cluster using the module `bare-metal/flatcar-linux/kubernete ```tf module "mercury" { - source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.31.3" # bare-metal cluster_name = "mercury" @@ -194,7 +194,7 @@ Workers with similar features can be defined inline using the `workers` field as ```tf module "mercury-node1" { - source = "git::https://github.com/poseidon/typhoon//bare-metal/fedora-coreos/kubernetes/worker?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//bare-metal/fedora-coreos/kubernetes/worker?ref=v1.31.3" # bare-metal cluster_name = "mercury" @@ -312,8 +312,9 @@ systemd[1]: Started Kubernetes control plane. ``` resource "local_file" "kubeconfig-mercury" { - content = module.mercury.kubeconfig-admin - filename = "/home/user/.kube/configs/mercury-config" + content = module.mercury.kubeconfig-admin + filename = "/home/user/.kube/configs/mercury-config" + file_permission = "0600" } ``` @@ -323,9 +324,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/mercury-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -node1.example.com Ready 10m v1.28.3 -node2.example.com Ready 10m v1.28.3 -node3.example.com Ready 10m v1.28.3 +node1.example.com Ready 10m v1.31.3 +node2.example.com Ready 10m v1.31.3 +node3.example.com Ready 10m v1.31.3 ``` List the pods. @@ -333,9 +334,10 @@ List the pods. ``` $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-6qp7f 2/2 Running 1 11m -kube-system calico-node-gnjrm 2/2 Running 0 11m -kube-system calico-node-llbgt 2/2 Running 0 11m +kube-system cilium-6qp7f 1/1 Running 1 11m +kube-system cilium-gnjrm 1/1 Running 0 11m +kube-system cilium-llbgt 1/1 Running 0 11m +kube-system cilium-operator-68d778b448-g744f 1/1 Running 0 11m kube-system coredns-1187388186-dj3pd 1/1 Running 0 11m kube-system coredns-1187388186-mx9rt 1/1 Running 0 11m kube-system kube-apiserver-node1.example.com 1/1 Running 0 11m diff --git a/docs/flatcar-linux/digitalocean.md b/docs/flatcar-linux/digitalocean.md index 4ab1f702..9d627247 100644 --- a/docs/flatcar-linux/digitalocean.md +++ b/docs/flatcar-linux/digitalocean.md @@ -1,10 +1,10 @@ # DigitalOcean -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on DigitalOcean with Flatcar Linux. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on DigitalOcean with Flatcar Linux. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create controller droplets, worker droplets, DNS records, tags, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -81,19 +81,19 @@ Define a Kubernetes cluster using the module `digital-ocean/flatcar-linux/kubern ```tf module "nemo" { - source = "git::https://github.com/poseidon/typhoon//digital-ocean/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//digital-ocean/flatcar-linux/kubernetes?ref=v1.31.3" # Digital Ocean cluster_name = "nemo" region = "nyc3" dns_zone = "digital-ocean.example.com" - # configuration - os_image = data.digitalocean_image.flatcar-stable-2303-4-0.id - ssh_fingerprints = ["d7:9d:79:ae:56:32:73:79:95:88:e3:a2:ab:5d:45:e7"] - - # optional + # instances + os_image = data.digitalocean_image.flatcar-stable-2303-4-0.id worker_count = 2 + + # configuration + ssh_fingerprints = ["d7:9d:79:ae:56:32:73:79:95:88:e3:a2:ab:5d:45:e7"] } ``` @@ -144,8 +144,9 @@ In 3-6 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-nemo" { - content = module.nemo.kubeconfig-admin - filename = "/home/user/.kube/configs/nemo-config" + content = module.nemo.kubeconfig-admin + filename = "/home/user/.kube/configs/nemo-config" + file_permission = "0600" } ``` @@ -155,9 +156,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/nemo-config $ kubectl get nodes NAME STATUS ROLES AGE VERSION -10.132.110.130 Ready 10m v1.28.3 -10.132.115.81 Ready 10m v1.28.3 -10.132.124.107 Ready 10m v1.28.3 +10.132.110.130 Ready 10m v1.31.3 +10.132.115.81 Ready 10m v1.31.3 +10.132.124.107 Ready 10m v1.31.3 ``` List the pods. @@ -166,9 +167,9 @@ List the pods. NAMESPACE NAME READY STATUS RESTARTS AGE kube-system coredns-1187388186-ld1j7 1/1 Running 0 11m kube-system coredns-1187388186-rdhf7 1/1 Running 0 11m -kube-system calico-node-1m5bf 2/2 Running 0 11m -kube-system calico-node-7jmr1 2/2 Running 0 11m -kube-system calico-node-bknc8 2/2 Running 0 11m +kube-system cilium-1m5bf 1/1 Running 0 11m +kube-system cilium-7jmr1 1/1 Running 0 11m +kube-system cilium-bknc8 1/1 Running 0 11m kube-system kube-apiserver-ip-10.132.115.81 1/1 Running 0 11m kube-system kube-controller-manager-ip-10.132.115.81 1/1 Running 0 11m kube-system kube-proxy-6kxjf 1/1 Running 0 11m diff --git a/docs/flatcar-linux/google-cloud.md b/docs/flatcar-linux/google-cloud.md index bc899f7c..ecaf77dd 100644 --- a/docs/flatcar-linux/google-cloud.md +++ b/docs/flatcar-linux/google-cloud.md @@ -1,10 +1,10 @@ # Google Cloud -In this tutorial, we'll create a Kubernetes v1.28.3 cluster on Google Compute Engine with Flatcar Linux. +In this tutorial, we'll create a Kubernetes v1.31.3 cluster on Google Compute Engine with Flatcar Linux. We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a network, firewall rules, health checks, controller instances, worker managed instance group, load balancers, and TLS assets. -Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and (`flannel`, `calico`, or `cilium`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -73,7 +73,7 @@ Define a Kubernetes cluster using the module `google-cloud/flatcar-linux/kuberne ```tf module "yavin" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/flatcar-linux/kubernetes?ref=v1.31.3" # Google Cloud cluster_name = "yavin" @@ -81,11 +81,11 @@ module "yavin" { dns_zone = "example.com" dns_zone_name = "example-zone" + # instances + worker_count = 2 + # configuration ssh_authorized_key = "ssh-rsa AAAAB3Nz..." - - # optional - worker_count = 2 } ``` @@ -136,8 +136,9 @@ In 4-8 minutes, the Kubernetes cluster will be ready. ``` resource "local_file" "kubeconfig-yavin" { - content = module.yavin.kubeconfig-admin - filename = "/home/user/.kube/configs/yavin-config" + content = module.yavin.kubeconfig-admin + filename = "/home/user/.kube/configs/yavin-config" + file_permission = "0600" } ``` @@ -147,9 +148,9 @@ List nodes in the cluster. $ export KUBECONFIG=/home/user/.kube/configs/yavin-config $ kubectl get nodes NAME ROLES STATUS AGE VERSION -yavin-controller-0.c.example-com.internal Ready 6m v1.28.3 -yavin-worker-jrbf.c.example-com.internal Ready 5m v1.28.3 -yavin-worker-mzdm.c.example-com.internal Ready 5m v1.28.3 +yavin-controller-0.c.example-com.internal Ready 6m v1.31.3 +yavin-worker-jrbf.c.example-com.internal Ready 5m v1.31.3 +yavin-worker-mzdm.c.example-com.internal Ready 5m v1.31.3 ``` List the pods. @@ -157,9 +158,9 @@ List the pods. ``` $ kubectl get pods --all-namespaces NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-1cs8z 2/2 Running 0 6m -kube-system calico-node-d1l5b 2/2 Running 0 6m -kube-system calico-node-sp9ps 2/2 Running 0 6m +kube-system cilium-1cs8z 1/1 Running 0 6m +kube-system cilium-d1l5b 1/1 Running 0 6m +kube-system cilium-sp9ps 1/1 Running 0 6m kube-system coredns-1187388186-dkh3o 1/1 Running 0 6m kube-system coredns-1187388186-zj5dl 1/1 Running 0 6m kube-system kube-apiserver-controller-0 1/1 Running 0 6m @@ -209,25 +210,25 @@ resource "google_dns_managed_zone" "zone-for-clusters" { ### Optional -| Name | Description | Default | Example | -|:-----|:------------|:--------|:--------| -| controller_count | Number of controllers (i.e. masters) | 1 | 3 | -| worker_count | Number of workers | 1 | 3 | -| controller_type | Machine type for controllers | "n1-standard-1" | See below | -| worker_type | Machine type for workers | "n1-standard-1" | See below | -| os_image | Flatcar Linux image for compute instances | "flatcar-stable" | flatcar-stable, flatcar-beta, flatcar-alpha | -| disk_size | Size of the disk in GB | 30 | 100 | -| worker_preemptible | If enabled, Compute Engine will terminate workers randomly within 24 hours | false | true | -| controller_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/) | -| worker_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/) | -| networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | -| pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | -| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | -| worker_node_labels | List of initial worker node labels | [] | ["worker-pool=default"] | +| Name | Description | Default | Example | +|:---------------------|:---------------------------------------------------------------------------|:-----------------|:--------------------------------------------| +| os_image | Flatcar Linux image for compute instances | "flatcar-stable" | flatcar-stable, flatcar-beta, flatcar-alpha | +| controller_count | Number of controllers (i.e. masters) | 1 | 3 | +| controller_type | Machine type for controllers | "n1-standard-1" | See below | +| controller_disk_size | Controller disk size in GB | 30 | 20 | +| worker_count | Number of workers | 1 | 3 | +| worker_type | Machine type for workers | "n1-standard-1" | See below | +| worker_disk_size | Worker disk size in GB | 30 | 100 | +| worker_preemptible | If enabled, Compute Engine will terminate workers randomly within 24 hours | false | true | +| controller_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/) | +| worker_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/) | +| networking | Choice of networking provider | "cilium" | "calico" or "cilium" or "flannel" | +| pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | +| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | +| worker_node_labels | List of initial worker node labels | [] | ["worker-pool=default"] | Check the list of valid [machine types](https://cloud.google.com/compute/docs/machine-types). #### Preemption Add `worker_preemptible = "true"` to allow worker nodes to be [preempted](https://cloud.google.com/compute/docs/instances/preemptible) at random, but pay [significantly](https://cloud.google.com/compute/pricing) less. Clusters tolerate stopping instances fairly well (reschedules pods, but cannot drain) and preemption provides a nice reward for running fault-tolerant cluster systems.` - diff --git a/docs/img/typhoon-azure-load-balancing.png b/docs/img/typhoon-azure-load-balancing.png index 0d227f7a..beb532e1 100644 Binary files a/docs/img/typhoon-azure-load-balancing.png and b/docs/img/typhoon-azure-load-balancing.png differ diff --git a/docs/index.md b/docs/index.md index 9b26982e..9d46a994 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing * Advanced features like [worker pools](advanced/worker-pools/), [preemptible](fedora-coreos/google-cloud/#preemption) workers, and [snippets](advanced/customization/#hosts) customization @@ -19,7 +19,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Modules -Typhoon provides a Terraform Module for each supported operating system and platform. +Typhoon provides a Terraform Module for defining a Kubernetes cluster on each supported operating system and platform. Typhoon is available for [Fedora CoreOS](https://getfedora.org/coreos/). @@ -50,6 +50,14 @@ Typhoon is available for [Flatcar Linux](https://www.flatcar-linux.org/releases/ | AWS | Flatcar Linux (ARM64) | [aws/flatcar-linux/kubernetes](advanced/arm64.md) | alpha | | Azure | Flatcar Linux (ARM64) | [azure/flatcar-linux/kubernetes](advanced/arm64.md) | alpha | +Typhoon also provides Terraform Modules for optionally managing individual components applied onto clusters. + +| Name | Terraform Module | Status | +|---------|------------------|--------| +| CoreDNS | [addons/coredns](addons/coredns) | beta | +| Cilium | [addons/cilium](addons/cilium) | beta | +| flannel | [addons/flannel](addons/flannel) | beta | + ## Documentation * Architecture [concepts](architecture/concepts.md) and [operating-systems](architecture/operating-systems.md) @@ -62,7 +70,7 @@ Define a Kubernetes cluster by using the Terraform module for your chosen platfo ```tf module "yavin" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3" # Google Cloud cluster_name = "yavin" @@ -79,8 +87,9 @@ module "yavin" { # Obtain cluster kubeconfig resource "local_file" "kubeconfig-yavin" { - content = module.yavin.kubeconfig-admin - filename = "/home/user/.kube/configs/yavin-config" + content = module.yavin.kubeconfig-admin + filename = "/home/user/.kube/configs/yavin-config" + file_permission = "0600" } ``` @@ -100,9 +109,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou $ export KUBECONFIG=/home/user/.kube/configs/yavin-config $ kubectl get nodes NAME ROLES STATUS AGE VERSION -yavin-controller-0.c.example-com.internal Ready 6m v1.28.3 -yavin-worker-jrbf.c.example-com.internal Ready 5m v1.28.3 -yavin-worker-mzdm.c.example-com.internal Ready 5m v1.28.3 +yavin-controller-0.c.example-com.internal Ready 6m v1.31.3 +yavin-worker-jrbf.c.example-com.internal Ready 5m v1.31.3 +yavin-worker-mzdm.c.example-com.internal Ready 5m v1.31.3 ``` List the pods. @@ -149,4 +158,3 @@ Poseidon's Github [Sponsors](https://github.com/sponsors/poseidon) support the i
If you'd like your company here, please contact dghubble at psdn.io. - diff --git a/docs/topics/maintenance.md b/docs/topics/maintenance.md index 2df69dee..68387d55 100644 --- a/docs/topics/maintenance.md +++ b/docs/topics/maintenance.md @@ -13,12 +13,12 @@ Typhoon provides tagged releases to allow clusters to be versioned using ordinar ``` module "yavin" { - source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3" ... } module "mercury" { - source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.28.3" + source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.31.3" ... } ``` @@ -192,7 +192,7 @@ Applying edits to most worker fields will start an instance refresh: However, changing `os_stream`/`os_channel` or new AMIs becoming available will NOT change the launch configuration or trigger an Instance Refresh. This allows Fedora CoreOS or Flatcar Linux to auto-update themselves via reboots and avoids unexpected terraform diffs for new AMIs. !!! note - Before Typhoon v1.28.3, worker nodes only used new launch configurations when replaced manually (or due to failure). If you must change node configuration manually, it's still possible. Create a new [worker pool](../advanced/worker-pools.md), then scale down the old worker pool as desired. + Before Typhoon v1.31.3, worker nodes only used new launch configurations when replaced manually (or due to failure). If you must change node configuration manually, it's still possible. Create a new [worker pool](../advanced/worker-pools.md), then scale down the old worker pool as desired. ### Google Cloud @@ -233,7 +233,7 @@ Applying edits to most worker fields will start an instance refresh: However, changing `os_stream`/`os_channel` or new compute images becoming available will NOT change the launch template or update instances. This allows Fedora CoreOS or Flatcar Linux to auto-update themselves via reboots and avoids unexpected terraform diffs for new AMIs. !!! note - Before Typhoon v1.28.3, worker nodes only used new launch templates when replaced manually (or due to failure). If you must change node configuration manually, it's still possible. Create a new [worker pool](../advanced/worker-pools.md), then scale down the old worker pool as desired. + Before Typhoon v1.31.3, worker nodes only used new launch templates when replaced manually (or due to failure). If you must change node configuration manually, it's still possible. Create a new [worker pool](../advanced/worker-pools.md), then scale down the old worker pool as desired. ## Upgrade poseidon/ct diff --git a/google-cloud/fedora-coreos/kubernetes/README.md b/google-cloud/fedora-coreos/kubernetes/README.md index 08f6c51d..1c6cefc1 100644 --- a/google-cloud/fedora-coreos/kubernetes/README.md +++ b/google-cloud/fedora-coreos/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/fedora-coreos/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/google-cloud/fedora-coreos/kubernetes/bootstrap.tf b/google-cloud/fedora-coreos/kubernetes/bootstrap.tf index db71a5a6..5decc2ae 100644 --- a/google-cloud/fedora-coreos/kubernetes/bootstrap.tf +++ b/google-cloud/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] @@ -9,10 +9,8 @@ module "bootstrap" { network_mtu = 1440 pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation daemonset_tolerations = var.daemonset_tolerations + components = var.components // temporary external_apiserver_port = 443 diff --git a/google-cloud/fedora-coreos/kubernetes/butane/controller.yaml b/google-cloud/fedora-coreos/kubernetes/butane/controller.yaml index 39e86d88..ab6d7fbd 100644 --- a/google-cloud/fedora-coreos/kubernetes/butane/controller.yaml +++ b/google-cloud/fedora-coreos/kubernetes/butane/controller.yaml @@ -12,7 +12,7 @@ systemd: Wants=network-online.target After=network-online.target [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 Type=exec ExecStartPre=/bin/mkdir -p /var/lib/etcd ExecStartPre=-/usr/bin/podman rm etcd @@ -54,7 +54,7 @@ systemd: Description=Kubelet (System Container) Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -111,7 +111,7 @@ systemd: --volume /opt/bootstrap/assets:/assets:ro,Z \ --volume /opt/bootstrap/apply:/apply:ro,Z \ --entrypoint=/apply \ - quay.io/poseidon/kubelet:v1.28.3 + quay.io/poseidon/kubelet:v1.31.3 ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done ExecStartPost=-/usr/bin/podman stop bootstrap storage: @@ -143,7 +143,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -157,7 +157,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -171,8 +171,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests chcon -R -u system_u -t container_file_t /etc/kubernetes/pki - path: /opt/bootstrap/apply mode: 0544 diff --git a/google-cloud/fedora-coreos/kubernetes/controllers.tf b/google-cloud/fedora-coreos/kubernetes/controllers.tf index 3ae436b3..14e60963 100644 --- a/google-cloud/fedora-coreos/kubernetes/controllers.tf +++ b/google-cloud/fedora-coreos/kubernetes/controllers.tf @@ -43,7 +43,8 @@ resource "google_compute_instance" "controllers" { initialize_params { image = data.google_compute_image.fedora-coreos.self_link - size = var.disk_size + size = var.controller_disk_size + type = var.controller_disk_type } } @@ -55,8 +56,9 @@ resource "google_compute_instance" "controllers" { } } - can_ip_forward = true - tags = ["${var.cluster_name}-controller"] + can_ip_forward = true + allow_stopping_for_update = true + tags = ["${var.cluster_name}-controller"] lifecycle { ignore_changes = [ @@ -80,7 +82,6 @@ data "ct_config" "controllers" { kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/google-cloud/fedora-coreos/kubernetes/network.tf b/google-cloud/fedora-coreos/kubernetes/network.tf index 40c490b6..fcbcd9af 100644 --- a/google-cloud/fedora-coreos/kubernetes/network.tf +++ b/google-cloud/fedora-coreos/kubernetes/network.tf @@ -112,13 +112,14 @@ resource "google_compute_firewall" "internal-vxlan" { target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] } -# Cilium VXLAN -resource "google_compute_firewall" "internal-linux-vxlan" { +# Cilium +resource "google_compute_firewall" "internal-cilium" { count = var.networking == "cilium" ? 1 : 0 - name = "${var.cluster_name}-linux-vxlan" + name = "${var.cluster_name}-cilium" network = google_compute_network.network.name + # vxlan allow { protocol = "udp" ports = [8472] @@ -128,12 +129,17 @@ resource "google_compute_firewall" "internal-linux-vxlan" { allow { protocol = "icmp" } - allow { protocol = "tcp" ports = [4240] } + # metrics + allow { + protocol = "tcp" + ports = [9962, 9963, 9964, 9965] + } + source_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] } diff --git a/google-cloud/fedora-coreos/kubernetes/variables.tf b/google-cloud/fedora-coreos/kubernetes/variables.tf index 4ea49983..84d63179 100644 --- a/google-cloud/fedora-coreos/kubernetes/variables.tf +++ b/google-cloud/fedora-coreos/kubernetes/variables.tf @@ -22,30 +22,6 @@ variable "dns_zone_name" { # instances -variable "controller_count" { - type = number - description = "Number of controllers (i.e. masters)" - default = 1 -} - -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - -variable "controller_type" { - type = string - description = "Machine type for controllers (see `gcloud compute machine-types list`)" - default = "n1-standard-1" -} - -variable "worker_type" { - type = string - description = "Machine type for controllers (see `gcloud compute machine-types list`)" - default = "n1-standard-1" -} - variable "os_stream" { type = string description = "Fedora CoreOS stream for compute instances (e.g. stable, testing, next)" @@ -57,12 +33,62 @@ variable "os_stream" { } } -variable "disk_size" { +variable "controller_count" { + type = number + description = "Number of controllers (i.e. masters)" + default = 1 +} + +variable "controller_type" { + type = string + description = "Machine type for controllers (see `gcloud compute machine-types list`)" + default = "n1-standard-1" +} + +variable "controller_disk_size" { type = number description = "Size of the disk in GB" default = 30 } +variable "controller_disk_type" { + type = string + description = "Type of managed disk for controller node(s)" + default = "pd-standard" + validation { + condition = contains(["pd-standard", "pd-ssd", "pd-balanced"], var.controller_disk_type) + error_message = "The controller_disk_type must be pd-standard, pd-ssd or pd-balanced." + } +} + +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + +variable "worker_type" { + type = string + description = "Machine type for controllers (see `gcloud compute machine-types list`)" + default = "n1-standard-1" +} + +variable "worker_disk_size" { + type = number + description = "Size of the disk in GB" + default = 30 +} + +variable "worker_disk_type" { + type = string + description = "Type of managed disk for worker nodes" + default = "pd-standard" + validation { + condition = contains(["pd-standard", "pd-ssd", "pd-balanced"], var.worker_disk_type) + error_message = "The worker_disk_type must be pd-standard, pd-ssd or pd-balanced." + } +} + variable "worker_preemptible" { type = bool description = "If enabled, Compute Engine will terminate workers randomly within 24 hours" @@ -109,35 +135,32 @@ EOD default = "10.3.0.0/16" } - -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false -} - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - variable "worker_node_labels" { type = list(string) description = "List of initial worker node labels" default = [] } -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} +# advanced variable "daemonset_tolerations" { type = list(string) description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])" default = [] } + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null +} diff --git a/google-cloud/fedora-coreos/kubernetes/workers.tf b/google-cloud/fedora-coreos/kubernetes/workers.tf index d35db25f..18d01fbc 100644 --- a/google-cloud/fedora-coreos/kubernetes/workers.tf +++ b/google-cloud/fedora-coreos/kubernetes/workers.tf @@ -9,15 +9,14 @@ module "workers" { worker_count = var.worker_count machine_type = var.worker_type os_stream = var.os_stream - disk_size = var.disk_size + disk_size = var.worker_disk_size + disk_type = var.worker_disk_type preemptible = var.worker_preemptible # configuration - kubeconfig = module.bootstrap.kubeconfig-kubelet - ssh_authorized_key = var.ssh_authorized_key - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - snippets = var.worker_snippets - node_labels = var.worker_node_labels + kubeconfig = module.bootstrap.kubeconfig-kubelet + ssh_authorized_key = var.ssh_authorized_key + service_cidr = var.service_cidr + snippets = var.worker_snippets + node_labels = var.worker_node_labels } - diff --git a/google-cloud/fedora-coreos/kubernetes/workers/butane/worker.yaml b/google-cloud/fedora-coreos/kubernetes/workers/butane/worker.yaml index 9e4b5133..112251f1 100644 --- a/google-cloud/fedora-coreos/kubernetes/workers/butane/worker.yaml +++ b/google-cloud/fedora-coreos/kubernetes/workers/butane/worker.yaml @@ -26,7 +26,7 @@ systemd: Description=Kubelet (System Container) Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -98,7 +98,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/google-cloud/fedora-coreos/kubernetes/workers/variables.tf b/google-cloud/fedora-coreos/kubernetes/workers/variables.tf index 8fed0043..7fd7a507 100644 --- a/google-cloud/fedora-coreos/kubernetes/workers/variables.tf +++ b/google-cloud/fedora-coreos/kubernetes/workers/variables.tf @@ -51,6 +51,16 @@ variable "disk_size" { default = 30 } +variable "disk_type" { + type = string + description = "Type of managed disk" + default = "pd-standard" + validation { + condition = contains(["pd-standard", "pd-ssd", "pd-balanced"], var.disk_type) + error_message = "The disk_type must be pd-standard, pd-ssd or pd-balanced." + } +} + variable "preemptible" { type = bool description = "If enabled, Compute Engine will terminate instances randomly within 24 hours" @@ -96,13 +106,7 @@ variable "node_taints" { default = [] } -# unofficial, undocumented, unsupported, temporary - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} +# advanced variable "accelerator_type" { type = string @@ -115,4 +119,3 @@ variable "accelerator_count" { default = "0" description = "Number of compute engine accelerators" } - diff --git a/google-cloud/fedora-coreos/kubernetes/workers/workers.tf b/google-cloud/fedora-coreos/kubernetes/workers/workers.tf index b9daf2ec..043d445f 100644 --- a/google-cloud/fedora-coreos/kubernetes/workers/workers.tf +++ b/google-cloud/fedora-coreos/kubernetes/workers/workers.tf @@ -8,7 +8,7 @@ resource "google_compute_region_instance_group_manager" "workers" { region = var.region version { name = "default" - instance_template = google_compute_instance_template.worker.self_link + instance_template = google_compute_region_instance_template.worker.self_link } # Roll out MIG instance template changes by replacing instances. @@ -58,10 +58,11 @@ resource "google_compute_health_check" "worker" { } # Worker instance template -resource "google_compute_instance_template" "worker" { +resource "google_compute_region_instance_template" "worker" { name_prefix = "${var.name}-worker-" description = "${var.name} worker instance template" machine_type = var.machine_type + region = var.region metadata = { user-data = data.ct_config.worker.rendered @@ -80,6 +81,7 @@ resource "google_compute_instance_template" "worker" { boot = true source_image = data.google_compute_image.fedora-coreos.self_link disk_size_gb = var.disk_size + disk_type = var.disk_type } network_interface { @@ -111,7 +113,6 @@ data "ct_config" "worker" { kubeconfig = indent(10, var.kubeconfig) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix node_labels = join(",", var.node_labels) node_taints = join(",", var.node_taints) }) diff --git a/google-cloud/flatcar-linux/kubernetes/README.md b/google-cloud/flatcar-linux/kubernetes/README.md index be080790..21ecaf52 100644 --- a/google-cloud/flatcar-linux/kubernetes/README.md +++ b/google-cloud/flatcar-linux/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.28.3 (upstream) +* Kubernetes v1.31.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/) * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/flatcar-linux/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization diff --git a/google-cloud/flatcar-linux/kubernetes/bootstrap.tf b/google-cloud/flatcar-linux/kubernetes/bootstrap.tf index db71a5a6..5decc2ae 100644 --- a/google-cloud/flatcar-linux/kubernetes/bootstrap.tf +++ b/google-cloud/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] @@ -9,10 +9,8 @@ module "bootstrap" { network_mtu = 1440 pod_cidr = var.pod_cidr service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - enable_reporting = var.enable_reporting - enable_aggregation = var.enable_aggregation daemonset_tolerations = var.daemonset_tolerations + components = var.components // temporary external_apiserver_port = 443 diff --git a/google-cloud/flatcar-linux/kubernetes/butane/controller.yaml b/google-cloud/flatcar-linux/kubernetes/butane/controller.yaml index 9e512494..5a21f7da 100644 --- a/google-cloud/flatcar-linux/kubernetes/butane/controller.yaml +++ b/google-cloud/flatcar-linux/kubernetes/butane/controller.yaml @@ -11,7 +11,7 @@ systemd: Requires=docker.service After=docker.service [Service] - Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10 + Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13 ExecStartPre=/usr/bin/docker run -d \ --name etcd \ --network host \ @@ -56,7 +56,7 @@ systemd: After=docker.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -105,7 +105,7 @@ systemd: Type=oneshot RemainAfterExit=true WorkingDirectory=/opt/bootstrap - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStart=/usr/bin/docker run \ -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \ -v /opt/bootstrap/assets:/assets:ro \ @@ -143,7 +143,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s @@ -157,7 +157,7 @@ storage: contents: inline: | #!/bin/bash -e - mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking + mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network} awk '/#####/ {filename=$2; next} {print > filename}' assets mkdir -p /etc/ssl/etcd/etcd mkdir -p /etc/kubernetes/pki @@ -172,8 +172,7 @@ storage: mv static-manifests/* /etc/kubernetes/manifests/ mkdir -p /opt/bootstrap/assets mv manifests /opt/bootstrap/assets/manifests - mv manifests-networking/* /opt/bootstrap/assets/manifests/ - rm -rf assets auth static-manifests tls manifests-networking + rm -rf assets auth static-manifests tls manifests - path: /opt/bootstrap/apply mode: 0544 contents: diff --git a/google-cloud/flatcar-linux/kubernetes/controllers.tf b/google-cloud/flatcar-linux/kubernetes/controllers.tf index b9233051..8dd0ee70 100644 --- a/google-cloud/flatcar-linux/kubernetes/controllers.tf +++ b/google-cloud/flatcar-linux/kubernetes/controllers.tf @@ -31,8 +31,9 @@ resource "google_compute_instance" "controllers" { name = "${var.cluster_name}-controller-${count.index}" # use a zone in the region and wrap around (e.g. controllers > zones) - zone = element(local.zones, count.index) - machine_type = var.controller_type + zone = element(local.zones, count.index) + machine_type = var.controller_type + allow_stopping_for_update = true metadata = { user-data = data.ct_config.controllers.*.rendered[count.index] @@ -43,7 +44,8 @@ resource "google_compute_instance" "controllers" { initialize_params { image = data.google_compute_image.flatcar-linux.self_link - size = var.disk_size + size = var.controller_disk_size + type = var.controller_disk_type } } @@ -80,7 +82,6 @@ data "ct_config" "controllers" { kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix }) strict = true snippets = var.controller_snippets diff --git a/google-cloud/flatcar-linux/kubernetes/network.tf b/google-cloud/flatcar-linux/kubernetes/network.tf index 40c490b6..fcbcd9af 100644 --- a/google-cloud/flatcar-linux/kubernetes/network.tf +++ b/google-cloud/flatcar-linux/kubernetes/network.tf @@ -112,13 +112,14 @@ resource "google_compute_firewall" "internal-vxlan" { target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] } -# Cilium VXLAN -resource "google_compute_firewall" "internal-linux-vxlan" { +# Cilium +resource "google_compute_firewall" "internal-cilium" { count = var.networking == "cilium" ? 1 : 0 - name = "${var.cluster_name}-linux-vxlan" + name = "${var.cluster_name}-cilium" network = google_compute_network.network.name + # vxlan allow { protocol = "udp" ports = [8472] @@ -128,12 +129,17 @@ resource "google_compute_firewall" "internal-linux-vxlan" { allow { protocol = "icmp" } - allow { protocol = "tcp" ports = [4240] } + # metrics + allow { + protocol = "tcp" + ports = [9962, 9963, 9964, 9965] + } + source_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] } diff --git a/google-cloud/flatcar-linux/kubernetes/variables.tf b/google-cloud/flatcar-linux/kubernetes/variables.tf index e13da824..fd2f77b8 100644 --- a/google-cloud/flatcar-linux/kubernetes/variables.tf +++ b/google-cloud/flatcar-linux/kubernetes/variables.tf @@ -22,30 +22,6 @@ variable "dns_zone_name" { # instances -variable "controller_count" { - type = number - description = "Number of controllers (i.e. masters)" - default = 1 -} - -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - -variable "controller_type" { - type = string - description = "Machine type for controllers (see `gcloud compute machine-types list`)" - default = "n1-standard-1" -} - -variable "worker_type" { - type = string - description = "Machine type for controllers (see `gcloud compute machine-types list`)" - default = "n1-standard-1" -} - variable "os_image" { type = string description = "Flatcar Linux image for compute instances (flatcar-stable, flatcar-beta, flatcar-alpha)" @@ -57,12 +33,62 @@ variable "os_image" { } } -variable "disk_size" { +variable "controller_count" { + type = number + description = "Number of controllers (i.e. masters)" + default = 1 +} + +variable "controller_type" { + type = string + description = "Machine type for controllers (see `gcloud compute machine-types list`)" + default = "n1-standard-1" +} + +variable "controller_disk_size" { type = number description = "Size of the disk in GB" default = 30 } +variable "controller_disk_type" { + type = string + description = "Type of managed disk for controller node(s)" + default = "pd-standard" + validation { + condition = contains(["pd-standard", "pd-ssd", "pd-balanced"], var.controller_disk_type) + error_message = "The controller_disk_type must be pd-standard, pd-ssd or pd-balanced." + } +} + +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + +variable "worker_type" { + type = string + description = "Machine type for controllers (see `gcloud compute machine-types list`)" + default = "n1-standard-1" +} + +variable "worker_disk_size" { + type = number + description = "Size of the disk in GB" + default = 30 +} + +variable "worker_disk_type" { + type = string + description = "Type of managed disk for worker nodes" + default = "pd-standard" + validation { + condition = contains(["pd-standard", "pd-ssd", "pd-balanced"], var.worker_disk_type) + error_message = "The worker_disk_type must be pd-standard, pd-ssd or pd-balanced." + } +} + variable "worker_preemptible" { type = bool description = "If enabled, Compute Engine will terminate workers randomly within 24 hours" @@ -109,35 +135,32 @@ EOD default = "10.3.0.0/16" } - -variable "enable_reporting" { - type = bool - description = "Enable usage or analytics reporting to upstreams (Calico)" - default = false -} - -variable "enable_aggregation" { - type = bool - description = "Enable the Kubernetes Aggregation Layer" - default = true -} - variable "worker_node_labels" { type = list(string) description = "List of initial worker node labels" default = [] } -# unofficial, undocumented, unsupported - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} +# advanced variable "daemonset_tolerations" { type = list(string) description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])" default = [] } + +variable "components" { + description = "Configure pre-installed cluster components" + # Component configs are passed through to terraform-render-bootstrap, + # which handles type enforcement and defines defaults + # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95 + type = object({ + enable = optional(bool) + coredns = optional(map(any)) + kube_proxy = optional(map(any)) + flannel = optional(map(any)) + calico = optional(map(any)) + cilium = optional(map(any)) + }) + default = null +} diff --git a/google-cloud/flatcar-linux/kubernetes/versions.tf b/google-cloud/flatcar-linux/kubernetes/versions.tf index 23cec3aa..4c0366e0 100644 --- a/google-cloud/flatcar-linux/kubernetes/versions.tf +++ b/google-cloud/flatcar-linux/kubernetes/versions.tf @@ -7,7 +7,7 @@ terraform { null = ">= 2.1" ct = { source = "poseidon/ct" - version = "~> 0.11" + version = "~> 0.13" } } } diff --git a/google-cloud/flatcar-linux/kubernetes/workers.tf b/google-cloud/flatcar-linux/kubernetes/workers.tf index 91a32bd0..d539f692 100644 --- a/google-cloud/flatcar-linux/kubernetes/workers.tf +++ b/google-cloud/flatcar-linux/kubernetes/workers.tf @@ -9,15 +9,14 @@ module "workers" { worker_count = var.worker_count machine_type = var.worker_type os_image = var.os_image - disk_size = var.disk_size + disk_size = var.worker_disk_size + disk_type = var.worker_disk_type preemptible = var.worker_preemptible # configuration - kubeconfig = module.bootstrap.kubeconfig-kubelet - ssh_authorized_key = var.ssh_authorized_key - service_cidr = var.service_cidr - cluster_domain_suffix = var.cluster_domain_suffix - snippets = var.worker_snippets - node_labels = var.worker_node_labels + kubeconfig = module.bootstrap.kubeconfig-kubelet + ssh_authorized_key = var.ssh_authorized_key + service_cidr = var.service_cidr + snippets = var.worker_snippets + node_labels = var.worker_node_labels } - diff --git a/google-cloud/flatcar-linux/kubernetes/workers/butane/worker.yaml b/google-cloud/flatcar-linux/kubernetes/workers/butane/worker.yaml index af226674..1cf68fcf 100644 --- a/google-cloud/flatcar-linux/kubernetes/workers/butane/worker.yaml +++ b/google-cloud/flatcar-linux/kubernetes/workers/butane/worker.yaml @@ -28,7 +28,7 @@ systemd: After=docker.service Wants=rpc-statd.service [Service] - Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3 + Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3 ExecStartPre=/bin/mkdir -p /etc/cni/net.d ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests ExecStartPre=/bin/mkdir -p /opt/cni/bin @@ -98,7 +98,7 @@ storage: cgroupDriver: systemd clusterDNS: - ${cluster_dns_service_ip} - clusterDomain: ${cluster_domain_suffix} + clusterDomain: cluster.local healthzPort: 0 rotateCertificates: true shutdownGracePeriod: 45s diff --git a/google-cloud/flatcar-linux/kubernetes/workers/variables.tf b/google-cloud/flatcar-linux/kubernetes/workers/variables.tf index 1d4f9487..b7e803fb 100644 --- a/google-cloud/flatcar-linux/kubernetes/workers/variables.tf +++ b/google-cloud/flatcar-linux/kubernetes/workers/variables.tf @@ -51,6 +51,16 @@ variable "disk_size" { default = 30 } +variable "disk_type" { + type = string + description = "Type of managed disk" + default = "pd-standard" + validation { + condition = contains(["pd-standard", "pd-ssd", "pd-balanced"], var.disk_type) + error_message = "The disk_type must be pd-standard, pd-ssd or pd-balanced." + } +} + variable "preemptible" { type = bool description = "If enabled, Compute Engine will terminate instances randomly within 24 hours" @@ -96,13 +106,7 @@ variable "node_taints" { default = [] } -# unofficial, undocumented, unsupported, temporary - -variable "cluster_domain_suffix" { - type = string - description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) " - default = "cluster.local" -} +# advanced variable "accelerator_type" { type = string @@ -115,4 +119,3 @@ variable "accelerator_count" { default = "0" description = "Number of compute engine accelerators" } - diff --git a/google-cloud/flatcar-linux/kubernetes/workers/versions.tf b/google-cloud/flatcar-linux/kubernetes/workers/versions.tf index 894b24b6..7524cee7 100644 --- a/google-cloud/flatcar-linux/kubernetes/workers/versions.tf +++ b/google-cloud/flatcar-linux/kubernetes/workers/versions.tf @@ -6,7 +6,7 @@ terraform { google = ">= 2.19" ct = { source = "poseidon/ct" - version = "~> 0.11" + version = "~> 0.13" } } } diff --git a/google-cloud/flatcar-linux/kubernetes/workers/workers.tf b/google-cloud/flatcar-linux/kubernetes/workers/workers.tf index b3a87b0c..a83bdd0d 100644 --- a/google-cloud/flatcar-linux/kubernetes/workers/workers.tf +++ b/google-cloud/flatcar-linux/kubernetes/workers/workers.tf @@ -8,7 +8,7 @@ resource "google_compute_region_instance_group_manager" "workers" { region = var.region version { name = "default" - instance_template = google_compute_instance_template.worker.self_link + instance_template = google_compute_region_instance_template.worker.self_link } # Roll out MIG instance template changes by replacing instances. @@ -58,10 +58,11 @@ resource "google_compute_health_check" "worker" { } # Worker instance template -resource "google_compute_instance_template" "worker" { +resource "google_compute_region_instance_template" "worker" { name_prefix = "${var.name}-worker-" description = "Worker Instance template" machine_type = var.machine_type + region = var.region metadata = { user-data = data.ct_config.worker.rendered @@ -80,6 +81,7 @@ resource "google_compute_instance_template" "worker" { boot = true source_image = data.google_compute_image.flatcar-linux.self_link disk_size_gb = var.disk_size + disk_type = var.disk_type } network_interface { @@ -111,7 +113,6 @@ data "ct_config" "worker" { kubeconfig = indent(10, var.kubeconfig) ssh_authorized_key = var.ssh_authorized_key cluster_dns_service_ip = cidrhost(var.service_cidr, 10) - cluster_domain_suffix = var.cluster_domain_suffix node_labels = join(",", var.node_labels) node_taints = join(",", var.node_taints) }) diff --git a/requirements.txt b/requirements.txt index be153bc6..7cd147d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -mkdocs==1.5.3 -mkdocs-material==9.4.7 -pygments==2.16.1 -pymdown-extensions==10.3.1 +mkdocs==1.6.1 +mkdocs-material==9.5.46 +pygments==2.18.0 +pymdown-extensions==10.12