Merge remote-tracking branch 'upstream/main'

Bump mkdocs-material from 9.5.45 to v9.5.46
Bump registry.k8s.io/coredns/coredns image from v1.11.4 to v1.12.0
2025-07-31 21:41:34 +02:00 · 2024-12-02 11:05:29 +01:00 · 2024-11-29 08:54:47 -08:00 · 2024-11-29 08:54:38 -08:00 · 2024-11-24 08:43:54 -08:00 · 2024-11-24 08:36:50 -08:00
217 changed files with 7354 additions and 3415 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1,10 +0,0 @@
-High level description of the change.
-
-* Specific change
-* Specific change
-
-## Testing
-
-Describe your work to validate the change works.
-
-rel: issue number (if applicable)
--- a/.github/dependabot.yaml
+++ b/.github/dependabot.yaml
@ -4,6 +4,3 @@ updates:
  directory: "/"
  schedule:
    interval: weekly
-  pull-request-branch-name:
-    separator: "-"
-  open-pull-requests-limit: 3
--- a/.github/release.yaml
+++ b/.github/release.yaml
@ -0,0 +1,12 @@
+changelog:
+  categories:
+    - title: Contributions
+      labels:
+        - '*'
+      exclude:
+        labels:
+          - dependencies
+          - no-release-note
+    - title: Dependencies
+      labels:
+        - dependencies
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@ -0,0 +1,12 @@
+name: publish
+on:
+  push:
+    branches:
+      - release-docs
+jobs:
+  mkdocs:
+    name: mkdocs
+    uses: poseidon/matchbox/.github/workflows/mkdocs-pages.yaml@main
+    # Add content write for GitHub Pages
+    permissions:
+      contents: write
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+site/
+venv/
--- a/CHANGES.md
+++ b/CHANGES.md
@ -4,6 +4,578 @@ Notable changes between versions.

 ## Latest

+## v1.31.3
+
+* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312)
+* Update CoreDNS from v1.11.3 to v1.11.4
+* Update Cilium from v1.16.3 to [v1.16.4](https://github.com/cilium/cilium/releases/tag/v1.16.4)
+
+### Deprecations
+
+* Plan to drop support for using Calico CNI, recommend everyone use the Cilium default
+
+## v1.31.2
+
+* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312)
+* Update Cilium from v1.16.1 to [v1.16.3](https://github.com/cilium/cilium/releases/tag/v1.16.3)
+* Update flannel from v0.25.6 to [v0.26.0](https://github.com/flannel-io/flannel/releases/tag/v0.26.0)
+
+## v1.31.1
+
+* Kubernetes [v1.31.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1311)
+* Update flannel from v0.25.5 to [v0.25.6](https://github.com/flannel-io/flannel/releases/tag/v0.25.6)
+
+### Google
+
+* Add `controller_disk_type` and `worker_disk_type` variables ([#1513](https://github.com/poseidon/typhoon/pull/1513))
+* Add explicit `region` field to regional worker instance templates ([#1524](https://github.com/poseidon/typhoon/pull/1524))
+
+## v1.31.0
+
+* Kubernetes [v1.31.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1310)
+* Use Cilium kube-proxy replacement mode when `cilium` networking is chosen ([#1501](https://github.com/poseidon/typhoon/pull/1501))
+* Fix invalid flannel-cni container image for those using `flannel` networking ([#1497](https://github.com/poseidon/typhoon/pull/1497))
+
+### AWS
+
+* Use EC2 resource-based hostnames instead of IP-based hostnames ([#1499](https://github.com/poseidon/typhoon/pull/1499))
+  * The Amazon DNS server can resolve A and AAAA queries to IPv4 and IPv6 node addresses
+* Tag controller node EBS volumes with a name based on the controller node name
+
+### Google
+
+* Use `google_compute_region_instance_template` instead of `google_compute_instance_template`
+  * Google's regional instance template metadata is kept in the associated region for greater resiliency. The "global" instance templates were kept in a single region
+
+## v1.30.4
+
+* Kubernetes [v1.30.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1304)
+* Update Cilium from v1.15.7 to [v1.16.1](https://github.com/cilium/cilium/releases/tag/v1.16.1)
+* Update CoreDNS from v1.11.1 to v1.11.3
+* Remove `enable_aggregation` variable for Kubernetes Aggregation Layer, always set to true
+* Remove `cluster_domain_suffix` variable, always use "cluster.local"
+* Remove `enable_reporting` variable for analytics, always set to false
+
+## v1.30.3
+
+* Kubernetes [v1.30.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1303)
+* Update Cilium from v1.15.6 to [v1.15.7](https://github.com/cilium/cilium/releases/tag/v1.15.7)
+* Update flannel from v0.25.4 to [v0.25.5](https://github.com/flannel-io/flannel/releases/tag/v0.25.5)
+
+### AWS
+
+* Configure controller and worker disks ([#1482](https://github.com/poseidon/typhoon/pull/1482))
+  * Add `controller_disk_type`, `controller_disk_size`, and `controller_disk_iops` variables
+  * Add `worker_disk_type`, `worker_disk_size`, and `worker_disk_iops` variables
+  * Remove `disk_type`, `disk_size`, and `disk_iops` variables
+  * Fix propagating settings to worker disks, previously ignored
+* Configure CPU pricing model for burstable instance types ([#1482](https://github.com/poseidon/typhoon/pull/1482))
+  * Add `controller_cpu_credits` and `worker_cpu_credits` variables (`standard` or `unlimited`)
+* Configure controller or worker instance architecture ([#1485](https://github.com/poseidon/typhoon/pull/1485))
+  * Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
+  * Remove `arch` variable
+
+```diff
+module "cluster" {
+  ...
+- arch      = "amd64"
+- disk_type = "gp3"
+- disk_size = 30
+- disk_iops = 3000
+
+ controller_arch        = "amd64"
+ controller_disk_size   = 15
+ controller_cpu_credits = "standard"
+ worker_arch            = "amd64"
+ worker_disk_size       = 22
+ worker_cpu_credits     = "unlimited"
+}
+```
+
+### Azure
+
+* Configure the virtual network and subnets with IPv6 private address space
+  * Change `host_cidr` variable (string) to a `network_cidr` object with `ipv4` and `ipv6` fields that list CIDR strings. Leave the variable unset to use the defaults. (**breaking**)
+* Add support for dual-stack Kubernetes Ingress Load Balancing
+  * Add a public IPv6 frontend, 80/443 rules, and a worker-ipv6 backend pool
+  * Change the `controller_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**)
+  * Change the `worker_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**)
+  * Change the `backend_address_pool_id` output (and worker module input) from a string to an object with `ipv4` and `ipv6` fields that list ids (**breaking**)
+* Configure nodes to have outbound IPv6 internet connectivity (analogous to IPv4 SNAT)
+  * Configure controller nodes to have a public IPv6 address
+  * Configure worker nodes to use outbound rules and the load balancer for SNAT
+* Extend network security rules to allow IPv6 traffic, analogous to IPv4
+* Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469))
+* Change worker pools from uniform to flexible orchestration mode ([#1473](https://github.com/poseidon/typhoon/pull/1473))
+* Add options to allow workers nodes to use ephemeral local disks ([#1473](https://github.com/poseidon/typhoon/pull/1473))
+  * Add `controller_disk_type` and `controller_disk_size` variables
+  * Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
+* Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470))
+* Configure controller or worker instance architecture for Flatcar Linux ([#1485](https://github.com/poseidon/typhoon/pull/1485))
+  * Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
+  * Remove `arch` variable
+
+```diff
+module "cluster" {
+  ...
+- region = "centralus"
+ location = "centralus"
+  # optional
+- host_cidr = "10.0.0.0/16"
+ network_cidr = {
+   ipv4 = ["10.0.0.0/16"]
+ }
+
+  # instances
+ controller_disk_type = "StandardSSD_LRS"
+ worker_ephemeral_disk = true
+}
+```
+
+### Google Cloud
+
+* Allow configuring controller and worker disks ([#1486](https://github.com/poseidon/typhoon/pull/1486))
+  * Add `controller_disk_size` and `worker_disk_size` variables
+  * Remove `disk_size` variable
+
+## v1.30.2
+
+* Kubernetes [v1.30.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1302)
+* Update CoreDNS from v1.9.4 to v1.11.1
+* Update Cilium from v1.15.5 to [v1.15.6](https://github.com/cilium/cilium/releases/tag/v1.15.6)
+* Update flannel from v0.25.1 to [v0.25.4](https://github.com/flannel-io/flannel/releases/tag/v0.25.4)
+
+## v1.30.1
+
+* Kubernetes [v1.30.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1301)
+* Add firewall rules and security group rules for Cilium and Hubble metrics ([#1449](https://github.com/poseidon/typhoon/pull/1449))
+* Update Cilium from v1.15.3 to [v1.15.5](https://github.com/cilium/cilium/releases/tag/v1.15.5)
+* Update flannel from v0.24.4 to [v0.25.1](https://github.com/flannel-io/flannel/releases/tag/v0.25.1)
+* Introduce `components` variabe to enable/disable/configure pre-installed components ([#1453](https://github.com/poseidon/typhoon/pull/1453))
+* Add Terraform modules for `coredns`, `cilium`, and `flannel` components
+
+### Azure
+
+* Add `controller_security_group_name` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450))
+* Add `controller_address_prefixes` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450))
+
+## v1.30.0
+
+* Kubernetes [v1.30.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1300)
+* Update etcd from v3.5.12 to [v3.5.13](https://github.com/etcd-io/etcd/releases/tag/v3.5.13)
+* Update Cilium from v1.15.2 to [v1.15.3](https://github.com/cilium/cilium/releases/tag/v1.15.3)
+* Update Calico from v3.27.2 to [v3.27.3](https://github.com/projectcalico/calico/releases/tag/v3.27.3)
+
+## v1.29.3
+
+* Kubernetes [v1.29.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1293)
+* Update Cilium from v1.15.1 to [v1.15.2](https://github.com/cilium/cilium/releases/tag/v1.15.2)
+* Update flannel from v0.24.2 to [v0.24.4](https://github.com/flannel-io/flannel/releases/tag/v0.24.4)
+
+## v1.29.2
+
+* Kubernetes [v1.29.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1292)
+* Update etcd from v3.5.10 to [v3.5.12](https://github.com/etcd-io/etcd/releases/tag/v3.5.12)
+* Update Cilium from v1.14.3 to [v1.15.1](https://github.com/cilium/cilium/releases/tag/v1.15.1)
+* Update Calico from v3.26.3 to [v3.27.2](https://github.com/projectcalico/calico/releases/tag/v3.27.2)
+  * Fix upstream incompatibility with Fedora CoreOS ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
+* Update flannel from v0.22.2 to [v0.24.2](https://github.com/flannel-io/flannel/releases/tag/v0.24.2)
+* Add an `install_container_networking` variable (default `true`) ([#1421](https://github.com/poseidon/typhoon/pull/1421))
+  * When `true`, the chosen container `networking` provider is installed during cluster bootstrap
+  * Set `false` to self-manage the container networking provider. This allows flannel, Calico, or Cilium
+  to be managed via Terraform (like any other Kubernetes resources). Nodes will be NotReady until you
+  apply the self-managed container networking provider. This may become the default in future.
+  * Continue to set `networking` to one of the three supported container networking providers. Most
+  require custom firewall / security policies be present across nodes so they have some infra tie-ins.
+
+## v1.29.1
+
+* Kubernetes [v1.29.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1291)
+
+### AWS
+
+* Continue to support AWS IMDSv1 ([#1412](https://github.com/poseidon/typhoon/pull/1412))
+
+### Known Issues
+
+* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
+
+## v1.29.0
+
+* Kubernetes [v1.29.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1290)
+
+### Known Issues
+
+* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
+
+## v1.28.4
+
+* Kubernetes [v1.28.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1284)
+
+## v1.28.3
+
+* Kubernetes [v1.28.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1283)
+* Update etcd from v3.5.9 to [v3.5.10](https://github.com/etcd-io/etcd/releases/tag/v3.5.10)
+* Update Cilium from v1.14.2 to [v1.14.3](https://github.com/cilium/cilium/releases/tag/v1.14.3)
+* Workaround problems in Cilium v1.14's partial `kube-proxy` implementation ([#365](https://github.com/poseidon/terraform-render-bootstrap/pull/365))
+* Update Calico from v3.26.1 to [v3.26.3](https://github.com/projectcalico/calico/releases/tag/v3.26.3)
+
+### Google Cloud
+
+* Allow upgrading Google Cloud Terraform provider to v5.x
+
+## v1.28.2
+
+* Kubernetes [v1.28.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1282)
+* Update Cilium from v1.14.1 to [v1.14.2](https://github.com/cilium/cilium/releases/tag/v1.14.2)
+
+### Azure
+
+* Add optional `azure_authorized_key` variable
+  * Azure obtusely inspects public keys, requires RSA keys, and forbids more secure key formats (e.g. ed25519)
+  * Allow passing a dummy RSA key via `azure_authorized_key` (delete the private key) to satisfy Azure validations, then the usual `ssh_authorized_key` variable can new newer formats (e.g. ed25519)
+
+## v1.28.1
+
+* Kubernetes [v1.28.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1281)
+
+## v1.28.0
+
+* Kubernetes [v1.28.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1280)
+* Update Cilium from v1.13.4 to [v1.14.1](https://github.com/cilium/cilium/releases/tag/v1.14.1)
+* Update flannel from v0.22.0 to [v0.22.2](https://github.com/flannel-io/flannel/releases/tag/v0.22.2)
+
+## v1.27.4
+
+* Kubernetes [v1.27.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.27.md#v1274)
+
+## v1.27.3
+
+* Kubernetes [v1.27.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.27.md#v1273)
+* Update etcd from v3.5.7 to [v3.5.9](https://github.com/etcd-io/etcd/releases/tag/v3.5.9)
+* Update Cilium from v1.13.2 to [v1.13.4](https://github.com/cilium/cilium/releases/tag/v1.13.4)
+* Update Calico from v3.25.1 to [v3.26.1](https://github.com/projectcalico/calico/releases/tag/v3.26.1)
+* Update flannel from v0.21.2 to [v0.22.0](https://github.com/flannel-io/flannel/releases/tag/v0.22.0)
+
+### AWS
+
+* Allow upgrading AWS Terraform provider to v5.x ([#1353](https://github.com/poseidon/typhoon/pull/1353))
+
+### Azure
+
+* Enable boot diagnostics for controller and worker VMs ([#1351](https://github.com/poseidon/typhoon/pull/1351))
+
+## v1.27.2
+
+* Kubernetes [v1.27.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.27.md#v1272)
+
+### Fedora CoreOS
+
+* Update Butane Config version from v1.4.0 to v1.5.0
+  * Require any custom Butane [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) update to v1.5.0
+* Require Fedora CoreOS `37.20230303.3.0` or newer (with ignition v2.15)
+* Require poseidon/ct v0.13+ (**action required**)
+
+## v1.27.1
+
+* Kubernetes [v1.27.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.27.md#v1271)
+* Update etcd from v3.5.7 to [v3.5.8](https://github.com/etcd-io/etcd/releases/tag/v3.5.8)
+* Update Cilium from v1.13.1 to [v1.13.2](https://github.com/cilium/cilium/releases/tag/v1.13.2)
+* Update Calico from v3.25.0 to [v3.25.1](https://github.com/projectcalico/calico/releases/tag/v3.25.1)
+
+## v1.26.3
+
+* Kubernetes [v1.26.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.26.md#v1263)
+* Update Cilium from v1.12.6 to [v1.13.1](https://github.com/cilium/cilium/releases/tag/v1.13.1)
+
+### Bare-Metal
+
+* Add `oem_type` variable for Flatcar Linux ([#1302](https://github.com/poseidon/typhoon/pull/1302))
+
+## v1.26.2
+
+* Kubernetes [v1.26.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.26.md#v1262)
+* Update Cilium from v1.12.5 to [v1.12.6](https://github.com/cilium/cilium/releases/tag/v1.12.6)
+* Update flannel from v0.20.2 to [v0.21.2](https://github.com/flannel-io/flannel/releases/tag/v0.21.2)
+
+### Bare-Metal
+
+* Add a `worker` module to allow customizing individual worker nodes ([#1295](https://github.com/poseidon/typhoon/pull/1295))
+
+### Known Issues
+
+* Fedora CoreOS [issue](https://github.com/coreos/fedora-coreos-tracker/issues/1423) fix is progressing through channels
+
+## v1.26.1
+
+* Kubernetes [v1.26.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.26.md#v1261)
+* Update etcd from v3.5.6 to [v3.5.7](https://github.com/etcd-io/etcd/releases/tag/v3.5.7)
+* Update Cilium from v1.12.4 to [v1.12.5](https://github.com/cilium/cilium/releases/tag/v1.12.5)
+* Update Calico from v3.24.5 to [v3.25.0](https://github.com/projectcalico/calico/releases/tag/v3.25.0)
+* Update CoreDNS from v1.9.3 to [v1.9.4](https://github.com/poseidon/terraform-render-bootstrap/pull/341)
+
+## v1.26.0
+
+* Kubernetes [v1.26.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.26.md#v1260)
+* Update etcd from v3.5.5 to [v3.5.6](https://github.com/etcd-io/etcd/releases/tag/v3.5.6)
+* Update Cilium from v1.12.3 to [v1.12.4](https://github.com/cilium/cilium/releases/tag/v1.12.4)
+* Update flannel from v0.15.1 to [v0.20.2](https://github.com/flannel-io/flannel/releases/tag/v0.20.2)
+* Reminder: Modules are no longer published to the [Terraform Module Registry](https://registry.terraform.io/search/modules?q=poseidon) ([#1282](https://github.com/poseidon/typhoon/pull/1282))
+  * See [#1282](https://github.com/poseidon/typhoon/pull/1282) and [v1.25.4](https://github.com/poseidon/typhoon/releases/tag/v1.25.4) for details
+
+### AWS
+
+* Migrate AWS launch configurations to launch templates ([#1275](https://github.com/poseidon/typhoon/pull/1275))
+  * Starting Dec 31, 2022 AWS won't add new instance types/families to launch configurations
+
+### Addons
+
+* Update ingress-nginx from v1.3.1 to [v1.5.1](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.5.1)
+* Update Prometheus from v2.40.1 to [v2.40.5](https://github.com/prometheus/prometheus/releases/tag/v2.40.5)
+* Update node-exporter from v1.3.1 to [v1.5.0](https://github.com/prometheus/node_exporter/releases/tag/v1.5.0)
+* Update kube-state-metrics from v2.6.0 to [v2.7.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.7.0)
+* Update Grafana from v9.2.4 to [v9.3.1](https://github.com/grafana/grafana/releases/tag/v9.3.1)
+
+## v1.25.4
+
+* Kubernetes [v1.25.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.25.md#v1254)
+* Update Calico from v3.24.1 to [v3.24.5](https://github.com/projectcalico/calico/releases/tag/v3.24.5)
+* Allow Kubelet kubeconfig to drain nodes, if desired ([#330](https://github.com/poseidon/terraform-render-bootstrap/pull/330))
+* Re-enable Kubelet Graceful Node Shutdown ([#1261](https://github.com/poseidon/typhoon/pull/1261))
+  * Introduce companion project [poseidon/scuttle](https://github.com/poseidon/scuttle)
+* Link to new Mastodon account for release announcements
+  * [@typhoon@fosstodon.org](https://fosstodon.org/@typhoon)
+  * [@poseidon@fosstodon.org](https://fosstodon.org/@poseidon)
+* Deprecate publishing to the [Terraform Module Registry](https://registry.terraform.io/search/modules?q=poseidon)
+  * Typhoon docs have always shown using Git-based module sources, not the Terraform Module Registry
+  * Module usage should be `source = "git::https://github.com/poseidon/typhoon/...` not `source = poseidon/kubernetes/...`
+  * Terraform's Module Registry requires subtree mirroring typhoon to special terraform-platform-kubernetes repos, only supports release versions (no commit SHAs or forks), only ever contained Flatcar Linux modules (not Fedora CoreOS) for historical reasons
+  * Note, this does not affect Terraform Providers like `poseidon/matchbox` or `poseidon/ct`, the registry works well for providers
+
+### Fedora CoreOS
+
+* Remove unused `Wants=network.target` from `etcd-member.service` ([#1254](https://github.com/poseidon/typhoon/pull/1254))
+
+### Cloud
+
+* Remove defunct `delete-node.service` from worker node configurations ([#1256](https://github.com/poseidon/typhoon/pull/1256))
+
+### Addons
+
+* Update Prometheus from v2.39.1 to [v2.40.1](https://github.com/prometheus/prometheus/releases/tag/v2.40.1)
+* Update Grafana from v9.1.7 to [v9.2.4](https://github.com/grafana/grafana/releases/tag/v9.2.4)
+
+## v1.25.3
+
+* Kubernetes [v1.25.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.25.md#v1253)
+* Switch Kubernetes registry from `k8s.gcr.io` to `registry.k8s.io` for addons ([#1246](https://github.com/poseidon/typhoon/pull/1246))
+* Update Cilium from v1.12.2 to [v1.12.3](https://github.com/cilium/cilium/releases/tag/v1.12.3) ([#1253](https://github.com/poseidon/typhoon/pull/1253))
+
+### Azure
+
+* Change default Azure `worker_type` from [`Standard_DS1_v2`](https://learn.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series#dsv2-series) to [`Standard_D2as_v5`](https://learn.microsoft.com/en-us/azure/virtual-machines/dasv5-dadsv5-series#dasv5-series) ([#1248](https://github.com/poseidon/typhoon/pull/1248))
+  * Get 2 VCPU, 7 GiB, 12500Mbps (vs 1 VCPU, 3.5GiB, 750 Mbps)
+  * Small increase in pay-as-you-go price ($53.29 -> $62.78)
+  * Small increase in spot price ($5.64/mo -> $7.37/mo)
+  * Change from Intel to AMD EPYC (`D2as_v5` cheaper than `D2s_v5`)
+
+### Flatcar Linux
+
+* Add Flatcar Linux ARM64 support on Azure ([docs](https://typhoon.psdn.io/advanced/arm64/), [#1251](https://github.com/poseidon/typhoon/pull/1251))
+* Switch from Azure Hypervisor gen1 to gen2 (**action required**) ([#1248](https://github.com/poseidon/typhoon/pull/1248))
+  * Run `az vm image terms accept --publish kinvolk --offer flatcar-container-linux-free --plan stable-gen2`
+
+### Docs
+
+* Remove old docs note about not supporting ARM64 with Calico
+  * Typhoon supports ARM64 with `cilium`, `calico`, and `flannel`
+
+### Addons
+
+* Update Prometheus from v2.38.0 to [v2.39.1](https://github.com/prometheus/prometheus/releases/tag/v2.39.1)
+* Update Grafana from v9.1.6 to [v9.1.7](https://github.com/grafana/grafana/releases/tag/v9.1.7)
+
+## v1.25.2
+
+Kubernetes v1.25.2 was skipped since there were minimal changes upstream.
+
+## v1.25.1
+
+* Kubernetes [v1.25.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.25.md#v1251)
+* Update etcd from v3.5.4 to [v3.5.5](https://github.com/etcd-io/etcd/releases/tag/v3.5.5)
+* Update Cilium from v1.12.1 to [v1.12.2](https://github.com/cilium/cilium/releases/tag/v1.12.2)
+* Update Calico from v3.23.3 to [v3.24.1](https://github.com/projectcalico/calico/releases/tag/v3.24.1)
+* Revert Kubelet Graceful Node Shutdown on worker nodes ([#1227](https://github.com/poseidon/typhoon/pull/1227))
+  * Fix issue where non-critical pods are left in Error/Completed state on node shutdown
+* Remove feature flag disable workaround for [kubernetes/kubernetes#112081](https://github.com/kubernetes/kubernetes/issues/112081)
+  * Kubernetes [reverted](https://github.com/kubernetes/kubernetes/pull/112078) `LocalStorageCapacityIsolationFSQuotaMonitoring` back to alpha
+* Remove workaround for preventing `search .` propagation in [kubernetes/kubernetes#112135](https://github.com/kubernetes/kubernetes/issues/112135)
+  * Upstream Kubernetes [fix](https://github.com/kubernetes/kubernetes/pull/112157)
+
+### Addons
+
+* Update kube-state-metrics from v2.5.0 to [v2.6.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.6.0)
+* Update ingress-nginx from v1.3.0 to [v1.3.1](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.3.1)
+* Update Grafana from v9.1.0 to [v9.1.6](https://github.com/grafana/grafana/releases/tag/v9.1.6)
+
+## v1.25.0
+
+* Kubernetes [v1.25.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.25.md#v1250)
+  * Disable LocalStorageCapacityIsolationFSQuotaMonitoring feature gate ([#1220](https://github.com/poseidon/typhoon/pull/1220), fixes [kubernetes#112081](https://github.com/kubernetes/kubernetes/issues/112081))
+  * Add workaround to revert adding "search ." to containers' `/etc/resolv.conf` ([#1224](https://github.com/poseidon/typhoon/pull/1224), fixes [kubernetes#112135](https://github.com/kubernetes/kubernetes/issues/112135))
+* Migrate most Kubelet flags to KubeletConfiguration file ([#1219](https://github.com/poseidon/typhoon/pull/1219))
+* Configure Kubelet Graceful Node Shutdown ([#1222](https://github.com/poseidon/typhoon/pull/1222))
+  * Allow up to 30s for critical pods to gracefully shutdown on node shutdown
+  * Allow up to 15s for regular pods to gracefully shutdown on node shutdown
+  * Mark node NotReady promptly on node shutdown
+  * Lengthen systemd inhibitor lock max delay from 5s to 45s
+
+### Fedora CoreOS
+
+* Change Podman `log-driver` from `journald` to `k8s-file` ([#1221](https://github.com/poseidon/typhoon/pull/1221))
+  * Fix `etcd-member` and Kubelet systemd service log lines appearing twice in journal logs
+
+## v1.24.4
+
+* Kubernetes [v1.24.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#v1244)
+* Update CoreDNS from v1.8.6 to [v1.9.3](https://github.com/poseidon/terraform-render-bootstrap/pull/318)
+* Update Cilium from v1.11.7 to [v1.12.1](https://github.com/cilium/cilium/releases/tag/v1.12.1)
+* Update Calico from v3.23.1 to [v3.23.3](https://github.com/projectcalico/calico/releases/tag/v3.23.3)
+* Switch Kubernetes registry from `k8s.gcr.io` to `registry.k8s.io` ([#1206](https://github.com/poseidon/typhoon/pull/1206))
+* Remove use of deprecated Terraform [template](https://registry.terraform.io/providers/hashicorp/template) provider ([#1194](https://github.com/poseidon/typhoon/pull/1194))
+
+### Fedora CoreOS
+
+* Remove ineffective `/etc/fedora-coreos/iptables-legacy.stamp` ([#1201](https://github.com/poseidon/typhoon/pull/1201))
+  * Typhoon already uses iptables v1.8.7 (nf_tables) since FCOS 36
+  * Staying on legacy iptables required a file in `/etc/coreos` instead
+
+### Flatcar Linux
+
+* Migrate Flatcar Linux from Ignition spec v2.3.0 to v3.3.0 ([#1196](https://github.com/poseidon/typhoon/pull/1196)) (**action required**)
+  * Flatcar Linux 3185.0.0+ [supports](https://flatcar-linux.org/docs/latest/provisioning/ignition/specification/#ignition-v3) Ignition v3.x specs (which are rendered from Butane Configs, like Fedora CoreOS)
+  * `poseidon/ct` v0.11.0 [supports](https://github.com/poseidon/terraform-provider-ct/pull/131) the `flatcar` Butane Config variant
+  * Require poseidon/ct v0.11+ and Flatcar Linux 3185.0.0+
+* Please modify any Flatcar Linux snippets to use the [Butane Config](https://coreos.github.io/butane/config-flatcar-v1_0/) format (**action required**)
+
+```tf
+variant: flatcar
+version: 1.0.0
+...
+```
+
+### AWS
+
+* [Refresh](https://docs.aws.amazon.com/autoscaling/ec2/userguide/asg-instance-refresh.html) instances in autoscaling group when launch configuration changes ([#1208](https://github.com/poseidon/typhoon/pull/1208)) ([docs](https://typhoon.psdn.io/topics/maintenance/#node-configuration-updates), **important**)
+  * Worker launch configuration changes start an autoscaling group instance refresh to replace instances
+  * Instance refresh creates surge instances, waits for a warm-up period, then deletes old instances
+  * Changing `worker_type`, `disk_*`, `worker_price`, `worker_target_groups`, or Butane `worker_snippets` on existing worker nodes will replace instances
+  * New AMIs or changing `os_stream` will be ignored, to allow Fedora CoreOS or Flatcar Linux to keep themselves updated
+  * Previously, new launch configurations were made in the same way, but not applied to instances unless manually replaced
+* Rename worker autoscaling group `${cluster_name}-worker` ([#1202](https://github.com/poseidon/typhoon/pull/1202))
+  * Rename launch configuration `${cluster_name}-worker` instead of a random id
+
+### Google
+
+* [Roll](https://cloud.google.com/compute/docs/instance-groups/rolling-out-updates-to-managed-instance-groups) instance template changes to worker managed instance groups ([#1207](https://github.com/poseidon/typhoon/pull/1207)) ([docs](https://typhoon.psdn.io/topics/maintenance/#node-configuration-updates), **important**)
+  * Worker instance template changes roll out by gradually replacing instances
+  * Automatic rollouts create surge instances, wait for health checks, then delete old instances (0 unavailable instances)
+  * Changing `worker_type`, `disk_size`, `worker_preemptible`, or Butane `worker_snippets` on existing worker nodes will replace instances
+  * New compute images or changing `os_stream` will be ignored, to allow Fedora CoreOS or Flatcar Linux to keep themselves updated
+  * Previously, new instance templates were made in the same way, but not applied to instances unless manually replaced
+* Add health checks to worker managed instance groups (i.e. "autohealing") ([#1207](https://github.com/poseidon/typhoon/pull/1207))
+  * Use health checks to probe kube-proxy every 30s
+  * Replace worker nodes that fail the health check 6 times (3min)
+* Name `kube-apiserver` and `worker` health checks consistently ([#1207](https://github.com/poseidon/typhoon/pull/1207))
+  * Use name `${cluster_name}-apiserver-health` and `${cluster_name}-worker-health`
+* Rename managed instance group from `${cluster_name}-worker-group` to `${cluster_name}-worker` ([#1207](https://github.com/poseidon/typhoon/pull/1207))
+* Fix bug provisioning clusters with multiple controller nodes ([#1195](https://github.com/poseidon/typhoon/pull/1195))
+
+### Addons
+
+* Update Prometheus from v2.37.0 to [v2.38.0](https://github.com/prometheus/prometheus/releases/tag/v2.38.0)
+* Update Grafana from v9.0.3 to [v9.1.0](https://github.com/grafana/grafana/releases/tag/v9.1.0)
+
+## v1.24.3
+
+* Kubernetes [v1.24.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#v1243)
+* Update Cilium from v1.11.6 to [v1.11.7](https://github.com/cilium/cilium/releases/tag/v1.11.7)
+
+### Addons
+
+* Update ingress-nginx from v1.2.1 to [v1.3.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.3.0)
+* Update Prometheus from v2.36.1 to [v2.37.0](https://github.com/prometheus/prometheus/releases/tag/v2.37.0)
+* Update Grafana from v8.5.6 to [v9.0.3](https://github.com/grafana/grafana/releases/tag/v9.0.3)
+
+### Notes
+
+* Poseidon repos will soon change their default branch from `master` to `main`
+
+## v1.24.2
+
+* Kubernetes [v1.24.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#v1242)
+* Update Cilium from v1.11.5 to [v1.11.6](https://github.com/cilium/cilium/releases/tag/v1.11.6)
+* Update Calico from v3.22.2 to [v3.23.1](https://github.com/projectcalico/calico/releases/tag/v3.23.1)
+
+### Addons
+
+* Update Prometheus from v2.36.0 to [v2.36.1](https://github.com/prometheus/prometheus/releases/tag/v2.36.1)
+* Update Grafana from v8.5.3 to [v8.5.6](https://github.com/grafana/grafana/releases/tag/v8.5.6)
+* Update kube-state-metrics from v2.4.2 to [v2.5.0](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.5.0)
+
+## Known Issues
+
+* Skip AWS Terraform provider v4.17.0 to v4.19.0, which had a regression affecting workers joining ([#1173](https://github.com/poseidon/typhoon/issues/1173))
+
+## v1.24.1
+
+* Kubernetes [v1.24.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#v1241)
+* Update Cilium from v1.11.4 to [v1.11.5](https://github.com/cilium/cilium/releases/tag/v1.11.5)
+
+### Addons
+
+* Update Prometheus from v2.35.0 to [v2.36.0](https://github.com/prometheus/prometheus/releases/tag/v2.36.0)
+* Update Grafana from v8.5.1 to [v8.5.3](https://github.com/grafana/grafana/releases/tag/v8.5.3)
+* Update nginx-ingress from v1.2.0 to [v1.2.1](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.2.1)
+
+## v1.24.0
+
+* Kubernetes [v1.24.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.24.md#v1240)
+* Update etcd from v3.5.2 to [v3.5.4](https://github.com/etcd-io/etcd/releases/tag/v3.5.4)
+* Add Kubelet mounts to enable relabeling workload volumes ([#1152](https://github.com/poseidon/typhoon/pull/1152))
+  * StorageClass no longer require explicit SELinux mount contexts
+
+### Addons
+
+* Update nginx-ingress from v1.1.3 to [v1.2.0](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.2.0)
+* Update Prometheus from v2.34.0 to [v2.35.0](https://github.com/prometheus/prometheus/releases/tag/v2.35.0)
+* Update Grafana from v8.4.5 to [v8.5.1](https://github.com/grafana/grafana/releases/tag/v8.5.1)
+
+## v1.23.6
+
+* Kubernetes [v1.23.6](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.23.md#v1236)
+* Update Cilium from v1.11.2 to [v1.11.4](https://github.com/cilium/cilium/releases/tag/v1.11.4)
+* Rename Cilium DaemonSet from `cilium-agent` to `cilium` to match Cilium CLI tools ([#303](https://github.com/poseidon/terraform-render-bootstrap/pull/303))
+* Update Calico from v3.22.1 to [v3.22.2](https://github.com/projectcalico/calico/releases/tag/v3.22.2)
+* Mount /etc/machine-id from host into Kubelet ([#1143](https://github.com/poseidon/typhoon/pull/1143))
+* Remove deprecated use of `key_algorithm` in `hashicorp/tls` resources
+
+### Azure
+
+* Allow upgrading Azure Terraform provider to v3.x ([#1144](https://github.com/poseidon/typhoon/pull/1144))
+* Rename `worker_address_prefix` output to `worker_address_prefixes`
+
+### Google Cloud
+
+* Fix issue on Flatcar Linux with controller nodes not ignoring os image changes ([#1149](https://github.com/poseidon/typhoon/pull/1149))
+  * Nodes will auto-update, Terraform should not attempt to delete/recreate them
+
+### Addons
+
+* Update nginx-ingress from v1.1.2 to [v1.1.3](https://github.com/kubernetes/ingress-nginx/releases/tag/controller-v1.1.3)
+* Update Prometheus from v2.33.5 to [v2.34.0](https://github.com/prometheus/prometheus/releases/tag/v2.34.0)
+* Update Grafana from v8.4.4 to [v8.4.5](https://github.com/grafana/grafana/releases/tag/v8.4.5)
+
 ## v1.23.5

 * Kubernetes [v1.23.5](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.23.md#v1235)
--- a/README.md
+++ b/README.md
@ -1,4 +1,11 @@
-# Typhoon <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
+# Typhoon
+
+[![Release](https://img.shields.io/github/v/release/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/releases)
+[![Stars](https://img.shields.io/github/stars/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/stargazers)
+[![Sponsors](https://img.shields.io/github/sponsors/poseidon?logo=github&style=flat-square)](https://github.com/sponsors/poseidon)
+[![Mastodon](https://img.shields.io/badge/follow-news-6364ff?logo=mastodon&style=flat-square)](https://fosstodon.org/@typhoon)
+
+<img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">

 Typhoon is a minimal and free Kubernetes distribution.

@ -11,7 +18,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.23.5 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/flatcar-linux/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
@ -19,7 +26,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Modules

-Typhoon provides a Terraform Module for each supported operating system and platform.
+Typhoon provides a Terraform Module for defining a Kubernetes cluster on each supported operating system and platform.

 Typhoon is available for [Fedora CoreOS](https://getfedora.org/coreos/).

@ -48,6 +55,15 @@ Typhoon is available for [Flatcar Linux](https://www.flatcar-linux.org/releases/
 | Platform      | Operating System | Terraform Module | Status |
 |---------------|------------------|------------------|--------|
 | AWS           | Flatcar Linux (ARM64) | [aws/flatcar-linux/kubernetes](aws/flatcar-linux/kubernetes) | alpha |
+| Azure         | Flatcar Linux (ARM64) | [azure/flatcar-linux/kubernetes](azure/flatcar-linux/kubernetes) | alpha |
+
+Typhoon also provides Terraform Modules for optionally managing individual components applied onto clusters.
+
+| Name    | Terraform Module | Status |
+|---------|------------------|--------|
+| CoreDNS | [addons/coredns](addons/coredns) | beta |
+| Cilium  | [addons/cilium](addons/cilium) | beta |
+| flannel | [addons/flannel](addons/flannel) | beta |

 ## Documentation

@ -62,7 +78,7 @@ Define a Kubernetes cluster by using the Terraform module for your chosen platfo

 ```tf
 module "yavin" {
-  source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.23.5"
+  source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3"

  # Google Cloud
  cluster_name  = "yavin"
@ -80,8 +96,9 @@ module "yavin" {

 # Obtain cluster kubeconfig
 resource "local_file" "kubeconfig-yavin" {
-  content  = module.yavin.kubeconfig-admin
-  filename = "/home/user/.kube/configs/yavin-config"
+  content         = module.yavin.kubeconfig-admin
+  filename        = "/home/user/.kube/configs/yavin-config"
+  file_permission = "0600"
 }
 ```

@ -101,9 +118,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
 $ export KUBECONFIG=/home/user/.kube/configs/yavin-config
 $ kubectl get nodes
 NAME                                       ROLES    STATUS  AGE  VERSION
-yavin-controller-0.c.example-com.internal  <none>   Ready   6m   v1.23.5
-yavin-worker-jrbf.c.example-com.internal   <none>   Ready   5m   v1.23.5
-yavin-worker-mzdm.c.example-com.internal   <none>   Ready   5m   v1.23.5
+yavin-controller-0.c.example-com.internal  <none>   Ready   6m   v1.31.3
+yavin-worker-jrbf.c.example-com.internal   <none>   Ready   5m   v1.31.3
+yavin-worker-mzdm.c.example-com.internal   <none>   Ready   5m   v1.31.3
 ```

 List the pods.
@ -111,9 +128,10 @@ List the pods.
 ```
 $ kubectl get pods --all-namespaces
 NAMESPACE     NAME                                      READY  STATUS    RESTARTS  AGE
-kube-system   calico-node-1cs8z                         2/2    Running   0         6m
-kube-system   calico-node-d1l5b                         2/2    Running   0         6m
-kube-system   calico-node-sp9ps                         2/2    Running   0         6m
+kube-system   cilium-1cs8z                              1/1    Running   0         6m
+kube-system   cilium-d1l5b                              1/1    Running   0         6m
+kube-system   cilium-sp9ps                              1/1    Running   0         6m
+kube-system   cilium-operator-68d778b448-g744f          1/1    Running   0         6m
 kube-system   coredns-1187388186-zj5dl                  1/1    Running   0         6m
 kube-system   coredns-1187388186-dkh3o                  1/1    Running   0         6m
 kube-system   kube-apiserver-controller-0               1/1    Running   0         6m
@ -156,5 +174,12 @@ Poseidon's Github [Sponsors](https://github.com/sponsors/poseidon) support the i
    <img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg" width="201px">
 </a>
 <br>
+<br>
+
+<a href="https://deploy.equinix.com/">
+  <img src="https://storage.googleapis.com/poseidon/equinix.png" width="201px">
+</a>
+<br>
+<br>

 If you'd like your company here, please contact dghubble at psdn.io.
--- a/addons/cilium/cluster-role-binding.tf
+++ b/addons/cilium/cluster-role-binding.tf
@ -0,0 +1,36 @@
+resource "kubernetes_cluster_role_binding" "operator" {
+  metadata {
+    name = "cilium-operator"
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "cilium-operator"
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = "cilium-operator"
+    namespace = "kube-system"
+  }
+}
+
+resource "kubernetes_cluster_role_binding" "agent" {
+  metadata {
+    name = "cilium-agent"
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "cilium-agent"
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = "cilium-agent"
+    namespace = "kube-system"
+  }
+}
+
--- a/addons/cilium/cluster-role.tf
+++ b/addons/cilium/cluster-role.tf
@ -0,0 +1,112 @@
+resource "kubernetes_cluster_role" "operator" {
+  metadata {
+    name = "cilium-operator"
+  }
+
+  # detect and restart [core|kube]dns pods on startup
+  rule {
+    verbs      = ["get", "list", "watch", "delete"]
+    api_groups = [""]
+    resources  = ["pods"]
+  }
+
+  rule {
+    verbs      = ["list", "watch"]
+    api_groups = [""]
+    resources  = ["nodes"]
+  }
+
+  rule {
+    verbs      = ["patch"]
+    api_groups = [""]
+    resources  = ["nodes", "nodes/status"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = ["discovery.k8s.io"]
+    resources  = ["endpointslices"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = [""]
+    resources  = ["services"]
+  }
+
+  # Perform LB IP allocation for BGP
+  rule {
+    verbs      = ["update"]
+    api_groups = [""]
+    resources  = ["services/status"]
+  }
+
+  # Perform the translation of a CNP that contains `ToGroup` to its endpoints
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = [""]
+    resources  = ["services", "endpoints", "namespaces"]
+  }
+
+  rule {
+    verbs      = ["*"]
+    api_groups = ["cilium.io"]
+    resources  = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumnetworkpolicies/finalizers", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumclusterwidenetworkpolicies/finalizers", "ciliumendpoints", "ciliumendpoints/status", "ciliumendpoints/finalizers", "ciliumnodes", "ciliumnodes/status", "ciliumnodes/finalizers", "ciliumidentities", "ciliumidentities/status", "ciliumidentities/finalizers", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumlocalredirectpolicies/finalizers", "ciliumendpointslices", "ciliumloadbalancerippools", "ciliumloadbalancerippools/status", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"]
+  }
+
+  rule {
+    verbs      = ["create", "get", "list", "update", "watch"]
+    api_groups = ["apiextensions.k8s.io"]
+    resources  = ["customresourcedefinitions"]
+  }
+
+  # Cilium leader elects if among multiple operator replicas
+  rule {
+    verbs      = ["create", "get", "update"]
+    api_groups = ["coordination.k8s.io"]
+    resources  = ["leases"]
+  }
+}
+
+resource "kubernetes_cluster_role" "agent" {
+  metadata {
+    name = "cilium-agent"
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = ["networking.k8s.io"]
+    resources  = ["networkpolicies"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = ["discovery.k8s.io"]
+    resources  = ["endpointslices"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = [""]
+    resources  = ["namespaces", "services", "pods", "endpoints", "nodes"]
+  }
+
+  rule {
+    verbs      = ["patch"]
+    api_groups = [""]
+    resources  = ["nodes/status"]
+  }
+
+  rule {
+    verbs      = ["create", "get", "list", "watch", "update"]
+    api_groups = ["apiextensions.k8s.io"]
+    resources  = ["customresourcedefinitions"]
+  }
+
+  rule {
+    verbs      = ["*"]
+    api_groups = ["cilium.io"]
+    resources  = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumendpoints", "ciliumendpoints/status", "ciliumnodes", "ciliumnodes/status", "ciliumidentities", "ciliumidentities/status", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumegressnatpolicies", "ciliumendpointslices", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"]
+  }
+}
+
--- a/addons/cilium/config.tf
+++ b/addons/cilium/config.tf
@ -0,0 +1,196 @@
+resource "kubernetes_config_map" "cilium" {
+  metadata {
+    name      = "cilium"
+    namespace = "kube-system"
+  }
+  data = {
+    # Identity allocation mode selects how identities are shared between cilium
+    # nodes by setting how they are stored. The options are "crd" or "kvstore".
+    # - "crd" stores identities in kubernetes as CRDs (custom resource definition).
+    #   These can be queried with:
+    #     kubectl get ciliumid
+    # - "kvstore" stores identities in a kvstore, etcd or consul, that is
+    #   configured below. Cilium versions before 1.6 supported only the kvstore
+    #   backend. Upgrades from these older cilium versions should continue using
+    #   the kvstore by commenting out the identity-allocation-mode below, or
+    #   setting it to "kvstore".
+    identity-allocation-mode    = "crd"
+    cilium-endpoint-gc-interval = "5m0s"
+    nodes-gc-interval           = "5m0s"
+
+    # If you want to run cilium in debug mode change this value to true
+    debug = "false"
+    # The agent can be put into the following three policy enforcement modes
+    # default, always and never.
+    # https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes
+    enable-policy = "default"
+
+    # Prometheus
+    enable-metrics                 = "true"
+    prometheus-serve-addr          = ":9962"
+    operator-prometheus-serve-addr = ":9963"
+    proxy-prometheus-port          = "9964" # envoy
+
+    # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4
+    # address.
+    enable-ipv4 = "true"
+
+    # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6
+    # address.
+    enable-ipv6 = "false"
+
+    # Enable probing for a more efficient clock source for the BPF datapath
+    enable-bpf-clock-probe = "true"
+
+    # Enable use of transparent proxying mechanisms (Linux 5.7+)
+    enable-bpf-tproxy = "false"
+
+    # If you want cilium monitor to aggregate tracing for packets, set this level
+    # to "low", "medium", or "maximum". The higher the level, the less packets
+    # that will be seen in monitor output.
+    monitor-aggregation = "medium"
+
+    # The monitor aggregation interval governs the typical time between monitor
+    # notification events for each allowed connection.
+    #
+    # Only effective when monitor aggregation is set to "medium" or higher.
+    monitor-aggregation-interval = "5s"
+
+    # The monitor aggregation flags determine which TCP flags which, upon the
+    # first observation, cause monitor notifications to be generated.
+    #
+    # Only effective when monitor aggregation is set to "medium" or higher.
+    monitor-aggregation-flags = "all"
+
+    # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic
+    # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps.
+    bpf-map-dynamic-size-ratio = "0.0025"
+    # bpf-policy-map-max specified the maximum number of entries in endpoint
+    # policy map (per endpoint)
+    bpf-policy-map-max = "16384"
+    # bpf-lb-map-max specifies the maximum number of entries in bpf lb service,
+    # backend and affinity maps.
+    bpf-lb-map-max = "65536"
+
+    # Pre-allocation of map entries allows per-packet latency to be reduced, at
+    # the expense of up-front memory allocation for the entries in the maps. The
+    # default value below will minimize memory usage in the default installation;
+    # users who are sensitive to latency may consider setting this to "true".
+    #
+    # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore
+    # this option and behave as though it is set to "true".
+    #
+    # If this value is modified, then during the next Cilium startup the restore
+    # of existing endpoints and tracking of ongoing connections may be disrupted.
+    # As a result, reply packets may be dropped and the load-balancing decisions
+    # for established connections may change.
+    #
+    # If this option is set to "false" during an upgrade from 1.3 or earlier to
+    # 1.4 or later, then it may cause one-time disruptions during the upgrade.
+    preallocate-bpf-maps = "false"
+
+    # Name of the cluster. Only relevant when building a mesh of clusters.
+    cluster-name = "default"
+    # Unique ID of the cluster. Must be unique across all conneted clusters and
+    # in the range of 1 and 255. Only relevant when building a mesh of clusters.
+    cluster-id = "0"
+
+    # Encapsulation mode for communication between nodes
+    # Possible values:
+    #   - disabled
+    #   - vxlan (default)
+    #   - geneve
+    routing-mode = "tunnel"
+    tunnel       = "vxlan"
+    # Enables L7 proxy for L7 policy enforcement and visibility
+    enable-l7-proxy = "true"
+
+    auto-direct-node-routes = "false"
+
+    # enableXTSocketFallback enables the fallback compatibility solution
+    # when the xt_socket kernel module is missing and it is needed for
+    # the datapath L7 redirection to work properly.  See documentation
+    # for details on when this can be disabled:
+    # http://docs.cilium.io/en/latest/install/system_requirements/#admin-kernel-version.
+    enable-xt-socket-fallback = "true"
+
+    # installIptablesRules enables installation of iptables rules to allow for
+    # TPROXY (L7 proxy injection), itpables based masquerading and compatibility
+    # with kube-proxy. See documentation for details on when this can be
+    # disabled.
+    install-iptables-rules = "true"
+
+    # masquerade traffic leaving the node destined for outside
+    enable-ipv4-masquerade = "true"
+    enable-ipv6-masquerade = "false"
+
+    # bpfMasquerade enables masquerading with BPF instead of iptables
+    enable-bpf-masquerade = "true"
+
+    # kube-proxy
+    kube-proxy-replacement                      = "true"
+    kube-proxy-replacement-healthz-bind-address = ":10256"
+    enable-session-affinity                     = "true"
+
+    # ClusterIPs from host namespace
+    bpf-lb-sock = "true"
+    # ClusterIPs from external nodes
+    bpf-lb-external-clusterip = "true"
+
+    # NodePort
+    enable-node-port             = "true"
+    enable-health-check-nodeport = "false"
+
+    # ExternalIPs
+    enable-external-ips = "true"
+
+    # HostPort
+    enable-host-port = "true"
+
+    # IPAM
+    ipam                        = "cluster-pool"
+    disable-cnp-status-updates  = "true"
+    cluster-pool-ipv4-cidr      = "${var.pod_cidr}"
+    cluster-pool-ipv4-mask-size = "24"
+
+    # Health
+    agent-health-port               = "9876"
+    enable-health-checking          = "true"
+    enable-endpoint-health-checking = "true"
+
+    # Identity
+    enable-well-known-identities = "false"
+    enable-remote-node-identity  = "true"
+
+    # Hubble server
+    enable-hubble                  = var.enable_hubble
+    hubble-disable-tls             = "false"
+    hubble-listen-address          = ":4244"
+    hubble-socket-path             = "/var/run/cilium/hubble.sock"
+    hubble-tls-client-ca-files     = "/var/lib/cilium/tls/hubble/client-ca.crt"
+    hubble-tls-cert-file           = "/var/lib/cilium/tls/hubble/server.crt"
+    hubble-tls-key-file            = "/var/lib/cilium/tls/hubble/server.key"
+    hubble-export-file-max-backups = "5"
+    hubble-export-file-max-size-mb = "10"
+
+    # Hubble metrics
+    hubble-metrics-server      = ":9965"
+    hubble-metrics             = "dns drop tcp flow port-distribution icmp httpV2"
+    enable-hubble-open-metrics = "false"
+
+
+    # Misc
+    enable-bandwidth-manager        = "false"
+    enable-local-redirect-policy    = "false"
+    policy-audit-mode               = "false"
+    operator-api-serve-addr         = "127.0.0.1:9234"
+    enable-l2-neigh-discovery       = "true"
+    enable-k8s-terminating-endpoint = "true"
+    enable-k8s-networkpolicy        = "true"
+    external-envoy-proxy            = "false"
+    write-cni-conf-when-ready       = "/host/etc/cni/net.d/05-cilium.conflist"
+    cni-exclusive                   = "true"
+    cni-log-file                    = "/var/run/cilium/cilium-cni.log"
+  }
+}
+
--- a/addons/cilium/daemonset.tf
+++ b/addons/cilium/daemonset.tf
@ -0,0 +1,379 @@
+resource "kubernetes_daemonset" "cilium" {
+  wait_for_rollout = false
+
+  metadata {
+    name      = "cilium"
+    namespace = "kube-system"
+    labels = {
+      k8s-app = "cilium"
+    }
+  }
+  spec {
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        k8s-app = "cilium-agent"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          k8s-app = "cilium-agent"
+        }
+        annotations = {
+          "prometheus.io/port"   = "9962"
+          "prometheus.io/scrape" = "true"
+        }
+      }
+      spec {
+        host_network         = true
+        priority_class_name  = "system-node-critical"
+        service_account_name = "cilium-agent"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        toleration {
+          key      = "node-role.kubernetes.io/controller"
+          operator = "Exists"
+        }
+        toleration {
+          key      = "node.kubernetes.io/not-ready"
+          operator = "Exists"
+        }
+        dynamic "toleration" {
+          for_each = var.daemonset_tolerations
+          content {
+            key      = toleration.value
+            operator = "Exists"
+          }
+        }
+        automount_service_account_token = true
+        enable_service_links            = false
+
+        # Cilium v1.13.1 starts installing CNI plugins in yet another init container
+        # https://github.com/cilium/cilium/pull/24075
+        init_container {
+          name    = "install-cni"
+          image   = "quay.io/cilium/cilium:v1.16.4"
+          command = ["/install-plugin.sh"]
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+            capabilities {
+              drop = ["ALL"]
+            }
+          }
+          volume_mount {
+            name       = "cni-bin-dir"
+            mount_path = "/host/opt/cni/bin"
+          }
+        }
+
+        # Required to mount cgroup2 filesystem on the underlying Kubernetes node.
+        # We use nsenter command with host's cgroup and mount namespaces enabled.
+        init_container {
+          name  = "mount-cgroup"
+          image = "quay.io/cilium/cilium:v1.16.4"
+          command = [
+            "sh",
+            "-ec",
+            # The statically linked Go program binary is invoked to avoid any
+            # dependency on utilities like sh and mount that can be missing on certain
+            # distros installed on the underlying host. Copy the binary to the
+            # same directory where we install cilium cni plugin so that exec permissions
+            # are available.
+            "cp /usr/bin/cilium-mount /hostbin/cilium-mount && nsenter --cgroup=/hostproc/1/ns/cgroup --mount=/hostproc/1/ns/mnt \"$${BIN_PATH}/cilium-mount\" $CGROUP_ROOT; rm /hostbin/cilium-mount"
+          ]
+          env {
+            name  = "CGROUP_ROOT"
+            value = "/run/cilium/cgroupv2"
+          }
+          env {
+            name  = "BIN_PATH"
+            value = "/opt/cni/bin"
+          }
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+          }
+          volume_mount {
+            name       = "hostproc"
+            mount_path = "/hostproc"
+          }
+          volume_mount {
+            name       = "cni-bin-dir"
+            mount_path = "/hostbin"
+          }
+        }
+
+        init_container {
+          name    = "clean-cilium-state"
+          image   = "quay.io/cilium/cilium:v1.16.4"
+          command = ["/init-container.sh"]
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+          }
+          volume_mount {
+            name       = "sys-fs-bpf"
+            mount_path = "/sys/fs/bpf"
+          }
+          volume_mount {
+            name       = "var-run-cilium"
+            mount_path = "/var/run/cilium"
+          }
+          # Required to mount cgroup filesystem from the host to cilium agent pod
+          volume_mount {
+            name              = "cilium-cgroup"
+            mount_path        = "/run/cilium/cgroupv2"
+            mount_propagation = "HostToContainer"
+          }
+        }
+
+        container {
+          name    = "cilium-agent"
+          image   = "quay.io/cilium/cilium:v1.16.4"
+          command = ["cilium-agent"]
+          args = [
+            "--config-dir=/tmp/cilium/config-map"
+          ]
+          env {
+            name = "K8S_NODE_NAME"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "spec.nodeName"
+              }
+            }
+          }
+          env {
+            name = "CILIUM_K8S_NAMESPACE"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "metadata.namespace"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_HOST"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-host"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_PORT"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-port"
+              }
+            }
+          }
+          port {
+            name           = "peer-service"
+            protocol       = "TCP"
+            container_port = 4244
+          }
+          # Metrics
+          port {
+            name           = "metrics"
+            protocol       = "TCP"
+            container_port = 9962
+          }
+          port {
+            name           = "envoy-metrics"
+            protocol       = "TCP"
+            container_port = 9964
+          }
+          port {
+            name           = "hubble-metrics"
+            protocol       = "TCP"
+            container_port = 9965
+          }
+          # Not yet used, prefer exec's
+          port {
+            name           = "health"
+            protocol       = "TCP"
+            container_port = 9876
+          }
+          lifecycle {
+            pre_stop {
+              exec {
+                command = ["/cni-uninstall.sh"]
+              }
+            }
+          }
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+          }
+          liveness_probe {
+            exec {
+              command = ["cilium", "status", "--brief"]
+            }
+            initial_delay_seconds = 120
+            timeout_seconds       = 5
+            period_seconds        = 30
+            success_threshold     = 1
+            failure_threshold     = 10
+          }
+          readiness_probe {
+            exec {
+              command = ["cilium", "status", "--brief"]
+            }
+            initial_delay_seconds = 5
+            timeout_seconds       = 5
+            period_seconds        = 20
+            success_threshold     = 1
+            failure_threshold     = 3
+          }
+          # Load kernel modules
+          volume_mount {
+            name       = "lib-modules"
+            read_only  = true
+            mount_path = "/lib/modules"
+          }
+          # Access iptables concurrently
+          volume_mount {
+            name       = "xtables-lock"
+            mount_path = "/run/xtables.lock"
+          }
+          # Keep state between restarts
+          volume_mount {
+            name       = "var-run-cilium"
+            mount_path = "/var/run/cilium"
+          }
+          volume_mount {
+            name              = "sys-fs-bpf"
+            mount_path        = "/sys/fs/bpf"
+            mount_propagation = "Bidirectional"
+          }
+          # Configuration
+          volume_mount {
+            name       = "config"
+            read_only  = true
+            mount_path = "/tmp/cilium/config-map"
+          }
+          # Install config on host
+          volume_mount {
+            name       = "cni-conf-dir"
+            mount_path = "/host/etc/cni/net.d"
+          }
+          # Hubble
+          volume_mount {
+            name       = "hubble-tls"
+            mount_path = "/var/lib/cilium/tls/hubble"
+            read_only  = true
+          }
+        }
+        termination_grace_period_seconds = 1
+
+        # Load kernel modules
+        volume {
+          name = "lib-modules"
+          host_path {
+            path = "/lib/modules"
+          }
+        }
+        # Access iptables concurrently with other processes (e.g. kube-proxy)
+        volume {
+          name = "xtables-lock"
+          host_path {
+            path = "/run/xtables.lock"
+            type = "FileOrCreate"
+          }
+        }
+        # Keep state between restarts
+        volume {
+          name = "var-run-cilium"
+          host_path {
+            path = "/var/run/cilium"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Keep state for bpf maps between restarts
+        volume {
+          name = "sys-fs-bpf"
+          host_path {
+            path = "/sys/fs/bpf"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Mount host cgroup2 filesystem
+        volume {
+          name = "hostproc"
+          host_path {
+            path = "/proc"
+            type = "Directory"
+          }
+        }
+        volume {
+          name = "cilium-cgroup"
+          host_path {
+            path = "/run/cilium/cgroupv2"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Read configuration
+        volume {
+          name = "config"
+          config_map {
+            name = "cilium"
+          }
+        }
+        # Install CNI plugin and config on host
+        volume {
+          name = "cni-bin-dir"
+          host_path {
+            path = "/opt/cni/bin"
+            type = "DirectoryOrCreate"
+          }
+        }
+        volume {
+          name = "cni-conf-dir"
+          host_path {
+            path = "/etc/cni/net.d"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Hubble TLS (optional)
+        volume {
+          name = "hubble-tls"
+          projected {
+            default_mode = "0400"
+            sources {
+              secret {
+                name     = "hubble-server-certs"
+                optional = true
+                items {
+                  key  = "ca.crt"
+                  path = "client-ca.crt"
+                }
+                items {
+                  key  = "tls.crt"
+                  path = "server.crt"
+                }
+                items {
+                  key  = "tls.key"
+                  path = "server.key"
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/cilium/deployment.tf
+++ b/addons/cilium/deployment.tf
@ -0,0 +1,163 @@
+resource "kubernetes_deployment" "operator" {
+  wait_for_rollout = false
+  metadata {
+    name      = "cilium-operator"
+    namespace = "kube-system"
+  }
+  spec {
+    replicas = 1
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        name = "cilium-operator"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          name = "cilium-operator"
+        }
+        annotations = {
+          "prometheus.io/scrape" = "true"
+          "prometheus.io/port"   = "9963"
+        }
+      }
+      spec {
+        host_network         = true
+        priority_class_name  = "system-cluster-critical"
+        service_account_name = "cilium-operator"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        toleration {
+          key      = "node-role.kubernetes.io/controller"
+          operator = "Exists"
+        }
+        toleration {
+          key      = "node.kubernetes.io/not-ready"
+          operator = "Exists"
+        }
+        topology_spread_constraint {
+          max_skew           = 1
+          topology_key       = "kubernetes.io/hostname"
+          when_unsatisfiable = "DoNotSchedule"
+          label_selector {
+            match_labels = {
+              name = "cilium-operator"
+            }
+          }
+        }
+        automount_service_account_token = true
+        enable_service_links            = false
+        container {
+          name    = "cilium-operator"
+          image   = "quay.io/cilium/operator-generic:v1.16.4"
+          command = ["cilium-operator-generic"]
+          args = [
+            "--config-dir=/tmp/cilium/config-map",
+            "--debug=$(CILIUM_DEBUG)"
+          ]
+          env {
+            name = "K8S_NODE_NAME"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "spec.nodeName"
+              }
+            }
+          }
+          env {
+            name = "CILIUM_K8S_NAMESPACE"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "metadata.namespace"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_HOST"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-host"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_PORT"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-port"
+              }
+            }
+          }
+          env {
+            name = "CILIUM_DEBUG"
+            value_from {
+              config_map_key_ref {
+                name     = "cilium"
+                key      = "debug"
+                optional = true
+              }
+            }
+          }
+          port {
+            name           = "metrics"
+            protocol       = "TCP"
+            host_port      = 9963
+            container_port = 9963
+          }
+          port {
+            name           = "health"
+            container_port = 9234
+            protocol       = "TCP"
+          }
+          liveness_probe {
+            http_get {
+              scheme = "HTTP"
+              host   = "127.0.0.1"
+              port   = "9234"
+              path   = "/healthz"
+            }
+            initial_delay_seconds = 60
+            timeout_seconds       = 3
+            period_seconds        = 10
+          }
+          readiness_probe {
+            http_get {
+              scheme = "HTTP"
+              host   = "127.0.0.1"
+              port   = "9234"
+              path   = "/healthz"
+            }
+            timeout_seconds   = 3
+            period_seconds    = 15
+            failure_threshold = 5
+          }
+          volume_mount {
+            name       = "config"
+            read_only  = true
+            mount_path = "/tmp/cilium/config-map"
+          }
+        }
+
+        volume {
+          name = "config"
+          config_map {
+            name = "cilium"
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/cilium/service-account.tf
+++ b/addons/cilium/service-account.tf
@ -0,0 +1,15 @@
+resource "kubernetes_service_account" "operator" {
+  metadata {
+    name      = "cilium-operator"
+    namespace = "kube-system"
+  }
+  automount_service_account_token = false
+}
+
+resource "kubernetes_service_account" "agent" {
+  metadata {
+    name      = "cilium-agent"
+    namespace = "kube-system"
+  }
+  automount_service_account_token = false
+}
--- a/addons/cilium/variables.tf
+++ b/addons/cilium/variables.tf
@ -0,0 +1,17 @@
+variable "pod_cidr" {
+  type        = string
+  description = "CIDR IP range to assign Kubernetes pods"
+  default     = "10.2.0.0/16"
+}
+
+variable "daemonset_tolerations" {
+  type        = list(string)
+  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
+  default     = []
+}
+
+variable "enable_hubble" {
+  type        = bool
+  description = "Run the embedded Hubble Server and mount hubble-server-certs Secret"
+  default     = true
+}
--- a/addons/cilium/versions.tf
+++ b/addons/cilium/versions.tf
@ -0,0 +1,8 @@
+terraform {
+  required_providers {
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = "~> 2.8"
+    }
+  }
+}
--- a/addons/coredns/cluster-role.tf
+++ b/addons/coredns/cluster-role.tf
@ -0,0 +1,37 @@
+resource "kubernetes_cluster_role" "coredns" {
+  metadata {
+    name = "system:coredns"
+  }
+  rule {
+    api_groups = [""]
+    resources = [
+      "endpoints",
+      "services",
+      "pods",
+      "namespaces",
+    ]
+    verbs = [
+      "list",
+      "watch",
+    ]
+  }
+  rule {
+    api_groups = [""]
+    resources = [
+      "nodes",
+    ]
+    verbs = [
+      "get",
+    ]
+  }
+  rule {
+    api_groups = ["discovery.k8s.io"]
+    resources = [
+      "endpointslices",
+    ]
+    verbs = [
+      "list",
+      "watch",
+    ]
+  }
+}
--- a/addons/coredns/config.tf
+++ b/addons/coredns/config.tf
@ -0,0 +1,30 @@
+resource "kubernetes_config_map" "coredns" {
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+  }
+  data = {
+    "Corefile" = <<-EOF
+      .:53 {
+          errors
+          health {
+            lameduck 5s
+          }
+          ready
+          log . {
+              class error
+          }
+          kubernetes ${var.cluster_domain_suffix} in-addr.arpa ip6.arpa {
+              pods insecure
+              fallthrough in-addr.arpa ip6.arpa
+          }
+          prometheus :9153
+          forward . /etc/resolv.conf
+          cache 30
+          loop
+          reload
+          loadbalance
+      }
+  EOF
+  }
+}
--- a/addons/coredns/deployment.tf
+++ b/addons/coredns/deployment.tf
@ -0,0 +1,151 @@
+resource "kubernetes_deployment" "coredns" {
+  wait_for_rollout = false
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+    labels = {
+      k8s-app              = "coredns"
+      "kubernetes.io/name" = "CoreDNS"
+    }
+  }
+  spec {
+    replicas = var.replicas
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        k8s-app = "coredns"
+        tier    = "control-plane"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          k8s-app = "coredns"
+          tier    = "control-plane"
+        }
+      }
+      spec {
+        affinity {
+          node_affinity {
+            preferred_during_scheduling_ignored_during_execution {
+              weight = 100
+              preference {
+                match_expressions {
+                  key      = "node.kubernetes.io/controller"
+                  operator = "Exists"
+                }
+              }
+            }
+          }
+          pod_anti_affinity {
+            preferred_during_scheduling_ignored_during_execution {
+              weight = 100
+              pod_affinity_term {
+                label_selector {
+                  match_expressions {
+                    key      = "tier"
+                    operator = "In"
+                    values   = ["control-plane"]
+                  }
+                  match_expressions {
+                    key      = "k8s-app"
+                    operator = "In"
+                    values   = ["coredns"]
+                  }
+                }
+                topology_key = "kubernetes.io/hostname"
+              }
+            }
+          }
+        }
+        dns_policy          = "Default"
+        priority_class_name = "system-cluster-critical"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        service_account_name = "coredns"
+        toleration {
+          key    = "node-role.kubernetes.io/controller"
+          effect = "NoSchedule"
+        }
+        container {
+          name  = "coredns"
+          image = "registry.k8s.io/coredns/coredns:v1.12.0"
+          args  = ["-conf", "/etc/coredns/Corefile"]
+          port {
+            name           = "dns"
+            container_port = 53
+            protocol       = "UDP"
+          }
+          port {
+            name           = "dns-tcp"
+            container_port = 53
+            protocol       = "TCP"
+          }
+          port {
+            name           = "metrics"
+            container_port = 9153
+            protocol       = "TCP"
+          }
+          resources {
+            requests = {
+              cpu    = "100m"
+              memory = "70Mi"
+            }
+            limits = {
+              memory = "170Mi"
+            }
+          }
+          security_context {
+            capabilities {
+              add  = ["NET_BIND_SERVICE"]
+              drop = ["all"]
+            }
+            read_only_root_filesystem = true
+          }
+          liveness_probe {
+            http_get {
+              path   = "/health"
+              port   = "8080"
+              scheme = "HTTP"
+            }
+            initial_delay_seconds = 60
+            timeout_seconds       = 5
+            success_threshold     = 1
+            failure_threshold     = 5
+          }
+          readiness_probe {
+            http_get {
+              path   = "/ready"
+              port   = "8181"
+              scheme = "HTTP"
+            }
+          }
+          volume_mount {
+            name       = "config"
+            mount_path = "/etc/coredns"
+            read_only  = true
+          }
+        }
+        volume {
+          name = "config"
+          config_map {
+            name = "coredns"
+            items {
+              key  = "Corefile"
+              path = "Corefile"
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/coredns/service-account.tf
+++ b/addons/coredns/service-account.tf
@ -0,0 +1,24 @@
+resource "kubernetes_service_account" "coredns" {
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+  }
+  automount_service_account_token = false
+}
+
+
+resource "kubernetes_cluster_role_binding" "coredns" {
+  metadata {
+    name = "system:coredns"
+  }
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "system:coredns"
+  }
+  subject {
+    kind      = "ServiceAccount"
+    name      = "coredns"
+    namespace = "kube-system"
+  }
+}
--- a/addons/coredns/service.tf
+++ b/addons/coredns/service.tf
@ -0,0 +1,31 @@
+resource "kubernetes_service" "coredns" {
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+    labels = {
+      "k8s-app"            = "coredns"
+      "kubernetes.io/name" = "CoreDNS"
+    }
+    annotations = {
+      "prometheus.io/scrape" = "true"
+      "prometheus.io/port"   = "9153"
+    }
+  }
+  spec {
+    type       = "ClusterIP"
+    cluster_ip = var.cluster_dns_service_ip
+    selector = {
+      k8s-app = "coredns"
+    }
+    port {
+      name     = "dns"
+      protocol = "UDP"
+      port     = 53
+    }
+    port {
+      name     = "dns-tcp"
+      protocol = "TCP"
+      port     = 53
+    }
+  }
+}
--- a/addons/coredns/variables.tf
+++ b/addons/coredns/variables.tf
@ -0,0 +1,15 @@
+variable "replicas" {
+  type        = number
+  description = "CoreDNS replica count"
+  default     = 2
+}
+
+variable "cluster_dns_service_ip" {
+  description = "Must be set to `cluster_dns_service_ip` output by cluster"
+  default     = "10.3.0.10"
+}
+
+variable "cluster_domain_suffix" {
+  description = "Must be set to `cluster_domain_suffix` output by cluster"
+  default     = "cluster.local"
+}
--- a/addons/coredns/versions.tf
+++ b/addons/coredns/versions.tf
@ -0,0 +1,9 @@
+terraform {
+  required_providers {
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = "~> 2.8"
+    }
+  }
+}
+
--- a/addons/flannel/cluster-role-binding.tf
+++ b/addons/flannel/cluster-role-binding.tf
@ -0,0 +1,18 @@
+resource "kubernetes_cluster_role_binding" "flannel" {
+  metadata {
+    name = "flannel"
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "flannel"
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = "flannel"
+    namespace = "kube-system"
+  }
+}
+
--- a/addons/flannel/cluster-role.tf
+++ b/addons/flannel/cluster-role.tf
@ -0,0 +1,24 @@
+resource "kubernetes_cluster_role" "flannel" {
+  metadata {
+    name = "flannel"
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["pods"]
+    verbs      = ["get"]
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["nodes"]
+    verbs      = ["list", "watch"]
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["nodes/status"]
+    verbs      = ["patch"]
+  }
+}
+
--- a/addons/flannel/config.tf
+++ b/addons/flannel/config.tf
@ -0,0 +1,44 @@
+resource "kubernetes_config_map" "config" {
+  metadata {
+    name      = "flannel-config"
+    namespace = "kube-system"
+    labels = {
+      k8s-app = "flannel"
+      tier    = "node"
+    }
+  }
+
+  data = {
+    "cni-conf.json" = <<-EOF
+      {
+        "name": "cbr0",
+        "cniVersion": "0.3.1",
+        "plugins": [
+          {
+            "type": "flannel",
+            "delegate": {
+              "hairpinMode": true,
+              "isDefaultGateway": true
+            }
+          },
+          {
+            "type": "portmap",
+            "capabilities": {
+              "portMappings": true
+            }
+          }
+        ]
+      }
+    EOF
+    "net-conf.json" = <<-EOF
+      {
+        "Network": "${var.pod_cidr}",
+        "Backend": {
+          "Type": "vxlan",
+          "Port": 4789
+        }
+      }
+    EOF
+  }
+}
+
--- a/addons/flannel/daemonset.tf
+++ b/addons/flannel/daemonset.tf
@ -0,0 +1,167 @@
+resource "kubernetes_daemonset" "flannel" {
+  metadata {
+    name      = "flannel"
+    namespace = "kube-system"
+    labels = {
+      k8s-app = "flannel"
+    }
+  }
+  spec {
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        k8s-app = "flannel"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          k8s-app = "flannel"
+        }
+      }
+      spec {
+        host_network         = true
+        priority_class_name  = "system-node-critical"
+        service_account_name = "flannel"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        toleration {
+          key      = "node-role.kubernetes.io/controller"
+          operator = "Exists"
+        }
+        toleration {
+          key      = "node.kubernetes.io/not-ready"
+          operator = "Exists"
+        }
+        dynamic "toleration" {
+          for_each = var.daemonset_tolerations
+          content {
+            key      = toleration.value
+            operator = "Exists"
+          }
+        }
+        init_container {
+          name    = "install-cni"
+          image   = "quay.io/poseidon/flannel-cni:v0.4.2"
+          command = ["/install-cni.sh"]
+          env {
+            name = "CNI_NETWORK_CONFIG"
+            value_from {
+              config_map_key_ref {
+                name = "flannel-config"
+                key  = "cni-conf.json"
+              }
+            }
+          }
+          volume_mount {
+            name       = "cni-bin-dir"
+            mount_path = "/host/opt/cni/bin/"
+          }
+          volume_mount {
+            name       = "cni-conf-dir"
+            mount_path = "/host/etc/cni/net.d"
+          }
+        }
+
+        container {
+          name  = "flannel"
+          image = "docker.io/flannel/flannel:v0.26.1"
+          command = [
+            "/opt/bin/flanneld",
+            "--ip-masq",
+            "--kube-subnet-mgr",
+            "--iface=$(POD_IP)"
+          ]
+          env {
+            name = "POD_NAME"
+            value_from {
+              field_ref {
+                field_path = "metadata.name"
+              }
+            }
+          }
+          env {
+            name = "POD_NAMESPACE"
+            value_from {
+              field_ref {
+                field_path = "metadata.namespace"
+              }
+            }
+          }
+          env {
+            name = "POD_IP"
+            value_from {
+              field_ref {
+                field_path = "status.podIP"
+              }
+            }
+          }
+          security_context {
+            privileged = true
+          }
+          resources {
+            requests = {
+              cpu = "100m"
+            }
+          }
+          volume_mount {
+            name       = "flannel-config"
+            mount_path = "/etc/kube-flannel/"
+          }
+          volume_mount {
+            name       = "run-flannel"
+            mount_path = "/run/flannel"
+          }
+          volume_mount {
+            name       = "xtables-lock"
+            mount_path = "/run/xtables.lock"
+          }
+        }
+
+        volume {
+          name = "flannel-config"
+          config_map {
+            name = "flannel-config"
+          }
+        }
+        volume {
+          name = "run-flannel"
+          host_path {
+            path = "/run/flannel"
+          }
+        }
+        # Used by install-cni
+        volume {
+          name = "cni-bin-dir"
+          host_path {
+            path = "/opt/cni/bin"
+          }
+        }
+        volume {
+          name = "cni-conf-dir"
+          host_path {
+            path = "/etc/cni/net.d"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Acces iptables concurrently
+        volume {
+          name = "xtables-lock"
+          host_path {
+            path = "/run/xtables.lock"
+            type = "FileOrCreate"
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/flannel/service-account.tf
+++ b/addons/flannel/service-account.tf
@ -0,0 +1,7 @@
+resource "kubernetes_service_account" "flannel" {
+  metadata {
+    name      = "flannel"
+    namespace = "kube-system"
+  }
+}
+
--- a/addons/flannel/variables.tf
+++ b/addons/flannel/variables.tf
@ -0,0 +1,11 @@
+variable "pod_cidr" {
+  type        = string
+  description = "CIDR IP range to assign Kubernetes pods"
+  default     = "10.2.0.0/16"
+}
+
+variable "daemonset_tolerations" {
+  type        = list(string)
+  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
+  default     = []
+}
--- a/addons/flannel/versions.tf
+++ b/addons/flannel/versions.tf
@ -0,0 +1,8 @@
+terraform {
+  required_providers {
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = "~> 2.8"
+    }
+  }
+}
--- a/addons/grafana/deployment.yaml
+++ b/addons/grafana/deployment.yaml
@ -24,7 +24,7 @@ spec:
          type: RuntimeDefault
      containers:
        - name: grafana
-          image: docker.io/grafana/grafana:8.4.3
+          image: docker.io/grafana/grafana:9.3.1
          env:
            - name: GF_PATHS_CONFIG
              value: "/etc/grafana/custom.ini"
@ -32,15 +32,22 @@ spec:
            - name: http
              containerPort: 8080
          livenessProbe:
-            httpGet:
-              path: /metrics
+            tcpSocket:
              port: 8080
-            initialDelaySeconds: 10
+            initialDelaySeconds: 30
+            periodSeconds: 10
+            timeoutSeconds: 1
+            failureThreshold: 5
+            successThreshold: 1
          readinessProbe:
            httpGet:
-              path: /api/health
+              scheme: HTTP
+              path: /robots.txt
              port: 8080
            initialDelaySeconds: 10
+            periodSeconds: 30
+            successThreshold: 1
+            timeoutSeconds: 5
          resources:
            requests:
              cpu: 100m
--- a/addons/nginx-ingress/aws/deployment.yaml
+++ b/addons/nginx-ingress/aws/deployment.yaml
@ -23,7 +23,7 @@ spec:
          type: RuntimeDefault
      containers:
        - name: nginx-ingress-controller
-          image: k8s.gcr.io/ingress-nginx/controller:v1.1.2
+          image: registry.k8s.io/ingress-nginx/controller:v1.5.1
          args:
            - /nginx-ingress-controller
            - --controller-class=k8s.io/public
--- a/addons/nginx-ingress/aws/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/aws/rbac/cluster-role.yaml
@ -29,7 +29,7 @@ rules:
      - list
      - watch
  - apiGroups:
-    - ""
+      - ""
    resources:
      - events
    verbs:
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/aws/rbac/role.yaml
+++ b/addons/nginx-ingress/aws/rbac/role.yaml
@ -10,6 +10,7 @@ rules:
      - configmaps
      - pods
      - secrets
+      - endpoints
    verbs:
      - get
  - apiGroups:
@ -37,3 +38,11 @@ rules:
      - endpoints
    verbs:
      - get
+  - apiGroups:
+      - "coordination.k8s.io"
+    resources:
+      - leases
+    verbs:
+      - create
+      - get
+      - update
--- a/addons/nginx-ingress/azure/deployment.yaml
+++ b/addons/nginx-ingress/azure/deployment.yaml
@ -23,7 +23,7 @@ spec:
          type: RuntimeDefault
      containers:
        - name: nginx-ingress-controller
-          image: k8s.gcr.io/ingress-nginx/controller:v1.1.2
+          image: registry.k8s.io/ingress-nginx/controller:v1.5.1
          args:
            - /nginx-ingress-controller
            - --controller-class=k8s.io/public
--- a/addons/nginx-ingress/azure/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/azure/rbac/cluster-role.yaml
@ -29,7 +29,7 @@ rules:
      - list
      - watch
  - apiGroups:
-    - ""
+      - ""
    resources:
      - events
    verbs:
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/azure/rbac/role.yaml
+++ b/addons/nginx-ingress/azure/rbac/role.yaml
@ -10,6 +10,7 @@ rules:
      - configmaps
      - pods
      - secrets
+      - endpoints
    verbs:
      - get
  - apiGroups:
@ -32,8 +33,11 @@ rules:
    verbs:
      - create
  - apiGroups:
-      - ""
+      - "coordination.k8s.io"
    resources:
-      - endpoints
+      - leases
    verbs:
+      - create
      - get
+      - update
+
--- a/addons/nginx-ingress/bare-metal/deployment.yaml
+++ b/addons/nginx-ingress/bare-metal/deployment.yaml
@ -23,7 +23,7 @@ spec:
          type: RuntimeDefault
      containers:
        - name: nginx-ingress-controller
-          image: k8s.gcr.io/ingress-nginx/controller:v1.1.2
+          image: registry.k8s.io/ingress-nginx/controller:v1.5.1
          args:
            - /nginx-ingress-controller
            - --controller-class=k8s.io/public
--- a/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml
@ -29,7 +29,7 @@ rules:
      - list
      - watch
  - apiGroups:
-    - ""
+      - ""
    resources:
      - events
    verbs:
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/bare-metal/rbac/role.yaml
+++ b/addons/nginx-ingress/bare-metal/rbac/role.yaml
@ -10,6 +10,7 @@ rules:
      - configmaps
      - pods
      - secrets
+      - endpoints
    verbs:
      - get
  - apiGroups:
@ -32,8 +33,10 @@ rules:
    verbs:
      - create
  - apiGroups:
-      - ""
+      - "coordination.k8s.io"
    resources:
-      - endpoints
+      - leases
    verbs:
+      - create
      - get
+      - update
--- a/addons/nginx-ingress/bare-metal/service.yaml
+++ b/addons/nginx-ingress/bare-metal/service.yaml
@ -1,7 +1,7 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: ingress-controller-public
+  name: nginx-ingress-controller
  namespace: ingress
  annotations:
    prometheus.io/scrape: 'true'
@ -10,7 +10,7 @@ spec:
  type: ClusterIP
  clusterIP: 10.3.0.12
  selector:
-    name: ingress-controller-public
+    name: nginx-ingress-controller
    phase: prod
  ports:
    - name: http
--- a/addons/nginx-ingress/digital-ocean/daemonset.yaml
+++ b/addons/nginx-ingress/digital-ocean/daemonset.yaml
@ -23,7 +23,7 @@ spec:
          type: RuntimeDefault
      containers:
        - name: nginx-ingress-controller
-          image: k8s.gcr.io/ingress-nginx/controller:v1.1.2
+          image: registry.k8s.io/ingress-nginx/controller:v1.5.1
          args:
            - /nginx-ingress-controller
            - --controller-class=k8s.io/public
--- a/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml
@ -29,7 +29,7 @@ rules:
      - list
      - watch
  - apiGroups:
-    - ""
+      - ""
    resources:
      - events
    verbs:
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/digital-ocean/rbac/role.yaml
+++ b/addons/nginx-ingress/digital-ocean/rbac/role.yaml
@ -10,6 +10,7 @@ rules:
      - configmaps
      - pods
      - secrets
+      - endpoints
    verbs:
      - get
  - apiGroups:
@ -32,8 +33,10 @@ rules:
    verbs:
      - create
  - apiGroups:
-      - ""
+      - "coordination.k8s.io"
    resources:
-      - endpoints
+      - leases
    verbs:
+      - create
      - get
+      - update
--- a/addons/nginx-ingress/google-cloud/deployment.yaml
+++ b/addons/nginx-ingress/google-cloud/deployment.yaml
@ -23,7 +23,7 @@ spec:
          type: RuntimeDefault
      containers:
        - name: nginx-ingress-controller
-          image: k8s.gcr.io/ingress-nginx/controller:v1.1.2
+          image: registry.k8s.io/ingress-nginx/controller:v1.5.1
          args:
            - /nginx-ingress-controller
            - --controller-class=k8s.io/public
--- a/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml
@ -29,7 +29,7 @@ rules:
      - list
      - watch
  - apiGroups:
-    - ""
+      - ""
    resources:
      - events
    verbs:
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/google-cloud/rbac/role.yaml
+++ b/addons/nginx-ingress/google-cloud/rbac/role.yaml
@ -10,6 +10,7 @@ rules:
      - configmaps
      - pods
      - secrets
+      - endpoints
    verbs:
      - get
  - apiGroups:
@ -32,8 +33,10 @@ rules:
    verbs:
      - create
  - apiGroups:
-      - ""
+      - "coordination.k8s.io"
    resources:
-      - endpoints
+      - leases
    verbs:
+      - create
      - get
+      - update
--- a/addons/prometheus/deployment.yaml
+++ b/addons/prometheus/deployment.yaml
@ -21,7 +21,7 @@ spec:
      serviceAccountName: prometheus
      containers:
        - name: prometheus
-          image: quay.io/prometheus/prometheus:v2.33.5
+          image: quay.io/prometheus/prometheus:v2.40.5
          args:
            - --web.listen-address=0.0.0.0:9090
            - --config.file=/etc/prometheus/prometheus.yaml
--- a/addons/prometheus/exporters/kube-state-metrics/deployment.yaml
+++ b/addons/prometheus/exporters/kube-state-metrics/deployment.yaml
@ -25,7 +25,7 @@ spec:
      serviceAccountName: kube-state-metrics
      containers:
      - name: kube-state-metrics
-        image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.4.2
+        image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.7.0
        ports:
          - name: metrics
            containerPort: 8080
--- a/addons/prometheus/exporters/node-exporter/daemonset.yaml
+++ b/addons/prometheus/exporters/node-exporter/daemonset.yaml
@ -22,19 +22,19 @@ spec:
      securityContext:
        runAsNonRoot: true
        runAsUser: 65534
+        runAsGroup: 65534
+        fsGroup: 65534
        seccompProfile:
          type: RuntimeDefault
      hostNetwork: true
      hostPID: true
      containers:
      - name: node-exporter
-        image: quay.io/prometheus/node-exporter:v1.3.1
+        image: quay.io/prometheus/node-exporter:v1.5.0
        args:
          - --path.procfs=/host/proc
          - --path.sysfs=/host/sys
          - --path.rootfs=/host/root
-          - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+)($|/)
-          - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
        ports:
          - name: metrics
            containerPort: 9100
@ -46,6 +46,9 @@ spec:
          limits:
            cpu: 200m
            memory: 100Mi
+        securityContext:
+          seLinuxOptions:
+            type: spc_t
        volumeMounts:
          - name: proc
            mountPath: /host/proc
@ -55,9 +58,12 @@ spec:
            readOnly: true
          - name: root
            mountPath: /host/root
+            mountPropagation: HostToContainer
            readOnly: true
      tolerations:
-        - key: node-role.kubernetes.io/master
+        - key: node-role.kubernetes.io/controller
+          operator: Exists
+        - key: node-role.kubernetes.io/control-plane
          operator: Exists
        - key: node.kubernetes.io/not-ready
          operator: Exists
--- a/aws/fedora-coreos/kubernetes/README.md
+++ b/aws/fedora-coreos/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.23.5 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/fedora-coreos/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/aws/fedora-coreos/kubernetes/ami.tf
+++ b/aws/fedora-coreos/kubernetes/ami.tf
@ -19,7 +19,7 @@ data "aws_ami" "fedora-coreos" {
 }

 data "aws_ami" "fedora-coreos-arm" {
-  count = var.arch == "arm64" ? 1 : 0
+  count = var.controller_arch == "arm64" ? 1 : 0

  most_recent = true
  owners      = ["125523088429"]
--- a/aws/fedora-coreos/kubernetes/bootstrap.tf
+++ b/aws/fedora-coreos/kubernetes/bootstrap.tf
@ -1,6 +1,6 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e5bdb6f6c67461ca3a1cd3449f4703189f14d3e4"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name          = var.cluster_name
  api_servers           = [format("%s.%s", var.cluster_name, var.dns_zone)]
@ -9,9 +9,7 @@ module "bootstrap" {
  network_mtu           = var.network_mtu
  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/aws/fedora-coreos/kubernetes/butane/controller.yaml
+++ b/aws/fedora-coreos/kubernetes/butane/controller.yaml
@ -1,6 +1,6 @@
 ---
 variant: fcos
-version: 1.4.0
+version: 1.5.0
 systemd:
  units:
    - name: etcd-member.service
@ -9,15 +9,16 @@ systemd:
        [Unit]
        Description=etcd (System Container)
        Documentation=https://github.com/etcd-io/etcd
-        Wants=network-online.target network.target
+        Wants=network-online.target
        After=network-online.target
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.2
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        Type=exec
        ExecStartPre=/bin/mkdir -p /var/lib/etcd
        ExecStartPre=-/usr/bin/podman rm etcd
        ExecStart=/usr/bin/podman run --name etcd \
          --env-file /etc/etcd/etcd.env \
+          --log-driver k8s-file \
          --network host \
          --volume /var/lib/etcd:/var/lib/etcd:rw,Z \
          --volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
@ -56,7 +57,7 @@ systemd:
        After=afterburn.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/afterburn
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -66,15 +67,19 @@ systemd:
        ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
        ExecStartPre=-/usr/bin/podman rm kubelet
        ExecStart=/usr/bin/podman run --name kubelet \
+          --log-driver k8s-file \
          --privileged \
          --pid host \
          --network host \
          --volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
          --volume /etc/kubernetes:/etc/kubernetes:ro,z \
+          --volume /etc/machine-id:/etc/machine-id:ro \
          --volume /usr/lib/os-release:/etc/os-release:ro \
          --volume /lib/modules:/lib/modules:ro \
          --volume /run:/run \
          --volume /sys/fs/cgroup:/sys/fs/cgroup \
+          --volume /etc/selinux:/etc/selinux \
+          --volume /sys/fs/selinux:/sys/fs/selinux \
          --volume /var/lib/calico:/var/lib/calico:ro \
          --volume /var/lib/containerd:/var/lib/containerd \
          --volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
@ -82,28 +87,13 @@ systemd:
          --volume /var/run/lock:/var/run/lock:z \
          --volume /opt/cni/bin:/opt/cni/bin:z \
          $${KUBELET_IMAGE} \
-          --anonymous-auth=false \
-          --authentication-token-webhook \
-          --authorization-mode=Webhook \
          --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
-          --cgroup-driver=systemd \
-          --cgroups-per-qos=true \
-          --container-runtime=remote \
+          --config=/etc/kubernetes/kubelet.yaml \
          --container-runtime-endpoint=unix:///run/containerd/containerd.sock \
-          --enforce-node-allocatable=pods \
-          --client-ca-file=/etc/kubernetes/ca.crt \
-          --cluster_dns=${cluster_dns_service_ip} \
-          --cluster_domain=${cluster_domain_suffix} \
-          --healthz-port=0 \
          --kubeconfig=/var/lib/kubelet/kubeconfig \
          --node-labels=node.kubernetes.io/controller="true" \
-          --pod-manifest-path=/etc/kubernetes/manifests \
          --provider-id=aws:///$${AFTERBURN_AWS_AVAILABILITY_ZONE}/$${AFTERBURN_AWS_INSTANCE_ID} \
-          --read-only-port=0 \
-          --resolv-conf=/run/systemd/resolve/resolv.conf \
-          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
-          --rotate-certificates \
-          --volume-plugin-dir=/var/lib/kubelet/volumeplugins
+          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule
        ExecStop=-/usr/bin/podman stop kubelet
        Delegate=yes
        Restart=always
@ -126,7 +116,7 @@ systemd:
            --volume /opt/bootstrap/assets:/assets:ro,Z \
            --volume /opt/bootstrap/apply:/apply:ro,Z \
            --entrypoint=/apply \
-            quay.io/poseidon/kubelet:v1.23.5
+            quay.io/poseidon/kubelet:v1.31.3
        ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
        ExecStartPost=-/usr/bin/podman stop bootstrap
 storage:
@ -141,12 +131,39 @@ storage:
      contents:
        inline: |
          ${kubeconfig}
+    - path: /etc/kubernetes/kubelet.yaml
+      mode: 0644
+      contents:
+        inline: |
+          apiVersion: kubelet.config.k8s.io/v1beta1
+          kind: KubeletConfiguration
+          authentication:
+            anonymous:
+              enabled: false
+            webhook:
+              enabled: true
+            x509:
+              clientCAFile: /etc/kubernetes/ca.crt
+          authorization:
+            mode: Webhook
+          cgroupDriver: systemd
+          clusterDNS:
+            - ${cluster_dns_service_ip}
+          clusterDomain: cluster.local
+          healthzPort: 0
+          rotateCertificates: true
+          shutdownGracePeriod: 45s
+          shutdownGracePeriodCriticalPods: 30s
+          staticPodPath: /etc/kubernetes/manifests
+          readOnlyPort: 0
+          resolvConf: /run/systemd/resolve/resolv.conf
+          volumePluginDir: /var/lib/kubelet/volumeplugins
    - path: /opt/bootstrap/layout
      mode: 0544
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -160,8 +177,7 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
          chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
    - path: /opt/bootstrap/apply
      mode: 0544
@ -177,6 +193,11 @@ storage:
             echo "Retry applying manifests"
             sleep 5
          done
+    - path: /etc/systemd/logind.conf.d/inhibitors.conf
+      contents:
+        inline: |
+          [Login]
+          InhibitDelayMaxSec=45s
    - path: /etc/sysctl.d/max-user-watches.conf
      contents:
        inline: |
@ -221,7 +242,6 @@ storage:
          ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
          ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
          ETCD_PEER_CLIENT_CERT_AUTH=true
-    - path: /etc/fedora-coreos/iptables-legacy.stamp
    - path: /etc/containerd/config.toml
      overwrite: true
      contents:
--- a/aws/fedora-coreos/kubernetes/controllers.tf
+++ b/aws/fedora-coreos/kubernetes/controllers.tf
@ -20,17 +20,18 @@ resource "aws_instance" "controllers" {
  tags = {
    Name = "${var.cluster_name}-controller-${count.index}"
  }
-
  instance_type = var.controller_type
-  ami           = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
-  user_data     = data.ct_config.controller-ignitions.*.rendered[count.index]
+  ami           = var.controller_arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id

  # storage
  root_block_device {
-    volume_type = var.disk_type
-    volume_size = var.disk_size
-    iops        = var.disk_iops
+    volume_type = var.controller_disk_type
+    volume_size = var.controller_disk_size
+    iops        = var.controller_disk_iops
    encrypted   = true
+    tags = {
+      Name = "${var.cluster_name}-controller-${count.index}"
+    }
  }

  # network
@ -38,6 +39,14 @@ resource "aws_instance" "controllers" {
  subnet_id                   = element(aws_subnet.public.*.id, count.index)
  vpc_security_group_ids      = [aws_security_group.controller.id]

+  # boot
+  user_data = data.ct_config.controllers.*.rendered[count.index]
+
+  # cost
+  credit_specification {
+    cpu_credits = var.controller_cpu_credits
+  }
+
  lifecycle {
    ignore_changes = [
      ami,
@ -46,41 +55,21 @@ resource "aws_instance" "controllers" {
  }
 }

-# Controller Ignition configs
-data "ct_config" "controller-ignitions" {
-  count    = var.controller_count
-  content  = data.template_file.controller-configs.*.rendered[count.index]
-  strict   = true
-  snippets = var.controller_snippets
-}
-
-# Controller Fedora CoreOS configs
-data "template_file" "controller-configs" {
+# Fedora CoreOS controllers
+data "ct_config" "controllers" {
  count = var.controller_count
-
-  template = file("${path.module}/fcc/controller.yaml")
-
-  vars = {
+  content = templatefile("${path.module}/butane/controller.yaml", {
    # Cannot use cyclic dependencies on controllers or their DNS records
    etcd_name   = "etcd${count.index}"
    etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
    # etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
-    etcd_initial_cluster   = join(",", data.template_file.etcds.*.rendered)
+    etcd_initial_cluster = join(",", [
+      for i in range(var.controller_count) : "etcd${i}=https://${var.cluster_name}-etcd${i}.${var.dns_zone}:2380"
+    ])
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
-  }
+  })
+  strict   = true
+  snippets = var.controller_snippets
 }
-
-data "template_file" "etcds" {
-  count    = var.controller_count
-  template = "etcd$${index}=https://$${cluster_name}-etcd$${index}.$${dns_zone}:2380"
-
-  vars = {
-    index        = count.index
-    cluster_name = var.cluster_name
-    dns_zone     = var.dns_zone
-  }
-}
-
--- a/aws/fedora-coreos/kubernetes/network.tf
+++ b/aws/fedora-coreos/kubernetes/network.tf
@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
 resource "aws_subnet" "public" {
  count = length(data.aws_availability_zones.all.names)

-  vpc_id            = aws_vpc.network.id
-  availability_zone = data.aws_availability_zones.all.names[count.index]
-
-  cidr_block                      = cidrsubnet(var.host_cidr, 4, count.index)
-  ipv6_cidr_block                 = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
-  map_public_ip_on_launch         = true
-  assign_ipv6_address_on_creation = true
-
  tags = {
    "Name" = "${var.cluster_name}-public-${count.index}"
  }
+  vpc_id            = aws_vpc.network.id
+  availability_zone = data.aws_availability_zones.all.names[count.index]
+
+  # IPv4 and IPv6 CIDR blocks
+  cidr_block      = cidrsubnet(var.host_cidr, 4, count.index)
+  ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
+
+  # Assign IPv4 and IPv6 addresses to instances
+  map_public_ip_on_launch         = true
+  assign_ipv6_address_on_creation = true
+
+  # Hostnames assigned to instances
+  # resource-name: <ec2-instance-id>.region.compute.internal
+  private_dns_hostname_type_on_launch            = "resource-name"
+  enable_resource_name_dns_a_record_on_launch    = true
+  enable_resource_name_dns_aaaa_record_on_launch = true
 }

 resource "aws_route_table_association" "public" {
--- a/aws/fedora-coreos/kubernetes/security.tf
+++ b/aws/fedora-coreos/kubernetes/security.tf
@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "controller-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.worker.id
+}
+
+resource "aws_security_group_rule" "controller-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "controller-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "worker-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.controller.id
+}
+
+resource "aws_security_group_rule" "worker-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "worker-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
--- a/aws/fedora-coreos/kubernetes/variables.tf
+++ b/aws/fedora-coreos/kubernetes/variables.tf
@ -17,30 +17,6 @@ variable "dns_zone_id" {

 # instances

-variable "controller_count" {
-  type        = number
-  description = "Number of controllers (i.e. masters)"
-  default     = 1
-}
-
-variable "worker_count" {
-  type        = number
-  description = "Number of workers"
-  default     = 1
-}
-
-variable "controller_type" {
-  type        = string
-  description = "EC2 instance type for controllers"
-  default     = "t3.small"
-}
-
-variable "worker_type" {
-  type        = string
-  description = "EC2 instance type for workers"
-  default     = "t3.small"
-}
-
 variable "os_stream" {
  type        = string
  description = "Fedora CoreOS image stream for instances (e.g. stable, testing, next)"
@ -52,24 +28,78 @@ variable "os_stream" {
  }
 }

-variable "disk_size" {
+variable "controller_count" {
+  type        = number
+  description = "Number of controllers (i.e. masters)"
+  default     = 1
+}
+
+variable "controller_type" {
+  type        = string
+  description = "EC2 instance type for controllers"
+  default     = "t3.small"
+}
+
+variable "controller_disk_size" {
  type        = number
  description = "Size of the EBS volume in GB"
  default     = 30
 }

-variable "disk_type" {
+variable "controller_disk_type" {
  type        = string
  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
  default     = "gp3"
 }

-variable "disk_iops" {
+variable "controller_disk_iops" {
  type        = number
  description = "IOPS of the EBS volume (e.g. 3000)"
  default     = 3000
 }

+variable "controller_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
+variable "worker_count" {
+  type        = number
+  description = "Number of workers"
+  default     = 1
+}
+
+variable "worker_type" {
+  type        = string
+  description = "EC2 instance type for workers"
+  default     = "t3.small"
+}
+
+variable "worker_disk_size" {
+  type        = number
+  description = "Size of the EBS volume in GB"
+  default     = 30
+}
+
+variable "worker_disk_type" {
+  type        = string
+  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
+  default     = "gp3"
+}
+
+variable "worker_disk_iops" {
+  type        = number
+  description = "IOPS of the EBS volume (e.g. 3000)"
+  default     = 3000
+}
+
+variable "worker_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
 variable "worker_price" {
  type        = number
  description = "Spot price in USD for worker instances or 0 to use on-demand instances"
@ -134,40 +164,31 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
-}
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
 variable "worker_node_labels" {
  type        = list(string)
  description = "List of initial worker node labels"
  default     = []
 }

-# unofficial, undocumented, unsupported
+# advanced

-variable "cluster_domain_suffix" {
+variable "controller_arch" {
  type        = string
-  description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
-  default     = "cluster.local"
+  description = "Controller node(s) architecture (amd64 or arm64)"
+  default     = "amd64"
+  validation {
+    condition     = contains(["amd64", "arm64"], var.controller_arch)
+    error_message = "The controller_arch must be amd64 or arm64."
+  }
 }

-variable "arch" {
+variable "worker_arch" {
  type        = string
-  description = "Container architecture (amd64 or arm64)"
+  description = "Worker node(s) architecture (amd64 or arm64)"
  default     = "amd64"
-
  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
-    error_message = "The arch must be amd64 or arm64."
+    condition     = contains(["amd64", "arm64"], var.worker_arch)
+    error_message = "The worker_arch must be amd64 or arm64."
  }
 }

@ -176,3 +197,19 @@ variable "daemonset_tolerations" {
  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
  default     = []
 }
+
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
+}
--- a/aws/fedora-coreos/kubernetes/versions.tf
+++ b/aws/fedora-coreos/kubernetes/versions.tf
@ -3,13 +3,11 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    aws      = ">= 2.23, <= 5.0"
-    template = "~> 2.2"
-    null     = ">= 2.1"
-
+    aws  = ">= 2.23, <= 6.0"
+    null = ">= 2.1"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.9"
+      version = "~> 0.13"
    }
  }
 }
--- a/aws/fedora-coreos/kubernetes/workers.tf
+++ b/aws/fedora-coreos/kubernetes/workers.tf
@ -6,20 +6,24 @@ module "workers" {
  vpc_id          = aws_vpc.network.id
  subnet_ids      = aws_subnet.public.*.id
  security_groups = [aws_security_group.worker.id]
-  worker_count    = var.worker_count
-  instance_type   = var.worker_type
-  os_stream       = var.os_stream
-  arch            = var.arch
-  disk_size       = var.disk_size
-  spot_price      = var.worker_price
-  target_groups   = var.worker_target_groups
+
+  # instances
+  os_stream     = var.os_stream
+  worker_count  = var.worker_count
+  instance_type = var.worker_type
+  arch          = var.worker_arch
+  disk_type     = var.worker_disk_type
+  disk_size     = var.worker_disk_size
+  disk_iops     = var.worker_disk_iops
+  cpu_credits   = var.worker_cpu_credits
+  spot_price    = var.worker_price
+  target_groups = var.worker_target_groups

  # configuration
-  kubeconfig            = module.bootstrap.kubeconfig-kubelet
-  ssh_authorized_key    = var.ssh_authorized_key
-  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  snippets              = var.worker_snippets
-  node_labels           = var.worker_node_labels
+  kubeconfig         = module.bootstrap.kubeconfig-kubelet
+  ssh_authorized_key = var.ssh_authorized_key
+  service_cidr       = var.service_cidr
+  snippets           = var.worker_snippets
+  node_labels        = var.worker_node_labels
 }

--- a/aws/fedora-coreos/kubernetes/workers/ami.tf
+++ b/aws/fedora-coreos/kubernetes/workers/ami.tf
@ -1,3 +1,7 @@
+locals {
+  ami_id = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
+}
+
 data "aws_ami" "fedora-coreos" {
  most_recent = true
  owners      = ["125523088429"]
--- a/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml
+++ b/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml
@ -1,6 +1,6 @@
 ---
 variant: fcos
-version: 1.4.0
+version: 1.5.0
 systemd:
  units:
    - name: containerd.service
@ -29,7 +29,7 @@ systemd:
        After=afterburn.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/afterburn
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -39,15 +39,19 @@ systemd:
        ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
        ExecStartPre=-/usr/bin/podman rm kubelet
        ExecStart=/usr/bin/podman run --name kubelet \
+          --log-driver k8s-file \
          --privileged \
          --pid host \
          --network host \
          --volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
          --volume /etc/kubernetes:/etc/kubernetes:ro,z \
+          --volume /etc/machine-id:/etc/machine-id:ro \
          --volume /usr/lib/os-release:/etc/os-release:ro \
          --volume /lib/modules:/lib/modules:ro \
          --volume /run:/run \
          --volume /sys/fs/cgroup:/sys/fs/cgroup \
+          --volume /etc/selinux:/etc/selinux \
+          --volume /sys/fs/selinux:/sys/fs/selinux \
          --volume /var/lib/calico:/var/lib/calico:ro \
          --volume /var/lib/containerd:/var/lib/containerd \
          --volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
@ -55,19 +59,9 @@ systemd:
          --volume /var/run/lock:/var/run/lock:z \
          --volume /opt/cni/bin:/opt/cni/bin:z \
          $${KUBELET_IMAGE} \
-          --anonymous-auth=false \
-          --authentication-token-webhook \
-          --authorization-mode=Webhook \
          --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
-          --cgroup-driver=systemd \
-          --cgroups-per-qos=true \
-          --container-runtime=remote \
+          --config=/etc/kubernetes/kubelet.yaml \
          --container-runtime-endpoint=unix:///run/containerd/containerd.sock \
-          --enforce-node-allocatable=pods \
-          --client-ca-file=/etc/kubernetes/ca.crt \
-          --cluster_dns=${cluster_dns_service_ip} \
-          --cluster_domain=${cluster_domain_suffix} \
-          --healthz-port=0 \
          --kubeconfig=/var/lib/kubelet/kubeconfig \
          --node-labels=node.kubernetes.io/node \
          %{~ for label in split(",", node_labels) ~}
@ -76,31 +70,13 @@ systemd:
          %{~ for taint in split(",", node_taints) ~}
          --register-with-taints=${taint} \
          %{~ endfor ~}
-          --pod-manifest-path=/etc/kubernetes/manifests \
-          --provider-id=aws:///$${AFTERBURN_AWS_AVAILABILITY_ZONE}/$${AFTERBURN_AWS_INSTANCE_ID} \
-          --read-only-port=0 \
-          --resolv-conf=/run/systemd/resolve/resolv.conf \
-          --rotate-certificates \
-          --volume-plugin-dir=/var/lib/kubelet/volumeplugins
+          --provider-id=aws:///$${AFTERBURN_AWS_AVAILABILITY_ZONE}/$${AFTERBURN_AWS_INSTANCE_ID}
        ExecStop=-/usr/bin/podman stop kubelet
        Delegate=yes
        Restart=always
        RestartSec=10
        [Install]
        WantedBy=multi-user.target
-    - name: delete-node.service
-      enabled: true
-      contents: |
-        [Unit]
-        Description=Delete Kubernetes node on shutdown
-        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
-        Type=oneshot
-        RemainAfterExit=true
-        ExecStart=/bin/true
-        ExecStop=/bin/bash -c '/usr/bin/podman run --volume /var/lib/kubelet:/var/lib/kubelet:ro,z --entrypoint /usr/local/bin/kubectl $${KUBELET_IMAGE} --kubeconfig=/var/lib/kubelet/kubeconfig delete node $HOSTNAME'
-        [Install]
-        WantedBy=multi-user.target
 storage:
  directories:
    - path: /etc/kubernetes
@ -110,6 +86,38 @@ storage:
      contents:
        inline: |
          ${kubeconfig}
+    - path: /etc/kubernetes/kubelet.yaml
+      mode: 0644
+      contents:
+        inline: |
+          apiVersion: kubelet.config.k8s.io/v1beta1
+          kind: KubeletConfiguration
+          authentication:
+            anonymous:
+              enabled: false
+            webhook:
+              enabled: true
+            x509:
+              clientCAFile: /etc/kubernetes/ca.crt
+          authorization:
+            mode: Webhook
+          cgroupDriver: systemd
+          clusterDNS:
+            - ${cluster_dns_service_ip}
+          clusterDomain: cluster.local
+          healthzPort: 0
+          rotateCertificates: true
+          shutdownGracePeriod: 45s
+          shutdownGracePeriodCriticalPods: 30s
+          staticPodPath: /etc/kubernetes/manifests
+          readOnlyPort: 0
+          resolvConf: /run/systemd/resolve/resolv.conf
+          volumePluginDir: /var/lib/kubelet/volumeplugins
+    - path: /etc/systemd/logind.conf.d/inhibitors.conf
+      contents:
+        inline: |
+          [Login]
+          InhibitDelayMaxSec=45s
    - path: /etc/sysctl.d/max-user-watches.conf
      contents:
        inline: |
@ -133,7 +141,6 @@ storage:
          DefaultCPUAccounting=yes
          DefaultMemoryAccounting=yes
          DefaultBlockIOAccounting=yes
-    - path: /etc/fedora-coreos/iptables-legacy.stamp
    - path: /etc/containerd/config.toml
      overwrite: true
      contents:
--- a/aws/fedora-coreos/kubernetes/workers/variables.tf
+++ b/aws/fedora-coreos/kubernetes/workers/variables.tf
@ -69,6 +69,12 @@ variable "spot_price" {
  default     = 0
 }

+variable "cpu_credits" {
+  type        = string
+  description = "CPU burst credits mode (if applicable)"
+  default     = null
+}
+
 variable "target_groups" {
  type        = list(string)
  description = "Additional target group ARNs to which instances should be added"
@ -102,12 +108,6 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "cluster_domain_suffix" {
-  type        = string
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  default     = "cluster.local"
-}
-
 variable "node_labels" {
  type        = list(string)
  description = "List of initial node labels"
@ -120,15 +120,14 @@ variable "node_taints" {
  default     = []
 }

-# unofficial, undocumented, unsupported
+# advanced

 variable "arch" {
  type        = string
  description = "Container architecture (amd64 or arm64)"
  default     = "amd64"
-
  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
+    condition     = contains(["amd64", "arm64"], var.arch)
    error_message = "The arch must be amd64 or arm64."
  }
 }
--- a/aws/fedora-coreos/kubernetes/workers/versions.tf
+++ b/aws/fedora-coreos/kubernetes/workers/versions.tf
@ -3,12 +3,10 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    aws      = ">= 2.23, <= 5.0"
-    template = "~> 2.2"
-
+    aws = ">= 2.23, <= 6.0"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.9"
+      version = "~> 0.13"
    }
  }
 }
--- a/aws/fedora-coreos/kubernetes/workers/workers.tf
+++ b/aws/fedora-coreos/kubernetes/workers/workers.tf
@ -1,19 +1,20 @@
 # Workers AutoScaling Group
 resource "aws_autoscaling_group" "workers" {
-  name = "${var.name}-worker ${aws_launch_configuration.worker.name}"
+  name = "${var.name}-worker"

  # count
-  desired_capacity          = var.worker_count
-  min_size                  = var.worker_count
-  max_size                  = var.worker_count + 2
-  default_cooldown          = 30
-  health_check_grace_period = 30
+  desired_capacity = var.worker_count
+  min_size         = var.worker_count
+  max_size         = var.worker_count + 2

  # network
  vpc_zone_identifier = var.subnet_ids

-  # template
-  launch_configuration = aws_launch_configuration.worker.name
+  # instance template
+  launch_template {
+    id      = aws_launch_template.worker.id
+    version = aws_launch_template.worker.latest_version
+  }

  # target groups to which instances should be added
  target_group_arns = flatten([
@ -22,6 +23,19 @@ resource "aws_autoscaling_group" "workers" {
    var.target_groups,
  ])

+  instance_refresh {
+    strategy = "Rolling"
+    preferences {
+      instance_warmup        = 120
+      min_healthy_percentage = 90
+    }
+  }
+  # Grace period before checking new instance's health
+  health_check_grace_period = 30
+  # Cooldown period between scaling activities
+  default_cooldown = 30
+
+
  lifecycle {
    # override the default destroy and replace update behavior
    create_before_destroy = true
@ -41,24 +55,54 @@ resource "aws_autoscaling_group" "workers" {
 }

 # Worker template
-resource "aws_launch_configuration" "worker" {
-  image_id          = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
-  instance_type     = var.instance_type
-  spot_price        = var.spot_price > 0 ? var.spot_price : null
-  enable_monitoring = false
-
-  user_data = data.ct_config.worker-ignition.rendered
+resource "aws_launch_template" "worker" {
+  name_prefix   = "${var.name}-worker"
+  image_id      = local.ami_id
+  instance_type = var.instance_type

  # storage
-  root_block_device {
-    volume_type = var.disk_type
-    volume_size = var.disk_size
-    iops        = var.disk_iops
-    encrypted   = true
+  ebs_optimized = true
+  block_device_mappings {
+    device_name = "/dev/xvda"
+    ebs {
+      volume_type           = var.disk_type
+      volume_size           = var.disk_size
+      iops                  = var.disk_iops
+      encrypted             = true
+      delete_on_termination = true
+    }
  }

  # network
-  security_groups = var.security_groups
+  network_interfaces {
+    associate_public_ip_address = true
+    security_groups             = var.security_groups
+  }
+
+  # boot
+  user_data = sensitive(base64encode(data.ct_config.worker.rendered))
+
+  # metadata
+  metadata_options {
+    http_tokens = "optional"
+  }
+  monitoring {
+    enabled = false
+  }
+
+  # cost
+  credit_specification {
+    cpu_credits = var.cpu_credits
+  }
+  dynamic "instance_market_options" {
+    for_each = var.spot_price > 0 ? [1] : []
+    content {
+      market_type = "spot"
+      spot_options {
+        max_price = var.spot_price
+      }
+    }
+  }

  lifecycle {
    // Override the default destroy and replace update behavior
@ -67,24 +111,15 @@ resource "aws_launch_configuration" "worker" {
  }
 }

-# Worker Ignition config
-data "ct_config" "worker-ignition" {
-  content  = data.template_file.worker-config.rendered
-  strict   = true
-  snippets = var.snippets
-}
-
-# Worker Fedora CoreOS config
-data "template_file" "worker-config" {
-  template = file("${path.module}/fcc/worker.yaml")
-
-  vars = {
+# Fedora CoreOS worker
+data "ct_config" "worker" {
+  content = templatefile("${path.module}/butane/worker.yaml", {
    kubeconfig             = indent(10, var.kubeconfig)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
    node_labels            = join(",", var.node_labels)
    node_taints            = join(",", var.node_taints)
-  }
+  })
+  strict   = true
+  snippets = var.snippets
 }
-
--- a/aws/flatcar-linux/kubernetes/README.md
+++ b/aws/flatcar-linux/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.23.5 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/flatcar-linux/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/aws/flatcar-linux/kubernetes/ami.tf
+++ b/aws/flatcar-linux/kubernetes/ami.tf
@ -1,7 +1,7 @@
 locals {
  # Pick a Flatcar Linux AMI
  # flatcar-stable -> Flatcar Linux AMI
-  ami_id  = var.arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
+  ami_id  = var.controller_arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
  channel = split("-", var.os_image)[1]
 }

@ -26,7 +26,7 @@ data "aws_ami" "flatcar" {
 }

 data "aws_ami" "flatcar-arm64" {
-  count = var.arch == "arm64" ? 1 : 0
+  count = var.controller_arch == "arm64" ? 1 : 0

  most_recent = true
  owners      = ["075585003325"]
--- a/aws/flatcar-linux/kubernetes/bootstrap.tf
+++ b/aws/flatcar-linux/kubernetes/bootstrap.tf
@ -1,6 +1,6 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e5bdb6f6c67461ca3a1cd3449f4703189f14d3e4"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name          = var.cluster_name
  api_servers           = [format("%s.%s", var.cluster_name, var.dns_zone)]
@ -9,9 +9,7 @@ module "bootstrap" {
  network_mtu           = var.network_mtu
  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/aws/flatcar-linux/kubernetes/butane/controller.yaml
+++ b/aws/flatcar-linux/kubernetes/butane/controller.yaml
@ -1,4 +1,5 @@
---
+variant: flatcar
+version: 1.0.0
 systemd:
  units:
    - name: etcd-member.service
@ -10,7 +11,7 @@ systemd:
        Requires=docker.service
        After=docker.service
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.2
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        ExecStartPre=/usr/bin/docker run -d \
          --name etcd \
          --network host \
@ -57,7 +58,7 @@ systemd:
        After=coreos-metadata.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/coreos
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -83,26 +84,13 @@ systemd:
          -v /var/log:/var/log \
          -v /opt/cni/bin:/opt/cni/bin \
          $${KUBELET_IMAGE} \
-          --anonymous-auth=false \
-          --authentication-token-webhook \
-          --authorization-mode=Webhook \
          --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
-          --cgroup-driver=systemd \
-          --container-runtime=remote \
+          --config=/etc/kubernetes/kubelet.yaml \
          --container-runtime-endpoint=unix:///run/containerd/containerd.sock \
-          --client-ca-file=/etc/kubernetes/ca.crt \
-          --cluster_dns=${cluster_dns_service_ip} \
-          --cluster_domain=${cluster_domain_suffix} \
-          --healthz-port=0 \
          --kubeconfig=/var/lib/kubelet/kubeconfig \
          --node-labels=node.kubernetes.io/controller="true" \
-          --pod-manifest-path=/etc/kubernetes/manifests \
          --provider-id=aws:///$${COREOS_EC2_AVAILABILITY_ZONE}/$${COREOS_EC2_INSTANCE_ID} \
-          --read-only-port=0 \
-          --resolv-conf=/run/systemd/resolve/resolv.conf \
-          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
-          --rotate-certificates \
-          --volume-plugin-dir=/var/lib/kubelet/volumeplugins
+          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule
        ExecStart=docker logs -f kubelet
        ExecStop=docker stop kubelet
        ExecStopPost=docker rm kubelet
@ -121,7 +109,7 @@ systemd:
        Type=oneshot
        RemainAfterExit=true
        WorkingDirectory=/opt/bootstrap
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStart=/usr/bin/docker run \
            -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
            -v /opt/bootstrap/assets:/assets:ro \
@ -134,23 +122,47 @@ systemd:
 storage:
  directories:
    - path: /var/lib/etcd
-      filesystem: root
      mode: 0700
      overwrite: true
  files:
    - path: /etc/kubernetes/kubeconfig
-      filesystem: root
      mode: 0644
      contents:
        inline: |
          ${kubeconfig}
+    - path: /etc/kubernetes/kubelet.yaml
+      mode: 0644
+      contents:
+        inline: |
+          apiVersion: kubelet.config.k8s.io/v1beta1
+          kind: KubeletConfiguration
+          authentication:
+            anonymous:
+              enabled: false
+            webhook:
+              enabled: true
+            x509:
+              clientCAFile: /etc/kubernetes/ca.crt
+          authorization:
+            mode: Webhook
+          cgroupDriver: systemd
+          clusterDNS:
+            - ${cluster_dns_service_ip}
+          clusterDomain: cluster.local
+          healthzPort: 0
+          rotateCertificates: true
+          shutdownGracePeriod: 45s
+          shutdownGracePeriodCriticalPods: 30s
+          staticPodPath: /etc/kubernetes/manifests
+          readOnlyPort: 0
+          resolvConf: /run/systemd/resolve/resolv.conf
+          volumePluginDir: /var/lib/kubelet/volumeplugins
    - path: /opt/bootstrap/layout
-      filesystem: root
      mode: 0544
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -165,10 +177,8 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
    - path: /opt/bootstrap/apply
-      filesystem: root
      mode: 0544
      contents:
        inline: |
@ -182,14 +192,17 @@ storage:
             echo "Retry applying manifests"
             sleep 5
          done
+    - path: /etc/systemd/logind.conf.d/inhibitors.conf
+      contents:
+        inline: |
+          [Login]
+          InhibitDelayMaxSec=45s
    - path: /etc/sysctl.d/max-user-watches.conf
-      filesystem: root
      mode: 0644
      contents:
        inline: |
          fs.inotify.max_user_watches=16184
    - path: /etc/etcd/etcd.env
-      filesystem: root
      mode: 0644
      contents:
          inline: |
--- a/aws/flatcar-linux/kubernetes/controllers.tf
+++ b/aws/flatcar-linux/kubernetes/controllers.tf
@ -20,18 +20,18 @@ resource "aws_instance" "controllers" {
  tags = {
    Name = "${var.cluster_name}-controller-${count.index}"
  }
-
  instance_type = var.controller_type
-
-  ami       = local.ami_id
-  user_data = data.ct_config.controller-ignitions.*.rendered[count.index]
+  ami           = local.ami_id

  # storage
  root_block_device {
-    volume_type = var.disk_type
-    volume_size = var.disk_size
-    iops        = var.disk_iops
+    volume_type = var.controller_disk_type
+    volume_size = var.controller_disk_size
+    iops        = var.controller_disk_iops
    encrypted   = true
+    tags = {
+      Name = "${var.cluster_name}-controller-${count.index}"
+    }
  }

  # network
@ -39,6 +39,14 @@ resource "aws_instance" "controllers" {
  subnet_id                   = element(aws_subnet.public.*.id, count.index)
  vpc_security_group_ids      = [aws_security_group.controller.id]

+  # boot
+  user_data = data.ct_config.controllers.*.rendered[count.index]
+
+  # cost
+  credit_specification {
+    cpu_credits = var.controller_cpu_credits
+  }
+
  lifecycle {
    ignore_changes = [
      ami,
@ -47,41 +55,21 @@ resource "aws_instance" "controllers" {
  }
 }

-# Controller Ignition configs
-data "ct_config" "controller-ignitions" {
-  count    = var.controller_count
-  content  = data.template_file.controller-configs.*.rendered[count.index]
-  strict   = true
-  snippets = var.controller_snippets
-}
-
-# Controller Container Linux configs
-data "template_file" "controller-configs" {
+# Flatcar Linux controllers
+data "ct_config" "controllers" {
  count = var.controller_count
-
-  template = file("${path.module}/cl/controller.yaml")
-
-  vars = {
+  content = templatefile("${path.module}/butane/controller.yaml", {
    # Cannot use cyclic dependencies on controllers or their DNS records
    etcd_name   = "etcd${count.index}"
    etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
    # etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
-    etcd_initial_cluster   = join(",", data.template_file.etcds.*.rendered)
+    etcd_initial_cluster = join(",", [
+      for i in range(var.controller_count) : "etcd${i}=https://${var.cluster_name}-etcd${i}.${var.dns_zone}:2380"
+    ])
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
-  }
+  })
+  strict   = true
+  snippets = var.controller_snippets
 }
-
-data "template_file" "etcds" {
-  count    = var.controller_count
-  template = "etcd$${index}=https://$${cluster_name}-etcd$${index}.$${dns_zone}:2380"
-
-  vars = {
-    index        = count.index
-    cluster_name = var.cluster_name
-    dns_zone     = var.dns_zone
-  }
-}
-
--- a/aws/flatcar-linux/kubernetes/network.tf
+++ b/aws/flatcar-linux/kubernetes/network.tf
@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
 resource "aws_subnet" "public" {
  count = length(data.aws_availability_zones.all.names)

-  vpc_id            = aws_vpc.network.id
-  availability_zone = data.aws_availability_zones.all.names[count.index]
-
-  cidr_block                      = cidrsubnet(var.host_cidr, 4, count.index)
-  ipv6_cidr_block                 = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
-  map_public_ip_on_launch         = true
-  assign_ipv6_address_on_creation = true
-
  tags = {
    "Name" = "${var.cluster_name}-public-${count.index}"
  }
+  vpc_id            = aws_vpc.network.id
+  availability_zone = data.aws_availability_zones.all.names[count.index]
+
+  # IPv4 and IPv6 CIDR blocks
+  cidr_block      = cidrsubnet(var.host_cidr, 4, count.index)
+  ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
+
+  # Assign IPv4 and IPv6 addresses to instances
+  map_public_ip_on_launch         = true
+  assign_ipv6_address_on_creation = true
+
+  # Hostnames assigned to instances
+  # resource-name: <ec2-instance-id>.region.compute.internal
+  private_dns_hostname_type_on_launch            = "resource-name"
+  enable_resource_name_dns_a_record_on_launch    = true
+  enable_resource_name_dns_aaaa_record_on_launch = true
 }

 resource "aws_route_table_association" "public" {
--- a/aws/flatcar-linux/kubernetes/security.tf
+++ b/aws/flatcar-linux/kubernetes/security.tf
@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "controller-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.worker.id
+}
+
+resource "aws_security_group_rule" "controller-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "controller-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "worker-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.controller.id
+}
+
+resource "aws_security_group_rule" "worker-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "worker-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
--- a/aws/flatcar-linux/kubernetes/variables.tf
+++ b/aws/flatcar-linux/kubernetes/variables.tf
@ -17,30 +17,6 @@ variable "dns_zone_id" {

 # instances

-variable "controller_count" {
-  type        = number
-  description = "Number of controllers (i.e. masters)"
-  default     = 1
-}
-
-variable "worker_count" {
-  type        = number
-  description = "Number of workers"
-  default     = 1
-}
-
-variable "controller_type" {
-  type        = string
-  description = "EC2 instance type for controllers"
-  default     = "t3.small"
-}
-
-variable "worker_type" {
-  type        = string
-  description = "EC2 instance type for workers"
-  default     = "t3.small"
-}
-
 variable "os_image" {
  type        = string
  description = "AMI channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
@ -52,24 +28,78 @@ variable "os_image" {
  }
 }

-variable "disk_size" {
+variable "controller_count" {
+  type        = number
+  description = "Number of controllers (i.e. masters)"
+  default     = 1
+}
+
+variable "controller_type" {
+  type        = string
+  description = "EC2 instance type for controllers"
+  default     = "t3.small"
+}
+
+variable "controller_disk_size" {
  type        = number
  description = "Size of the EBS volume in GB"
  default     = 30
 }

-variable "disk_type" {
+variable "controller_disk_type" {
  type        = string
  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
  default     = "gp3"
 }

-variable "disk_iops" {
+variable "controller_disk_iops" {
  type        = number
  description = "IOPS of the EBS volume (e.g. 3000)"
  default     = 3000
 }

+variable "controller_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
+variable "worker_count" {
+  type        = number
+  description = "Number of workers"
+  default     = 1
+}
+
+variable "worker_type" {
+  type        = string
+  description = "EC2 instance type for workers"
+  default     = "t3.small"
+}
+
+variable "worker_disk_size" {
+  type        = number
+  description = "Size of the EBS volume in GB"
+  default     = 30
+}
+
+variable "worker_disk_type" {
+  type        = string
+  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
+  default     = "gp3"
+}
+
+variable "worker_disk_iops" {
+  type        = number
+  description = "IOPS of the EBS volume (e.g. 3000)"
+  default     = 3000
+}
+
+variable "worker_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
 variable "worker_price" {
  type        = number
  description = "Spot price in USD for worker instances or 0 to use on-demand instances"
@ -134,40 +164,31 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
-}
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
 variable "worker_node_labels" {
  type        = list(string)
  description = "List of initial worker node labels"
  default     = []
 }

-# unofficial, undocumented, unsupported
+# advanced

-variable "cluster_domain_suffix" {
+variable "controller_arch" {
  type        = string
-  description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
-  default     = "cluster.local"
+  description = "Controller node(s) architecture (amd64 or arm64)"
+  default     = "amd64"
+  validation {
+    condition     = contains(["amd64", "arm64"], var.controller_arch)
+    error_message = "The controller_arch must be amd64 or arm64."
+  }
 }

-variable "arch" {
+variable "worker_arch" {
  type        = string
-  description = "Container architecture (amd64 or arm64)"
+  description = "Worker node(s) architecture (amd64 or arm64)"
  default     = "amd64"
-
  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
-    error_message = "The arch must be amd64 or arm64."
+    condition     = contains(["amd64", "arm64"], var.worker_arch)
+    error_message = "The worker_arch must be amd64 or arm64."
  }
 }

@ -176,3 +197,19 @@ variable "daemonset_tolerations" {
  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
  default     = []
 }
+
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
+}
--- a/aws/flatcar-linux/kubernetes/versions.tf
+++ b/aws/flatcar-linux/kubernetes/versions.tf
@ -3,13 +3,11 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    aws      = ">= 2.23, <= 5.0"
-    template = "~> 2.2"
-    null     = ">= 2.1"
-
+    aws  = ">= 2.23, <= 6.0"
+    null = ">= 2.1"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.9"
+      version = "~> 0.13"
    }
  }
 }
--- a/aws/flatcar-linux/kubernetes/workers.tf
+++ b/aws/flatcar-linux/kubernetes/workers.tf
@ -6,20 +6,23 @@ module "workers" {
  vpc_id          = aws_vpc.network.id
  subnet_ids      = aws_subnet.public.*.id
  security_groups = [aws_security_group.worker.id]
-  worker_count    = var.worker_count
-  instance_type   = var.worker_type
-  os_image        = var.os_image
-  arch            = var.arch
-  disk_size       = var.disk_size
-  spot_price      = var.worker_price
-  target_groups   = var.worker_target_groups
+
+  # instances
+  os_image      = var.os_image
+  worker_count  = var.worker_count
+  instance_type = var.worker_type
+  arch          = var.worker_arch
+  disk_type     = var.worker_disk_type
+  disk_size     = var.worker_disk_size
+  disk_iops     = var.worker_disk_iops
+  spot_price    = var.worker_price
+  target_groups = var.worker_target_groups

  # configuration
-  kubeconfig            = module.bootstrap.kubeconfig-kubelet
-  ssh_authorized_key    = var.ssh_authorized_key
-  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  snippets              = var.worker_snippets
-  node_labels           = var.worker_node_labels
+  kubeconfig         = module.bootstrap.kubeconfig-kubelet
+  ssh_authorized_key = var.ssh_authorized_key
+  service_cidr       = var.service_cidr
+  snippets           = var.worker_snippets
+  node_labels        = var.worker_node_labels
 }

--- a/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml
+++ b/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml
@ -1,4 +1,5 @@
---
+variant: flatcar
+version: 1.0.0
 systemd:
  units:
    - name: docker.service
@ -29,7 +30,7 @@ systemd:
        After=coreos-metadata.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/coreos
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -58,17 +59,9 @@ systemd:
          -v /var/log:/var/log \
          -v /opt/cni/bin:/opt/cni/bin \
          $${KUBELET_IMAGE} \
-          --anonymous-auth=false \
-          --authentication-token-webhook \
-          --authorization-mode=Webhook \
          --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
-          --cgroup-driver=systemd \
-          --container-runtime=remote \
+          --config=/etc/kubernetes/kubelet.yaml \
          --container-runtime-endpoint=unix:///run/containerd/containerd.sock \
-          --client-ca-file=/etc/kubernetes/ca.crt \
-          --cluster_dns=${cluster_dns_service_ip} \
-          --cluster_domain=${cluster_domain_suffix} \
-          --healthz-port=0 \
          --kubeconfig=/var/lib/kubelet/kubeconfig \
          --node-labels=node.kubernetes.io/node \
          %{~ for label in split(",", node_labels) ~}
@ -77,12 +70,7 @@ systemd:
          %{~ for taint in split(",", node_taints) ~}
          --register-with-taints=${taint} \
          %{~ endfor ~}
-          --pod-manifest-path=/etc/kubernetes/manifests \
-          --provider-id=aws:///$${COREOS_EC2_AVAILABILITY_ZONE}/$${COREOS_EC2_INSTANCE_ID} \
-          --read-only-port=0 \
-          --resolv-conf=/run/systemd/resolve/resolv.conf \
-          --rotate-certificates \
-          --volume-plugin-dir=/var/lib/kubelet/volumeplugins
+          --provider-id=aws:///$${COREOS_EC2_AVAILABILITY_ZONE}/$${COREOS_EC2_INSTANCE_ID}
        ExecStart=docker logs -f kubelet
        ExecStop=docker stop kubelet
        ExecStopPost=docker rm kubelet
@ -90,29 +78,46 @@ systemd:
        RestartSec=5
        [Install]
        WantedBy=multi-user.target
-    - name: delete-node.service
-      enabled: true
-      contents: |
-        [Unit]
-        Description=Delete Kubernetes node on shutdown
-        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
-        Type=oneshot
-        RemainAfterExit=true
-        ExecStart=/bin/true
-        ExecStop=/bin/bash -c '/usr/bin/docker run -v /var/lib/kubelet:/var/lib/kubelet:ro --entrypoint /usr/local/bin/kubectl $${KUBELET_IMAGE} --kubeconfig=/var/lib/kubelet/kubeconfig delete node $HOSTNAME'
-        [Install]
-        WantedBy=multi-user.target
 storage:
  files:
    - path: /etc/kubernetes/kubeconfig
-      filesystem: root
      mode: 0644
      contents:
        inline: |
          ${kubeconfig}
+    - path: /etc/kubernetes/kubelet.yaml
+      mode: 0644
+      contents:
+        inline: |
+          apiVersion: kubelet.config.k8s.io/v1beta1
+          kind: KubeletConfiguration
+          authentication:
+            anonymous:
+              enabled: false
+            webhook:
+              enabled: true
+            x509:
+              clientCAFile: /etc/kubernetes/ca.crt
+          authorization:
+            mode: Webhook
+          cgroupDriver: systemd
+          clusterDNS:
+            - ${cluster_dns_service_ip}
+          clusterDomain: cluster.local
+          healthzPort: 0
+          rotateCertificates: true
+          shutdownGracePeriod: 45s
+          shutdownGracePeriodCriticalPods: 30s
+          staticPodPath: /etc/kubernetes/manifests
+          readOnlyPort: 0
+          resolvConf: /run/systemd/resolve/resolv.conf
+          volumePluginDir: /var/lib/kubelet/volumeplugins
+    - path: /etc/systemd/logind.conf.d/inhibitors.conf
+      contents:
+        inline: |
+          [Login]
+          InhibitDelayMaxSec=45s
    - path: /etc/sysctl.d/max-user-watches.conf
-      filesystem: root
      mode: 0644
      contents:
        inline: |
--- a/aws/flatcar-linux/kubernetes/workers/variables.tf
+++ b/aws/flatcar-linux/kubernetes/workers/variables.tf
@ -69,6 +69,12 @@ variable "spot_price" {
  default     = 0
 }

+variable "cpu_credits" {
+  type        = string
+  description = "CPU burst credits mode (if applicable)"
+  default     = null
+}
+
 variable "target_groups" {
  type        = list(string)
  description = "Additional target group ARNs to which instances should be added"
@ -102,12 +108,6 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "cluster_domain_suffix" {
-  type        = string
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  default     = "cluster.local"
-}
-
 variable "node_labels" {
  type        = list(string)
  description = "List of initial node labels"
@ -128,7 +128,7 @@ variable "arch" {
  default     = "amd64"

  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
+    condition     = contains(["amd64", "arm64"], var.arch)
    error_message = "The arch must be amd64 or arm64."
  }
 }
--- a/aws/flatcar-linux/kubernetes/workers/versions.tf
+++ b/aws/flatcar-linux/kubernetes/workers/versions.tf
@ -3,12 +3,10 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    aws      = ">= 2.23, <= 5.0"
-    template = "~> 2.2"
-
+    aws = ">= 2.23, <= 6.0"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.9"
+      version = "~> 0.13"
    }
  }
 }
--- a/aws/flatcar-linux/kubernetes/workers/workers.tf
+++ b/aws/flatcar-linux/kubernetes/workers/workers.tf
@ -1,19 +1,20 @@
 # Workers AutoScaling Group
 resource "aws_autoscaling_group" "workers" {
-  name = "${var.name}-worker ${aws_launch_configuration.worker.name}"
+  name = "${var.name}-worker"

  # count
-  desired_capacity          = var.worker_count
-  min_size                  = var.worker_count
-  max_size                  = var.worker_count + 2
-  default_cooldown          = 30
-  health_check_grace_period = 30
+  desired_capacity = var.worker_count
+  min_size         = var.worker_count
+  max_size         = var.worker_count + 2

  # network
  vpc_zone_identifier = var.subnet_ids

-  # template
-  launch_configuration = aws_launch_configuration.worker.name
+  # instance template
+  launch_template {
+    id      = aws_launch_template.worker.id
+    version = aws_launch_template.worker.latest_version
+  }

  # target groups to which instances should be added
  target_group_arns = flatten([
@ -22,6 +23,18 @@ resource "aws_autoscaling_group" "workers" {
    var.target_groups,
  ])

+  instance_refresh {
+    strategy = "Rolling"
+    preferences {
+      instance_warmup        = 120
+      min_healthy_percentage = 90
+    }
+  }
+  # Grace period before checking new instance's health
+  health_check_grace_period = 30
+  # Cooldown period between scaling activities
+  default_cooldown = 30
+
  lifecycle {
    # override the default destroy and replace update behavior
    create_before_destroy = true
@ -41,24 +54,54 @@ resource "aws_autoscaling_group" "workers" {
 }

 # Worker template
-resource "aws_launch_configuration" "worker" {
-  image_id          = local.ami_id
-  instance_type     = var.instance_type
-  spot_price        = var.spot_price > 0 ? var.spot_price : null
-  enable_monitoring = false
-
-  user_data = data.ct_config.worker-ignition.rendered
+resource "aws_launch_template" "worker" {
+  name_prefix   = "${var.name}-worker"
+  image_id      = local.ami_id
+  instance_type = var.instance_type

  # storage
-  root_block_device {
-    volume_type = var.disk_type
-    volume_size = var.disk_size
-    iops        = var.disk_iops
-    encrypted   = true
+  ebs_optimized = true
+  block_device_mappings {
+    device_name = "/dev/xvda"
+    ebs {
+      volume_type           = var.disk_type
+      volume_size           = var.disk_size
+      iops                  = var.disk_iops
+      encrypted             = true
+      delete_on_termination = true
+    }
  }

  # network
-  security_groups = var.security_groups
+  network_interfaces {
+    associate_public_ip_address = true
+    security_groups             = var.security_groups
+  }
+
+  # boot
+  user_data = sensitive(base64encode(data.ct_config.worker.rendered))
+
+  # metadata
+  metadata_options {
+    http_tokens = "optional"
+  }
+  monitoring {
+    enabled = false
+  }
+
+  # cost
+  credit_specification {
+    cpu_credits = var.cpu_credits
+  }
+  dynamic "instance_market_options" {
+    for_each = var.spot_price > 0 ? [1] : []
+    content {
+      market_type = "spot"
+      spot_options {
+        max_price = var.spot_price
+      }
+    }
+  }

  lifecycle {
    // Override the default destroy and replace update behavior
@ -67,24 +110,15 @@ resource "aws_launch_configuration" "worker" {
  }
 }

-# Worker Ignition config
-data "ct_config" "worker-ignition" {
-  content  = data.template_file.worker-config.rendered
-  strict   = true
-  snippets = var.snippets
-}
-
-# Worker Container Linux config
-data "template_file" "worker-config" {
-  template = file("${path.module}/cl/worker.yaml")
-
-  vars = {
+# Flatcar Linux worker
+data "ct_config" "worker" {
+  content = templatefile("${path.module}/butane/worker.yaml", {
    kubeconfig             = indent(10, var.kubeconfig)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
    node_labels            = join(",", var.node_labels)
    node_taints            = join(",", var.node_taints)
-  }
+  })
+  strict   = true
+  snippets = var.snippets
 }
-
--- a/azure/fedora-coreos/kubernetes/README.md
+++ b/azure/fedora-coreos/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.23.5 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot priority](https://typhoon.psdn.io/fedora-coreos/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/azure/fedora-coreos/kubernetes/bootstrap.tf
+++ b/azure/fedora-coreos/kubernetes/bootstrap.tf
@ -1,13 +1,12 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e5bdb6f6c67461ca3a1cd3449f4703189f14d3e4"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name = var.cluster_name
  api_servers  = [format("%s.%s", var.cluster_name, var.dns_zone)]
  etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)

  networking = var.networking
-
  # only effective with Calico networking
  # we should be able to use 1450 MTU, but in practice, 1410 was needed
  network_encapsulation = "vxlan"
@ -15,9 +14,7 @@ module "bootstrap" {

  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/google-cloud/fedora-coreos/kubernetes/fcc/controller.yaml
+++ b/google-cloud/fedora-coreos/kubernetes/fcc/controller.yaml
@ -1,6 +1,6 @@
 ---
 variant: fcos
-version: 1.4.0
+version: 1.5.0
 systemd:
  units:
    - name: etcd-member.service
@ -9,15 +9,16 @@ systemd:
        [Unit]
        Description=etcd (System Container)
        Documentation=https://github.com/etcd-io/etcd
-        Wants=network-online.target network.target
+        Wants=network-online.target
        After=network-online.target
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.2
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        Type=exec
        ExecStartPre=/bin/mkdir -p /var/lib/etcd
        ExecStartPre=-/usr/bin/podman rm etcd
        ExecStart=/usr/bin/podman run --name etcd \
          --env-file /etc/etcd/etcd.env \
+          --log-driver k8s-file \
          --network host \
          --volume /var/lib/etcd:/var/lib/etcd:rw,Z \
          --volume /etc/ssl/etcd:/etc/ssl/certs:ro,Z \
@ -53,7 +54,7 @@ systemd:
        Description=Kubelet (System Container)
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -62,15 +63,19 @@ systemd:
        ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
        ExecStartPre=-/usr/bin/podman rm kubelet
        ExecStart=/usr/bin/podman run --name kubelet \
+          --log-driver k8s-file \
          --privileged \
          --pid host \
          --network host \
          --volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
          --volume /etc/kubernetes:/etc/kubernetes:ro,z \
+          --volume /etc/machine-id:/etc/machine-id:ro \
          --volume /usr/lib/os-release:/etc/os-release:ro \
          --volume /lib/modules:/lib/modules:ro \
          --volume /run:/run \
          --volume /sys/fs/cgroup:/sys/fs/cgroup \
+          --volume /etc/selinux:/etc/selinux \
+          --volume /sys/fs/selinux:/sys/fs/selinux \
          --volume /var/lib/calico:/var/lib/calico:ro \
          --volume /var/lib/containerd:/var/lib/containerd \
          --volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
@ -78,27 +83,12 @@ systemd:
          --volume /var/run/lock:/var/run/lock:z \
          --volume /opt/cni/bin:/opt/cni/bin:z \
          $${KUBELET_IMAGE} \
-          --anonymous-auth=false \
-          --authentication-token-webhook \
-          --authorization-mode=Webhook \
          --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
-          --cgroup-driver=systemd \
-          --cgroups-per-qos=true \
-          --container-runtime=remote \
+          --config=/etc/kubernetes/kubelet.yaml \
          --container-runtime-endpoint=unix:///run/containerd/containerd.sock \
-          --enforce-node-allocatable=pods \
-          --client-ca-file=/etc/kubernetes/ca.crt \
-          --cluster_dns=${cluster_dns_service_ip} \
-          --cluster_domain=${cluster_domain_suffix} \
-          --healthz-port=0 \
          --kubeconfig=/var/lib/kubelet/kubeconfig \
          --node-labels=node.kubernetes.io/controller="true" \
-          --pod-manifest-path=/etc/kubernetes/manifests \
-          --read-only-port=0 \
-          --resolv-conf=/run/systemd/resolve/resolv.conf \
-          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
-          --rotate-certificates \
-          --volume-plugin-dir=/var/lib/kubelet/volumeplugins
+          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule
        ExecStop=-/usr/bin/podman stop kubelet
        Delegate=yes
        Restart=always
@ -121,7 +111,7 @@ systemd:
            --volume /opt/bootstrap/assets:/assets:ro,Z \
            --volume /opt/bootstrap/apply:/apply:ro,Z \
            --entrypoint=/apply \
-            quay.io/poseidon/kubelet:v1.23.5
+            quay.io/poseidon/kubelet:v1.31.3
        ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
        ExecStartPost=-/usr/bin/podman stop bootstrap
 storage:
@ -136,12 +126,39 @@ storage:
      contents:
        inline: |
          ${kubeconfig}
+    - path: /etc/kubernetes/kubelet.yaml
+      mode: 0644
+      contents:
+        inline: |
+          apiVersion: kubelet.config.k8s.io/v1beta1
+          kind: KubeletConfiguration
+          authentication:
+            anonymous:
+              enabled: false
+            webhook:
+              enabled: true
+            x509:
+              clientCAFile: /etc/kubernetes/ca.crt
+          authorization:
+            mode: Webhook
+          cgroupDriver: systemd
+          clusterDNS:
+            - ${cluster_dns_service_ip}
+          clusterDomain: cluster.local
+          healthzPort: 0
+          rotateCertificates: true
+          shutdownGracePeriod: 45s
+          shutdownGracePeriodCriticalPods: 30s
+          staticPodPath: /etc/kubernetes/manifests
+          readOnlyPort: 0
+          resolvConf: /run/systemd/resolve/resolv.conf
+          volumePluginDir: /var/lib/kubelet/volumeplugins
    - path: /opt/bootstrap/layout
      mode: 0544
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -155,8 +172,7 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
          chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
    - path: /opt/bootstrap/apply
      mode: 0544
@ -172,6 +188,11 @@ storage:
             echo "Retry applying manifests"
             sleep 5
          done
+    - path: /etc/systemd/logind.conf.d/inhibitors.conf
+      contents:
+        inline: |
+          [Login]
+          InhibitDelayMaxSec=45s
    - path: /etc/sysctl.d/max-user-watches.conf
      contents:
        inline: |
@ -216,7 +237,6 @@ storage:
          ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
          ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
          ETCD_PEER_CLIENT_CERT_AUTH=true
-    - path: /etc/fedora-coreos/iptables-legacy.stamp
    - path: /etc/containerd/config.toml
      overwrite: true
      contents:
@ -241,3 +261,4 @@ passwd:
    - name: core
      ssh_authorized_keys:
        - ${ssh_authorized_key}
+
--- a/azure/fedora-coreos/kubernetes/controllers.tf
+++ b/azure/fedora-coreos/kubernetes/controllers.tf
@ -1,25 +1,30 @@
+locals {
+  # Typhoon ssh_authorized_key supports RSA or a newer formats (e.g. ed25519).
+  # However, Azure requires an older RSA key to pass validations. To use a
+  # newer key format, pass a dummy RSA key as the azure_authorized_key and
+  # delete the associated private key so it's never used.
+  azure_authorized_key = var.azure_authorized_key == "" ? var.ssh_authorized_key : var.azure_authorized_key
+}
+
 # Discrete DNS records for each controller's private IPv4 for etcd usage
 resource "azurerm_dns_a_record" "etcds" {
-  count               = var.controller_count
-  resource_group_name = var.dns_zone_group
+  count = var.controller_count

  # DNS Zone name where record should be created
-  zone_name = var.dns_zone
-
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
  # DNS record
  name = format("%s-etcd%d", var.cluster_name, count.index)
  ttl  = 300
-
  # private IPv4 address for etcd
-  records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
+  records = [azurerm_network_interface.controllers[count.index].private_ip_address]
 }

 # Controller availability set to spread controllers
 resource "azurerm_availability_set" "controllers" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                         = "${var.cluster_name}-controllers"
-  location                     = var.region
+  resource_group_name          = azurerm_resource_group.cluster.name
+  location                     = var.location
  platform_fault_domain_count  = 2
  platform_update_domain_count = 4
  managed                      = true
@ -27,35 +32,39 @@ resource "azurerm_availability_set" "controllers" {

 # Controller instances
 resource "azurerm_linux_virtual_machine" "controllers" {
-  count               = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name
+  count = var.controller_count

  name                = "${var.cluster_name}-controller-${count.index}"
-  location            = var.region
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
  availability_set_id = azurerm_availability_set.controllers.id
-
-  size        = var.controller_type
-  custom_data = base64encode(data.ct_config.controller-ignitions.*.rendered[count.index])
+  size                = var.controller_type

  # storage
  source_image_id = var.os_image
  os_disk {
    name                 = "${var.cluster_name}-controller-${count.index}"
+    storage_account_type = var.controller_disk_type
+    disk_size_gb         = var.controller_disk_size
    caching              = "None"
-    disk_size_gb         = var.disk_size
-    storage_account_type = "Premium_LRS"
  }

  # network
  network_interface_ids = [
-    azurerm_network_interface.controllers.*.id[count.index]
+    azurerm_network_interface.controllers[count.index].id
  ]

-  # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
+  # boot
+  custom_data = base64encode(data.ct_config.controllers[count.index].rendered)
+  boot_diagnostics {
+    # defaults to a managed storage account
+  }
+
+  # Azure requires an RSA admin_ssh_key
  admin_username = "core"
  admin_ssh_key {
    username   = "core"
-    public_key = var.ssh_authorized_key
+    public_key = local.azure_authorized_key
  }

  lifecycle {
@ -66,31 +75,52 @@ resource "azurerm_linux_virtual_machine" "controllers" {
  }
 }

-# Controller public IPv4 addresses
-resource "azurerm_public_ip" "controllers" {
-  count               = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name
+# Controller node public IPv4 addresses
+resource "azurerm_public_ip" "controllers-ipv4" {
+  count = var.controller_count

-  name              = "${var.cluster_name}-controller-${count.index}"
-  location          = azurerm_resource_group.cluster.location
-  sku               = "Standard"
-  allocation_method = "Static"
+  name                = "${var.cluster_name}-controller-${count.index}-ipv4"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv4"
+  sku                 = "Standard"
+  allocation_method   = "Static"
 }

-# Controller NICs with public and private IPv4
-resource "azurerm_network_interface" "controllers" {
-  count               = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name
+# Controller node public IPv6 addresses
+resource "azurerm_public_ip" "controllers-ipv6" {
+  count = var.controller_count

-  name     = "${var.cluster_name}-controller-${count.index}"
-  location = azurerm_resource_group.cluster.location
+  name                = "${var.cluster_name}-controller-${count.index}-ipv6"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv6"
+  sku                 = "Standard"
+  allocation_method   = "Static"
+}
+
+# Controllers' network interfaces
+resource "azurerm_network_interface" "controllers" {
+  count = var.controller_count
+
+  name                = "${var.cluster_name}-controller-${count.index}"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location

  ip_configuration {
-    name                          = "ip0"
+    name                          = "ipv4"
+    primary                       = true
    subnet_id                     = azurerm_subnet.controller.id
    private_ip_address_allocation = "Dynamic"
-    # instance public IPv4
-    public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
+    private_ip_address_version    = "IPv4"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv4[count.index].id
+  }
+  ip_configuration {
+    name                          = "ipv6"
+    subnet_id                     = azurerm_subnet.controller.id
+    private_ip_address_allocation = "Dynamic"
+    private_ip_address_version    = "IPv6"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv6[count.index].id
  }
 }

@ -103,49 +133,37 @@ resource "azurerm_network_interface_security_group_association" "controllers" {
 }

 # Associate controller network interface with controller backend address pool
-resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" {
  count = var.controller_count

  network_interface_id    = azurerm_network_interface.controllers[count.index].id
-  ip_configuration_name   = "ip0"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
+  ip_configuration_name   = "ipv4"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id
 }

-# Controller Ignition configs
-data "ct_config" "controller-ignitions" {
-  count    = var.controller_count
-  content  = data.template_file.controller-configs.*.rendered[count.index]
-  strict   = true
-  snippets = var.controller_snippets
-}
-
-# Controller Fedora CoreOS configs
-data "template_file" "controller-configs" {
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" {
  count = var.controller_count

-  template = file("${path.module}/fcc/controller.yaml")
+  network_interface_id    = azurerm_network_interface.controllers[count.index].id
+  ip_configuration_name   = "ipv6"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id
+}

-  vars = {
+# Fedora CoreOS controllers
+data "ct_config" "controllers" {
+  count = var.controller_count
+  content = templatefile("${path.module}/butane/controller.yaml", {
    # Cannot use cyclic dependencies on controllers or their DNS records
    etcd_name   = "etcd${count.index}"
    etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
    # etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
-    etcd_initial_cluster   = join(",", data.template_file.etcds.*.rendered)
+    etcd_initial_cluster = join(",", [
+      for i in range(var.controller_count) : "etcd${i}=https://${var.cluster_name}-etcd${i}.${var.dns_zone}:2380"
+    ])
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
-  }
+  })
+  strict   = true
+  snippets = var.controller_snippets
 }
-
-data "template_file" "etcds" {
-  count    = var.controller_count
-  template = "etcd$${index}=https://$${cluster_name}-etcd$${index}.$${dns_zone}:2380"
-
-  vars = {
-    index        = count.index
-    cluster_name = var.cluster_name
-    dns_zone     = var.dns_zone
-  }
-}
-
--- a/azure/fedora-coreos/kubernetes/lb.tf
+++ b/azure/fedora-coreos/kubernetes/lb.tf
@ -1,124 +1,164 @@
-# DNS record for the apiserver load balancer
+# DNS A record for the apiserver load balancer
 resource "azurerm_dns_a_record" "apiserver" {
-  resource_group_name = var.dns_zone_group
-
  # DNS Zone name where record should be created
-  zone_name = var.dns_zone
-
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
  # DNS record
  name = var.cluster_name
  ttl  = 300
-
  # IPv4 address of apiserver load balancer
-  records = [azurerm_public_ip.apiserver-ipv4.ip_address]
+  records = [azurerm_public_ip.frontend-ipv4.ip_address]
 }

-# Static IPv4 address for the apiserver frontend
-resource "azurerm_public_ip" "apiserver-ipv4" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name              = "${var.cluster_name}-apiserver-ipv4"
-  location          = var.region
-  sku               = "Standard"
-  allocation_method = "Static"
+# DNS AAAA record for the apiserver load balancer
+resource "azurerm_dns_aaaa_record" "apiserver" {
+  # DNS Zone name where record should be created
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
+  # DNS record
+  name = var.cluster_name
+  ttl  = 300
+  # IPv4 address of apiserver load balancer
+  records = [azurerm_public_ip.frontend-ipv6.ip_address]
 }

-# Static IPv4 address for the ingress frontend
-resource "azurerm_public_ip" "ingress-ipv4" {
+# Static IPv4 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv4" {
+  name                = "${var.cluster_name}-frontend-ipv4"
  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
+  ip_version          = "IPv4"
+  sku                 = "Standard"
+  allocation_method   = "Static"
+}

-  name              = "${var.cluster_name}-ingress-ipv4"
-  location          = var.region
-  sku               = "Standard"
-  allocation_method = "Static"
+# Static IPv6 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv6" {
+  name                = "${var.cluster_name}-frontend-ipv6"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
+  ip_version          = "IPv6"
+  sku                 = "Standard"
+  allocation_method   = "Static"
 }

 # Network Load Balancer for apiservers and ingress
 resource "azurerm_lb" "cluster" {
+  name                = var.cluster_name
  resource_group_name = azurerm_resource_group.cluster.name
-
-  name     = var.cluster_name
-  location = var.region
-  sku      = "Standard"
+  location            = var.location
+  sku                 = "Standard"

  frontend_ip_configuration {
-    name                 = "apiserver"
-    public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id
+    name                 = "frontend-ipv4"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv4.id
  }

  frontend_ip_configuration {
-    name                 = "ingress"
-    public_ip_address_id = azurerm_public_ip.ingress-ipv4.id
+    name                 = "frontend-ipv6"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv6.id
  }
 }

-resource "azurerm_lb_rule" "apiserver" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name                           = "apiserver"
+resource "azurerm_lb_rule" "apiserver-ipv4" {
+  name                           = "apiserver-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "apiserver"
+  frontend_ip_configuration_name = "frontend-ipv4"
+  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 6443
  backend_port             = 6443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id]
  probe_id                 = azurerm_lb_probe.apiserver.id
 }

-resource "azurerm_lb_rule" "ingress-http" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name                           = "ingress-http"
+resource "azurerm_lb_rule" "apiserver-ipv6" {
+  name                           = "apiserver-ipv6"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 6443
+  backend_port             = 6443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id]
+  probe_id                 = azurerm_lb_probe.apiserver.id
+}
+
+resource "azurerm_lb_rule" "ingress-http-ipv4" {
+  name                           = "ingress-http-ipv4"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 80
  backend_port             = 80
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-resource "azurerm_lb_rule" "ingress-https" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name                           = "ingress-https"
+resource "azurerm_lb_rule" "ingress-https-ipv4" {
+  name                           = "ingress-https-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 443
  backend_port             = 443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-# Worker outbound TCP/UDP SNAT
-resource "azurerm_lb_outbound_rule" "worker-outbound" {
-  resource_group_name = azurerm_resource_group.cluster.name
+resource "azurerm_lb_rule" "ingress-http-ipv6" {
+  name                           = "ingress-http-ipv6"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true

-  name            = "worker"
-  loadbalancer_id = azurerm_lb.cluster.id
-  frontend_ip_configuration {
-    name = "ingress"
-  }
-
-  protocol                = "All"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
+  protocol                 = "Tcp"
+  frontend_port            = 80
+  backend_port             = 80
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
 }

+resource "azurerm_lb_rule" "ingress-https-ipv6" {
+  name                           = "ingress-https-ipv6"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 443
+  backend_port             = 443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
+}
+
+# Backend Address Pools
+
 # Address pool of controllers
-resource "azurerm_lb_backend_address_pool" "controller" {
-  name            = "controller"
+resource "azurerm_lb_backend_address_pool" "controller-ipv4" {
+  name            = "controller-ipv4"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+resource "azurerm_lb_backend_address_pool" "controller-ipv6" {
+  name            = "controller-ipv6"
  loadbalancer_id = azurerm_lb.cluster.id
 }

 # Address pool of workers
-resource "azurerm_lb_backend_address_pool" "worker" {
-  name            = "worker"
+resource "azurerm_lb_backend_address_pool" "worker-ipv4" {
+  name            = "worker-ipv4"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+resource "azurerm_lb_backend_address_pool" "worker-ipv6" {
+  name            = "worker-ipv6"
  loadbalancer_id = azurerm_lb.cluster.id
 }

@ -126,32 +166,45 @@ resource "azurerm_lb_backend_address_pool" "worker" {

 # TCP health check for apiserver
 resource "azurerm_lb_probe" "apiserver" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name            = "apiserver"
  loadbalancer_id = azurerm_lb.cluster.id
  protocol        = "Tcp"
  port            = 6443
-
  # unhealthy threshold
-  number_of_probes = 3
-
+  number_of_probes    = 3
  interval_in_seconds = 5
 }

 # HTTP health check for ingress
 resource "azurerm_lb_probe" "ingress" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name            = "ingress"
  loadbalancer_id = azurerm_lb.cluster.id
  protocol        = "Http"
  port            = 10254
  request_path    = "/healthz"
-
  # unhealthy threshold
-  number_of_probes = 3
-
+  number_of_probes    = 3
  interval_in_seconds = 5
 }

+# Outbound SNAT
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv4" {
+  name                    = "outbound-ipv4"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id
+  frontend_ip_configuration {
+    name = "frontend-ipv4"
+  }
+}
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv6" {
+  name                    = "outbound-ipv6"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id
+  frontend_ip_configuration {
+    name = "frontend-ipv6"
+  }
+}
--- a/azure/fedora-coreos/kubernetes/locals.tf
+++ b/azure/fedora-coreos/kubernetes/locals.tf
@ -0,0 +1,6 @@
+locals {
+  backend_address_pool_ids = {
+    ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
+    ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  }
+}
--- a/azure/fedora-coreos/kubernetes/network.tf
+++ b/azure/fedora-coreos/kubernetes/network.tf
@ -1,27 +1,64 @@
+# Choose an IPv6 ULA subnet at random
+# https://datatracker.ietf.org/doc/html/rfc4193
+resource "random_id" "ula-netnum" {
+  byte_length = 5 # 40 bits
+}
+
+locals {
+  # fd00::/8 -> shift 40 -> 2^40 possible /48 subnets
+  ula-range = cidrsubnet("fd00::/8", 40, random_id.ula-netnum.dec)
+  network_cidr = {
+    ipv4 = var.network_cidr.ipv4
+    ipv6 = length(var.network_cidr.ipv6) > 0 ? var.network_cidr.ipv6 : [local.ula-range]
+  }
+
+  # Subdivide the virtual network into subnets
+  # - controllers use netnum 0
+  # - workers use netnum 1
+  controller_subnets = {
+    ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 0)]
+    ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 0)]
+  }
+  worker_subnets = {
+    ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 1)]
+    ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 1)]
+  }
+  cluster_subnets = {
+    ipv4 = concat(local.controller_subnets.ipv4, local.worker_subnets.ipv4)
+    ipv6 = concat(local.controller_subnets.ipv6, local.worker_subnets.ipv6)
+  }
+}
+
 # Organize cluster into a resource group
 resource "azurerm_resource_group" "cluster" {
  name     = var.cluster_name
-  location = var.region
+  location = var.location
 }

 resource "azurerm_virtual_network" "network" {
+  name                = var.cluster_name
  resource_group_name = azurerm_resource_group.cluster.name
-
-  name          = var.cluster_name
-  location      = azurerm_resource_group.cluster.location
-  address_space = [var.host_cidr]
+  location            = azurerm_resource_group.cluster.location
+  address_space = concat(
+    local.network_cidr.ipv4,
+    local.network_cidr.ipv6
+  )
 }

-# Subnets - separate subnets for controller and workers because Azure
-# network security groups are based on IPv4 CIDR rather than instance
-# tags like GCP or security group membership like AWS
+# Subnets - separate subnets for controllers and workers because Azure
+# network security groups are oriented around address prefixes rather
+# than instance tags (GCP) or security group membership (AWS)

 resource "azurerm_subnet" "controller" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                 = "controller"
+  resource_group_name  = azurerm_resource_group.cluster.name
  virtual_network_name = azurerm_virtual_network.network.name
-  address_prefixes     = [cidrsubnet(var.host_cidr, 1, 0)]
+  address_prefixes = concat(
+    local.controller_subnets.ipv4,
+    local.controller_subnets.ipv6,
+  )
+  default_outbound_access_enabled = false
+
 }

 resource "azurerm_subnet_network_security_group_association" "controller" {
@ -30,15 +67,17 @@ resource "azurerm_subnet_network_security_group_association" "controller" {
 }

 resource "azurerm_subnet" "worker" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                 = "worker"
+  resource_group_name  = azurerm_resource_group.cluster.name
  virtual_network_name = azurerm_virtual_network.network.name
-  address_prefixes     = [cidrsubnet(var.host_cidr, 1, 1)]
+  address_prefixes = concat(
+    local.worker_subnets.ipv4,
+    local.worker_subnets.ipv6,
+  )
+  default_outbound_access_enabled = false
 }

 resource "azurerm_subnet_network_security_group_association" "worker" {
  subnet_id                 = azurerm_subnet.worker.id
  network_security_group_id = azurerm_network_security_group.worker.id
 }
-
--- a/azure/fedora-coreos/kubernetes/outputs.tf
+++ b/azure/fedora-coreos/kubernetes/outputs.tf
@ -6,13 +6,18 @@ output "kubeconfig-admin" {
 # Outputs for Kubernetes Ingress

 output "ingress_static_ipv4" {
-  value       = azurerm_public_ip.ingress-ipv4.ip_address
+  value       = azurerm_public_ip.frontend-ipv4.ip_address
  description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers"
 }

+output "ingress_static_ipv6" {
+  value       = azurerm_public_ip.frontend-ipv6.ip_address
+  description = "IPv6 address of the load balancer for distributing traffic to Ingress controllers"
+}
+
 # Outputs for worker pools

-output "region" {
+output "location" {
  value = azurerm_resource_group.cluster.location
 }

@ -39,13 +44,24 @@ output "kubeconfig" {

 # Outputs for custom firewalling

-output "worker_security_group_name" {
-  value = azurerm_network_security_group.worker.name
+output "controller_security_group_name" {
+  description = "Network Security Group for controller nodes"
+  value       = azurerm_network_security_group.controller.name
 }

-output "worker_address_prefix" {
-  description = "Worker network subnet CIDR address (for source/destination)"
-  value       = azurerm_subnet.worker.address_prefix
+output "worker_security_group_name" {
+  description = "Network Security Group for worker nodes"
+  value       = azurerm_network_security_group.worker.name
+}
+
+output "controller_address_prefixes" {
+  description = "Controller network subnet CIDR addresses (for source/destination)"
+  value       = local.controller_subnets
+}
+
+output "worker_address_prefixes" {
+  description = "Worker network subnet CIDR addresses (for source/destination)"
+  value       = local.worker_subnets
 }

 # Outputs for custom load balancing
@ -55,9 +71,12 @@ output "loadbalancer_id" {
  value       = azurerm_lb.cluster.id
 }

-output "backend_address_pool_id" {
-  description = "ID of the worker backend address pool"
-  value       = azurerm_lb_backend_address_pool.worker.id
+output "backend_address_pool_ids" {
+  description = "IDs of the worker backend address pools"
+  value = {
+    ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
+    ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  }
 }

 # Outputs for debug
--- a/azure/fedora-coreos/kubernetes/security.tf
+++ b/azure/fedora-coreos/kubernetes/security.tf
@ -1,198 +1,223 @@
 # Controller security group

 resource "azurerm_network_security_group" "controller" {
+  name                = "${var.cluster_name}-controller"
  resource_group_name = azurerm_resource_group.cluster.name
-
-  name     = "${var.cluster_name}-controller"
-  location = azurerm_resource_group.cluster.location
+  location            = azurerm_resource_group.cluster.location
 }

 resource "azurerm_network_security_rule" "controller-icmp" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-icmp"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "1995"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Icmp"
-  source_port_range           = "*"
-  destination_port_range      = "*"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-icmp-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 1995 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Icmp"
+  source_port_range            = "*"
+  destination_port_range       = "*"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-ssh" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-ssh"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2000"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "22"
-  source_address_prefix       = "*"
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-ssh-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2000 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "22"
+  source_address_prefix        = "*"
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-etcd" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-etcd"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2005"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "2379-2380"
-  source_address_prefix       = azurerm_subnet.controller.address_prefix
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-etcd-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2005 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "2379-2380"
+  source_address_prefixes      = local.controller_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape etcd metrics
 resource "azurerm_network_security_rule" "controller-etcd-metrics" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-etcd-metrics"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2010"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "2381"
-  source_address_prefix       = azurerm_subnet.worker.address_prefix
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-etcd-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2010 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "2381"
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-proxy metrics
 resource "azurerm_network_security_rule" "controller-kube-proxy" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-kube-proxy-metrics"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2011"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "10249"
-  source_address_prefix       = azurerm_subnet.worker.address_prefix
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-kube-proxy-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2012 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "10249"
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
 resource "azurerm_network_security_rule" "controller-kube-metrics" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-kube-metrics"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2012"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "10257-10259"
-  source_address_prefix       = azurerm_subnet.worker.address_prefix
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-kube-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2014 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "10257-10259"
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-apiserver" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-apiserver"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2015"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "6443"
-  source_address_prefix       = "*"
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-apiserver-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2016 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "6443"
+  source_address_prefix        = "*"
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-cilium-health" {
-  resource_group_name = azurerm_resource_group.cluster.name
-  count               = var.networking == "cilium" ? 1 : 0
+  for_each = var.networking == "cilium" ? local.controller_subnets : {}

-  name                        = "allow-cilium-health"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2019"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "4240"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-cilium-health-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2018 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "4240"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
+}
+
+resource "azurerm_network_security_rule" "controller-cilium-metrics" {
+  for_each = var.networking == "cilium" ? local.controller_subnets : {}
+
+  name                         = "allow-cilium-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2035 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9962-9965"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-vxlan"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2020"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Udp"
-  source_port_range           = "*"
-  destination_port_range      = "4789"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2020 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Udp"
+  source_port_range            = "*"
+  destination_port_range       = "4789"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-linux-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-linux-vxlan"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2021"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Udp"
-  source_port_range           = "*"
-  destination_port_range      = "8472"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-linux-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2022 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Udp"
+  source_port_range            = "*"
+  destination_port_range       = "8472"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape node-exporter daemonset
 resource "azurerm_network_security_rule" "controller-node-exporter" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-node-exporter"
-  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2025"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "9100"
-  source_address_prefix       = azurerm_subnet.worker.address_prefix
-  destination_address_prefix  = azurerm_subnet.controller.address_prefix
+  name                         = "allow-node-exporter-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2025 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9100"
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow apiserver to access kubelet's for exec, log, port-forward
 resource "azurerm_network_security_rule" "controller-kubelet" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-kubelet"
+  name                        = "allow-kubelet-${each.key}"
+  resource_group_name         = azurerm_resource_group.cluster.name
  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2030"
+  priority                    = 2030 + (each.key == "ipv4" ? 0 : 1)
  access                      = "Allow"
  direction                   = "Inbound"
  protocol                    = "Tcp"
  source_port_range           = "*"
  destination_port_range      = "10250"
-
  # allow Prometheus to scrape kubelet metrics too
-  source_address_prefixes    = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix = azurerm_subnet.controller.address_prefix
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
@ -231,166 +256,189 @@ resource "azurerm_network_security_rule" "controller-deny-all" {
 # Worker security group

 resource "azurerm_network_security_group" "worker" {
+  name                = "${var.cluster_name}-worker"
  resource_group_name = azurerm_resource_group.cluster.name
-
-  name     = "${var.cluster_name}-worker"
-  location = azurerm_resource_group.cluster.location
+  location            = azurerm_resource_group.cluster.location
 }

 resource "azurerm_network_security_rule" "worker-icmp" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-icmp"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "1995"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Icmp"
-  source_port_range           = "*"
-  destination_port_range      = "*"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-icmp-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 1995 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Icmp"
+  source_port_range            = "*"
+  destination_port_range       = "*"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-ssh" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-ssh"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2000"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "22"
-  source_address_prefix       = azurerm_subnet.controller.address_prefix
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-ssh-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2000 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "22"
+  source_address_prefixes      = local.controller_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-http" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-http"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2005"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "80"
-  source_address_prefix       = "*"
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-http-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2005 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "80"
+  source_address_prefix        = "*"
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-https" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-https"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2010"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "443"
-  source_address_prefix       = "*"
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-https-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2010 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "443"
+  source_address_prefix        = "*"
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-cilium-health" {
-  resource_group_name = azurerm_resource_group.cluster.name
-  count               = var.networking == "cilium" ? 1 : 0
+  for_each = var.networking == "cilium" ? local.worker_subnets : {}

-  name                        = "allow-cilium-health"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2014"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "4240"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-cilium-health-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2012 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "4240"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
+}
+
+resource "azurerm_network_security_rule" "worker-cilium-metrics" {
+  for_each = var.networking == "cilium" ? local.worker_subnets : {}
+
+  name                         = "allow-cilium-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2014 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9962-9965"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-vxlan"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2015"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Udp"
-  source_port_range           = "*"
-  destination_port_range      = "4789"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2016 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Udp"
+  source_port_range            = "*"
+  destination_port_range       = "4789"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-linux-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-linux-vxlan"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2016"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Udp"
-  source_port_range           = "*"
-  destination_port_range      = "8472"
-  source_address_prefixes     = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-linux-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2018 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Udp"
+  source_port_range            = "*"
+  destination_port_range       = "8472"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow Prometheus to scrape node-exporter daemonset
 resource "azurerm_network_security_rule" "worker-node-exporter" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-node-exporter"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2020"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "9100"
-  source_address_prefix       = azurerm_subnet.worker.address_prefix
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-node-exporter-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2020 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9100"
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-proxy
 resource "azurerm_network_security_rule" "worker-kube-proxy" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-kube-proxy"
-  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2024"
-  access                      = "Allow"
-  direction                   = "Inbound"
-  protocol                    = "Tcp"
-  source_port_range           = "*"
-  destination_port_range      = "10249"
-  source_address_prefix       = azurerm_subnet.worker.address_prefix
-  destination_address_prefix  = azurerm_subnet.worker.address_prefix
+  name                         = "allow-kube-proxy-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2024 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "10249"
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow apiserver to access kubelet's for exec, log, port-forward
 resource "azurerm_network_security_rule" "worker-kubelet" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-kubelet"
+  name                        = "allow-kubelet-${each.key}"
+  resource_group_name         = azurerm_resource_group.cluster.name
  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2025"
+  priority                    = 2026 + (each.key == "ipv4" ? 0 : 1)
  access                      = "Allow"
  direction                   = "Inbound"
  protocol                    = "Tcp"
  source_port_range           = "*"
  destination_port_range      = "10250"
-
  # allow Prometheus to scrape kubelet metrics too
-  source_address_prefixes    = [azurerm_subnet.controller.address_prefix, azurerm_subnet.worker.address_prefix]
-  destination_address_prefix = azurerm_subnet.worker.address_prefix
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
--- a/azure/fedora-coreos/kubernetes/ssh.tf
+++ b/azure/fedora-coreos/kubernetes/ssh.tf
@ -18,7 +18,7 @@ resource "null_resource" "copy-controller-secrets" {

  connection {
    type    = "ssh"
-    host    = azurerm_public_ip.controllers.*.ip_address[count.index]
+    host    = azurerm_public_ip.controllers-ipv4[count.index].ip_address
    user    = "core"
    timeout = "15m"
  }
@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" {

  connection {
    type    = "ssh"
-    host    = azurerm_public_ip.controllers.*.ip_address[0]
+    host    = azurerm_public_ip.controllers-ipv4[0].ip_address
    user    = "core"
    timeout = "15m"
  }
--- a/azure/fedora-coreos/kubernetes/variables.tf
+++ b/azure/fedora-coreos/kubernetes/variables.tf
@ -5,9 +5,9 @@ variable "cluster_name" {

 # Azure

-variable "region" {
+variable "location" {
  type        = string
-  description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)"
+  description = "Azure location (e.g. centralus , see `az account list-locations --output table`)"
 }

 variable "dns_zone" {
@ -22,41 +22,65 @@ variable "dns_zone_group" {

 # instances

+variable "os_image" {
+  type        = string
+  description = "Fedora CoreOS image for instances"
+}
+
 variable "controller_count" {
  type        = number
  description = "Number of controllers (i.e. masters)"
  default     = 1
 }

-variable "worker_count" {
-  type        = number
-  description = "Number of workers"
-  default     = 1
-}
-
 variable "controller_type" {
  type        = string
  description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
  default     = "Standard_B2s"
 }

+variable "controller_disk_type" {
+  type        = string
+  description = "Type of managed disk for controller node(s)"
+  default     = "Premium_LRS"
+}
+
+variable "controller_disk_size" {
+  type        = number
+  description = "Size of the managed disk in GB for controller node(s)"
+  default     = 30
+}
+
+variable "worker_count" {
+  type        = number
+  description = "Number of workers"
+  default     = 1
+}
+
 variable "worker_type" {
  type        = string
  description = "Machine type for workers (see `az vm list-skus --location centralus`)"
-  default     = "Standard_DS1_v2"
+  default     = "Standard_D2as_v5"
 }

-variable "os_image" {
+variable "worker_disk_type" {
  type        = string
-  description = "Fedora CoreOS image for instances"
+  description = "Type of managed disk for worker nodes"
+  default     = "Standard_LRS"
 }

-variable "disk_size" {
+variable "worker_disk_size" {
  type        = number
-  description = "Size of the disk in GB"
+  description = "Size of the managed disk in GB for worker nodes"
  default     = 30
 }

+variable "worker_ephemeral_disk" {
+  type        = bool
+  description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
+  default     = false
+}
+
 variable "worker_priority" {
  type        = string
  description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
@ -82,16 +106,27 @@ variable "ssh_authorized_key" {
  description = "SSH public key for user 'core'"
 }

+variable "azure_authorized_key" {
+  type        = string
+  description = "Optionally, pass a dummy RSA key to satisfy Azure validations (then use an ed25519 key set above)"
+  default     = ""
+}
+
 variable "networking" {
  type        = string
  description = "Choice of networking provider (flannel, calico, or cilium)"
  default     = "cilium"
 }

-variable "host_cidr" {
-  type        = string
-  description = "CIDR IPv4 range to assign to instances"
-  default     = "10.0.0.0/16"
+variable "network_cidr" {
+  type = object({
+    ipv4 = list(string)
+    ipv6 = optional(list(string), [])
+  })
+  description = "Virtual network CIDR ranges"
+  default = {
+    ipv4 = ["10.0.0.0/16"]
+  }
 }

 variable "pod_cidr" {
@ -109,34 +144,32 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
-}
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
 variable "worker_node_labels" {
  type        = list(string)
  description = "List of initial worker node labels"
  default     = []
 }

-# unofficial, undocumented, unsupported
-
-variable "cluster_domain_suffix" {
-  type        = string
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  default     = "cluster.local"
-}
+# advanced

 variable "daemonset_tolerations" {
  type        = list(string)
  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
  default     = []
 }
+
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
+}
--- a/azure/fedora-coreos/kubernetes/versions.tf
+++ b/azure/fedora-coreos/kubernetes/versions.tf
@ -3,13 +3,11 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    azurerm  = "~> 2.8"
-    template = "~> 2.2"
-    null     = ">= 2.1"
-
+    azurerm = ">= 2.8"
+    null    = ">= 2.1"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.9"
+      version = "~> 0.13"
    }
  }
 }
--- a/azure/fedora-coreos/kubernetes/workers.tf
+++ b/azure/fedora-coreos/kubernetes/workers.tf
@ -3,22 +3,26 @@ module "workers" {
  name   = var.cluster_name

  # Azure
-  resource_group_name     = azurerm_resource_group.cluster.name
-  region                  = azurerm_resource_group.cluster.location
-  subnet_id               = azurerm_subnet.worker.id
-  security_group_id       = azurerm_network_security_group.worker.id
-  backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
+  resource_group_name      = azurerm_resource_group.cluster.name
+  location                 = azurerm_resource_group.cluster.location
+  subnet_id                = azurerm_subnet.worker.id
+  security_group_id        = azurerm_network_security_group.worker.id
+  backend_address_pool_ids = local.backend_address_pool_ids

-  worker_count = var.worker_count
-  vm_type      = var.worker_type
-  os_image     = var.os_image
-  priority     = var.worker_priority
+  # instances
+  os_image       = var.os_image
+  worker_count   = var.worker_count
+  vm_type        = var.worker_type
+  disk_type      = var.worker_disk_type
+  disk_size      = var.worker_disk_size
+  ephemeral_disk = var.worker_ephemeral_disk
+  priority       = var.worker_priority

  # configuration
-  kubeconfig            = module.bootstrap.kubeconfig-kubelet
-  ssh_authorized_key    = var.ssh_authorized_key
-  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  snippets              = var.worker_snippets
-  node_labels           = var.worker_node_labels
+  kubeconfig           = module.bootstrap.kubeconfig-kubelet
+  ssh_authorized_key   = var.ssh_authorized_key
+  azure_authorized_key = var.azure_authorized_key
+  service_cidr         = var.service_cidr
+  snippets             = var.worker_snippets
+  node_labels          = var.worker_node_labels
 }
--- a/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml
+++ b/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml
@ -1,6 +1,6 @@
 ---
 variant: fcos
-version: 1.4.0
+version: 1.5.0
 systemd:
  units:
    - name: containerd.service
@ -26,7 +26,7 @@ systemd:
        Description=Kubelet (System Container)
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -35,15 +35,19 @@ systemd:
        ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
        ExecStartPre=-/usr/bin/podman rm kubelet
        ExecStart=/usr/bin/podman run --name kubelet \
+          --log-driver k8s-file \
          --privileged \
          --pid host \
          --network host \
          --volume /etc/cni/net.d:/etc/cni/net.d:ro,z \
          --volume /etc/kubernetes:/etc/kubernetes:ro,z \
          --volume /usr/lib/os-release:/etc/os-release:ro \
+          --volume /etc/machine-id:/etc/machine-id:ro \
          --volume /lib/modules:/lib/modules:ro \
          --volume /run:/run \
          --volume /sys/fs/cgroup:/sys/fs/cgroup \
+          --volume /etc/selinux:/etc/selinux \
+          --volume /sys/fs/selinux:/sys/fs/selinux \
          --volume /var/lib/calico:/var/lib/calico:ro \
          --volume /var/lib/containerd:/var/lib/containerd \
          --volume /var/lib/kubelet:/var/lib/kubelet:rshared,z \
@ -51,51 +55,23 @@ systemd:
          --volume /var/run/lock:/var/run/lock:z \
          --volume /opt/cni/bin:/opt/cni/bin:z \
          $${KUBELET_IMAGE} \
-          --anonymous-auth=false \
-          --authentication-token-webhook \
-          --authorization-mode=Webhook \
          --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
-          --cgroup-driver=systemd \
-          --cgroups-per-qos=true \
-          --container-runtime=remote \
+          --config=/etc/kubernetes/kubelet.yaml \
          --container-runtime-endpoint=unix:///run/containerd/containerd.sock \
-          --enforce-node-allocatable=pods \
-          --client-ca-file=/etc/kubernetes/ca.crt \
-          --cluster_dns=${cluster_dns_service_ip} \
-          --cluster_domain=${cluster_domain_suffix} \
-          --healthz-port=0 \
          --kubeconfig=/var/lib/kubelet/kubeconfig \
-          --node-labels=node.kubernetes.io/node \
          %{~ for label in split(",", node_labels) ~}
          --node-labels=${label} \
          %{~ endfor ~}
          %{~ for taint in split(",", node_taints) ~}
          --register-with-taints=${taint} \
          %{~ endfor ~}
-          --pod-manifest-path=/etc/kubernetes/manifests \
-          --read-only-port=0 \
-          --resolv-conf=/run/systemd/resolve/resolv.conf \
-          --rotate-certificates \
-          --volume-plugin-dir=/var/lib/kubelet/volumeplugins
+          --node-labels=node.kubernetes.io/node
        ExecStop=-/usr/bin/podman stop kubelet
        Delegate=yes
        Restart=always
        RestartSec=10
        [Install]
        WantedBy=multi-user.target
-    - name: delete-node.service
-      enabled: true
-      contents: |
-        [Unit]
-        Description=Delete Kubernetes node on shutdown
-        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
-        Type=oneshot
-        RemainAfterExit=true
-        ExecStart=/bin/true
-        ExecStop=/bin/bash -c '/usr/bin/podman run --volume /var/lib/kubelet:/var/lib/kubelet:ro,z --entrypoint /usr/local/bin/kubectl $${KUBELET_IMAGE} --kubeconfig=/var/lib/kubelet/kubeconfig delete node $HOSTNAME'
-        [Install]
-        WantedBy=multi-user.target
 storage:
  directories:
    - path: /etc/kubernetes
@ -105,6 +81,38 @@ storage:
      contents:
        inline: |
          ${kubeconfig}
+    - path: /etc/kubernetes/kubelet.yaml
+      mode: 0644
+      contents:
+        inline: |
+          apiVersion: kubelet.config.k8s.io/v1beta1
+          kind: KubeletConfiguration
+          authentication:
+            anonymous:
+              enabled: false
+            webhook:
+              enabled: true
+            x509:
+              clientCAFile: /etc/kubernetes/ca.crt
+          authorization:
+            mode: Webhook
+          cgroupDriver: systemd
+          clusterDNS:
+            - ${cluster_dns_service_ip}
+          clusterDomain: cluster.local
+          healthzPort: 0
+          rotateCertificates: true
+          shutdownGracePeriod: 45s
+          shutdownGracePeriodCriticalPods: 30s
+          staticPodPath: /etc/kubernetes/manifests
+          readOnlyPort: 0
+          resolvConf: /run/systemd/resolve/resolv.conf
+          volumePluginDir: /var/lib/kubelet/volumeplugins
+    - path: /etc/systemd/logind.conf.d/inhibitors.conf
+      contents:
+        inline: |
+          [Login]
+          InhibitDelayMaxSec=45s
    - path: /etc/sysctl.d/max-user-watches.conf
      contents:
        inline: |
@ -128,7 +136,6 @@ storage:
          DefaultCPUAccounting=yes
          DefaultMemoryAccounting=yes
          DefaultBlockIOAccounting=yes
-    - path: /etc/fedora-coreos/iptables-legacy.stamp
    - path: /etc/containerd/config.toml
      overwrite: true
      contents:
--- a/azure/fedora-coreos/kubernetes/workers/variables.tf
+++ b/azure/fedora-coreos/kubernetes/workers/variables.tf
@ -5,9 +5,9 @@ variable "name" {

 # Azure

-variable "region" {
+variable "location" {
  type        = string
-  description = "Must be set to the Azure Region of cluster"
+  description = "Must be set to the Azure location of cluster"
 }

 variable "resource_group_name" {
@ -25,9 +25,12 @@ variable "security_group_id" {
  description = "Must be set to the `worker_security_group_id` output by cluster"
 }

-variable "backend_address_pool_id" {
-  type        = string
-  description = "Must be set to the `worker_backend_address_pool_id` output by cluster"
+variable "backend_address_pool_ids" {
+  type = object({
+    ipv4 = list(string)
+    ipv6 = list(string)
+  })
+  description = "Must be set to the `backend_address_pool_ids` output by cluster"
 }

 # instances
@ -41,7 +44,7 @@ variable "worker_count" {
 variable "vm_type" {
  type        = string
  description = "Machine type for instances (see `az vm list-skus --location centralus`)"
-  default     = "Standard_DS1_v2"
+  default     = "Standard_D2as_v5"
 }

 variable "os_image" {
@ -49,6 +52,24 @@ variable "os_image" {
  description = "Fedora CoreOS image for instances"
 }

+variable "disk_type" {
+  type        = string
+  description = "Type of managed disk"
+  default     = "Standard_LRS"
+}
+
+variable "disk_size" {
+  type        = number
+  description = "Size of the managed disk in GB"
+  default     = 30
+}
+
+variable "ephemeral_disk" {
+  type        = bool
+  description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
+  default     = false
+}
+
 variable "priority" {
  type        = string
  description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
@ -73,6 +94,12 @@ variable "ssh_authorized_key" {
  description = "SSH public key for user 'core'"
 }

+variable "azure_authorized_key" {
+  type        = string
+  description = "Optionally, pass a dummy RSA key to satisfy Azure validations (then use an ed25519 key set above)"
+  default     = ""
+}
+
 variable "service_cidr" {
  type        = string
  description = <<EOD
@ -93,12 +120,3 @@ variable "node_taints" {
  description = "List of initial node taints"
  default     = []
 }
-
-# unofficial, undocumented, unsupported
-
-variable "cluster_domain_suffix" {
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  type        = string
-  default     = "cluster.local"
-}
-
--- a/azure/fedora-coreos/kubernetes/workers/versions.tf
+++ b/azure/fedora-coreos/kubernetes/workers/versions.tf
@ -3,12 +3,10 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    azurerm  = "~> 2.8"
-    template = "~> 2.2"
-
+    azurerm = ">= 2.8"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.9"
+      version = "~> 0.13"
    }
  }
 }
--- a/azure/fedora-coreos/kubernetes/workers/workers.tf
+++ b/azure/fedora-coreos/kubernetes/workers/workers.tf
@ -1,28 +1,31 @@
-# Workers scale set
-resource "azurerm_linux_virtual_machine_scale_set" "workers" {
-  resource_group_name = var.resource_group_name
+locals {
+  azure_authorized_key = var.azure_authorized_key == "" ? var.ssh_authorized_key : var.azure_authorized_key
+}

-  name      = "${var.name}-worker"
-  location  = var.region
-  sku       = var.vm_type
-  instances = var.worker_count
-  # instance name prefix for instances in the set
-  computer_name_prefix   = "${var.name}-worker"
-  single_placement_group = false
-  custom_data            = base64encode(data.ct_config.worker-ignition.rendered)
+# Workers scale set
+resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
+  name                        = "${var.name}-worker"
+  resource_group_name         = var.resource_group_name
+  location                    = var.location
+  platform_fault_domain_count = 1
+  sku_name                    = var.vm_type
+  instances                   = var.worker_count

  # storage
-  source_image_id = var.os_image
+  encryption_at_host_enabled = true
+  source_image_id            = var.os_image
  os_disk {
-    storage_account_type = "Standard_LRS"
-    caching              = "ReadWrite"
-  }
-
-  # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
-  admin_username = "core"
-  admin_ssh_key {
-    username   = "core"
-    public_key = var.ssh_authorized_key
+    storage_account_type = var.disk_type
+    disk_size_gb         = var.disk_size
+    caching              = "ReadOnly"
+    # Optionally, use the ephemeral disk of the instance type (support varies)
+    dynamic "diff_disk_settings" {
+      for_each = var.ephemeral_disk ? [1] : []
+      content {
+        option    = "Local"
+        placement = "ResourceDisk"
+      }
+    }
  }

  # network
@ -32,62 +35,59 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
    network_security_group_id = var.security_group_id

    ip_configuration {
-      name      = "ip0"
+      name      = "ipv4"
+      version   = "IPv4"
      primary   = true
      subnet_id = var.subnet_id
-
      # backend address pool to which the NIC should be added
-      load_balancer_backend_address_pool_ids = [var.backend_address_pool_id]
+      load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4
+    }
+    ip_configuration {
+      name      = "ipv6"
+      version   = "IPv6"
+      subnet_id = var.subnet_id
+      # backend address pool to which the NIC should be added
+      load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6
+    }
+  }
+
+  # boot
+  user_data_base64 = base64encode(data.ct_config.worker.rendered)
+  boot_diagnostics {
+    # defaults to a managed storage account
+  }
+
+  # Azure requires an RSA admin_ssh_key
+  os_profile {
+    linux_configuration {
+      admin_username = "core"
+      admin_ssh_key {
+        username   = "core"
+        public_key = local.azure_authorized_key
+      }
+      computer_name_prefix = "${var.name}-worker"
    }
  }

  # lifecycle
-  upgrade_mode = "Manual"
  # eviction policy may only be set when priority is Spot
  priority        = var.priority
  eviction_policy = var.priority == "Spot" ? "Delete" : null
-}
-
-# Scale up or down to maintain desired number, tolerating deallocations.
-resource "azurerm_monitor_autoscale_setting" "workers" {
-  resource_group_name = var.resource_group_name
-
-  name     = "${var.name}-maintain-desired"
-  location = var.region
-
-  # autoscale
-  enabled            = true
-  target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
-
-  profile {
-    name = "default"
-
-    capacity {
-      minimum = var.worker_count
-      default = var.worker_count
-      maximum = var.worker_count
-    }
+  termination_notification {
+    enabled = true
  }
 }

-# Worker Ignition configs
-data "ct_config" "worker-ignition" {
-  content  = data.template_file.worker-config.rendered
+# Fedora CoreOS worker
+data "ct_config" "worker" {
+  content = templatefile("${path.module}/butane/worker.yaml", {
+    kubeconfig             = indent(10, var.kubeconfig)
+    ssh_authorized_key     = var.ssh_authorized_key
+    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
+    node_labels            = join(",", var.node_labels)
+    node_taints            = join(",", var.node_taints)
+  })
  strict   = true
  snippets = var.snippets
 }

-# Worker Fedora CoreOS configs
-data "template_file" "worker-config" {
-  template = file("${path.module}/fcc/worker.yaml")
-
-  vars = {
-    kubeconfig             = indent(10, var.kubeconfig)
-    ssh_authorized_key     = var.ssh_authorized_key
-    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
-    node_labels            = join(",", var.node_labels)
-    node_taints            = join(",", var.node_taints)
-  }
-}
-
--- a/azure/flatcar-linux/kubernetes/README.md
+++ b/azure/flatcar-linux/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.23.5 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/flatcar-linux/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/azure/flatcar-linux/kubernetes/bootstrap.tf
+++ b/azure/flatcar-linux/kubernetes/bootstrap.tf
@ -1,13 +1,12 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e5bdb6f6c67461ca3a1cd3449f4703189f14d3e4"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name = var.cluster_name
  api_servers  = [format("%s.%s", var.cluster_name, var.dns_zone)]
  etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)

  networking = var.networking
-
  # only effective with Calico networking
  # we should be able to use 1450 MTU, but in practice, 1410 was needed
  network_encapsulation = "vxlan"
@ -15,9 +14,7 @@ module "bootstrap" {

  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/google-cloud/flatcar-linux/kubernetes/cl/controller.yaml
+++ b/google-cloud/flatcar-linux/kubernetes/cl/controller.yaml
@ -1,4 +1,5 @@
---
+variant: flatcar
+version: 1.0.0
 systemd:
  units:
    - name: etcd-member.service
@ -10,7 +11,7 @@ systemd:
        Requires=docker.service
        After=docker.service
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.2
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        ExecStartPre=/usr/bin/docker run -d \
          --name etcd \
          --network host \
@ -55,7 +56,7 @@ systemd:
        After=docker.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -80,25 +81,12 @@ systemd:
          -v /var/log:/var/log \
          -v /opt/cni/bin:/opt/cni/bin \
          $${KUBELET_IMAGE} \
-          --anonymous-auth=false \
-          --authentication-token-webhook \
-          --authorization-mode=Webhook \
          --bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
-          --cgroup-driver=systemd \
-          --container-runtime=remote \
+          --config=/etc/kubernetes/kubelet.yaml \
          --container-runtime-endpoint=unix:///run/containerd/containerd.sock \
-          --client-ca-file=/etc/kubernetes/ca.crt \
-          --cluster_dns=${cluster_dns_service_ip} \
-          --cluster_domain=${cluster_domain_suffix} \
-          --healthz-port=0 \
          --kubeconfig=/var/lib/kubelet/kubeconfig \
          --node-labels=node.kubernetes.io/controller="true" \
-          --pod-manifest-path=/etc/kubernetes/manifests \
-          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule \
-          --read-only-port=0 \
-          --resolv-conf=/run/systemd/resolve/resolv.conf \
-          --rotate-certificates \
-          --volume-plugin-dir=/var/lib/kubelet/volumeplugins
+          --register-with-taints=node-role.kubernetes.io/controller=:NoSchedule
        ExecStart=docker logs -f kubelet
        ExecStop=docker stop kubelet
        ExecStopPost=docker rm kubelet
@ -117,7 +105,7 @@ systemd:
        Type=oneshot
        RemainAfterExit=true
        WorkingDirectory=/opt/bootstrap
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.23.5
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStart=/usr/bin/docker run \
            -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
            -v /opt/bootstrap/assets:/assets:ro \
@ -130,23 +118,47 @@ systemd:
 storage:
  directories:
    - path: /var/lib/etcd
-      filesystem: root
      mode: 0700
      overwrite: true
  files:
    - path: /etc/kubernetes/kubeconfig
-      filesystem: root
      mode: 0644
      contents:
        inline: |
          ${kubeconfig}
+    - path: /etc/kubernetes/kubelet.yaml
+      mode: 0644
+      contents:
+        inline: |
+          apiVersion: kubelet.config.k8s.io/v1beta1
+          kind: KubeletConfiguration
+          authentication:
+            anonymous:
+              enabled: false
+            webhook:
+              enabled: true
+            x509:
+              clientCAFile: /etc/kubernetes/ca.crt
+          authorization:
+            mode: Webhook
+          cgroupDriver: systemd
+          clusterDNS:
+            - ${cluster_dns_service_ip}
+          clusterDomain: cluster.local
+          healthzPort: 0
+          rotateCertificates: true
+          shutdownGracePeriod: 45s
+          shutdownGracePeriodCriticalPods: 30s
+          staticPodPath: /etc/kubernetes/manifests
+          readOnlyPort: 0
+          resolvConf: /run/systemd/resolve/resolv.conf
+          volumePluginDir: /var/lib/kubelet/volumeplugins
    - path: /opt/bootstrap/layout
-      filesystem: root
      mode: 0544
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -161,10 +173,8 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
    - path: /opt/bootstrap/apply
-      filesystem: root
      mode: 0544
      contents:
        inline: |
@ -178,14 +188,17 @@ storage:
             echo "Retry applying manifests"
             sleep 5
          done
+    - path: /etc/systemd/logind.conf.d/inhibitors.conf
+      contents:
+        inline: |
+          [Login]
+          InhibitDelayMaxSec=45s
    - path: /etc/sysctl.d/max-user-watches.conf
-      filesystem: root
      mode: 0644
      contents:
        inline: |
          fs.inotify.max_user_watches=16184
    - path: /etc/etcd/etcd.env
-      filesystem: root
      mode: 0644
      contents:
          inline: |
--- a/azure/flatcar-linux/kubernetes/controllers.tf
+++ b/azure/flatcar-linux/kubernetes/controllers.tf
@ -1,31 +1,36 @@
-# Discrete DNS records for each controller's private IPv4 for etcd usage
-resource "azurerm_dns_a_record" "etcds" {
-  count               = var.controller_count
-  resource_group_name = var.dns_zone_group
-
-  # DNS Zone name where record should be created
-  zone_name = var.dns_zone
-
-  # DNS record
-  name = format("%s-etcd%d", var.cluster_name, count.index)
-  ttl  = 300
-
-  # private IPv4 address for etcd
-  records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
-}
-
 locals {
  # Container Linux derivative
  # flatcar-stable -> Flatcar Linux Stable
-  channel = split("-", var.os_image)[1]
+  channel      = split("-", var.os_image)[1]
+  offer_suffix = var.controller_arch == "arm64" ? "corevm" : "free"
+  urn          = var.controller_arch == "arm64" ? local.channel : "${local.channel}-gen2"
+
+  # Typhoon ssh_authorized_key supports RSA or a newer formats (e.g. ed25519).
+  # However, Azure requires an older RSA key to pass validations. To use a
+  # newer key format, pass a dummy RSA key as the azure_authorized_key and
+  # delete the associated private key so it's never used.
+  azure_authorized_key = var.azure_authorized_key == "" ? var.ssh_authorized_key : var.azure_authorized_key
+}
+
+# Discrete DNS records for each controller's private IPv4 for etcd usage
+resource "azurerm_dns_a_record" "etcds" {
+  count = var.controller_count
+
+  # DNS Zone name where record should be created
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
+  # DNS record
+  name = format("%s-etcd%d", var.cluster_name, count.index)
+  ttl  = 300
+  # private IPv4 address for etcd
+  records = [azurerm_network_interface.controllers[count.index].private_ip_address]
 }

 # Controller availability set to spread controllers
 resource "azurerm_availability_set" "controllers" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                         = "${var.cluster_name}-controllers"
-  location                     = var.region
+  resource_group_name          = azurerm_resource_group.cluster.name
+  location                     = var.location
  platform_fault_domain_count  = 2
  platform_update_domain_count = 4
  managed                      = true
@ -33,48 +38,55 @@ resource "azurerm_availability_set" "controllers" {

 # Controller instances
 resource "azurerm_linux_virtual_machine" "controllers" {
-  count               = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name
+  count = var.controller_count

  name                = "${var.cluster_name}-controller-${count.index}"
-  location            = var.region
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
  availability_set_id = azurerm_availability_set.controllers.id
-
-  size        = var.controller_type
-  custom_data = base64encode(data.ct_config.controller-ignitions.*.rendered[count.index])
+  size                = var.controller_type

  # storage
  os_disk {
    name                 = "${var.cluster_name}-controller-${count.index}"
+    storage_account_type = var.controller_disk_type
+    disk_size_gb         = var.controller_disk_size
    caching              = "None"
-    disk_size_gb         = var.disk_size
-    storage_account_type = "Premium_LRS"
  }

  # Flatcar Container Linux
  source_image_reference {
-    publisher = "Kinvolk"
-    offer     = "flatcar-container-linux-free"
-    sku       = local.channel
+    publisher = "kinvolk"
+    offer     = "flatcar-container-linux-${local.offer_suffix}"
+    sku       = local.urn
    version   = "latest"
  }

-  plan {
-    name      = local.channel
-    publisher = "kinvolk"
-    product   = "flatcar-container-linux-free"
+  dynamic "plan" {
+    for_each = var.controller_arch == "arm64" ? [] : [1]
+    content {
+      publisher = "kinvolk"
+      product   = "flatcar-container-linux-${local.offer_suffix}"
+      name      = local.urn
+    }
  }

  # network
  network_interface_ids = [
-    azurerm_network_interface.controllers.*.id[count.index]
+    azurerm_network_interface.controllers[count.index].id
  ]

-  # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
+  # boot
+  custom_data = base64encode(data.ct_config.controllers[count.index].rendered)
+  boot_diagnostics {
+    # defaults to a managed storage account
+  }
+
+  # Azure requires an RSA admin_ssh_key
  admin_username = "core"
  admin_ssh_key {
    username   = "core"
-    public_key = var.ssh_authorized_key
+    public_key = local.azure_authorized_key
  }

  lifecycle {
@ -85,31 +97,52 @@ resource "azurerm_linux_virtual_machine" "controllers" {
  }
 }

-# Controller public IPv4 addresses
-resource "azurerm_public_ip" "controllers" {
-  count               = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name
+# Controller node public IPv4 addresses
+resource "azurerm_public_ip" "controllers-ipv4" {
+  count = var.controller_count

-  name              = "${var.cluster_name}-controller-${count.index}"
-  location          = azurerm_resource_group.cluster.location
-  sku               = "Standard"
-  allocation_method = "Static"
+  name                = "${var.cluster_name}-controller-${count.index}-ipv4"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv4"
+  sku                 = "Standard"
+  allocation_method   = "Static"
 }

-# Controller NICs with public and private IPv4
-resource "azurerm_network_interface" "controllers" {
-  count               = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name
+# Controller node public IPv6 addresses
+resource "azurerm_public_ip" "controllers-ipv6" {
+  count = var.controller_count

-  name     = "${var.cluster_name}-controller-${count.index}"
-  location = azurerm_resource_group.cluster.location
+  name                = "${var.cluster_name}-controller-${count.index}-ipv6"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv6"
+  sku                 = "Standard"
+  allocation_method   = "Static"
+}
+
+# Controllers' network interfaces
+resource "azurerm_network_interface" "controllers" {
+  count = var.controller_count
+
+  name                = "${var.cluster_name}-controller-${count.index}"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location

  ip_configuration {
-    name                          = "ip0"
+    name                          = "ipv4"
+    primary                       = true
    subnet_id                     = azurerm_subnet.controller.id
    private_ip_address_allocation = "Dynamic"
-    # instance public IPv4
-    public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
+    private_ip_address_version    = "IPv4"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv4[count.index].id
+  }
+  ip_configuration {
+    name                          = "ipv6"
+    subnet_id                     = azurerm_subnet.controller.id
+    private_ip_address_allocation = "Dynamic"
+    private_ip_address_version    = "IPv6"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv6[count.index].id
  }
 }

@ -121,50 +154,38 @@ resource "azurerm_network_interface_security_group_association" "controllers" {
  network_security_group_id = azurerm_network_security_group.controller.id
 }

-# Associate controller network interface with controller backend address pool
-resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
+# Associate controller network interface with controller backend address pools
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" {
  count = var.controller_count

  network_interface_id    = azurerm_network_interface.controllers[count.index].id
-  ip_configuration_name   = "ip0"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
+  ip_configuration_name   = "ipv4"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id
 }

-# Controller Ignition configs
-data "ct_config" "controller-ignitions" {
-  count    = var.controller_count
-  content  = data.template_file.controller-configs.*.rendered[count.index]
-  strict   = true
-  snippets = var.controller_snippets
-}
-
-# Controller Container Linux configs
-data "template_file" "controller-configs" {
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" {
  count = var.controller_count

-  template = file("${path.module}/cl/controller.yaml")
+  network_interface_id    = azurerm_network_interface.controllers[count.index].id
+  ip_configuration_name   = "ipv6"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id
+}

-  vars = {
+# Flatcar Linux controllers
+data "ct_config" "controllers" {
+  count = var.controller_count
+  content = templatefile("${path.module}/butane/controller.yaml", {
    # Cannot use cyclic dependencies on controllers or their DNS records
    etcd_name   = "etcd${count.index}"
    etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
    # etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
-    etcd_initial_cluster   = join(",", data.template_file.etcds.*.rendered)
+    etcd_initial_cluster = join(",", [
+      for i in range(var.controller_count) : "etcd${i}=https://${var.cluster_name}-etcd${i}.${var.dns_zone}:2380"
+    ])
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
-  }
+  })
+  strict   = true
+  snippets = var.controller_snippets
 }
-
-data "template_file" "etcds" {
-  count    = var.controller_count
-  template = "etcd$${index}=https://$${cluster_name}-etcd$${index}.$${dns_zone}:2380"
-
-  vars = {
-    index        = count.index
-    cluster_name = var.cluster_name
-    dns_zone     = var.dns_zone
-  }
-}
-
--- a/azure/flatcar-linux/kubernetes/lb.tf
+++ b/azure/flatcar-linux/kubernetes/lb.tf
@ -1,124 +1,164 @@
-# DNS record for the apiserver load balancer
+# DNS A record for the apiserver load balancer
 resource "azurerm_dns_a_record" "apiserver" {
-  resource_group_name = var.dns_zone_group
-
  # DNS Zone name where record should be created
-  zone_name = var.dns_zone
-
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
  # DNS record
  name = var.cluster_name
  ttl  = 300
-
  # IPv4 address of apiserver load balancer
-  records = [azurerm_public_ip.apiserver-ipv4.ip_address]
+  records = [azurerm_public_ip.frontend-ipv4.ip_address]
 }

-# Static IPv4 address for the apiserver frontend
-resource "azurerm_public_ip" "apiserver-ipv4" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name              = "${var.cluster_name}-apiserver-ipv4"
-  location          = var.region
-  sku               = "Standard"
-  allocation_method = "Static"
+# DNS AAAA record for the apiserver load balancer
+resource "azurerm_dns_aaaa_record" "apiserver" {
+  # DNS Zone name where record should be created
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
+  # DNS record
+  name = var.cluster_name
+  ttl  = 300
+  # IPv6 address of apiserver load balancer
+  records = [azurerm_public_ip.frontend-ipv6.ip_address]
 }

-# Static IPv4 address for the ingress frontend
-resource "azurerm_public_ip" "ingress-ipv4" {
+# Static IPv4 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv4" {
+  name                = "${var.cluster_name}-frontend-ipv4"
  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
+  ip_version          = "IPv4"
+  sku                 = "Standard"
+  allocation_method   = "Static"
+}

-  name              = "${var.cluster_name}-ingress-ipv4"
-  location          = var.region
-  sku               = "Standard"
-  allocation_method = "Static"
+# Static IPv6 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv6" {
+  name                = "${var.cluster_name}-frontend-ipv6"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
+  ip_version          = "IPv6"
+  sku                 = "Standard"
+  allocation_method   = "Static"
 }

 # Network Load Balancer for apiservers and ingress
 resource "azurerm_lb" "cluster" {
+  name                = var.cluster_name
  resource_group_name = azurerm_resource_group.cluster.name
-
-  name     = var.cluster_name
-  location = var.region
-  sku      = "Standard"
+  location            = var.location
+  sku                 = "Standard"

  frontend_ip_configuration {
-    name                 = "apiserver"
-    public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id
+    name                 = "frontend-ipv4"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv4.id
  }

  frontend_ip_configuration {
-    name                 = "ingress"
-    public_ip_address_id = azurerm_public_ip.ingress-ipv4.id
+    name                 = "frontend-ipv6"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv6.id
  }
 }

-resource "azurerm_lb_rule" "apiserver" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name                           = "apiserver"
+resource "azurerm_lb_rule" "apiserver-ipv4" {
+  name                           = "apiserver-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "apiserver"
+  frontend_ip_configuration_name = "frontend-ipv4"
+  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 6443
  backend_port             = 6443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id]
  probe_id                 = azurerm_lb_probe.apiserver.id
 }

-resource "azurerm_lb_rule" "ingress-http" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name                           = "ingress-http"
+resource "azurerm_lb_rule" "apiserver-ipv6" {
+  name                           = "apiserver-ipv6"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 6443
+  backend_port             = 6443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id]
+  probe_id                 = azurerm_lb_probe.apiserver.id
+}
+
+resource "azurerm_lb_rule" "ingress-http-ipv4" {
+  name                           = "ingress-http-ipv4"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 80
  backend_port             = 80
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-resource "azurerm_lb_rule" "ingress-https" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
-  name                           = "ingress-https"
+resource "azurerm_lb_rule" "ingress-https-ipv4" {
+  name                           = "ingress-https-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 443
  backend_port             = 443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-# Worker outbound TCP/UDP SNAT
-resource "azurerm_lb_outbound_rule" "worker-outbound" {
-  resource_group_name = azurerm_resource_group.cluster.name
+resource "azurerm_lb_rule" "ingress-http-ipv6" {
+  name                           = "ingress-http-ipv6"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true

-  name            = "worker"
-  loadbalancer_id = azurerm_lb.cluster.id
-  frontend_ip_configuration {
-    name = "ingress"
-  }
-
-  protocol                = "All"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
+  protocol                 = "Tcp"
+  frontend_port            = 80
+  backend_port             = 80
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
 }

+resource "azurerm_lb_rule" "ingress-https-ipv6" {
+  name                           = "ingress-https-ipv6"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 443
+  backend_port             = 443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
+}
+
+# Backend Address Pools
+
 # Address pool of controllers
-resource "azurerm_lb_backend_address_pool" "controller" {
-  name            = "controller"
+resource "azurerm_lb_backend_address_pool" "controller-ipv4" {
+  name            = "controller-ipv4"
  loadbalancer_id = azurerm_lb.cluster.id
 }

-# Address pool of workers
-resource "azurerm_lb_backend_address_pool" "worker" {
-  name            = "worker"
+resource "azurerm_lb_backend_address_pool" "controller-ipv6" {
+  name            = "controller-ipv6"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+# Address pools for workers
+resource "azurerm_lb_backend_address_pool" "worker-ipv4" {
+  name            = "worker-ipv4"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+resource "azurerm_lb_backend_address_pool" "worker-ipv6" {
+  name            = "worker-ipv6"
  loadbalancer_id = azurerm_lb.cluster.id
 }

@ -126,32 +166,45 @@ resource "azurerm_lb_backend_address_pool" "worker" {

 # TCP health check for apiserver
 resource "azurerm_lb_probe" "apiserver" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name            = "apiserver"
  loadbalancer_id = azurerm_lb.cluster.id
  protocol        = "Tcp"
  port            = 6443
-
  # unhealthy threshold
-  number_of_probes = 3
-
+  number_of_probes    = 3
  interval_in_seconds = 5
 }

 # HTTP health check for ingress
 resource "azurerm_lb_probe" "ingress" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name            = "ingress"
  loadbalancer_id = azurerm_lb.cluster.id
  protocol        = "Http"
  port            = 10254
  request_path    = "/healthz"
-
  # unhealthy threshold
-  number_of_probes = 3
-
+  number_of_probes    = 3
  interval_in_seconds = 5
 }

+# Outbound SNAT
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv4" {
+  name                    = "outbound-ipv4"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id
+  frontend_ip_configuration {
+    name = "frontend-ipv4"
+  }
+}
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv6" {
+  name                    = "outbound-ipv6"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id
+  frontend_ip_configuration {
+    name = "frontend-ipv6"
+  }
+}
--- a/Show More
+++ b/Show More