Merge remote-tracking branch 'upstream/main'

2025-07-22 10:11:34 +02:00 · 2024-12-02 11:05:29 +01:00
parent 516517fafe 17060445f7
commit daa5fc4171
173 changed files with 4505 additions and 1838 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1,10 +0,0 @@
-High level description of the change.
-
-* Specific change
-* Specific change
-
-## Testing
-
-Describe your work to validate the change works.
-
-rel: issue number (if applicable)
--- a/.github/release.yaml
+++ b/.github/release.yaml
@ -0,0 +1,12 @@
+changelog:
+  categories:
+    - title: Contributions
+      labels:
+        - '*'
+      exclude:
+        labels:
+          - dependencies
+          - no-release-note
+    - title: Dependencies
+      labels:
+        - dependencies
--- a/CHANGES.md
+++ b/CHANGES.md
@ -4,6 +4,214 @@ Notable changes between versions.

 ## Latest

+## v1.31.3
+
+* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312)
+* Update CoreDNS from v1.11.3 to v1.11.4
+* Update Cilium from v1.16.3 to [v1.16.4](https://github.com/cilium/cilium/releases/tag/v1.16.4)
+
+### Deprecations
+
+* Plan to drop support for using Calico CNI, recommend everyone use the Cilium default
+
+## v1.31.2
+
+* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312)
+* Update Cilium from v1.16.1 to [v1.16.3](https://github.com/cilium/cilium/releases/tag/v1.16.3)
+* Update flannel from v0.25.6 to [v0.26.0](https://github.com/flannel-io/flannel/releases/tag/v0.26.0)
+
+## v1.31.1
+
+* Kubernetes [v1.31.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1311)
+* Update flannel from v0.25.5 to [v0.25.6](https://github.com/flannel-io/flannel/releases/tag/v0.25.6)
+
+### Google
+
+* Add `controller_disk_type` and `worker_disk_type` variables ([#1513](https://github.com/poseidon/typhoon/pull/1513))
+* Add explicit `region` field to regional worker instance templates ([#1524](https://github.com/poseidon/typhoon/pull/1524))
+
+## v1.31.0
+
+* Kubernetes [v1.31.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1310)
+* Use Cilium kube-proxy replacement mode when `cilium` networking is chosen ([#1501](https://github.com/poseidon/typhoon/pull/1501))
+* Fix invalid flannel-cni container image for those using `flannel` networking ([#1497](https://github.com/poseidon/typhoon/pull/1497))
+
+### AWS
+
+* Use EC2 resource-based hostnames instead of IP-based hostnames ([#1499](https://github.com/poseidon/typhoon/pull/1499))
+  * The Amazon DNS server can resolve A and AAAA queries to IPv4 and IPv6 node addresses
+* Tag controller node EBS volumes with a name based on the controller node name
+
+### Google
+
+* Use `google_compute_region_instance_template` instead of `google_compute_instance_template`
+  * Google's regional instance template metadata is kept in the associated region for greater resiliency. The "global" instance templates were kept in a single region
+
+## v1.30.4
+
+* Kubernetes [v1.30.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1304)
+* Update Cilium from v1.15.7 to [v1.16.1](https://github.com/cilium/cilium/releases/tag/v1.16.1)
+* Update CoreDNS from v1.11.1 to v1.11.3
+* Remove `enable_aggregation` variable for Kubernetes Aggregation Layer, always set to true
+* Remove `cluster_domain_suffix` variable, always use "cluster.local"
+* Remove `enable_reporting` variable for analytics, always set to false
+
+## v1.30.3
+
+* Kubernetes [v1.30.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1303)
+* Update Cilium from v1.15.6 to [v1.15.7](https://github.com/cilium/cilium/releases/tag/v1.15.7)
+* Update flannel from v0.25.4 to [v0.25.5](https://github.com/flannel-io/flannel/releases/tag/v0.25.5)
+
+### AWS
+
+* Configure controller and worker disks ([#1482](https://github.com/poseidon/typhoon/pull/1482))
+  * Add `controller_disk_type`, `controller_disk_size`, and `controller_disk_iops` variables
+  * Add `worker_disk_type`, `worker_disk_size`, and `worker_disk_iops` variables
+  * Remove `disk_type`, `disk_size`, and `disk_iops` variables
+  * Fix propagating settings to worker disks, previously ignored
+* Configure CPU pricing model for burstable instance types ([#1482](https://github.com/poseidon/typhoon/pull/1482))
+  * Add `controller_cpu_credits` and `worker_cpu_credits` variables (`standard` or `unlimited`)
+* Configure controller or worker instance architecture ([#1485](https://github.com/poseidon/typhoon/pull/1485))
+  * Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
+  * Remove `arch` variable
+
+```diff
+module "cluster" {
+  ...
+- arch      = "amd64"
+- disk_type = "gp3"
+- disk_size = 30
+- disk_iops = 3000
+
+ controller_arch        = "amd64"
+ controller_disk_size   = 15
+ controller_cpu_credits = "standard"
+ worker_arch            = "amd64"
+ worker_disk_size       = 22
+ worker_cpu_credits     = "unlimited"
+}
+```
+
+### Azure
+
+* Configure the virtual network and subnets with IPv6 private address space
+  * Change `host_cidr` variable (string) to a `network_cidr` object with `ipv4` and `ipv6` fields that list CIDR strings. Leave the variable unset to use the defaults. (**breaking**)
+* Add support for dual-stack Kubernetes Ingress Load Balancing
+  * Add a public IPv6 frontend, 80/443 rules, and a worker-ipv6 backend pool
+  * Change the `controller_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**)
+  * Change the `worker_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**)
+  * Change the `backend_address_pool_id` output (and worker module input) from a string to an object with `ipv4` and `ipv6` fields that list ids (**breaking**)
+* Configure nodes to have outbound IPv6 internet connectivity (analogous to IPv4 SNAT)
+  * Configure controller nodes to have a public IPv6 address
+  * Configure worker nodes to use outbound rules and the load balancer for SNAT
+* Extend network security rules to allow IPv6 traffic, analogous to IPv4
+* Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469))
+* Change worker pools from uniform to flexible orchestration mode ([#1473](https://github.com/poseidon/typhoon/pull/1473))
+* Add options to allow workers nodes to use ephemeral local disks ([#1473](https://github.com/poseidon/typhoon/pull/1473))
+  * Add `controller_disk_type` and `controller_disk_size` variables
+  * Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
+* Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470))
+* Configure controller or worker instance architecture for Flatcar Linux ([#1485](https://github.com/poseidon/typhoon/pull/1485))
+  * Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
+  * Remove `arch` variable
+
+```diff
+module "cluster" {
+  ...
+- region = "centralus"
+ location = "centralus"
+  # optional
+- host_cidr = "10.0.0.0/16"
+ network_cidr = {
+   ipv4 = ["10.0.0.0/16"]
+ }
+
+  # instances
+ controller_disk_type = "StandardSSD_LRS"
+ worker_ephemeral_disk = true
+}
+```
+
+### Google Cloud
+
+* Allow configuring controller and worker disks ([#1486](https://github.com/poseidon/typhoon/pull/1486))
+  * Add `controller_disk_size` and `worker_disk_size` variables
+  * Remove `disk_size` variable
+
+## v1.30.2
+
+* Kubernetes [v1.30.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1302)
+* Update CoreDNS from v1.9.4 to v1.11.1
+* Update Cilium from v1.15.5 to [v1.15.6](https://github.com/cilium/cilium/releases/tag/v1.15.6)
+* Update flannel from v0.25.1 to [v0.25.4](https://github.com/flannel-io/flannel/releases/tag/v0.25.4)
+
+## v1.30.1
+
+* Kubernetes [v1.30.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1301)
+* Add firewall rules and security group rules for Cilium and Hubble metrics ([#1449](https://github.com/poseidon/typhoon/pull/1449))
+* Update Cilium from v1.15.3 to [v1.15.5](https://github.com/cilium/cilium/releases/tag/v1.15.5)
+* Update flannel from v0.24.4 to [v0.25.1](https://github.com/flannel-io/flannel/releases/tag/v0.25.1)
+* Introduce `components` variabe to enable/disable/configure pre-installed components ([#1453](https://github.com/poseidon/typhoon/pull/1453))
+* Add Terraform modules for `coredns`, `cilium`, and `flannel` components
+
+### Azure
+
+* Add `controller_security_group_name` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450))
+* Add `controller_address_prefixes` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450))
+
+## v1.30.0
+
+* Kubernetes [v1.30.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1300)
+* Update etcd from v3.5.12 to [v3.5.13](https://github.com/etcd-io/etcd/releases/tag/v3.5.13)
+* Update Cilium from v1.15.2 to [v1.15.3](https://github.com/cilium/cilium/releases/tag/v1.15.3)
+* Update Calico from v3.27.2 to [v3.27.3](https://github.com/projectcalico/calico/releases/tag/v3.27.3)
+
+## v1.29.3
+
+* Kubernetes [v1.29.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1293)
+* Update Cilium from v1.15.1 to [v1.15.2](https://github.com/cilium/cilium/releases/tag/v1.15.2)
+* Update flannel from v0.24.2 to [v0.24.4](https://github.com/flannel-io/flannel/releases/tag/v0.24.4)
+
+## v1.29.2
+
+* Kubernetes [v1.29.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1292)
+* Update etcd from v3.5.10 to [v3.5.12](https://github.com/etcd-io/etcd/releases/tag/v3.5.12)
+* Update Cilium from v1.14.3 to [v1.15.1](https://github.com/cilium/cilium/releases/tag/v1.15.1)
+* Update Calico from v3.26.3 to [v3.27.2](https://github.com/projectcalico/calico/releases/tag/v3.27.2)
+  * Fix upstream incompatibility with Fedora CoreOS ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
+* Update flannel from v0.22.2 to [v0.24.2](https://github.com/flannel-io/flannel/releases/tag/v0.24.2)
+* Add an `install_container_networking` variable (default `true`) ([#1421](https://github.com/poseidon/typhoon/pull/1421))
+  * When `true`, the chosen container `networking` provider is installed during cluster bootstrap
+  * Set `false` to self-manage the container networking provider. This allows flannel, Calico, or Cilium
+  to be managed via Terraform (like any other Kubernetes resources). Nodes will be NotReady until you
+  apply the self-managed container networking provider. This may become the default in future.
+  * Continue to set `networking` to one of the three supported container networking providers. Most
+  require custom firewall / security policies be present across nodes so they have some infra tie-ins.
+
+## v1.29.1
+
+* Kubernetes [v1.29.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1291)
+
+### AWS
+
+* Continue to support AWS IMDSv1 ([#1412](https://github.com/poseidon/typhoon/pull/1412))
+
+### Known Issues
+
+* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
+
+## v1.29.0
+
+* Kubernetes [v1.29.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1290)
+
+### Known Issues
+
+* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
+
+## v1.28.4
+
+* Kubernetes [v1.28.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1284)
+
 ## v1.28.3

 * Kubernetes [v1.28.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1283)
--- a/README.md
+++ b/README.md
@ -1,4 +1,9 @@
-# Typhoon [![Release](https://img.shields.io/github/v/release/poseidon/typhoon)](https://github.com/poseidon/typhoon/releases) [![Stars](https://img.shields.io/github/stars/poseidon/typhoon)](https://github.com/poseidon/typhoon/stargazers) [![Sponsors](https://img.shields.io/github/sponsors/poseidon?logo=github)](https://github.com/sponsors/poseidon) [![Mastodon](https://img.shields.io/badge/follow-news-6364ff?logo=mastodon)](https://fosstodon.org/@typhoon)
+# Typhoon
+
+[![Release](https://img.shields.io/github/v/release/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/releases)
+[![Stars](https://img.shields.io/github/stars/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/stargazers)
+[![Sponsors](https://img.shields.io/github/sponsors/poseidon?logo=github&style=flat-square)](https://github.com/sponsors/poseidon)
+[![Mastodon](https://img.shields.io/badge/follow-news-6364ff?logo=mastodon&style=flat-square)](https://fosstodon.org/@typhoon)

 <img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">

@ -13,7 +18,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.28.3 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/flatcar-linux/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
@ -21,7 +26,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Modules

-Typhoon provides a Terraform Module for each supported operating system and platform.
+Typhoon provides a Terraform Module for defining a Kubernetes cluster on each supported operating system and platform.

 Typhoon is available for [Fedora CoreOS](https://getfedora.org/coreos/).

@ -52,6 +57,14 @@ Typhoon is available for [Flatcar Linux](https://www.flatcar-linux.org/releases/
 | AWS           | Flatcar Linux (ARM64) | [aws/flatcar-linux/kubernetes](aws/flatcar-linux/kubernetes) | alpha |
 | Azure         | Flatcar Linux (ARM64) | [azure/flatcar-linux/kubernetes](azure/flatcar-linux/kubernetes) | alpha |

+Typhoon also provides Terraform Modules for optionally managing individual components applied onto clusters.
+
+| Name    | Terraform Module | Status |
+|---------|------------------|--------|
+| CoreDNS | [addons/coredns](addons/coredns) | beta |
+| Cilium  | [addons/cilium](addons/cilium) | beta |
+| flannel | [addons/flannel](addons/flannel) | beta |
+
 ## Documentation

 * [Docs](https://typhoon.psdn.io)
@ -65,7 +78,7 @@ Define a Kubernetes cluster by using the Terraform module for your chosen platfo

 ```tf
 module "yavin" {
-  source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3"
+  source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3"

  # Google Cloud
  cluster_name  = "yavin"
@ -85,6 +98,7 @@ module "yavin" {
 resource "local_file" "kubeconfig-yavin" {
  content         = module.yavin.kubeconfig-admin
  filename        = "/home/user/.kube/configs/yavin-config"
+  file_permission = "0600"
 }
 ```

@ -104,9 +118,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
 $ export KUBECONFIG=/home/user/.kube/configs/yavin-config
 $ kubectl get nodes
 NAME                                       ROLES    STATUS  AGE  VERSION
-yavin-controller-0.c.example-com.internal  <none>   Ready   6m   v1.28.3
-yavin-worker-jrbf.c.example-com.internal   <none>   Ready   5m   v1.28.3
-yavin-worker-mzdm.c.example-com.internal   <none>   Ready   5m   v1.28.3
+yavin-controller-0.c.example-com.internal  <none>   Ready   6m   v1.31.3
+yavin-worker-jrbf.c.example-com.internal   <none>   Ready   5m   v1.31.3
+yavin-worker-mzdm.c.example-com.internal   <none>   Ready   5m   v1.31.3
 ```

 List the pods.
@ -114,9 +128,10 @@ List the pods.
 ```
 $ kubectl get pods --all-namespaces
 NAMESPACE     NAME                                      READY  STATUS    RESTARTS  AGE
-kube-system   calico-node-1cs8z                         2/2    Running   0         6m
-kube-system   calico-node-d1l5b                         2/2    Running   0         6m
-kube-system   calico-node-sp9ps                         2/2    Running   0         6m
+kube-system   cilium-1cs8z                              1/1    Running   0         6m
+kube-system   cilium-d1l5b                              1/1    Running   0         6m
+kube-system   cilium-sp9ps                              1/1    Running   0         6m
+kube-system   cilium-operator-68d778b448-g744f          1/1    Running   0         6m
 kube-system   coredns-1187388186-zj5dl                  1/1    Running   0         6m
 kube-system   coredns-1187388186-dkh3o                  1/1    Running   0         6m
 kube-system   kube-apiserver-controller-0               1/1    Running   0         6m
--- a/addons/cilium/cluster-role-binding.tf
+++ b/addons/cilium/cluster-role-binding.tf
@ -0,0 +1,36 @@
+resource "kubernetes_cluster_role_binding" "operator" {
+  metadata {
+    name = "cilium-operator"
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "cilium-operator"
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = "cilium-operator"
+    namespace = "kube-system"
+  }
+}
+
+resource "kubernetes_cluster_role_binding" "agent" {
+  metadata {
+    name = "cilium-agent"
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "cilium-agent"
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = "cilium-agent"
+    namespace = "kube-system"
+  }
+}
+
--- a/addons/cilium/cluster-role.tf
+++ b/addons/cilium/cluster-role.tf
@ -0,0 +1,112 @@
+resource "kubernetes_cluster_role" "operator" {
+  metadata {
+    name = "cilium-operator"
+  }
+
+  # detect and restart [core|kube]dns pods on startup
+  rule {
+    verbs      = ["get", "list", "watch", "delete"]
+    api_groups = [""]
+    resources  = ["pods"]
+  }
+
+  rule {
+    verbs      = ["list", "watch"]
+    api_groups = [""]
+    resources  = ["nodes"]
+  }
+
+  rule {
+    verbs      = ["patch"]
+    api_groups = [""]
+    resources  = ["nodes", "nodes/status"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = ["discovery.k8s.io"]
+    resources  = ["endpointslices"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = [""]
+    resources  = ["services"]
+  }
+
+  # Perform LB IP allocation for BGP
+  rule {
+    verbs      = ["update"]
+    api_groups = [""]
+    resources  = ["services/status"]
+  }
+
+  # Perform the translation of a CNP that contains `ToGroup` to its endpoints
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = [""]
+    resources  = ["services", "endpoints", "namespaces"]
+  }
+
+  rule {
+    verbs      = ["*"]
+    api_groups = ["cilium.io"]
+    resources  = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumnetworkpolicies/finalizers", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumclusterwidenetworkpolicies/finalizers", "ciliumendpoints", "ciliumendpoints/status", "ciliumendpoints/finalizers", "ciliumnodes", "ciliumnodes/status", "ciliumnodes/finalizers", "ciliumidentities", "ciliumidentities/status", "ciliumidentities/finalizers", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumlocalredirectpolicies/finalizers", "ciliumendpointslices", "ciliumloadbalancerippools", "ciliumloadbalancerippools/status", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"]
+  }
+
+  rule {
+    verbs      = ["create", "get", "list", "update", "watch"]
+    api_groups = ["apiextensions.k8s.io"]
+    resources  = ["customresourcedefinitions"]
+  }
+
+  # Cilium leader elects if among multiple operator replicas
+  rule {
+    verbs      = ["create", "get", "update"]
+    api_groups = ["coordination.k8s.io"]
+    resources  = ["leases"]
+  }
+}
+
+resource "kubernetes_cluster_role" "agent" {
+  metadata {
+    name = "cilium-agent"
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = ["networking.k8s.io"]
+    resources  = ["networkpolicies"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = ["discovery.k8s.io"]
+    resources  = ["endpointslices"]
+  }
+
+  rule {
+    verbs      = ["get", "list", "watch"]
+    api_groups = [""]
+    resources  = ["namespaces", "services", "pods", "endpoints", "nodes"]
+  }
+
+  rule {
+    verbs      = ["patch"]
+    api_groups = [""]
+    resources  = ["nodes/status"]
+  }
+
+  rule {
+    verbs      = ["create", "get", "list", "watch", "update"]
+    api_groups = ["apiextensions.k8s.io"]
+    resources  = ["customresourcedefinitions"]
+  }
+
+  rule {
+    verbs      = ["*"]
+    api_groups = ["cilium.io"]
+    resources  = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumendpoints", "ciliumendpoints/status", "ciliumnodes", "ciliumnodes/status", "ciliumidentities", "ciliumidentities/status", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumegressnatpolicies", "ciliumendpointslices", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"]
+  }
+}
+
--- a/addons/cilium/config.tf
+++ b/addons/cilium/config.tf
@ -0,0 +1,196 @@
+resource "kubernetes_config_map" "cilium" {
+  metadata {
+    name      = "cilium"
+    namespace = "kube-system"
+  }
+  data = {
+    # Identity allocation mode selects how identities are shared between cilium
+    # nodes by setting how they are stored. The options are "crd" or "kvstore".
+    # - "crd" stores identities in kubernetes as CRDs (custom resource definition).
+    #   These can be queried with:
+    #     kubectl get ciliumid
+    # - "kvstore" stores identities in a kvstore, etcd or consul, that is
+    #   configured below. Cilium versions before 1.6 supported only the kvstore
+    #   backend. Upgrades from these older cilium versions should continue using
+    #   the kvstore by commenting out the identity-allocation-mode below, or
+    #   setting it to "kvstore".
+    identity-allocation-mode    = "crd"
+    cilium-endpoint-gc-interval = "5m0s"
+    nodes-gc-interval           = "5m0s"
+
+    # If you want to run cilium in debug mode change this value to true
+    debug = "false"
+    # The agent can be put into the following three policy enforcement modes
+    # default, always and never.
+    # https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes
+    enable-policy = "default"
+
+    # Prometheus
+    enable-metrics                 = "true"
+    prometheus-serve-addr          = ":9962"
+    operator-prometheus-serve-addr = ":9963"
+    proxy-prometheus-port          = "9964" # envoy
+
+    # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4
+    # address.
+    enable-ipv4 = "true"
+
+    # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6
+    # address.
+    enable-ipv6 = "false"
+
+    # Enable probing for a more efficient clock source for the BPF datapath
+    enable-bpf-clock-probe = "true"
+
+    # Enable use of transparent proxying mechanisms (Linux 5.7+)
+    enable-bpf-tproxy = "false"
+
+    # If you want cilium monitor to aggregate tracing for packets, set this level
+    # to "low", "medium", or "maximum". The higher the level, the less packets
+    # that will be seen in monitor output.
+    monitor-aggregation = "medium"
+
+    # The monitor aggregation interval governs the typical time between monitor
+    # notification events for each allowed connection.
+    #
+    # Only effective when monitor aggregation is set to "medium" or higher.
+    monitor-aggregation-interval = "5s"
+
+    # The monitor aggregation flags determine which TCP flags which, upon the
+    # first observation, cause monitor notifications to be generated.
+    #
+    # Only effective when monitor aggregation is set to "medium" or higher.
+    monitor-aggregation-flags = "all"
+
+    # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic
+    # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps.
+    bpf-map-dynamic-size-ratio = "0.0025"
+    # bpf-policy-map-max specified the maximum number of entries in endpoint
+    # policy map (per endpoint)
+    bpf-policy-map-max = "16384"
+    # bpf-lb-map-max specifies the maximum number of entries in bpf lb service,
+    # backend and affinity maps.
+    bpf-lb-map-max = "65536"
+
+    # Pre-allocation of map entries allows per-packet latency to be reduced, at
+    # the expense of up-front memory allocation for the entries in the maps. The
+    # default value below will minimize memory usage in the default installation;
+    # users who are sensitive to latency may consider setting this to "true".
+    #
+    # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore
+    # this option and behave as though it is set to "true".
+    #
+    # If this value is modified, then during the next Cilium startup the restore
+    # of existing endpoints and tracking of ongoing connections may be disrupted.
+    # As a result, reply packets may be dropped and the load-balancing decisions
+    # for established connections may change.
+    #
+    # If this option is set to "false" during an upgrade from 1.3 or earlier to
+    # 1.4 or later, then it may cause one-time disruptions during the upgrade.
+    preallocate-bpf-maps = "false"
+
+    # Name of the cluster. Only relevant when building a mesh of clusters.
+    cluster-name = "default"
+    # Unique ID of the cluster. Must be unique across all conneted clusters and
+    # in the range of 1 and 255. Only relevant when building a mesh of clusters.
+    cluster-id = "0"
+
+    # Encapsulation mode for communication between nodes
+    # Possible values:
+    #   - disabled
+    #   - vxlan (default)
+    #   - geneve
+    routing-mode = "tunnel"
+    tunnel       = "vxlan"
+    # Enables L7 proxy for L7 policy enforcement and visibility
+    enable-l7-proxy = "true"
+
+    auto-direct-node-routes = "false"
+
+    # enableXTSocketFallback enables the fallback compatibility solution
+    # when the xt_socket kernel module is missing and it is needed for
+    # the datapath L7 redirection to work properly.  See documentation
+    # for details on when this can be disabled:
+    # http://docs.cilium.io/en/latest/install/system_requirements/#admin-kernel-version.
+    enable-xt-socket-fallback = "true"
+
+    # installIptablesRules enables installation of iptables rules to allow for
+    # TPROXY (L7 proxy injection), itpables based masquerading and compatibility
+    # with kube-proxy. See documentation for details on when this can be
+    # disabled.
+    install-iptables-rules = "true"
+
+    # masquerade traffic leaving the node destined for outside
+    enable-ipv4-masquerade = "true"
+    enable-ipv6-masquerade = "false"
+
+    # bpfMasquerade enables masquerading with BPF instead of iptables
+    enable-bpf-masquerade = "true"
+
+    # kube-proxy
+    kube-proxy-replacement                      = "true"
+    kube-proxy-replacement-healthz-bind-address = ":10256"
+    enable-session-affinity                     = "true"
+
+    # ClusterIPs from host namespace
+    bpf-lb-sock = "true"
+    # ClusterIPs from external nodes
+    bpf-lb-external-clusterip = "true"
+
+    # NodePort
+    enable-node-port             = "true"
+    enable-health-check-nodeport = "false"
+
+    # ExternalIPs
+    enable-external-ips = "true"
+
+    # HostPort
+    enable-host-port = "true"
+
+    # IPAM
+    ipam                        = "cluster-pool"
+    disable-cnp-status-updates  = "true"
+    cluster-pool-ipv4-cidr      = "${var.pod_cidr}"
+    cluster-pool-ipv4-mask-size = "24"
+
+    # Health
+    agent-health-port               = "9876"
+    enable-health-checking          = "true"
+    enable-endpoint-health-checking = "true"
+
+    # Identity
+    enable-well-known-identities = "false"
+    enable-remote-node-identity  = "true"
+
+    # Hubble server
+    enable-hubble                  = var.enable_hubble
+    hubble-disable-tls             = "false"
+    hubble-listen-address          = ":4244"
+    hubble-socket-path             = "/var/run/cilium/hubble.sock"
+    hubble-tls-client-ca-files     = "/var/lib/cilium/tls/hubble/client-ca.crt"
+    hubble-tls-cert-file           = "/var/lib/cilium/tls/hubble/server.crt"
+    hubble-tls-key-file            = "/var/lib/cilium/tls/hubble/server.key"
+    hubble-export-file-max-backups = "5"
+    hubble-export-file-max-size-mb = "10"
+
+    # Hubble metrics
+    hubble-metrics-server      = ":9965"
+    hubble-metrics             = "dns drop tcp flow port-distribution icmp httpV2"
+    enable-hubble-open-metrics = "false"
+
+
+    # Misc
+    enable-bandwidth-manager        = "false"
+    enable-local-redirect-policy    = "false"
+    policy-audit-mode               = "false"
+    operator-api-serve-addr         = "127.0.0.1:9234"
+    enable-l2-neigh-discovery       = "true"
+    enable-k8s-terminating-endpoint = "true"
+    enable-k8s-networkpolicy        = "true"
+    external-envoy-proxy            = "false"
+    write-cni-conf-when-ready       = "/host/etc/cni/net.d/05-cilium.conflist"
+    cni-exclusive                   = "true"
+    cni-log-file                    = "/var/run/cilium/cilium-cni.log"
+  }
+}
+
--- a/addons/cilium/daemonset.tf
+++ b/addons/cilium/daemonset.tf
@ -0,0 +1,379 @@
+resource "kubernetes_daemonset" "cilium" {
+  wait_for_rollout = false
+
+  metadata {
+    name      = "cilium"
+    namespace = "kube-system"
+    labels = {
+      k8s-app = "cilium"
+    }
+  }
+  spec {
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        k8s-app = "cilium-agent"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          k8s-app = "cilium-agent"
+        }
+        annotations = {
+          "prometheus.io/port"   = "9962"
+          "prometheus.io/scrape" = "true"
+        }
+      }
+      spec {
+        host_network         = true
+        priority_class_name  = "system-node-critical"
+        service_account_name = "cilium-agent"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        toleration {
+          key      = "node-role.kubernetes.io/controller"
+          operator = "Exists"
+        }
+        toleration {
+          key      = "node.kubernetes.io/not-ready"
+          operator = "Exists"
+        }
+        dynamic "toleration" {
+          for_each = var.daemonset_tolerations
+          content {
+            key      = toleration.value
+            operator = "Exists"
+          }
+        }
+        automount_service_account_token = true
+        enable_service_links            = false
+
+        # Cilium v1.13.1 starts installing CNI plugins in yet another init container
+        # https://github.com/cilium/cilium/pull/24075
+        init_container {
+          name    = "install-cni"
+          image   = "quay.io/cilium/cilium:v1.16.4"
+          command = ["/install-plugin.sh"]
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+            capabilities {
+              drop = ["ALL"]
+            }
+          }
+          volume_mount {
+            name       = "cni-bin-dir"
+            mount_path = "/host/opt/cni/bin"
+          }
+        }
+
+        # Required to mount cgroup2 filesystem on the underlying Kubernetes node.
+        # We use nsenter command with host's cgroup and mount namespaces enabled.
+        init_container {
+          name  = "mount-cgroup"
+          image = "quay.io/cilium/cilium:v1.16.4"
+          command = [
+            "sh",
+            "-ec",
+            # The statically linked Go program binary is invoked to avoid any
+            # dependency on utilities like sh and mount that can be missing on certain
+            # distros installed on the underlying host. Copy the binary to the
+            # same directory where we install cilium cni plugin so that exec permissions
+            # are available.
+            "cp /usr/bin/cilium-mount /hostbin/cilium-mount && nsenter --cgroup=/hostproc/1/ns/cgroup --mount=/hostproc/1/ns/mnt \"$${BIN_PATH}/cilium-mount\" $CGROUP_ROOT; rm /hostbin/cilium-mount"
+          ]
+          env {
+            name  = "CGROUP_ROOT"
+            value = "/run/cilium/cgroupv2"
+          }
+          env {
+            name  = "BIN_PATH"
+            value = "/opt/cni/bin"
+          }
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+          }
+          volume_mount {
+            name       = "hostproc"
+            mount_path = "/hostproc"
+          }
+          volume_mount {
+            name       = "cni-bin-dir"
+            mount_path = "/hostbin"
+          }
+        }
+
+        init_container {
+          name    = "clean-cilium-state"
+          image   = "quay.io/cilium/cilium:v1.16.4"
+          command = ["/init-container.sh"]
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+          }
+          volume_mount {
+            name       = "sys-fs-bpf"
+            mount_path = "/sys/fs/bpf"
+          }
+          volume_mount {
+            name       = "var-run-cilium"
+            mount_path = "/var/run/cilium"
+          }
+          # Required to mount cgroup filesystem from the host to cilium agent pod
+          volume_mount {
+            name              = "cilium-cgroup"
+            mount_path        = "/run/cilium/cgroupv2"
+            mount_propagation = "HostToContainer"
+          }
+        }
+
+        container {
+          name    = "cilium-agent"
+          image   = "quay.io/cilium/cilium:v1.16.4"
+          command = ["cilium-agent"]
+          args = [
+            "--config-dir=/tmp/cilium/config-map"
+          ]
+          env {
+            name = "K8S_NODE_NAME"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "spec.nodeName"
+              }
+            }
+          }
+          env {
+            name = "CILIUM_K8S_NAMESPACE"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "metadata.namespace"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_HOST"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-host"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_PORT"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-port"
+              }
+            }
+          }
+          port {
+            name           = "peer-service"
+            protocol       = "TCP"
+            container_port = 4244
+          }
+          # Metrics
+          port {
+            name           = "metrics"
+            protocol       = "TCP"
+            container_port = 9962
+          }
+          port {
+            name           = "envoy-metrics"
+            protocol       = "TCP"
+            container_port = 9964
+          }
+          port {
+            name           = "hubble-metrics"
+            protocol       = "TCP"
+            container_port = 9965
+          }
+          # Not yet used, prefer exec's
+          port {
+            name           = "health"
+            protocol       = "TCP"
+            container_port = 9876
+          }
+          lifecycle {
+            pre_stop {
+              exec {
+                command = ["/cni-uninstall.sh"]
+              }
+            }
+          }
+          security_context {
+            allow_privilege_escalation = true
+            privileged                 = true
+          }
+          liveness_probe {
+            exec {
+              command = ["cilium", "status", "--brief"]
+            }
+            initial_delay_seconds = 120
+            timeout_seconds       = 5
+            period_seconds        = 30
+            success_threshold     = 1
+            failure_threshold     = 10
+          }
+          readiness_probe {
+            exec {
+              command = ["cilium", "status", "--brief"]
+            }
+            initial_delay_seconds = 5
+            timeout_seconds       = 5
+            period_seconds        = 20
+            success_threshold     = 1
+            failure_threshold     = 3
+          }
+          # Load kernel modules
+          volume_mount {
+            name       = "lib-modules"
+            read_only  = true
+            mount_path = "/lib/modules"
+          }
+          # Access iptables concurrently
+          volume_mount {
+            name       = "xtables-lock"
+            mount_path = "/run/xtables.lock"
+          }
+          # Keep state between restarts
+          volume_mount {
+            name       = "var-run-cilium"
+            mount_path = "/var/run/cilium"
+          }
+          volume_mount {
+            name              = "sys-fs-bpf"
+            mount_path        = "/sys/fs/bpf"
+            mount_propagation = "Bidirectional"
+          }
+          # Configuration
+          volume_mount {
+            name       = "config"
+            read_only  = true
+            mount_path = "/tmp/cilium/config-map"
+          }
+          # Install config on host
+          volume_mount {
+            name       = "cni-conf-dir"
+            mount_path = "/host/etc/cni/net.d"
+          }
+          # Hubble
+          volume_mount {
+            name       = "hubble-tls"
+            mount_path = "/var/lib/cilium/tls/hubble"
+            read_only  = true
+          }
+        }
+        termination_grace_period_seconds = 1
+
+        # Load kernel modules
+        volume {
+          name = "lib-modules"
+          host_path {
+            path = "/lib/modules"
+          }
+        }
+        # Access iptables concurrently with other processes (e.g. kube-proxy)
+        volume {
+          name = "xtables-lock"
+          host_path {
+            path = "/run/xtables.lock"
+            type = "FileOrCreate"
+          }
+        }
+        # Keep state between restarts
+        volume {
+          name = "var-run-cilium"
+          host_path {
+            path = "/var/run/cilium"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Keep state for bpf maps between restarts
+        volume {
+          name = "sys-fs-bpf"
+          host_path {
+            path = "/sys/fs/bpf"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Mount host cgroup2 filesystem
+        volume {
+          name = "hostproc"
+          host_path {
+            path = "/proc"
+            type = "Directory"
+          }
+        }
+        volume {
+          name = "cilium-cgroup"
+          host_path {
+            path = "/run/cilium/cgroupv2"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Read configuration
+        volume {
+          name = "config"
+          config_map {
+            name = "cilium"
+          }
+        }
+        # Install CNI plugin and config on host
+        volume {
+          name = "cni-bin-dir"
+          host_path {
+            path = "/opt/cni/bin"
+            type = "DirectoryOrCreate"
+          }
+        }
+        volume {
+          name = "cni-conf-dir"
+          host_path {
+            path = "/etc/cni/net.d"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Hubble TLS (optional)
+        volume {
+          name = "hubble-tls"
+          projected {
+            default_mode = "0400"
+            sources {
+              secret {
+                name     = "hubble-server-certs"
+                optional = true
+                items {
+                  key  = "ca.crt"
+                  path = "client-ca.crt"
+                }
+                items {
+                  key  = "tls.crt"
+                  path = "server.crt"
+                }
+                items {
+                  key  = "tls.key"
+                  path = "server.key"
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/cilium/deployment.tf
+++ b/addons/cilium/deployment.tf
@ -0,0 +1,163 @@
+resource "kubernetes_deployment" "operator" {
+  wait_for_rollout = false
+  metadata {
+    name      = "cilium-operator"
+    namespace = "kube-system"
+  }
+  spec {
+    replicas = 1
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        name = "cilium-operator"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          name = "cilium-operator"
+        }
+        annotations = {
+          "prometheus.io/scrape" = "true"
+          "prometheus.io/port"   = "9963"
+        }
+      }
+      spec {
+        host_network         = true
+        priority_class_name  = "system-cluster-critical"
+        service_account_name = "cilium-operator"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        toleration {
+          key      = "node-role.kubernetes.io/controller"
+          operator = "Exists"
+        }
+        toleration {
+          key      = "node.kubernetes.io/not-ready"
+          operator = "Exists"
+        }
+        topology_spread_constraint {
+          max_skew           = 1
+          topology_key       = "kubernetes.io/hostname"
+          when_unsatisfiable = "DoNotSchedule"
+          label_selector {
+            match_labels = {
+              name = "cilium-operator"
+            }
+          }
+        }
+        automount_service_account_token = true
+        enable_service_links            = false
+        container {
+          name    = "cilium-operator"
+          image   = "quay.io/cilium/operator-generic:v1.16.4"
+          command = ["cilium-operator-generic"]
+          args = [
+            "--config-dir=/tmp/cilium/config-map",
+            "--debug=$(CILIUM_DEBUG)"
+          ]
+          env {
+            name = "K8S_NODE_NAME"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "spec.nodeName"
+              }
+            }
+          }
+          env {
+            name = "CILIUM_K8S_NAMESPACE"
+            value_from {
+              field_ref {
+                api_version = "v1"
+                field_path  = "metadata.namespace"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_HOST"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-host"
+              }
+            }
+          }
+          env {
+            name = "KUBERNETES_SERVICE_PORT"
+            value_from {
+              config_map_key_ref {
+                name = "in-cluster"
+                key  = "apiserver-port"
+              }
+            }
+          }
+          env {
+            name = "CILIUM_DEBUG"
+            value_from {
+              config_map_key_ref {
+                name     = "cilium"
+                key      = "debug"
+                optional = true
+              }
+            }
+          }
+          port {
+            name           = "metrics"
+            protocol       = "TCP"
+            host_port      = 9963
+            container_port = 9963
+          }
+          port {
+            name           = "health"
+            container_port = 9234
+            protocol       = "TCP"
+          }
+          liveness_probe {
+            http_get {
+              scheme = "HTTP"
+              host   = "127.0.0.1"
+              port   = "9234"
+              path   = "/healthz"
+            }
+            initial_delay_seconds = 60
+            timeout_seconds       = 3
+            period_seconds        = 10
+          }
+          readiness_probe {
+            http_get {
+              scheme = "HTTP"
+              host   = "127.0.0.1"
+              port   = "9234"
+              path   = "/healthz"
+            }
+            timeout_seconds   = 3
+            period_seconds    = 15
+            failure_threshold = 5
+          }
+          volume_mount {
+            name       = "config"
+            read_only  = true
+            mount_path = "/tmp/cilium/config-map"
+          }
+        }
+
+        volume {
+          name = "config"
+          config_map {
+            name = "cilium"
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/cilium/service-account.tf
+++ b/addons/cilium/service-account.tf
@ -0,0 +1,15 @@
+resource "kubernetes_service_account" "operator" {
+  metadata {
+    name      = "cilium-operator"
+    namespace = "kube-system"
+  }
+  automount_service_account_token = false
+}
+
+resource "kubernetes_service_account" "agent" {
+  metadata {
+    name      = "cilium-agent"
+    namespace = "kube-system"
+  }
+  automount_service_account_token = false
+}
--- a/addons/cilium/variables.tf
+++ b/addons/cilium/variables.tf
@ -0,0 +1,17 @@
+variable "pod_cidr" {
+  type        = string
+  description = "CIDR IP range to assign Kubernetes pods"
+  default     = "10.2.0.0/16"
+}
+
+variable "daemonset_tolerations" {
+  type        = list(string)
+  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
+  default     = []
+}
+
+variable "enable_hubble" {
+  type        = bool
+  description = "Run the embedded Hubble Server and mount hubble-server-certs Secret"
+  default     = true
+}
--- a/addons/cilium/versions.tf
+++ b/addons/cilium/versions.tf
@ -0,0 +1,8 @@
+terraform {
+  required_providers {
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = "~> 2.8"
+    }
+  }
+}
--- a/addons/coredns/cluster-role.tf
+++ b/addons/coredns/cluster-role.tf
@ -0,0 +1,37 @@
+resource "kubernetes_cluster_role" "coredns" {
+  metadata {
+    name = "system:coredns"
+  }
+  rule {
+    api_groups = [""]
+    resources = [
+      "endpoints",
+      "services",
+      "pods",
+      "namespaces",
+    ]
+    verbs = [
+      "list",
+      "watch",
+    ]
+  }
+  rule {
+    api_groups = [""]
+    resources = [
+      "nodes",
+    ]
+    verbs = [
+      "get",
+    ]
+  }
+  rule {
+    api_groups = ["discovery.k8s.io"]
+    resources = [
+      "endpointslices",
+    ]
+    verbs = [
+      "list",
+      "watch",
+    ]
+  }
+}
--- a/addons/coredns/config.tf
+++ b/addons/coredns/config.tf
@ -0,0 +1,30 @@
+resource "kubernetes_config_map" "coredns" {
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+  }
+  data = {
+    "Corefile" = <<-EOF
+      .:53 {
+          errors
+          health {
+            lameduck 5s
+          }
+          ready
+          log . {
+              class error
+          }
+          kubernetes ${var.cluster_domain_suffix} in-addr.arpa ip6.arpa {
+              pods insecure
+              fallthrough in-addr.arpa ip6.arpa
+          }
+          prometheus :9153
+          forward . /etc/resolv.conf
+          cache 30
+          loop
+          reload
+          loadbalance
+      }
+  EOF
+  }
+}
--- a/addons/coredns/deployment.tf
+++ b/addons/coredns/deployment.tf
@ -0,0 +1,151 @@
+resource "kubernetes_deployment" "coredns" {
+  wait_for_rollout = false
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+    labels = {
+      k8s-app              = "coredns"
+      "kubernetes.io/name" = "CoreDNS"
+    }
+  }
+  spec {
+    replicas = var.replicas
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        k8s-app = "coredns"
+        tier    = "control-plane"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          k8s-app = "coredns"
+          tier    = "control-plane"
+        }
+      }
+      spec {
+        affinity {
+          node_affinity {
+            preferred_during_scheduling_ignored_during_execution {
+              weight = 100
+              preference {
+                match_expressions {
+                  key      = "node.kubernetes.io/controller"
+                  operator = "Exists"
+                }
+              }
+            }
+          }
+          pod_anti_affinity {
+            preferred_during_scheduling_ignored_during_execution {
+              weight = 100
+              pod_affinity_term {
+                label_selector {
+                  match_expressions {
+                    key      = "tier"
+                    operator = "In"
+                    values   = ["control-plane"]
+                  }
+                  match_expressions {
+                    key      = "k8s-app"
+                    operator = "In"
+                    values   = ["coredns"]
+                  }
+                }
+                topology_key = "kubernetes.io/hostname"
+              }
+            }
+          }
+        }
+        dns_policy          = "Default"
+        priority_class_name = "system-cluster-critical"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        service_account_name = "coredns"
+        toleration {
+          key    = "node-role.kubernetes.io/controller"
+          effect = "NoSchedule"
+        }
+        container {
+          name  = "coredns"
+          image = "registry.k8s.io/coredns/coredns:v1.12.0"
+          args  = ["-conf", "/etc/coredns/Corefile"]
+          port {
+            name           = "dns"
+            container_port = 53
+            protocol       = "UDP"
+          }
+          port {
+            name           = "dns-tcp"
+            container_port = 53
+            protocol       = "TCP"
+          }
+          port {
+            name           = "metrics"
+            container_port = 9153
+            protocol       = "TCP"
+          }
+          resources {
+            requests = {
+              cpu    = "100m"
+              memory = "70Mi"
+            }
+            limits = {
+              memory = "170Mi"
+            }
+          }
+          security_context {
+            capabilities {
+              add  = ["NET_BIND_SERVICE"]
+              drop = ["all"]
+            }
+            read_only_root_filesystem = true
+          }
+          liveness_probe {
+            http_get {
+              path   = "/health"
+              port   = "8080"
+              scheme = "HTTP"
+            }
+            initial_delay_seconds = 60
+            timeout_seconds       = 5
+            success_threshold     = 1
+            failure_threshold     = 5
+          }
+          readiness_probe {
+            http_get {
+              path   = "/ready"
+              port   = "8181"
+              scheme = "HTTP"
+            }
+          }
+          volume_mount {
+            name       = "config"
+            mount_path = "/etc/coredns"
+            read_only  = true
+          }
+        }
+        volume {
+          name = "config"
+          config_map {
+            name = "coredns"
+            items {
+              key  = "Corefile"
+              path = "Corefile"
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/coredns/service-account.tf
+++ b/addons/coredns/service-account.tf
@ -0,0 +1,24 @@
+resource "kubernetes_service_account" "coredns" {
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+  }
+  automount_service_account_token = false
+}
+
+
+resource "kubernetes_cluster_role_binding" "coredns" {
+  metadata {
+    name = "system:coredns"
+  }
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "system:coredns"
+  }
+  subject {
+    kind      = "ServiceAccount"
+    name      = "coredns"
+    namespace = "kube-system"
+  }
+}
--- a/addons/coredns/service.tf
+++ b/addons/coredns/service.tf
@ -0,0 +1,31 @@
+resource "kubernetes_service" "coredns" {
+  metadata {
+    name      = "coredns"
+    namespace = "kube-system"
+    labels = {
+      "k8s-app"            = "coredns"
+      "kubernetes.io/name" = "CoreDNS"
+    }
+    annotations = {
+      "prometheus.io/scrape" = "true"
+      "prometheus.io/port"   = "9153"
+    }
+  }
+  spec {
+    type       = "ClusterIP"
+    cluster_ip = var.cluster_dns_service_ip
+    selector = {
+      k8s-app = "coredns"
+    }
+    port {
+      name     = "dns"
+      protocol = "UDP"
+      port     = 53
+    }
+    port {
+      name     = "dns-tcp"
+      protocol = "TCP"
+      port     = 53
+    }
+  }
+}
--- a/addons/coredns/variables.tf
+++ b/addons/coredns/variables.tf
@ -0,0 +1,15 @@
+variable "replicas" {
+  type        = number
+  description = "CoreDNS replica count"
+  default     = 2
+}
+
+variable "cluster_dns_service_ip" {
+  description = "Must be set to `cluster_dns_service_ip` output by cluster"
+  default     = "10.3.0.10"
+}
+
+variable "cluster_domain_suffix" {
+  description = "Must be set to `cluster_domain_suffix` output by cluster"
+  default     = "cluster.local"
+}
--- a/addons/coredns/versions.tf
+++ b/addons/coredns/versions.tf
@ -0,0 +1,9 @@
+terraform {
+  required_providers {
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = "~> 2.8"
+    }
+  }
+}
+
--- a/addons/flannel/cluster-role-binding.tf
+++ b/addons/flannel/cluster-role-binding.tf
@ -0,0 +1,18 @@
+resource "kubernetes_cluster_role_binding" "flannel" {
+  metadata {
+    name = "flannel"
+  }
+
+  role_ref {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "ClusterRole"
+    name      = "flannel"
+  }
+
+  subject {
+    kind      = "ServiceAccount"
+    name      = "flannel"
+    namespace = "kube-system"
+  }
+}
+
--- a/addons/flannel/cluster-role.tf
+++ b/addons/flannel/cluster-role.tf
@ -0,0 +1,24 @@
+resource "kubernetes_cluster_role" "flannel" {
+  metadata {
+    name = "flannel"
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["pods"]
+    verbs      = ["get"]
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["nodes"]
+    verbs      = ["list", "watch"]
+  }
+
+  rule {
+    api_groups = [""]
+    resources  = ["nodes/status"]
+    verbs      = ["patch"]
+  }
+}
+
--- a/addons/flannel/config.tf
+++ b/addons/flannel/config.tf
@ -0,0 +1,44 @@
+resource "kubernetes_config_map" "config" {
+  metadata {
+    name      = "flannel-config"
+    namespace = "kube-system"
+    labels = {
+      k8s-app = "flannel"
+      tier    = "node"
+    }
+  }
+
+  data = {
+    "cni-conf.json" = <<-EOF
+      {
+        "name": "cbr0",
+        "cniVersion": "0.3.1",
+        "plugins": [
+          {
+            "type": "flannel",
+            "delegate": {
+              "hairpinMode": true,
+              "isDefaultGateway": true
+            }
+          },
+          {
+            "type": "portmap",
+            "capabilities": {
+              "portMappings": true
+            }
+          }
+        ]
+      }
+    EOF
+    "net-conf.json" = <<-EOF
+      {
+        "Network": "${var.pod_cidr}",
+        "Backend": {
+          "Type": "vxlan",
+          "Port": 4789
+        }
+      }
+    EOF
+  }
+}
+
--- a/addons/flannel/daemonset.tf
+++ b/addons/flannel/daemonset.tf
@ -0,0 +1,167 @@
+resource "kubernetes_daemonset" "flannel" {
+  metadata {
+    name      = "flannel"
+    namespace = "kube-system"
+    labels = {
+      k8s-app = "flannel"
+    }
+  }
+  spec {
+    strategy {
+      type = "RollingUpdate"
+      rolling_update {
+        max_unavailable = "1"
+      }
+    }
+    selector {
+      match_labels = {
+        k8s-app = "flannel"
+      }
+    }
+    template {
+      metadata {
+        labels = {
+          k8s-app = "flannel"
+        }
+      }
+      spec {
+        host_network         = true
+        priority_class_name  = "system-node-critical"
+        service_account_name = "flannel"
+        security_context {
+          seccomp_profile {
+            type = "RuntimeDefault"
+          }
+        }
+        toleration {
+          key      = "node-role.kubernetes.io/controller"
+          operator = "Exists"
+        }
+        toleration {
+          key      = "node.kubernetes.io/not-ready"
+          operator = "Exists"
+        }
+        dynamic "toleration" {
+          for_each = var.daemonset_tolerations
+          content {
+            key      = toleration.value
+            operator = "Exists"
+          }
+        }
+        init_container {
+          name    = "install-cni"
+          image   = "quay.io/poseidon/flannel-cni:v0.4.2"
+          command = ["/install-cni.sh"]
+          env {
+            name = "CNI_NETWORK_CONFIG"
+            value_from {
+              config_map_key_ref {
+                name = "flannel-config"
+                key  = "cni-conf.json"
+              }
+            }
+          }
+          volume_mount {
+            name       = "cni-bin-dir"
+            mount_path = "/host/opt/cni/bin/"
+          }
+          volume_mount {
+            name       = "cni-conf-dir"
+            mount_path = "/host/etc/cni/net.d"
+          }
+        }
+
+        container {
+          name  = "flannel"
+          image = "docker.io/flannel/flannel:v0.26.1"
+          command = [
+            "/opt/bin/flanneld",
+            "--ip-masq",
+            "--kube-subnet-mgr",
+            "--iface=$(POD_IP)"
+          ]
+          env {
+            name = "POD_NAME"
+            value_from {
+              field_ref {
+                field_path = "metadata.name"
+              }
+            }
+          }
+          env {
+            name = "POD_NAMESPACE"
+            value_from {
+              field_ref {
+                field_path = "metadata.namespace"
+              }
+            }
+          }
+          env {
+            name = "POD_IP"
+            value_from {
+              field_ref {
+                field_path = "status.podIP"
+              }
+            }
+          }
+          security_context {
+            privileged = true
+          }
+          resources {
+            requests = {
+              cpu = "100m"
+            }
+          }
+          volume_mount {
+            name       = "flannel-config"
+            mount_path = "/etc/kube-flannel/"
+          }
+          volume_mount {
+            name       = "run-flannel"
+            mount_path = "/run/flannel"
+          }
+          volume_mount {
+            name       = "xtables-lock"
+            mount_path = "/run/xtables.lock"
+          }
+        }
+
+        volume {
+          name = "flannel-config"
+          config_map {
+            name = "flannel-config"
+          }
+        }
+        volume {
+          name = "run-flannel"
+          host_path {
+            path = "/run/flannel"
+          }
+        }
+        # Used by install-cni
+        volume {
+          name = "cni-bin-dir"
+          host_path {
+            path = "/opt/cni/bin"
+          }
+        }
+        volume {
+          name = "cni-conf-dir"
+          host_path {
+            path = "/etc/cni/net.d"
+            type = "DirectoryOrCreate"
+          }
+        }
+        # Acces iptables concurrently
+        volume {
+          name = "xtables-lock"
+          host_path {
+            path = "/run/xtables.lock"
+            type = "FileOrCreate"
+          }
+        }
+      }
+    }
+  }
+}
+
--- a/addons/flannel/service-account.tf
+++ b/addons/flannel/service-account.tf
@ -0,0 +1,7 @@
+resource "kubernetes_service_account" "flannel" {
+  metadata {
+    name      = "flannel"
+    namespace = "kube-system"
+  }
+}
+
--- a/addons/flannel/variables.tf
+++ b/addons/flannel/variables.tf
@ -0,0 +1,11 @@
+variable "pod_cidr" {
+  type        = string
+  description = "CIDR IP range to assign Kubernetes pods"
+  default     = "10.2.0.0/16"
+}
+
+variable "daemonset_tolerations" {
+  type        = list(string)
+  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
+  default     = []
+}
--- a/addons/flannel/versions.tf
+++ b/addons/flannel/versions.tf
@ -0,0 +1,8 @@
+terraform {
+  required_providers {
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = "~> 2.8"
+    }
+  }
+}
--- a/addons/nginx-ingress/aws/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/aws/rbac/cluster-role.yaml
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/azure/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/azure/rbac/cluster-role.yaml
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/bare-metal/rbac/cluster-role.yaml
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/bare-metal/service.yaml
+++ b/addons/nginx-ingress/bare-metal/service.yaml
@ -1,7 +1,7 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: ingress-controller-public
+  name: nginx-ingress-controller
  namespace: ingress
  annotations:
    prometheus.io/scrape: 'true'
@ -10,7 +10,7 @@ spec:
  type: ClusterIP
  clusterIP: 10.3.0.12
  selector:
-    name: ingress-controller-public
+    name: nginx-ingress-controller
    phase: prod
  ports:
    - name: http
--- a/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/digital-ocean/rbac/cluster-role.yaml
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml
+++ b/addons/nginx-ingress/google-cloud/rbac/cluster-role.yaml
@ -59,4 +59,11 @@ rules:
      - get
      - list
      - watch
-
+  - apiGroups:
+      - discovery.k8s.io
+    resources:
+      - "endpointslices"
+    verbs:
+      - get
+      - list
+      - watch
--- a/aws/fedora-coreos/kubernetes/README.md
+++ b/aws/fedora-coreos/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.28.3 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/fedora-coreos/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/aws/fedora-coreos/kubernetes/ami.tf
+++ b/aws/fedora-coreos/kubernetes/ami.tf
@ -19,7 +19,7 @@ data "aws_ami" "fedora-coreos" {
 }

 data "aws_ami" "fedora-coreos-arm" {
-  count = var.arch == "arm64" ? 1 : 0
+  count = var.controller_arch == "arm64" ? 1 : 0

  most_recent = true
  owners      = ["125523088429"]
--- a/aws/fedora-coreos/kubernetes/bootstrap.tf
+++ b/aws/fedora-coreos/kubernetes/bootstrap.tf
@ -1,6 +1,6 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name          = var.cluster_name
  api_servers           = [format("%s.%s", var.cluster_name, var.dns_zone)]
@ -9,9 +9,7 @@ module "bootstrap" {
  network_mtu           = var.network_mtu
  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/aws/fedora-coreos/kubernetes/butane/controller.yaml
+++ b/aws/fedora-coreos/kubernetes/butane/controller.yaml
@ -12,7 +12,7 @@ systemd:
        Wants=network-online.target
        After=network-online.target
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        Type=exec
        ExecStartPre=/bin/mkdir -p /var/lib/etcd
        ExecStartPre=-/usr/bin/podman rm etcd
@ -57,7 +57,7 @@ systemd:
        After=afterburn.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/afterburn
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -116,7 +116,7 @@ systemd:
            --volume /opt/bootstrap/assets:/assets:ro,Z \
            --volume /opt/bootstrap/apply:/apply:ro,Z \
            --entrypoint=/apply \
-            quay.io/poseidon/kubelet:v1.28.3
+            quay.io/poseidon/kubelet:v1.31.3
        ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
        ExecStartPost=-/usr/bin/podman stop bootstrap
 storage:
@ -149,7 +149,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
@ -163,7 +163,7 @@ storage:
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -177,8 +177,7 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
          chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
    - path: /opt/bootstrap/apply
      mode: 0544
--- a/aws/fedora-coreos/kubernetes/controllers.tf
+++ b/aws/fedora-coreos/kubernetes/controllers.tf
@ -20,18 +20,18 @@ resource "aws_instance" "controllers" {
  tags = {
    Name = "${var.cluster_name}-controller-${count.index}"
  }
-
  instance_type = var.controller_type
-  ami           = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
-  user_data     = data.ct_config.controllers.*.rendered[count.index]
+  ami           = var.controller_arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id

  # storage
  root_block_device {
-    volume_type = var.disk_type
-    volume_size = var.disk_size
-    iops        = var.disk_iops
+    volume_type = var.controller_disk_type
+    volume_size = var.controller_disk_size
+    iops        = var.controller_disk_iops
    encrypted   = true
-    tags        = {}
+    tags = {
+      Name = "${var.cluster_name}-controller-${count.index}"
+    }
  }

  # network
@ -39,6 +39,14 @@ resource "aws_instance" "controllers" {
  subnet_id                   = element(aws_subnet.public.*.id, count.index)
  vpc_security_group_ids      = [aws_security_group.controller.id]

+  # boot
+  user_data = data.ct_config.controllers.*.rendered[count.index]
+
+  # cost
+  credit_specification {
+    cpu_credits = var.controller_cpu_credits
+  }
+
  lifecycle {
    ignore_changes = [
      ami,
@ -61,7 +69,6 @@ data "ct_config" "controllers" {
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
  })
  strict   = true
  snippets = var.controller_snippets
--- a/aws/fedora-coreos/kubernetes/network.tf
+++ b/aws/fedora-coreos/kubernetes/network.tf
@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
 resource "aws_subnet" "public" {
  count = length(data.aws_availability_zones.all.names)

-  vpc_id            = aws_vpc.network.id
-  availability_zone = data.aws_availability_zones.all.names[count.index]
-
-  cidr_block                      = cidrsubnet(var.host_cidr, 4, count.index)
-  ipv6_cidr_block                 = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
-  map_public_ip_on_launch         = true
-  assign_ipv6_address_on_creation = true
-
  tags = {
    "Name" = "${var.cluster_name}-public-${count.index}"
  }
+  vpc_id            = aws_vpc.network.id
+  availability_zone = data.aws_availability_zones.all.names[count.index]
+
+  # IPv4 and IPv6 CIDR blocks
+  cidr_block      = cidrsubnet(var.host_cidr, 4, count.index)
+  ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
+
+  # Assign IPv4 and IPv6 addresses to instances
+  map_public_ip_on_launch         = true
+  assign_ipv6_address_on_creation = true
+
+  # Hostnames assigned to instances
+  # resource-name: <ec2-instance-id>.region.compute.internal
+  private_dns_hostname_type_on_launch            = "resource-name"
+  enable_resource_name_dns_a_record_on_launch    = true
+  enable_resource_name_dns_aaaa_record_on_launch = true
 }

 resource "aws_route_table_association" "public" {
--- a/aws/fedora-coreos/kubernetes/security.tf
+++ b/aws/fedora-coreos/kubernetes/security.tf
@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "controller-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.worker.id
+}
+
+resource "aws_security_group_rule" "controller-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "controller-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "worker-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.controller.id
+}
+
+resource "aws_security_group_rule" "worker-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "worker-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
--- a/aws/fedora-coreos/kubernetes/variables.tf
+++ b/aws/fedora-coreos/kubernetes/variables.tf
@ -17,30 +17,6 @@ variable "dns_zone_id" {

 # instances

-variable "controller_count" {
-  type        = number
-  description = "Number of controllers (i.e. masters)"
-  default     = 1
-}
-
-variable "worker_count" {
-  type        = number
-  description = "Number of workers"
-  default     = 1
-}
-
-variable "controller_type" {
-  type        = string
-  description = "EC2 instance type for controllers"
-  default     = "t3.small"
-}
-
-variable "worker_type" {
-  type        = string
-  description = "EC2 instance type for workers"
-  default     = "t3.small"
-}
-
 variable "os_stream" {
  type        = string
  description = "Fedora CoreOS image stream for instances (e.g. stable, testing, next)"
@ -52,24 +28,78 @@ variable "os_stream" {
  }
 }

-variable "disk_size" {
+variable "controller_count" {
+  type        = number
+  description = "Number of controllers (i.e. masters)"
+  default     = 1
+}
+
+variable "controller_type" {
+  type        = string
+  description = "EC2 instance type for controllers"
+  default     = "t3.small"
+}
+
+variable "controller_disk_size" {
  type        = number
  description = "Size of the EBS volume in GB"
  default     = 30
 }

-variable "disk_type" {
+variable "controller_disk_type" {
  type        = string
  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
  default     = "gp3"
 }

-variable "disk_iops" {
+variable "controller_disk_iops" {
  type        = number
  description = "IOPS of the EBS volume (e.g. 3000)"
  default     = 3000
 }

+variable "controller_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
+variable "worker_count" {
+  type        = number
+  description = "Number of workers"
+  default     = 1
+}
+
+variable "worker_type" {
+  type        = string
+  description = "EC2 instance type for workers"
+  default     = "t3.small"
+}
+
+variable "worker_disk_size" {
+  type        = number
+  description = "Size of the EBS volume in GB"
+  default     = 30
+}
+
+variable "worker_disk_type" {
+  type        = string
+  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
+  default     = "gp3"
+}
+
+variable "worker_disk_iops" {
+  type        = number
+  description = "IOPS of the EBS volume (e.g. 3000)"
+  default     = 3000
+}
+
+variable "worker_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
 variable "worker_price" {
  type        = number
  description = "Spot price in USD for worker instances or 0 to use on-demand instances"
@ -134,40 +164,31 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
-}
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
 variable "worker_node_labels" {
  type        = list(string)
  description = "List of initial worker node labels"
  default     = []
 }

-# unofficial, undocumented, unsupported
+# advanced

-variable "cluster_domain_suffix" {
+variable "controller_arch" {
  type        = string
-  description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
-  default     = "cluster.local"
+  description = "Controller node(s) architecture (amd64 or arm64)"
+  default     = "amd64"
+  validation {
+    condition     = contains(["amd64", "arm64"], var.controller_arch)
+    error_message = "The controller_arch must be amd64 or arm64."
+  }
 }

-variable "arch" {
+variable "worker_arch" {
  type        = string
-  description = "Container architecture (amd64 or arm64)"
+  description = "Worker node(s) architecture (amd64 or arm64)"
  default     = "amd64"
-
  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
-    error_message = "The arch must be amd64 or arm64."
+    condition     = contains(["amd64", "arm64"], var.worker_arch)
+    error_message = "The worker_arch must be amd64 or arm64."
  }
 }

@ -176,3 +197,19 @@ variable "daemonset_tolerations" {
  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
  default     = []
 }
+
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
+}
--- a/aws/fedora-coreos/kubernetes/workers.tf
+++ b/aws/fedora-coreos/kubernetes/workers.tf
@ -6,11 +6,16 @@ module "workers" {
  vpc_id          = aws_vpc.network.id
  subnet_ids      = aws_subnet.public.*.id
  security_groups = [aws_security_group.worker.id]
+
+  # instances
+  os_stream     = var.os_stream
  worker_count  = var.worker_count
  instance_type = var.worker_type
-  os_stream       = var.os_stream
-  arch            = var.arch
-  disk_size       = var.disk_size
+  arch          = var.worker_arch
+  disk_type     = var.worker_disk_type
+  disk_size     = var.worker_disk_size
+  disk_iops     = var.worker_disk_iops
+  cpu_credits   = var.worker_cpu_credits
  spot_price    = var.worker_price
  target_groups = var.worker_target_groups

@ -18,7 +23,6 @@ module "workers" {
  kubeconfig         = module.bootstrap.kubeconfig-kubelet
  ssh_authorized_key = var.ssh_authorized_key
  service_cidr       = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
  snippets           = var.worker_snippets
  node_labels        = var.worker_node_labels
 }
--- a/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml
+++ b/aws/fedora-coreos/kubernetes/workers/butane/worker.yaml
@ -29,7 +29,7 @@ systemd:
        After=afterburn.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/afterburn
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -104,7 +104,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
--- a/aws/fedora-coreos/kubernetes/workers/variables.tf
+++ b/aws/fedora-coreos/kubernetes/workers/variables.tf
@ -69,6 +69,12 @@ variable "spot_price" {
  default     = 0
 }

+variable "cpu_credits" {
+  type        = string
+  description = "CPU burst credits mode (if applicable)"
+  default     = null
+}
+
 variable "target_groups" {
  type        = list(string)
  description = "Additional target group ARNs to which instances should be added"
@ -102,12 +108,6 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "cluster_domain_suffix" {
-  type        = string
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  default     = "cluster.local"
-}
-
 variable "node_labels" {
  type        = list(string)
  description = "List of initial node labels"
@ -120,15 +120,14 @@ variable "node_taints" {
  default     = []
 }

-# unofficial, undocumented, unsupported
+# advanced

 variable "arch" {
  type        = string
  description = "Container architecture (amd64 or arm64)"
  default     = "amd64"
-
  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
+    condition     = contains(["amd64", "arm64"], var.arch)
    error_message = "The arch must be amd64 or arm64."
  }
 }
--- a/aws/fedora-coreos/kubernetes/workers/workers.tf
+++ b/aws/fedora-coreos/kubernetes/workers/workers.tf
@ -6,13 +6,11 @@ resource "aws_autoscaling_group" "workers" {
  desired_capacity = var.worker_count
  min_size         = var.worker_count
  max_size         = var.worker_count + 2
-  default_cooldown          = 30
-  health_check_grace_period = 30

  # network
  vpc_zone_identifier = var.subnet_ids

-  # template
+  # instance template
  launch_template {
    id      = aws_launch_template.worker.id
    version = aws_launch_template.worker.latest_version
@ -32,6 +30,11 @@ resource "aws_autoscaling_group" "workers" {
      min_healthy_percentage = 90
    }
  }
+  # Grace period before checking new instance's health
+  health_check_grace_period = 30
+  # Cooldown period between scaling activities
+  default_cooldown = 30
+

  lifecycle {
    # override the default destroy and replace update behavior
@ -56,11 +59,6 @@ resource "aws_launch_template" "worker" {
  name_prefix   = "${var.name}-worker"
  image_id      = local.ami_id
  instance_type = var.instance_type
-  monitoring {
-    enabled = false
-  }
-
-  user_data = sensitive(base64encode(data.ct_config.worker.rendered))

  # storage
  ebs_optimized = true
@ -76,9 +74,26 @@ resource "aws_launch_template" "worker" {
  }

  # network
-  vpc_security_group_ids = var.security_groups
+  network_interfaces {
+    associate_public_ip_address = true
+    security_groups             = var.security_groups
+  }

-  # spot
+  # boot
+  user_data = sensitive(base64encode(data.ct_config.worker.rendered))
+
+  # metadata
+  metadata_options {
+    http_tokens = "optional"
+  }
+  monitoring {
+    enabled = false
+  }
+
+  # cost
+  credit_specification {
+    cpu_credits = var.cpu_credits
+  }
  dynamic "instance_market_options" {
    for_each = var.spot_price > 0 ? [1] : []
    content {
@ -102,7 +117,6 @@ data "ct_config" "worker" {
    kubeconfig             = indent(10, var.kubeconfig)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
    node_labels            = join(",", var.node_labels)
    node_taints            = join(",", var.node_taints)
  })
--- a/aws/flatcar-linux/kubernetes/README.md
+++ b/aws/flatcar-linux/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.28.3 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/flatcar-linux/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/aws/flatcar-linux/kubernetes/ami.tf
+++ b/aws/flatcar-linux/kubernetes/ami.tf
@ -1,7 +1,7 @@
 locals {
  # Pick a Flatcar Linux AMI
  # flatcar-stable -> Flatcar Linux AMI
-  ami_id  = var.arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
+  ami_id  = var.controller_arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
  channel = split("-", var.os_image)[1]
 }

@ -26,7 +26,7 @@ data "aws_ami" "flatcar" {
 }

 data "aws_ami" "flatcar-arm64" {
-  count = var.arch == "arm64" ? 1 : 0
+  count = var.controller_arch == "arm64" ? 1 : 0

  most_recent = true
  owners      = ["075585003325"]
--- a/aws/flatcar-linux/kubernetes/bootstrap.tf
+++ b/aws/flatcar-linux/kubernetes/bootstrap.tf
@ -1,6 +1,6 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name          = var.cluster_name
  api_servers           = [format("%s.%s", var.cluster_name, var.dns_zone)]
@ -9,9 +9,7 @@ module "bootstrap" {
  network_mtu           = var.network_mtu
  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/aws/flatcar-linux/kubernetes/butane/controller.yaml
+++ b/aws/flatcar-linux/kubernetes/butane/controller.yaml
@ -11,7 +11,7 @@ systemd:
        Requires=docker.service
        After=docker.service
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        ExecStartPre=/usr/bin/docker run -d \
          --name etcd \
          --network host \
@ -58,7 +58,7 @@ systemd:
        After=coreos-metadata.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/coreos
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -109,7 +109,7 @@ systemd:
        Type=oneshot
        RemainAfterExit=true
        WorkingDirectory=/opt/bootstrap
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStart=/usr/bin/docker run \
            -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
            -v /opt/bootstrap/assets:/assets:ro \
@ -148,7 +148,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
@ -162,7 +162,7 @@ storage:
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -177,8 +177,7 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
    - path: /opt/bootstrap/apply
      mode: 0544
      contents:
--- a/aws/flatcar-linux/kubernetes/controllers.tf
+++ b/aws/flatcar-linux/kubernetes/controllers.tf
@ -20,19 +20,18 @@ resource "aws_instance" "controllers" {
  tags = {
    Name = "${var.cluster_name}-controller-${count.index}"
  }
-
  instance_type = var.controller_type
-
  ami           = local.ami_id
-  user_data = data.ct_config.controllers.*.rendered[count.index]

  # storage
  root_block_device {
-    volume_type = var.disk_type
-    volume_size = var.disk_size
-    iops        = var.disk_iops
+    volume_type = var.controller_disk_type
+    volume_size = var.controller_disk_size
+    iops        = var.controller_disk_iops
    encrypted   = true
-    tags        = {}
+    tags = {
+      Name = "${var.cluster_name}-controller-${count.index}"
+    }
  }

  # network
@ -40,6 +39,14 @@ resource "aws_instance" "controllers" {
  subnet_id                   = element(aws_subnet.public.*.id, count.index)
  vpc_security_group_ids      = [aws_security_group.controller.id]

+  # boot
+  user_data = data.ct_config.controllers.*.rendered[count.index]
+
+  # cost
+  credit_specification {
+    cpu_credits = var.controller_cpu_credits
+  }
+
  lifecycle {
    ignore_changes = [
      ami,
@ -62,7 +69,6 @@ data "ct_config" "controllers" {
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
  })
  strict   = true
  snippets = var.controller_snippets
--- a/aws/flatcar-linux/kubernetes/network.tf
+++ b/aws/flatcar-linux/kubernetes/network.tf
@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
 resource "aws_subnet" "public" {
  count = length(data.aws_availability_zones.all.names)

-  vpc_id            = aws_vpc.network.id
-  availability_zone = data.aws_availability_zones.all.names[count.index]
-
-  cidr_block                      = cidrsubnet(var.host_cidr, 4, count.index)
-  ipv6_cidr_block                 = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
-  map_public_ip_on_launch         = true
-  assign_ipv6_address_on_creation = true
-
  tags = {
    "Name" = "${var.cluster_name}-public-${count.index}"
  }
+  vpc_id            = aws_vpc.network.id
+  availability_zone = data.aws_availability_zones.all.names[count.index]
+
+  # IPv4 and IPv6 CIDR blocks
+  cidr_block      = cidrsubnet(var.host_cidr, 4, count.index)
+  ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
+
+  # Assign IPv4 and IPv6 addresses to instances
+  map_public_ip_on_launch         = true
+  assign_ipv6_address_on_creation = true
+
+  # Hostnames assigned to instances
+  # resource-name: <ec2-instance-id>.region.compute.internal
+  private_dns_hostname_type_on_launch            = "resource-name"
+  enable_resource_name_dns_a_record_on_launch    = true
+  enable_resource_name_dns_aaaa_record_on_launch = true
 }

 resource "aws_route_table_association" "public" {
--- a/aws/flatcar-linux/kubernetes/security.tf
+++ b/aws/flatcar-linux/kubernetes/security.tf
@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "controller-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.worker.id
+}
+
+resource "aws_security_group_rule" "controller-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.controller.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "controller-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" {
  self      = true
 }

+resource "aws_security_group_rule" "worker-cilium-metrics" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type                     = "ingress"
+  protocol                 = "tcp"
+  from_port                = 9962
+  to_port                  = 9965
+  source_security_group_id = aws_security_group.controller.id
+}
+
+resource "aws_security_group_rule" "worker-cilium-metrics-self" {
+  count = var.networking == "cilium" ? 1 : 0
+
+  security_group_id = aws_security_group.worker.id
+
+  type      = "ingress"
+  protocol  = "tcp"
+  from_port = 9962
+  to_port   = 9965
+  self      = true
+}
+
 # IANA VXLAN default
 resource "aws_security_group_rule" "worker-vxlan" {
  count = var.networking == "flannel" ? 1 : 0
--- a/aws/flatcar-linux/kubernetes/variables.tf
+++ b/aws/flatcar-linux/kubernetes/variables.tf
@ -17,30 +17,6 @@ variable "dns_zone_id" {

 # instances

-variable "controller_count" {
-  type        = number
-  description = "Number of controllers (i.e. masters)"
-  default     = 1
-}
-
-variable "worker_count" {
-  type        = number
-  description = "Number of workers"
-  default     = 1
-}
-
-variable "controller_type" {
-  type        = string
-  description = "EC2 instance type for controllers"
-  default     = "t3.small"
-}
-
-variable "worker_type" {
-  type        = string
-  description = "EC2 instance type for workers"
-  default     = "t3.small"
-}
-
 variable "os_image" {
  type        = string
  description = "AMI channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
@ -52,24 +28,78 @@ variable "os_image" {
  }
 }

-variable "disk_size" {
+variable "controller_count" {
+  type        = number
+  description = "Number of controllers (i.e. masters)"
+  default     = 1
+}
+
+variable "controller_type" {
+  type        = string
+  description = "EC2 instance type for controllers"
+  default     = "t3.small"
+}
+
+variable "controller_disk_size" {
  type        = number
  description = "Size of the EBS volume in GB"
  default     = 30
 }

-variable "disk_type" {
+variable "controller_disk_type" {
  type        = string
  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
  default     = "gp3"
 }

-variable "disk_iops" {
+variable "controller_disk_iops" {
  type        = number
  description = "IOPS of the EBS volume (e.g. 3000)"
  default     = 3000
 }

+variable "controller_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
+variable "worker_count" {
+  type        = number
+  description = "Number of workers"
+  default     = 1
+}
+
+variable "worker_type" {
+  type        = string
+  description = "EC2 instance type for workers"
+  default     = "t3.small"
+}
+
+variable "worker_disk_size" {
+  type        = number
+  description = "Size of the EBS volume in GB"
+  default     = 30
+}
+
+variable "worker_disk_type" {
+  type        = string
+  description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
+  default     = "gp3"
+}
+
+variable "worker_disk_iops" {
+  type        = number
+  description = "IOPS of the EBS volume (e.g. 3000)"
+  default     = 3000
+}
+
+variable "worker_cpu_credits" {
+  type        = string
+  description = "CPU credits mode (if using a burstable instance type)"
+  default     = null
+}
+
 variable "worker_price" {
  type        = number
  description = "Spot price in USD for worker instances or 0 to use on-demand instances"
@ -134,40 +164,31 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
-}
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
 variable "worker_node_labels" {
  type        = list(string)
  description = "List of initial worker node labels"
  default     = []
 }

-# unofficial, undocumented, unsupported
+# advanced

-variable "cluster_domain_suffix" {
+variable "controller_arch" {
  type        = string
-  description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
-  default     = "cluster.local"
+  description = "Controller node(s) architecture (amd64 or arm64)"
+  default     = "amd64"
+  validation {
+    condition     = contains(["amd64", "arm64"], var.controller_arch)
+    error_message = "The controller_arch must be amd64 or arm64."
+  }
 }

-variable "arch" {
+variable "worker_arch" {
  type        = string
-  description = "Container architecture (amd64 or arm64)"
+  description = "Worker node(s) architecture (amd64 or arm64)"
  default     = "amd64"
-
  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
-    error_message = "The arch must be amd64 or arm64."
+    condition     = contains(["amd64", "arm64"], var.worker_arch)
+    error_message = "The worker_arch must be amd64 or arm64."
  }
 }

@ -176,3 +197,19 @@ variable "daemonset_tolerations" {
  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
  default     = []
 }
+
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
+}
--- a/aws/flatcar-linux/kubernetes/versions.tf
+++ b/aws/flatcar-linux/kubernetes/versions.tf
@ -7,7 +7,7 @@ terraform {
    null = ">= 2.1"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.11"
+      version = "~> 0.13"
    }
  }
 }
--- a/aws/flatcar-linux/kubernetes/workers.tf
+++ b/aws/flatcar-linux/kubernetes/workers.tf
@ -6,11 +6,15 @@ module "workers" {
  vpc_id          = aws_vpc.network.id
  subnet_ids      = aws_subnet.public.*.id
  security_groups = [aws_security_group.worker.id]
+
+  # instances
+  os_image      = var.os_image
  worker_count  = var.worker_count
  instance_type = var.worker_type
-  os_image        = var.os_image
-  arch            = var.arch
-  disk_size       = var.disk_size
+  arch          = var.worker_arch
+  disk_type     = var.worker_disk_type
+  disk_size     = var.worker_disk_size
+  disk_iops     = var.worker_disk_iops
  spot_price    = var.worker_price
  target_groups = var.worker_target_groups

@ -18,7 +22,6 @@ module "workers" {
  kubeconfig         = module.bootstrap.kubeconfig-kubelet
  ssh_authorized_key = var.ssh_authorized_key
  service_cidr       = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
  snippets           = var.worker_snippets
  node_labels        = var.worker_node_labels
 }
--- a/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml
+++ b/aws/flatcar-linux/kubernetes/workers/butane/worker.yaml
@ -30,7 +30,7 @@ systemd:
        After=coreos-metadata.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        EnvironmentFile=/run/metadata/coreos
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -103,7 +103,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
--- a/aws/flatcar-linux/kubernetes/workers/variables.tf
+++ b/aws/flatcar-linux/kubernetes/workers/variables.tf
@ -69,6 +69,12 @@ variable "spot_price" {
  default     = 0
 }

+variable "cpu_credits" {
+  type        = string
+  description = "CPU burst credits mode (if applicable)"
+  default     = null
+}
+
 variable "target_groups" {
  type        = list(string)
  description = "Additional target group ARNs to which instances should be added"
@ -102,12 +108,6 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "cluster_domain_suffix" {
-  type        = string
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  default     = "cluster.local"
-}
-
 variable "node_labels" {
  type        = list(string)
  description = "List of initial node labels"
@ -128,7 +128,7 @@ variable "arch" {
  default     = "amd64"

  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
+    condition     = contains(["amd64", "arm64"], var.arch)
    error_message = "The arch must be amd64 or arm64."
  }
 }
--- a/aws/flatcar-linux/kubernetes/workers/versions.tf
+++ b/aws/flatcar-linux/kubernetes/workers/versions.tf
@ -6,7 +6,7 @@ terraform {
    aws = ">= 2.23, <= 6.0"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.11"
+      version = "~> 0.13"
    }
  }
 }
--- a/aws/flatcar-linux/kubernetes/workers/workers.tf
+++ b/aws/flatcar-linux/kubernetes/workers/workers.tf
@ -6,13 +6,11 @@ resource "aws_autoscaling_group" "workers" {
  desired_capacity = var.worker_count
  min_size         = var.worker_count
  max_size         = var.worker_count + 2
-  default_cooldown          = 30
-  health_check_grace_period = 30

  # network
  vpc_zone_identifier = var.subnet_ids

-  # template
+  # instance template
  launch_template {
    id      = aws_launch_template.worker.id
    version = aws_launch_template.worker.latest_version
@ -32,6 +30,10 @@ resource "aws_autoscaling_group" "workers" {
      min_healthy_percentage = 90
    }
  }
+  # Grace period before checking new instance's health
+  health_check_grace_period = 30
+  # Cooldown period between scaling activities
+  default_cooldown = 30

  lifecycle {
    # override the default destroy and replace update behavior
@ -56,11 +58,6 @@ resource "aws_launch_template" "worker" {
  name_prefix   = "${var.name}-worker"
  image_id      = local.ami_id
  instance_type = var.instance_type
-  monitoring {
-    enabled = false
-  }
-
-  user_data = sensitive(base64encode(data.ct_config.worker.rendered))

  # storage
  ebs_optimized = true
@ -76,9 +73,26 @@ resource "aws_launch_template" "worker" {
  }

  # network
-  vpc_security_group_ids = var.security_groups
+  network_interfaces {
+    associate_public_ip_address = true
+    security_groups             = var.security_groups
+  }

-  # spot
+  # boot
+  user_data = sensitive(base64encode(data.ct_config.worker.rendered))
+
+  # metadata
+  metadata_options {
+    http_tokens = "optional"
+  }
+  monitoring {
+    enabled = false
+  }
+
+  # cost
+  credit_specification {
+    cpu_credits = var.cpu_credits
+  }
  dynamic "instance_market_options" {
    for_each = var.spot_price > 0 ? [1] : []
    content {
@ -102,7 +116,6 @@ data "ct_config" "worker" {
    kubeconfig             = indent(10, var.kubeconfig)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
    node_labels            = join(",", var.node_labels)
    node_taints            = join(",", var.node_taints)
  })
--- a/azure/fedora-coreos/kubernetes/README.md
+++ b/azure/fedora-coreos/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.28.3 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot priority](https://typhoon.psdn.io/fedora-coreos/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/azure/fedora-coreos/kubernetes/bootstrap.tf
+++ b/azure/fedora-coreos/kubernetes/bootstrap.tf
@ -1,13 +1,12 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name = var.cluster_name
  api_servers  = [format("%s.%s", var.cluster_name, var.dns_zone)]
  etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)

  networking = var.networking
-
  # only effective with Calico networking
  # we should be able to use 1450 MTU, but in practice, 1410 was needed
  network_encapsulation = "vxlan"
@ -15,9 +14,7 @@ module "bootstrap" {

  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/azure/fedora-coreos/kubernetes/butane/controller.yaml
+++ b/azure/fedora-coreos/kubernetes/butane/controller.yaml
@ -12,7 +12,7 @@ systemd:
        Wants=network-online.target
        After=network-online.target
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        Type=exec
        ExecStartPre=/bin/mkdir -p /var/lib/etcd
        ExecStartPre=-/usr/bin/podman rm etcd
@ -54,7 +54,7 @@ systemd:
        Description=Kubelet (System Container)
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -111,7 +111,7 @@ systemd:
            --volume /opt/bootstrap/assets:/assets:ro,Z \
            --volume /opt/bootstrap/apply:/apply:ro,Z \
            --entrypoint=/apply \
-            quay.io/poseidon/kubelet:v1.28.3
+            quay.io/poseidon/kubelet:v1.31.3
        ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
        ExecStartPost=-/usr/bin/podman stop bootstrap
 storage:
@ -144,7 +144,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
@ -158,7 +158,7 @@ storage:
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -172,8 +172,7 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
          chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
    - path: /opt/bootstrap/apply
      mode: 0544
--- a/azure/fedora-coreos/kubernetes/controllers.tf
+++ b/azure/fedora-coreos/kubernetes/controllers.tf
@ -9,25 +9,22 @@ locals {
 # Discrete DNS records for each controller's private IPv4 for etcd usage
 resource "azurerm_dns_a_record" "etcds" {
  count = var.controller_count
-  resource_group_name = var.dns_zone_group

  # DNS Zone name where record should be created
  zone_name           = var.dns_zone
-
+  resource_group_name = var.dns_zone_group
  # DNS record
  name = format("%s-etcd%d", var.cluster_name, count.index)
  ttl  = 300
-
  # private IPv4 address for etcd
-  records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
+  records = [azurerm_network_interface.controllers[count.index].private_ip_address]
 }

 # Controller availability set to spread controllers
 resource "azurerm_availability_set" "controllers" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                         = "${var.cluster_name}-controllers"
-  location                     = var.region
+  resource_group_name          = azurerm_resource_group.cluster.name
+  location                     = var.location
  platform_fault_domain_count  = 2
  platform_update_domain_count = 4
  managed                      = true
@ -36,30 +33,34 @@ resource "azurerm_availability_set" "controllers" {
 # Controller instances
 resource "azurerm_linux_virtual_machine" "controllers" {
  count = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name

  name                = "${var.cluster_name}-controller-${count.index}"
-  location            = var.region
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
  availability_set_id = azurerm_availability_set.controllers.id
-
  size                = var.controller_type
-  custom_data = base64encode(data.ct_config.controllers.*.rendered[count.index])

  # storage
  source_image_id = var.os_image
  os_disk {
    name                 = "${var.cluster_name}-controller-${count.index}"
+    storage_account_type = var.controller_disk_type
+    disk_size_gb         = var.controller_disk_size
    caching              = "None"
-    disk_size_gb         = var.disk_size
-    storage_account_type = "Premium_LRS"
  }

  # network
  network_interface_ids = [
-    azurerm_network_interface.controllers.*.id[count.index]
+    azurerm_network_interface.controllers[count.index].id
  ]

-  # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
+  # boot
+  custom_data = base64encode(data.ct_config.controllers[count.index].rendered)
+  boot_diagnostics {
+    # defaults to a managed storage account
+  }
+
+  # Azure requires an RSA admin_ssh_key
  admin_username = "core"
  admin_ssh_key {
    username   = "core"
@ -74,31 +75,52 @@ resource "azurerm_linux_virtual_machine" "controllers" {
  }
 }

-# Controller public IPv4 addresses
-resource "azurerm_public_ip" "controllers" {
+# Controller node public IPv4 addresses
+resource "azurerm_public_ip" "controllers-ipv4" {
  count = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name

-  name              = "${var.cluster_name}-controller-${count.index}"
+  name                = "${var.cluster_name}-controller-${count.index}-ipv4"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv4"
  sku                 = "Standard"
  allocation_method   = "Static"
 }

-# Controller NICs with public and private IPv4
+# Controller node public IPv6 addresses
+resource "azurerm_public_ip" "controllers-ipv6" {
+  count = var.controller_count
+
+  name                = "${var.cluster_name}-controller-${count.index}-ipv6"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv6"
+  sku                 = "Standard"
+  allocation_method   = "Static"
+}
+
+# Controllers' network interfaces
 resource "azurerm_network_interface" "controllers" {
  count = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name

  name                = "${var.cluster_name}-controller-${count.index}"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location

  ip_configuration {
-    name                          = "ip0"
+    name                          = "ipv4"
+    primary                       = true
    subnet_id                     = azurerm_subnet.controller.id
    private_ip_address_allocation = "Dynamic"
-    # instance public IPv4
-    public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
+    private_ip_address_version    = "IPv4"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv4[count.index].id
+  }
+  ip_configuration {
+    name                          = "ipv6"
+    subnet_id                     = azurerm_subnet.controller.id
+    private_ip_address_allocation = "Dynamic"
+    private_ip_address_version    = "IPv6"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv6[count.index].id
  }
 }

@ -111,12 +133,20 @@ resource "azurerm_network_interface_security_group_association" "controllers" {
 }

 # Associate controller network interface with controller backend address pool
-resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" {
  count = var.controller_count

  network_interface_id    = azurerm_network_interface.controllers[count.index].id
-  ip_configuration_name   = "ip0"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
+  ip_configuration_name   = "ipv4"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id
+}
+
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" {
+  count = var.controller_count
+
+  network_interface_id    = azurerm_network_interface.controllers[count.index].id
+  ip_configuration_name   = "ipv6"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id
 }

 # Fedora CoreOS controllers
@ -133,7 +163,6 @@ data "ct_config" "controllers" {
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
  })
  strict   = true
  snippets = var.controller_snippets
--- a/azure/fedora-coreos/kubernetes/lb.tf
+++ b/azure/fedora-coreos/kubernetes/lb.tf
@ -1,116 +1,164 @@
-# DNS record for the apiserver load balancer
+# DNS A record for the apiserver load balancer
 resource "azurerm_dns_a_record" "apiserver" {
-  resource_group_name = var.dns_zone_group
-
  # DNS Zone name where record should be created
  zone_name           = var.dns_zone
-
+  resource_group_name = var.dns_zone_group
  # DNS record
  name = var.cluster_name
  ttl  = 300
-
  # IPv4 address of apiserver load balancer
-  records = [azurerm_public_ip.apiserver-ipv4.ip_address]
+  records = [azurerm_public_ip.frontend-ipv4.ip_address]
 }

-# Static IPv4 address for the apiserver frontend
-resource "azurerm_public_ip" "apiserver-ipv4" {
-  resource_group_name = azurerm_resource_group.cluster.name
+# DNS AAAA record for the apiserver load balancer
+resource "azurerm_dns_aaaa_record" "apiserver" {
+  # DNS Zone name where record should be created
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
+  # DNS record
+  name = var.cluster_name
+  ttl  = 300
+  # IPv4 address of apiserver load balancer
+  records = [azurerm_public_ip.frontend-ipv6.ip_address]
+}

-  name              = "${var.cluster_name}-apiserver-ipv4"
-  location          = var.region
+# Static IPv4 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv4" {
+  name                = "${var.cluster_name}-frontend-ipv4"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
+  ip_version          = "IPv4"
  sku                 = "Standard"
  allocation_method   = "Static"
 }

-# Static IPv4 address for the ingress frontend
-resource "azurerm_public_ip" "ingress-ipv4" {
+# Static IPv6 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv6" {
+  name                = "${var.cluster_name}-frontend-ipv6"
  resource_group_name = azurerm_resource_group.cluster.name
-
-  name              = "${var.cluster_name}-ingress-ipv4"
-  location          = var.region
+  location            = var.location
+  ip_version          = "IPv6"
  sku                 = "Standard"
  allocation_method   = "Static"
 }

 # Network Load Balancer for apiservers and ingress
 resource "azurerm_lb" "cluster" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = var.cluster_name
-  location = var.region
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
  sku                 = "Standard"

  frontend_ip_configuration {
-    name                 = "apiserver"
-    public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id
+    name                 = "frontend-ipv4"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv4.id
  }

  frontend_ip_configuration {
-    name                 = "ingress"
-    public_ip_address_id = azurerm_public_ip.ingress-ipv4.id
+    name                 = "frontend-ipv6"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv6.id
  }
 }

-resource "azurerm_lb_rule" "apiserver" {
-  name                           = "apiserver"
+resource "azurerm_lb_rule" "apiserver-ipv4" {
+  name                           = "apiserver-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "apiserver"
+  frontend_ip_configuration_name = "frontend-ipv4"
+  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 6443
  backend_port             = 6443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id]
  probe_id                 = azurerm_lb_probe.apiserver.id
 }

-resource "azurerm_lb_rule" "ingress-http" {
-  name                           = "ingress-http"
+resource "azurerm_lb_rule" "apiserver-ipv6" {
+  name                           = "apiserver-ipv6"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 6443
+  backend_port             = 6443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id]
+  probe_id                 = azurerm_lb_probe.apiserver.id
+}
+
+resource "azurerm_lb_rule" "ingress-http-ipv4" {
+  name                           = "ingress-http-ipv4"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 80
  backend_port             = 80
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-resource "azurerm_lb_rule" "ingress-https" {
-  name                           = "ingress-https"
+resource "azurerm_lb_rule" "ingress-https-ipv4" {
+  name                           = "ingress-https-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 443
  backend_port             = 443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-# Worker outbound TCP/UDP SNAT
-resource "azurerm_lb_outbound_rule" "worker-outbound" {
-  name            = "worker"
+resource "azurerm_lb_rule" "ingress-http-ipv6" {
+  name                           = "ingress-http-ipv6"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration {
-    name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 80
+  backend_port             = 80
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
 }

-  protocol                = "All"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
+resource "azurerm_lb_rule" "ingress-https-ipv6" {
+  name                           = "ingress-https-ipv6"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 443
+  backend_port             = 443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
 }

+# Backend Address Pools
+
 # Address pool of controllers
-resource "azurerm_lb_backend_address_pool" "controller" {
-  name            = "controller"
+resource "azurerm_lb_backend_address_pool" "controller-ipv4" {
+  name            = "controller-ipv4"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+resource "azurerm_lb_backend_address_pool" "controller-ipv6" {
+  name            = "controller-ipv6"
  loadbalancer_id = azurerm_lb.cluster.id
 }

 # Address pool of workers
-resource "azurerm_lb_backend_address_pool" "worker" {
-  name            = "worker"
+resource "azurerm_lb_backend_address_pool" "worker-ipv4" {
+  name            = "worker-ipv4"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+resource "azurerm_lb_backend_address_pool" "worker-ipv6" {
+  name            = "worker-ipv6"
  loadbalancer_id = azurerm_lb.cluster.id
 }

@ -122,10 +170,8 @@ resource "azurerm_lb_probe" "apiserver" {
  loadbalancer_id = azurerm_lb.cluster.id
  protocol        = "Tcp"
  port            = 6443
-
  # unhealthy threshold
  number_of_probes    = 3
-
  interval_in_seconds = 5
 }

@ -136,10 +182,29 @@ resource "azurerm_lb_probe" "ingress" {
  protocol        = "Http"
  port            = 10254
  request_path    = "/healthz"
-
  # unhealthy threshold
  number_of_probes    = 3
-
  interval_in_seconds = 5
 }

+# Outbound SNAT
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv4" {
+  name                    = "outbound-ipv4"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id
+  frontend_ip_configuration {
+    name = "frontend-ipv4"
+  }
+}
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv6" {
+  name                    = "outbound-ipv6"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id
+  frontend_ip_configuration {
+    name = "frontend-ipv6"
+  }
+}
--- a/azure/fedora-coreos/kubernetes/locals.tf
+++ b/azure/fedora-coreos/kubernetes/locals.tf
@ -0,0 +1,6 @@
+locals {
+  backend_address_pool_ids = {
+    ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
+    ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  }
+}
--- a/azure/fedora-coreos/kubernetes/network.tf
+++ b/azure/fedora-coreos/kubernetes/network.tf
@ -1,27 +1,64 @@
+# Choose an IPv6 ULA subnet at random
+# https://datatracker.ietf.org/doc/html/rfc4193
+resource "random_id" "ula-netnum" {
+  byte_length = 5 # 40 bits
+}
+
+locals {
+  # fd00::/8 -> shift 40 -> 2^40 possible /48 subnets
+  ula-range = cidrsubnet("fd00::/8", 40, random_id.ula-netnum.dec)
+  network_cidr = {
+    ipv4 = var.network_cidr.ipv4
+    ipv6 = length(var.network_cidr.ipv6) > 0 ? var.network_cidr.ipv6 : [local.ula-range]
+  }
+
+  # Subdivide the virtual network into subnets
+  # - controllers use netnum 0
+  # - workers use netnum 1
+  controller_subnets = {
+    ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 0)]
+    ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 0)]
+  }
+  worker_subnets = {
+    ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 1)]
+    ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 1)]
+  }
+  cluster_subnets = {
+    ipv4 = concat(local.controller_subnets.ipv4, local.worker_subnets.ipv4)
+    ipv6 = concat(local.controller_subnets.ipv6, local.worker_subnets.ipv6)
+  }
+}
+
 # Organize cluster into a resource group
 resource "azurerm_resource_group" "cluster" {
  name     = var.cluster_name
-  location = var.region
+  location = var.location
 }

 resource "azurerm_virtual_network" "network" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = var.cluster_name
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
-  address_space = [var.host_cidr]
+  address_space = concat(
+    local.network_cidr.ipv4,
+    local.network_cidr.ipv6
+  )
 }

-# Subnets - separate subnets for controller and workers because Azure
-# network security groups are based on IPv4 CIDR rather than instance
-# tags like GCP or security group membership like AWS
+# Subnets - separate subnets for controllers and workers because Azure
+# network security groups are oriented around address prefixes rather
+# than instance tags (GCP) or security group membership (AWS)

 resource "azurerm_subnet" "controller" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                 = "controller"
+  resource_group_name  = azurerm_resource_group.cluster.name
  virtual_network_name = azurerm_virtual_network.network.name
-  address_prefixes     = [cidrsubnet(var.host_cidr, 1, 0)]
+  address_prefixes = concat(
+    local.controller_subnets.ipv4,
+    local.controller_subnets.ipv6,
+  )
+  default_outbound_access_enabled = false
+
 }

 resource "azurerm_subnet_network_security_group_association" "controller" {
@ -30,11 +67,14 @@ resource "azurerm_subnet_network_security_group_association" "controller" {
 }

 resource "azurerm_subnet" "worker" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                 = "worker"
+  resource_group_name  = azurerm_resource_group.cluster.name
  virtual_network_name = azurerm_virtual_network.network.name
-  address_prefixes     = [cidrsubnet(var.host_cidr, 1, 1)]
+  address_prefixes = concat(
+    local.worker_subnets.ipv4,
+    local.worker_subnets.ipv6,
+  )
+  default_outbound_access_enabled = false
 }

 resource "azurerm_subnet_network_security_group_association" "worker" {
--- a/azure/fedora-coreos/kubernetes/outputs.tf
+++ b/azure/fedora-coreos/kubernetes/outputs.tf
@ -6,13 +6,18 @@ output "kubeconfig-admin" {
 # Outputs for Kubernetes Ingress

 output "ingress_static_ipv4" {
-  value       = azurerm_public_ip.ingress-ipv4.ip_address
+  value       = azurerm_public_ip.frontend-ipv4.ip_address
  description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers"
 }

+output "ingress_static_ipv6" {
+  value       = azurerm_public_ip.frontend-ipv6.ip_address
+  description = "IPv6 address of the load balancer for distributing traffic to Ingress controllers"
+}
+
 # Outputs for worker pools

-output "region" {
+output "location" {
  value = azurerm_resource_group.cluster.location
 }

@ -39,13 +44,24 @@ output "kubeconfig" {

 # Outputs for custom firewalling

+output "controller_security_group_name" {
+  description = "Network Security Group for controller nodes"
+  value       = azurerm_network_security_group.controller.name
+}
+
 output "worker_security_group_name" {
+  description = "Network Security Group for worker nodes"
  value       = azurerm_network_security_group.worker.name
 }

+output "controller_address_prefixes" {
+  description = "Controller network subnet CIDR addresses (for source/destination)"
+  value       = local.controller_subnets
+}
+
 output "worker_address_prefixes" {
  description = "Worker network subnet CIDR addresses (for source/destination)"
-  value       = azurerm_subnet.worker.address_prefixes
+  value       = local.worker_subnets
 }

 # Outputs for custom load balancing
@ -55,9 +71,12 @@ output "loadbalancer_id" {
  value       = azurerm_lb.cluster.id
 }

-output "backend_address_pool_id" {
-  description = "ID of the worker backend address pool"
-  value       = azurerm_lb_backend_address_pool.worker.id
+output "backend_address_pool_ids" {
+  description = "IDs of the worker backend address pools"
+  value = {
+    ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
+    ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  }
 }

 # Outputs for debug
--- a/azure/fedora-coreos/kubernetes/security.tf
+++ b/azure/fedora-coreos/kubernetes/security.tf
@ -1,198 +1,223 @@
 # Controller security group

 resource "azurerm_network_security_group" "controller" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = "${var.cluster_name}-controller"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
 }

 resource "azurerm_network_security_rule" "controller-icmp" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-icmp"
+  name                         = "allow-icmp-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "1995"
+  priority                     = 1995 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Icmp"
  source_port_range            = "*"
  destination_port_range       = "*"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-ssh" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-ssh"
+  name                         = "allow-ssh-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2000"
+  priority                     = 2000 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "22"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-etcd" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-etcd"
+  name                         = "allow-etcd-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2005"
+  priority                     = 2005 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "2379-2380"
-  source_address_prefixes      = azurerm_subnet.controller.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.controller_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape etcd metrics
 resource "azurerm_network_security_rule" "controller-etcd-metrics" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-etcd-metrics"
+  name                         = "allow-etcd-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2010"
+  priority                     = 2010 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "2381"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-proxy metrics
 resource "azurerm_network_security_rule" "controller-kube-proxy" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-kube-proxy-metrics"
+  name                         = "allow-kube-proxy-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2011"
+  priority                     = 2012 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "10249"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
 resource "azurerm_network_security_rule" "controller-kube-metrics" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-kube-metrics"
+  name                         = "allow-kube-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2012"
+  priority                     = 2014 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "10257-10259"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-apiserver" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-apiserver"
+  name                         = "allow-apiserver-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2015"
+  priority                     = 2016 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "6443"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-cilium-health" {
-  resource_group_name = azurerm_resource_group.cluster.name
-  count               = var.networking == "cilium" ? 1 : 0
+  for_each = var.networking == "cilium" ? local.controller_subnets : {}

-  name                         = "allow-cilium-health"
+  name                         = "allow-cilium-health-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2019"
+  priority                     = 2018 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "4240"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
+}
+
+resource "azurerm_network_security_rule" "controller-cilium-metrics" {
+  for_each = var.networking == "cilium" ? local.controller_subnets : {}
+
+  name                         = "allow-cilium-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2035 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9962-9965"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-vxlan"
+  name                         = "allow-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2020"
+  priority                     = 2020 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "4789"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-linux-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-linux-vxlan"
+  name                         = "allow-linux-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2021"
+  priority                     = 2022 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "8472"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape node-exporter daemonset
 resource "azurerm_network_security_rule" "controller-node-exporter" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-node-exporter"
+  name                         = "allow-node-exporter-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2025"
+  priority                     = 2025 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "9100"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow apiserver to access kubelet's for exec, log, port-forward
 resource "azurerm_network_security_rule" "controller-kubelet" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-kubelet"
+  name                        = "allow-kubelet-${each.key}"
+  resource_group_name         = azurerm_resource_group.cluster.name
  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2030"
+  priority                    = 2030 + (each.key == "ipv4" ? 0 : 1)
  access                      = "Allow"
  direction                   = "Inbound"
  protocol                    = "Tcp"
  source_port_range           = "*"
  destination_port_range      = "10250"
-
  # allow Prometheus to scrape kubelet metrics too
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
@ -231,166 +256,189 @@ resource "azurerm_network_security_rule" "controller-deny-all" {
 # Worker security group

 resource "azurerm_network_security_group" "worker" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = "${var.cluster_name}-worker"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
 }

 resource "azurerm_network_security_rule" "worker-icmp" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-icmp"
+  name                         = "allow-icmp-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "1995"
+  priority                     = 1995 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Icmp"
  source_port_range            = "*"
  destination_port_range       = "*"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-ssh" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-ssh"
+  name                         = "allow-ssh-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2000"
+  priority                     = 2000 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "22"
-  source_address_prefixes      = azurerm_subnet.controller.address_prefixes
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.controller_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-http" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-http"
+  name                         = "allow-http-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2005"
+  priority                     = 2005 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "80"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-https" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-https"
+  name                         = "allow-https-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2010"
+  priority                     = 2010 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "443"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-cilium-health" {
-  resource_group_name = azurerm_resource_group.cluster.name
-  count               = var.networking == "cilium" ? 1 : 0
+  for_each = var.networking == "cilium" ? local.worker_subnets : {}

-  name                         = "allow-cilium-health"
+  name                         = "allow-cilium-health-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2014"
+  priority                     = 2012 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "4240"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
+}
+
+resource "azurerm_network_security_rule" "worker-cilium-metrics" {
+  for_each = var.networking == "cilium" ? local.worker_subnets : {}
+
+  name                         = "allow-cilium-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2014 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9962-9965"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-vxlan"
+  name                         = "allow-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2015"
+  priority                     = 2016 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "4789"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-linux-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-linux-vxlan"
+  name                         = "allow-linux-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2016"
+  priority                     = 2018 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "8472"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow Prometheus to scrape node-exporter daemonset
 resource "azurerm_network_security_rule" "worker-node-exporter" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-node-exporter"
+  name                         = "allow-node-exporter-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2020"
+  priority                     = 2020 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "9100"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-proxy
 resource "azurerm_network_security_rule" "worker-kube-proxy" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-kube-proxy"
+  name                         = "allow-kube-proxy-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2024"
+  priority                     = 2024 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "10249"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow apiserver to access kubelet's for exec, log, port-forward
 resource "azurerm_network_security_rule" "worker-kubelet" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-kubelet"
+  name                        = "allow-kubelet-${each.key}"
+  resource_group_name         = azurerm_resource_group.cluster.name
  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2025"
+  priority                    = 2026 + (each.key == "ipv4" ? 0 : 1)
  access                      = "Allow"
  direction                   = "Inbound"
  protocol                    = "Tcp"
  source_port_range           = "*"
  destination_port_range      = "10250"
-
  # allow Prometheus to scrape kubelet metrics too
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
--- a/azure/fedora-coreos/kubernetes/ssh.tf
+++ b/azure/fedora-coreos/kubernetes/ssh.tf
@ -18,7 +18,7 @@ resource "null_resource" "copy-controller-secrets" {

  connection {
    type    = "ssh"
-    host    = azurerm_public_ip.controllers.*.ip_address[count.index]
+    host    = azurerm_public_ip.controllers-ipv4[count.index].ip_address
    user    = "core"
    timeout = "15m"
  }
@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" {

  connection {
    type    = "ssh"
-    host    = azurerm_public_ip.controllers.*.ip_address[0]
+    host    = azurerm_public_ip.controllers-ipv4[0].ip_address
    user    = "core"
    timeout = "15m"
  }
--- a/azure/fedora-coreos/kubernetes/variables.tf
+++ b/azure/fedora-coreos/kubernetes/variables.tf
@ -5,9 +5,9 @@ variable "cluster_name" {

 # Azure

-variable "region" {
+variable "location" {
  type        = string
-  description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)"
+  description = "Azure location (e.g. centralus , see `az account list-locations --output table`)"
 }

 variable "dns_zone" {
@ -22,41 +22,65 @@ variable "dns_zone_group" {

 # instances

+variable "os_image" {
+  type        = string
+  description = "Fedora CoreOS image for instances"
+}
+
 variable "controller_count" {
  type        = number
  description = "Number of controllers (i.e. masters)"
  default     = 1
 }

-variable "worker_count" {
-  type        = number
-  description = "Number of workers"
-  default     = 1
-}
-
 variable "controller_type" {
  type        = string
  description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
  default     = "Standard_B2s"
 }

+variable "controller_disk_type" {
+  type        = string
+  description = "Type of managed disk for controller node(s)"
+  default     = "Premium_LRS"
+}
+
+variable "controller_disk_size" {
+  type        = number
+  description = "Size of the managed disk in GB for controller node(s)"
+  default     = 30
+}
+
+variable "worker_count" {
+  type        = number
+  description = "Number of workers"
+  default     = 1
+}
+
 variable "worker_type" {
  type        = string
  description = "Machine type for workers (see `az vm list-skus --location centralus`)"
  default     = "Standard_D2as_v5"
 }

-variable "os_image" {
+variable "worker_disk_type" {
  type        = string
-  description = "Fedora CoreOS image for instances"
+  description = "Type of managed disk for worker nodes"
+  default     = "Standard_LRS"
 }

-variable "disk_size" {
+variable "worker_disk_size" {
  type        = number
-  description = "Size of the disk in GB"
+  description = "Size of the managed disk in GB for worker nodes"
  default     = 30
 }

+variable "worker_ephemeral_disk" {
+  type        = bool
+  description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
+  default     = false
+}
+
 variable "worker_priority" {
  type        = string
  description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
@ -94,10 +118,15 @@ variable "networking" {
  default     = "cilium"
 }

-variable "host_cidr" {
-  type        = string
-  description = "CIDR IPv4 range to assign to instances"
-  default     = "10.0.0.0/16"
+variable "network_cidr" {
+  type = object({
+    ipv4 = list(string)
+    ipv6 = optional(list(string), [])
+  })
+  description = "Virtual network CIDR ranges"
+  default = {
+    ipv4 = ["10.0.0.0/16"]
+  }
 }

 variable "pod_cidr" {
@ -115,34 +144,32 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
-}
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
 variable "worker_node_labels" {
  type        = list(string)
  description = "List of initial worker node labels"
  default     = []
 }

-# unofficial, undocumented, unsupported
-
-variable "cluster_domain_suffix" {
-  type        = string
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  default     = "cluster.local"
-}
+# advanced

 variable "daemonset_tolerations" {
  type        = list(string)
  description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
  default     = []
 }
+
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
+}
--- a/azure/fedora-coreos/kubernetes/versions.tf
+++ b/azure/fedora-coreos/kubernetes/versions.tf
@ -3,7 +3,7 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    azurerm = ">= 2.8, < 4.0"
+    azurerm = ">= 2.8"
    null    = ">= 2.1"
    ct = {
      source  = "poseidon/ct"
--- a/azure/fedora-coreos/kubernetes/workers.tf
+++ b/azure/fedora-coreos/kubernetes/workers.tf
@ -4,14 +4,18 @@ module "workers" {

  # Azure
  resource_group_name      = azurerm_resource_group.cluster.name
-  region                  = azurerm_resource_group.cluster.location
+  location                 = azurerm_resource_group.cluster.location
  subnet_id                = azurerm_subnet.worker.id
  security_group_id        = azurerm_network_security_group.worker.id
-  backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
+  backend_address_pool_ids = local.backend_address_pool_ids

+  # instances
+  os_image       = var.os_image
  worker_count   = var.worker_count
  vm_type        = var.worker_type
-  os_image     = var.os_image
+  disk_type      = var.worker_disk_type
+  disk_size      = var.worker_disk_size
+  ephemeral_disk = var.worker_ephemeral_disk
  priority       = var.worker_priority

  # configuration
@ -19,7 +23,6 @@ module "workers" {
  ssh_authorized_key   = var.ssh_authorized_key
  azure_authorized_key = var.azure_authorized_key
  service_cidr         = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
  snippets             = var.worker_snippets
  node_labels          = var.worker_node_labels
 }
--- a/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml
+++ b/azure/fedora-coreos/kubernetes/workers/butane/worker.yaml
@ -26,7 +26,7 @@ systemd:
        Description=Kubelet (System Container)
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -99,7 +99,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
--- a/azure/fedora-coreos/kubernetes/workers/variables.tf
+++ b/azure/fedora-coreos/kubernetes/workers/variables.tf
@ -5,9 +5,9 @@ variable "name" {

 # Azure

-variable "region" {
+variable "location" {
  type        = string
-  description = "Must be set to the Azure Region of cluster"
+  description = "Must be set to the Azure location of cluster"
 }

 variable "resource_group_name" {
@ -25,9 +25,12 @@ variable "security_group_id" {
  description = "Must be set to the `worker_security_group_id` output by cluster"
 }

-variable "backend_address_pool_id" {
-  type        = string
-  description = "Must be set to the `worker_backend_address_pool_id` output by cluster"
+variable "backend_address_pool_ids" {
+  type = object({
+    ipv4 = list(string)
+    ipv6 = list(string)
+  })
+  description = "Must be set to the `backend_address_pool_ids` output by cluster"
 }

 # instances
@ -49,6 +52,24 @@ variable "os_image" {
  description = "Fedora CoreOS image for instances"
 }

+variable "disk_type" {
+  type        = string
+  description = "Type of managed disk"
+  default     = "Standard_LRS"
+}
+
+variable "disk_size" {
+  type        = number
+  description = "Size of the managed disk in GB"
+  default     = 30
+}
+
+variable "ephemeral_disk" {
+  type        = bool
+  description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
+  default     = false
+}
+
 variable "priority" {
  type        = string
  description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
@ -99,12 +120,3 @@ variable "node_taints" {
  description = "List of initial node taints"
  default     = []
 }
-
-# unofficial, undocumented, unsupported
-
-variable "cluster_domain_suffix" {
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  type        = string
-  default     = "cluster.local"
-}
-
--- a/azure/fedora-coreos/kubernetes/workers/versions.tf
+++ b/azure/fedora-coreos/kubernetes/workers/versions.tf
@ -3,7 +3,7 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    azurerm = ">= 2.8, < 4.0"
+    azurerm = ">= 2.8"
    ct = {
      source  = "poseidon/ct"
      version = "~> 0.13"
--- a/azure/fedora-coreos/kubernetes/workers/workers.tf
+++ b/azure/fedora-coreos/kubernetes/workers/workers.tf
@ -3,30 +3,29 @@ locals {
 }

 # Workers scale set
-resource "azurerm_linux_virtual_machine_scale_set" "workers" {
-  resource_group_name = var.resource_group_name
-
+resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
  name                        = "${var.name}-worker"
-  location  = var.region
-  sku       = var.vm_type
+  resource_group_name         = var.resource_group_name
+  location                    = var.location
+  platform_fault_domain_count = 1
+  sku_name                    = var.vm_type
  instances                   = var.worker_count
-  # instance name prefix for instances in the set
-  computer_name_prefix   = "${var.name}-worker"
-  single_placement_group = false
-  custom_data            = base64encode(data.ct_config.worker.rendered)

  # storage
+  encryption_at_host_enabled = true
  source_image_id            = var.os_image
  os_disk {
-    storage_account_type = "Standard_LRS"
-    caching              = "ReadWrite"
+    storage_account_type = var.disk_type
+    disk_size_gb         = var.disk_size
+    caching              = "ReadOnly"
+    # Optionally, use the ephemeral disk of the instance type (support varies)
+    dynamic "diff_disk_settings" {
+      for_each = var.ephemeral_disk ? [1] : []
+      content {
+        option    = "Local"
+        placement = "ResourceDisk"
+      }
    }
-
-  # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
-  admin_username = "core"
-  admin_ssh_key {
-    username   = "core"
-    public_key = var.azure_authorized_key
  }

  # network
@ -36,41 +35,46 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
    network_security_group_id = var.security_group_id

    ip_configuration {
-      name      = "ip0"
+      name      = "ipv4"
+      version   = "IPv4"
      primary   = true
      subnet_id = var.subnet_id
-
      # backend address pool to which the NIC should be added
-      load_balancer_backend_address_pool_ids = [var.backend_address_pool_id]
+      load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4
+    }
+    ip_configuration {
+      name      = "ipv6"
+      version   = "IPv6"
+      subnet_id = var.subnet_id
+      # backend address pool to which the NIC should be added
+      load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6
+    }
+  }
+
+  # boot
+  user_data_base64 = base64encode(data.ct_config.worker.rendered)
+  boot_diagnostics {
+    # defaults to a managed storage account
+  }
+
+  # Azure requires an RSA admin_ssh_key
+  os_profile {
+    linux_configuration {
+      admin_username = "core"
+      admin_ssh_key {
+        username   = "core"
+        public_key = local.azure_authorized_key
+      }
+      computer_name_prefix = "${var.name}-worker"
    }
  }

  # lifecycle
-  upgrade_mode = "Manual"
  # eviction policy may only be set when priority is Spot
  priority        = var.priority
  eviction_policy = var.priority == "Spot" ? "Delete" : null
-}
-
-# Scale up or down to maintain desired number, tolerating deallocations.
-resource "azurerm_monitor_autoscale_setting" "workers" {
-  resource_group_name = var.resource_group_name
-
-  name     = "${var.name}-maintain-desired"
-  location = var.region
-
-  # autoscale
+  termination_notification {
    enabled = true
-  target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
-
-  profile {
-    name = "default"
-
-    capacity {
-      minimum = var.worker_count
-      default = var.worker_count
-      maximum = var.worker_count
-    }
  }
 }

@ -80,7 +84,6 @@ data "ct_config" "worker" {
    kubeconfig             = indent(10, var.kubeconfig)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
    node_labels            = join(",", var.node_labels)
    node_taints            = join(",", var.node_taints)
  })
--- a/azure/flatcar-linux/kubernetes/README.md
+++ b/azure/flatcar-linux/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.28.3 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
 * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/flatcar-linux/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/azure/flatcar-linux/kubernetes/bootstrap.tf
+++ b/azure/flatcar-linux/kubernetes/bootstrap.tf
@ -1,13 +1,12 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name = var.cluster_name
  api_servers  = [format("%s.%s", var.cluster_name, var.dns_zone)]
  etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)

  networking = var.networking
-
  # only effective with Calico networking
  # we should be able to use 1450 MTU, but in practice, 1410 was needed
  network_encapsulation = "vxlan"
@ -15,9 +14,7 @@ module "bootstrap" {

  pod_cidr              = var.pod_cidr
  service_cidr          = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
-  enable_reporting      = var.enable_reporting
-  enable_aggregation    = var.enable_aggregation
  daemonset_tolerations = var.daemonset_tolerations
+  components            = var.components
 }

--- a/azure/flatcar-linux/kubernetes/butane/controller.yaml
+++ b/azure/flatcar-linux/kubernetes/butane/controller.yaml
@ -11,7 +11,7 @@ systemd:
        Requires=docker.service
        After=docker.service
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        ExecStartPre=/usr/bin/docker run -d \
          --name etcd \
          --network host \
@ -56,7 +56,7 @@ systemd:
        After=docker.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -105,7 +105,7 @@ systemd:
        Type=oneshot
        RemainAfterExit=true
        WorkingDirectory=/opt/bootstrap
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStart=/usr/bin/docker run \
            -v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
            -v /opt/bootstrap/assets:/assets:ro \
@ -144,7 +144,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
@ -158,7 +158,7 @@ storage:
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -173,8 +173,7 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
    - path: /opt/bootstrap/apply
      mode: 0544
      contents:
--- a/azure/flatcar-linux/kubernetes/controllers.tf
+++ b/azure/flatcar-linux/kubernetes/controllers.tf
@ -1,25 +1,9 @@
-# Discrete DNS records for each controller's private IPv4 for etcd usage
-resource "azurerm_dns_a_record" "etcds" {
-  count               = var.controller_count
-  resource_group_name = var.dns_zone_group
-
-  # DNS Zone name where record should be created
-  zone_name = var.dns_zone
-
-  # DNS record
-  name = format("%s-etcd%d", var.cluster_name, count.index)
-  ttl  = 300
-
-  # private IPv4 address for etcd
-  records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
-}
-
 locals {
  # Container Linux derivative
  # flatcar-stable -> Flatcar Linux Stable
  channel      = split("-", var.os_image)[1]
-  offer_suffix = var.arch == "arm64" ? "corevm" : "free"
-  urn          = var.arch == "arm64" ? local.channel : "${local.channel}-gen2"
+  offer_suffix = var.controller_arch == "arm64" ? "corevm" : "free"
+  urn          = var.controller_arch == "arm64" ? local.channel : "${local.channel}-gen2"

  # Typhoon ssh_authorized_key supports RSA or a newer formats (e.g. ed25519).
  # However, Azure requires an older RSA key to pass validations. To use a
@ -28,12 +12,25 @@ locals {
  azure_authorized_key = var.azure_authorized_key == "" ? var.ssh_authorized_key : var.azure_authorized_key
 }

+# Discrete DNS records for each controller's private IPv4 for etcd usage
+resource "azurerm_dns_a_record" "etcds" {
+  count = var.controller_count
+
+  # DNS Zone name where record should be created
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
+  # DNS record
+  name = format("%s-etcd%d", var.cluster_name, count.index)
+  ttl  = 300
+  # private IPv4 address for etcd
+  records = [azurerm_network_interface.controllers[count.index].private_ip_address]
+}
+
 # Controller availability set to spread controllers
 resource "azurerm_availability_set" "controllers" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                         = "${var.cluster_name}-controllers"
-  location                     = var.region
+  resource_group_name          = azurerm_resource_group.cluster.name
+  location                     = var.location
  platform_fault_domain_count  = 2
  platform_update_domain_count = 4
  managed                      = true
@ -42,24 +39,19 @@ resource "azurerm_availability_set" "controllers" {
 # Controller instances
 resource "azurerm_linux_virtual_machine" "controllers" {
  count = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name

  name                = "${var.cluster_name}-controller-${count.index}"
-  location            = var.region
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
  availability_set_id = azurerm_availability_set.controllers.id
-
  size                = var.controller_type
-  custom_data = base64encode(data.ct_config.controllers.*.rendered[count.index])
-  boot_diagnostics {
-    # defaults to a managed storage account
-  }

  # storage
  os_disk {
    name                 = "${var.cluster_name}-controller-${count.index}"
+    storage_account_type = var.controller_disk_type
+    disk_size_gb         = var.controller_disk_size
    caching              = "None"
-    disk_size_gb         = var.disk_size
-    storage_account_type = "Premium_LRS"
  }

  # Flatcar Container Linux
@ -71,7 +63,7 @@ resource "azurerm_linux_virtual_machine" "controllers" {
  }

  dynamic "plan" {
-    for_each = var.arch == "arm64" ? [] : [1]
+    for_each = var.controller_arch == "arm64" ? [] : [1]
    content {
      publisher = "kinvolk"
      product   = "flatcar-container-linux-${local.offer_suffix}"
@ -84,7 +76,13 @@ resource "azurerm_linux_virtual_machine" "controllers" {
    azurerm_network_interface.controllers[count.index].id
  ]

-  # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
+  # boot
+  custom_data = base64encode(data.ct_config.controllers[count.index].rendered)
+  boot_diagnostics {
+    # defaults to a managed storage account
+  }
+
+  # Azure requires an RSA admin_ssh_key
  admin_username = "core"
  admin_ssh_key {
    username   = "core"
@ -99,31 +97,52 @@ resource "azurerm_linux_virtual_machine" "controllers" {
  }
 }

-# Controller public IPv4 addresses
-resource "azurerm_public_ip" "controllers" {
+# Controller node public IPv4 addresses
+resource "azurerm_public_ip" "controllers-ipv4" {
  count = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name

-  name              = "${var.cluster_name}-controller-${count.index}"
+  name                = "${var.cluster_name}-controller-${count.index}-ipv4"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv4"
  sku                 = "Standard"
  allocation_method   = "Static"
 }

-# Controller NICs with public and private IPv4
+# Controller node public IPv6 addresses
+resource "azurerm_public_ip" "controllers-ipv6" {
+  count = var.controller_count
+
+  name                = "${var.cluster_name}-controller-${count.index}-ipv6"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = azurerm_resource_group.cluster.location
+  ip_version          = "IPv6"
+  sku                 = "Standard"
+  allocation_method   = "Static"
+}
+
+# Controllers' network interfaces
 resource "azurerm_network_interface" "controllers" {
  count = var.controller_count
-  resource_group_name = azurerm_resource_group.cluster.name

  name                = "${var.cluster_name}-controller-${count.index}"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location

  ip_configuration {
-    name                          = "ip0"
+    name                          = "ipv4"
+    primary                       = true
    subnet_id                     = azurerm_subnet.controller.id
    private_ip_address_allocation = "Dynamic"
-    # instance public IPv4
-    public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
+    private_ip_address_version    = "IPv4"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv4[count.index].id
+  }
+  ip_configuration {
+    name                          = "ipv6"
+    subnet_id                     = azurerm_subnet.controller.id
+    private_ip_address_allocation = "Dynamic"
+    private_ip_address_version    = "IPv6"
+    public_ip_address_id          = azurerm_public_ip.controllers-ipv6[count.index].id
  }
 }

@ -135,13 +154,21 @@ resource "azurerm_network_interface_security_group_association" "controllers" {
  network_security_group_id = azurerm_network_security_group.controller.id
 }

-# Associate controller network interface with controller backend address pool
-resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
+# Associate controller network interface with controller backend address pools
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" {
  count = var.controller_count

  network_interface_id    = azurerm_network_interface.controllers[count.index].id
-  ip_configuration_name   = "ip0"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
+  ip_configuration_name   = "ipv4"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id
+}
+
+resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" {
+  count = var.controller_count
+
+  network_interface_id    = azurerm_network_interface.controllers[count.index].id
+  ip_configuration_name   = "ipv6"
+  backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id
 }

 # Flatcar Linux controllers
@ -158,7 +185,6 @@ data "ct_config" "controllers" {
    kubeconfig             = indent(10, module.bootstrap.kubeconfig-kubelet)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
  })
  strict   = true
  snippets = var.controller_snippets
--- a/azure/flatcar-linux/kubernetes/lb.tf
+++ b/azure/flatcar-linux/kubernetes/lb.tf
@ -1,116 +1,164 @@
-# DNS record for the apiserver load balancer
+# DNS A record for the apiserver load balancer
 resource "azurerm_dns_a_record" "apiserver" {
-  resource_group_name = var.dns_zone_group
-
  # DNS Zone name where record should be created
  zone_name           = var.dns_zone
-
+  resource_group_name = var.dns_zone_group
  # DNS record
  name = var.cluster_name
  ttl  = 300
-
  # IPv4 address of apiserver load balancer
-  records = [azurerm_public_ip.apiserver-ipv4.ip_address]
+  records = [azurerm_public_ip.frontend-ipv4.ip_address]
 }

-# Static IPv4 address for the apiserver frontend
-resource "azurerm_public_ip" "apiserver-ipv4" {
-  resource_group_name = azurerm_resource_group.cluster.name
+# DNS AAAA record for the apiserver load balancer
+resource "azurerm_dns_aaaa_record" "apiserver" {
+  # DNS Zone name where record should be created
+  zone_name           = var.dns_zone
+  resource_group_name = var.dns_zone_group
+  # DNS record
+  name = var.cluster_name
+  ttl  = 300
+  # IPv6 address of apiserver load balancer
+  records = [azurerm_public_ip.frontend-ipv6.ip_address]
+}

-  name              = "${var.cluster_name}-apiserver-ipv4"
-  location          = var.region
+# Static IPv4 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv4" {
+  name                = "${var.cluster_name}-frontend-ipv4"
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
+  ip_version          = "IPv4"
  sku                 = "Standard"
  allocation_method   = "Static"
 }

-# Static IPv4 address for the ingress frontend
-resource "azurerm_public_ip" "ingress-ipv4" {
+# Static IPv6 address for the load balancer
+resource "azurerm_public_ip" "frontend-ipv6" {
+  name                = "${var.cluster_name}-frontend-ipv6"
  resource_group_name = azurerm_resource_group.cluster.name
-
-  name              = "${var.cluster_name}-ingress-ipv4"
-  location          = var.region
+  location            = var.location
+  ip_version          = "IPv6"
  sku                 = "Standard"
  allocation_method   = "Static"
 }

 # Network Load Balancer for apiservers and ingress
 resource "azurerm_lb" "cluster" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = var.cluster_name
-  location = var.region
+  resource_group_name = azurerm_resource_group.cluster.name
+  location            = var.location
  sku                 = "Standard"

  frontend_ip_configuration {
-    name                 = "apiserver"
-    public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id
+    name                 = "frontend-ipv4"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv4.id
  }

  frontend_ip_configuration {
-    name                 = "ingress"
-    public_ip_address_id = azurerm_public_ip.ingress-ipv4.id
+    name                 = "frontend-ipv6"
+    public_ip_address_id = azurerm_public_ip.frontend-ipv6.id
  }
 }

-resource "azurerm_lb_rule" "apiserver" {
-  name                           = "apiserver"
+resource "azurerm_lb_rule" "apiserver-ipv4" {
+  name                           = "apiserver-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "apiserver"
+  frontend_ip_configuration_name = "frontend-ipv4"
+  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 6443
  backend_port             = 6443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id]
  probe_id                 = azurerm_lb_probe.apiserver.id
 }

-resource "azurerm_lb_rule" "ingress-http" {
-  name                           = "ingress-http"
+resource "azurerm_lb_rule" "apiserver-ipv6" {
+  name                           = "apiserver-ipv6"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 6443
+  backend_port             = 6443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id]
+  probe_id                 = azurerm_lb_probe.apiserver.id
+}
+
+resource "azurerm_lb_rule" "ingress-http-ipv4" {
+  name                           = "ingress-http-ipv4"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 80
  backend_port             = 80
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-resource "azurerm_lb_rule" "ingress-https" {
-  name                           = "ingress-https"
+resource "azurerm_lb_rule" "ingress-https-ipv4" {
+  name                           = "ingress-https-ipv4"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration_name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv4"
  disable_outbound_snat          = true

  protocol                 = "Tcp"
  frontend_port            = 443
  backend_port             = 443
-  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
  probe_id                 = azurerm_lb_probe.ingress.id
 }

-# Worker outbound TCP/UDP SNAT
-resource "azurerm_lb_outbound_rule" "worker-outbound" {
-  name            = "worker"
+resource "azurerm_lb_rule" "ingress-http-ipv6" {
+  name                           = "ingress-http-ipv6"
  loadbalancer_id                = azurerm_lb.cluster.id
-  frontend_ip_configuration {
-    name = "ingress"
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 80
+  backend_port             = 80
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
 }

-  protocol                = "All"
-  backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
+resource "azurerm_lb_rule" "ingress-https-ipv6" {
+  name                           = "ingress-https-ipv6"
+  loadbalancer_id                = azurerm_lb.cluster.id
+  frontend_ip_configuration_name = "frontend-ipv6"
+  disable_outbound_snat          = true
+
+  protocol                 = "Tcp"
+  frontend_port            = 443
+  backend_port             = 443
+  backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  probe_id                 = azurerm_lb_probe.ingress.id
 }

+# Backend Address Pools
+
 # Address pool of controllers
-resource "azurerm_lb_backend_address_pool" "controller" {
-  name            = "controller"
+resource "azurerm_lb_backend_address_pool" "controller-ipv4" {
+  name            = "controller-ipv4"
  loadbalancer_id = azurerm_lb.cluster.id
 }

-# Address pool of workers
-resource "azurerm_lb_backend_address_pool" "worker" {
-  name            = "worker"
+resource "azurerm_lb_backend_address_pool" "controller-ipv6" {
+  name            = "controller-ipv6"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+# Address pools for workers
+resource "azurerm_lb_backend_address_pool" "worker-ipv4" {
+  name            = "worker-ipv4"
+  loadbalancer_id = azurerm_lb.cluster.id
+}
+
+resource "azurerm_lb_backend_address_pool" "worker-ipv6" {
+  name            = "worker-ipv6"
  loadbalancer_id = azurerm_lb.cluster.id
 }

@ -122,10 +170,8 @@ resource "azurerm_lb_probe" "apiserver" {
  loadbalancer_id = azurerm_lb.cluster.id
  protocol        = "Tcp"
  port            = 6443
-
  # unhealthy threshold
  number_of_probes    = 3
-
  interval_in_seconds = 5
 }

@ -136,10 +182,29 @@ resource "azurerm_lb_probe" "ingress" {
  protocol        = "Http"
  port            = 10254
  request_path    = "/healthz"
-
  # unhealthy threshold
  number_of_probes    = 3
-
  interval_in_seconds = 5
 }

+# Outbound SNAT
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv4" {
+  name                    = "outbound-ipv4"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id
+  frontend_ip_configuration {
+    name = "frontend-ipv4"
+  }
+}
+
+resource "azurerm_lb_outbound_rule" "outbound-ipv6" {
+  name                    = "outbound-ipv6"
+  protocol                = "All"
+  loadbalancer_id         = azurerm_lb.cluster.id
+  backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id
+  frontend_ip_configuration {
+    name = "frontend-ipv6"
+  }
+}
--- a/azure/flatcar-linux/kubernetes/locals.tf
+++ b/azure/flatcar-linux/kubernetes/locals.tf
@ -0,0 +1,6 @@
+locals {
+  backend_address_pool_ids = {
+    ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
+    ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  }
+}
--- a/azure/flatcar-linux/kubernetes/network.tf
+++ b/azure/flatcar-linux/kubernetes/network.tf
@ -1,27 +1,63 @@
+# Choose an IPv6 ULA subnet at random
+# https://datatracker.ietf.org/doc/html/rfc4193
+resource "random_id" "ula-netnum" {
+  byte_length = 5 # 40 bits
+}
+
+locals {
+  # fd00::/8 -> shift 40 -> 2^40 possible /48 subnets
+  ula-range = cidrsubnet("fd00::/8", 40, random_id.ula-netnum.dec)
+  network_cidr = {
+    ipv4 = var.network_cidr.ipv4
+    ipv6 = length(var.network_cidr.ipv6) > 0 ? var.network_cidr.ipv6 : [local.ula-range]
+  }
+
+  # Subdivide the virtual network into subnets
+  # - controllers use netnum 0
+  # - workers use netnum 1
+  controller_subnets = {
+    ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 0)]
+    ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 0)]
+  }
+  worker_subnets = {
+    ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 1)]
+    ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 1)]
+  }
+  cluster_subnets = {
+    ipv4 = concat(local.controller_subnets.ipv4, local.worker_subnets.ipv4)
+    ipv6 = concat(local.controller_subnets.ipv6, local.worker_subnets.ipv6)
+  }
+}
+
 # Organize cluster into a resource group
 resource "azurerm_resource_group" "cluster" {
  name     = var.cluster_name
-  location = var.region
+  location = var.location
 }

 resource "azurerm_virtual_network" "network" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = var.cluster_name
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
-  address_space = [var.host_cidr]
+  address_space = concat(
+    local.network_cidr.ipv4,
+    local.network_cidr.ipv6
+  )
 }

-# Subnets - separate subnets for controller and workers because Azure
-# network security groups are based on IPv4 CIDR rather than instance
-# tags like GCP or security group membership like AWS
+# Subnets - separate subnets for controllers and workers because Azure
+# network security groups are oriented around address prefixes rather
+# than instance tags (GCP) or security group membership (AWS)

 resource "azurerm_subnet" "controller" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                 = "controller"
+  resource_group_name  = azurerm_resource_group.cluster.name
  virtual_network_name = azurerm_virtual_network.network.name
-  address_prefixes     = [cidrsubnet(var.host_cidr, 1, 0)]
+  address_prefixes = concat(
+    local.controller_subnets.ipv4,
+    local.controller_subnets.ipv6,
+  )
+  default_outbound_access_enabled = false
 }

 resource "azurerm_subnet_network_security_group_association" "controller" {
@ -30,11 +66,14 @@ resource "azurerm_subnet_network_security_group_association" "controller" {
 }

 resource "azurerm_subnet" "worker" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                 = "worker"
+  resource_group_name  = azurerm_resource_group.cluster.name
  virtual_network_name = azurerm_virtual_network.network.name
-  address_prefixes     = [cidrsubnet(var.host_cidr, 1, 1)]
+  address_prefixes = concat(
+    local.worker_subnets.ipv4,
+    local.worker_subnets.ipv6,
+  )
+  default_outbound_access_enabled = false
 }

 resource "azurerm_subnet_network_security_group_association" "worker" {
--- a/azure/flatcar-linux/kubernetes/outputs.tf
+++ b/azure/flatcar-linux/kubernetes/outputs.tf
@ -6,13 +6,18 @@ output "kubeconfig-admin" {
 # Outputs for Kubernetes Ingress

 output "ingress_static_ipv4" {
-  value       = azurerm_public_ip.ingress-ipv4.ip_address
+  value       = azurerm_public_ip.frontend-ipv4.ip_address
  description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers"
 }

+output "ingress_static_ipv6" {
+  value       = azurerm_public_ip.frontend-ipv6.ip_address
+  description = "IPv6 address of the load balancer for distributing traffic to Ingress controllers"
+}
+
 # Outputs for worker pools

-output "region" {
+output "location" {
  value = azurerm_resource_group.cluster.location
 }

@ -39,13 +44,24 @@ output "kubeconfig" {

 # Outputs for custom firewalling

+output "controller_security_group_name" {
+  description = "Network Security Group for controller nodes"
+  value       = azurerm_network_security_group.controller.name
+}
+
 output "worker_security_group_name" {
+  description = "Network Security Group for worker nodes"
  value       = azurerm_network_security_group.worker.name
 }

+output "controller_address_prefixes" {
+  description = "Controller network subnet CIDR addresses (for source/destination)"
+  value       = local.controller_subnets
+}
+
 output "worker_address_prefixes" {
  description = "Worker network subnet CIDR addresses (for source/destination)"
-  value       = azurerm_subnet.worker.address_prefixes
+  value       = local.worker_subnets
 }

 # Outputs for custom load balancing
@ -55,9 +71,12 @@ output "loadbalancer_id" {
  value       = azurerm_lb.cluster.id
 }

-output "backend_address_pool_id" {
-  description = "ID of the worker backend address pool"
-  value       = azurerm_lb_backend_address_pool.worker.id
+output "backend_address_pool_ids" {
+  description = "IDs of the worker backend address pools"
+  value = {
+    ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
+    ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
+  }
 }

 # Outputs for debug
--- a/azure/flatcar-linux/kubernetes/security.tf
+++ b/azure/flatcar-linux/kubernetes/security.tf
@ -1,198 +1,223 @@
 # Controller security group

 resource "azurerm_network_security_group" "controller" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = "${var.cluster_name}-controller"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
 }

 resource "azurerm_network_security_rule" "controller-icmp" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-icmp"
+  name                         = "allow-icmp-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "1995"
+  priority                     = 1995 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Icmp"
  source_port_range            = "*"
  destination_port_range       = "*"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-ssh" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-ssh"
+  name                         = "allow-ssh-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2000"
+  priority                     = 2000 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "22"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-etcd" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-etcd"
+  name                         = "allow-etcd-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2005"
+  priority                     = 2005 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "2379-2380"
-  source_address_prefixes      = azurerm_subnet.controller.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.controller_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape etcd metrics
 resource "azurerm_network_security_rule" "controller-etcd-metrics" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-etcd-metrics"
+  name                         = "allow-etcd-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2010"
+  priority                     = 2010 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "2381"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-proxy metrics
 resource "azurerm_network_security_rule" "controller-kube-proxy" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-kube-proxy-metrics"
+  name                         = "allow-kube-proxy-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2011"
+  priority                     = 2012 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "10249"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
 resource "azurerm_network_security_rule" "controller-kube-metrics" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-kube-metrics"
+  name                         = "allow-kube-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2012"
+  priority                     = 2014 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "10257-10259"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-apiserver" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-apiserver"
+  name                         = "allow-apiserver-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2015"
+  priority                     = 2016 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "6443"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-cilium-health" {
-  resource_group_name = azurerm_resource_group.cluster.name
-  count               = var.networking == "cilium" ? 1 : 0
+  for_each = var.networking == "cilium" ? local.controller_subnets : {}

-  name                         = "allow-cilium-health"
+  name                         = "allow-cilium-health-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2019"
+  priority                     = 2018 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "4240"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
+}
+
+resource "azurerm_network_security_rule" "controller-cilium-metrics" {
+  for_each = var.networking == "cilium" ? local.controller_subnets : {}
+
+  name                         = "allow-cilium-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.controller.name
+  priority                     = 2035 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9962-9965"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-vxlan"
+  name                         = "allow-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2020"
+  priority                     = 2020 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "4789"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "controller-linux-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-linux-vxlan"
+  name                         = "allow-linux-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2021"
+  priority                     = 2022 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "8472"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow Prometheus to scrape node-exporter daemonset
 resource "azurerm_network_security_rule" "controller-node-exporter" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                         = "allow-node-exporter"
+  name                         = "allow-node-exporter-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.controller.name
-  priority                     = "2025"
+  priority                     = 2025 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "9100"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Allow apiserver to access kubelet's for exec, log, port-forward
 resource "azurerm_network_security_rule" "controller-kubelet" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.controller_subnets

-  name                        = "allow-kubelet"
+  name                        = "allow-kubelet-${each.key}"
+  resource_group_name         = azurerm_resource_group.cluster.name
  network_security_group_name = azurerm_network_security_group.controller.name
-  priority                    = "2030"
+  priority                    = 2030 + (each.key == "ipv4" ? 0 : 1)
  access                      = "Allow"
  direction                   = "Inbound"
  protocol                    = "Tcp"
  source_port_range           = "*"
  destination_port_range      = "10250"
-
  # allow Prometheus to scrape kubelet metrics too
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.controller.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.controller_subnets[each.key]
 }

 # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
@ -231,166 +256,189 @@ resource "azurerm_network_security_rule" "controller-deny-all" {
 # Worker security group

 resource "azurerm_network_security_group" "worker" {
-  resource_group_name = azurerm_resource_group.cluster.name
-
  name                = "${var.cluster_name}-worker"
+  resource_group_name = azurerm_resource_group.cluster.name
  location            = azurerm_resource_group.cluster.location
 }

 resource "azurerm_network_security_rule" "worker-icmp" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-icmp"
+  name                         = "allow-icmp-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "1995"
+  priority                     = 1995 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Icmp"
  source_port_range            = "*"
  destination_port_range       = "*"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-ssh" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-ssh"
+  name                         = "allow-ssh-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2000"
+  priority                     = 2000 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "22"
-  source_address_prefixes      = azurerm_subnet.controller.address_prefixes
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.controller_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-http" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-http"
+  name                         = "allow-http-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2005"
+  priority                     = 2005 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "80"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-https" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-https"
+  name                         = "allow-https-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2010"
+  priority                     = 2010 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "443"
  source_address_prefix        = "*"
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-cilium-health" {
-  resource_group_name = azurerm_resource_group.cluster.name
-  count               = var.networking == "cilium" ? 1 : 0
+  for_each = var.networking == "cilium" ? local.worker_subnets : {}

-  name                         = "allow-cilium-health"
+  name                         = "allow-cilium-health-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2014"
+  priority                     = 2012 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "4240"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
+}
+
+resource "azurerm_network_security_rule" "worker-cilium-metrics" {
+  for_each = var.networking == "cilium" ? local.worker_subnets : {}
+
+  name                         = "allow-cilium-metrics-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
+  network_security_group_name  = azurerm_network_security_group.worker.name
+  priority                     = 2014 + (each.key == "ipv4" ? 0 : 1)
+  access                       = "Allow"
+  direction                    = "Inbound"
+  protocol                     = "Tcp"
+  source_port_range            = "*"
+  destination_port_range       = "9962-9965"
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-vxlan"
+  name                         = "allow-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2015"
+  priority                     = 2016 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "4789"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 resource "azurerm_network_security_rule" "worker-linux-vxlan" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-linux-vxlan"
+  name                         = "allow-linux-vxlan-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2016"
+  priority                     = 2018 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Udp"
  source_port_range            = "*"
  destination_port_range       = "8472"
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow Prometheus to scrape node-exporter daemonset
 resource "azurerm_network_security_rule" "worker-node-exporter" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-node-exporter"
+  name                         = "allow-node-exporter-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2020"
+  priority                     = 2020 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "9100"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow Prometheus to scrape kube-proxy
 resource "azurerm_network_security_rule" "worker-kube-proxy" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                         = "allow-kube-proxy"
+  name                         = "allow-kube-proxy-${each.key}"
+  resource_group_name          = azurerm_resource_group.cluster.name
  network_security_group_name  = azurerm_network_security_group.worker.name
-  priority                     = "2024"
+  priority                     = 2024 + (each.key == "ipv4" ? 0 : 1)
  access                       = "Allow"
  direction                    = "Inbound"
  protocol                     = "Tcp"
  source_port_range            = "*"
  destination_port_range       = "10249"
-  source_address_prefixes      = azurerm_subnet.worker.address_prefixes
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.worker_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Allow apiserver to access kubelet's for exec, log, port-forward
 resource "azurerm_network_security_rule" "worker-kubelet" {
-  resource_group_name = azurerm_resource_group.cluster.name
+  for_each = local.worker_subnets

-  name                        = "allow-kubelet"
+  name                        = "allow-kubelet-${each.key}"
+  resource_group_name         = azurerm_resource_group.cluster.name
  network_security_group_name = azurerm_network_security_group.worker.name
-  priority                    = "2025"
+  priority                    = 2026 + (each.key == "ipv4" ? 0 : 1)
  access                      = "Allow"
  direction                   = "Inbound"
  protocol                    = "Tcp"
  source_port_range           = "*"
  destination_port_range      = "10250"
-
  # allow Prometheus to scrape kubelet metrics too
-  source_address_prefixes      = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
-  destination_address_prefixes = azurerm_subnet.worker.address_prefixes
+  source_address_prefixes      = local.cluster_subnets[each.key]
+  destination_address_prefixes = local.worker_subnets[each.key]
 }

 # Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
--- a/azure/flatcar-linux/kubernetes/ssh.tf
+++ b/azure/flatcar-linux/kubernetes/ssh.tf
@ -18,7 +18,7 @@ resource "null_resource" "copy-controller-secrets" {

  connection {
    type    = "ssh"
-    host    = azurerm_public_ip.controllers.*.ip_address[count.index]
+    host    = azurerm_public_ip.controllers-ipv4[count.index].ip_address
    user    = "core"
    timeout = "15m"
  }
@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" {

  connection {
    type    = "ssh"
-    host    = azurerm_public_ip.controllers.*.ip_address[0]
+    host    = azurerm_public_ip.controllers-ipv4[0].ip_address
    user    = "core"
    timeout = "15m"
  }
--- a/azure/flatcar-linux/kubernetes/variables.tf
+++ b/azure/flatcar-linux/kubernetes/variables.tf
@ -5,9 +5,9 @@ variable "cluster_name" {

 # Azure

-variable "region" {
+variable "location" {
  type        = string
-  description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)"
+  description = "Azure location (e.g. centralus , see `az account list-locations --output table`)"
 }

 variable "dns_zone" {
@ -22,30 +22,6 @@ variable "dns_zone_group" {

 # instances

-variable "controller_count" {
-  type        = number
-  description = "Number of controllers (i.e. masters)"
-  default     = 1
-}
-
-variable "worker_count" {
-  type        = number
-  description = "Number of workers"
-  default     = 1
-}
-
-variable "controller_type" {
-  type        = string
-  description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
-  default     = "Standard_B2s"
-}
-
-variable "worker_type" {
-  type        = string
-  description = "Machine type for workers (see `az vm list-skus --location centralus`)"
-  default     = "Standard_D2as_v5"
-}
-
 variable "os_image" {
  type        = string
  description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
@ -57,12 +33,60 @@ variable "os_image" {
  }
 }

-variable "disk_size" {
+variable "controller_count" {
  type        = number
-  description = "Size of the disk in GB"
+  description = "Number of controllers (i.e. masters)"
+  default     = 1
+}
+
+variable "controller_type" {
+  type        = string
+  description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
+  default     = "Standard_B2s"
+}
+
+variable "controller_disk_type" {
+  type        = string
+  description = "Type of managed disk for controller node(s)"
+  default     = "Premium_LRS"
+}
+
+variable "controller_disk_size" {
+  type        = number
+  description = "Size of the managed disk in GB for controller node(s)"
  default     = 30
 }

+variable "worker_count" {
+  type        = number
+  description = "Number of workers"
+  default     = 1
+}
+
+variable "worker_type" {
+  type        = string
+  description = "Machine type for workers (see `az vm list-skus --location centralus`)"
+  default     = "Standard_D2as_v5"
+}
+
+variable "worker_disk_type" {
+  type        = string
+  description = "Type of managed disk for worker nodes"
+  default     = "Standard_LRS"
+}
+
+variable "worker_disk_size" {
+  type        = number
+  description = "Size of the managed disk in GB for worker nodes"
+  default     = 30
+}
+
+variable "worker_ephemeral_disk" {
+  type        = bool
+  description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
+  default     = false
+}
+
 variable "worker_priority" {
  type        = string
  description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
@ -100,10 +124,15 @@ variable "networking" {
  default     = "cilium"
 }

-variable "host_cidr" {
-  type        = string
-  description = "CIDR IPv4 range to assign to instances"
-  default     = "10.0.0.0/16"
+variable "network_cidr" {
+  type = object({
+    ipv4 = list(string)
+    ipv6 = optional(list(string), [])
+  })
+  description = "Virtual network CIDR ranges"
+  default = {
+    ipv4 = ["10.0.0.0/16"]
+  }
 }

 variable "pod_cidr" {
@ -121,32 +150,31 @@ EOD
  default     = "10.3.0.0/16"
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
-}
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
 variable "worker_node_labels" {
  type        = list(string)
  description = "List of initial worker node labels"
  default     = []
 }

-variable "arch" {
-  type        = string
-  description = "Container architecture (amd64 or arm64)"
-  default     = "amd64"
+# advanced

+variable "controller_arch" {
+  type        = string
+  description = "Controller node(s) architecture (amd64 or arm64)"
+  default     = "amd64"
  validation {
-    condition     = var.arch == "amd64" || var.arch == "arm64"
-    error_message = "The arch must be amd64 or arm64."
+    condition     = contains(["amd64", "arm64"], var.controller_arch)
+    error_message = "The controller_arch must be amd64 or arm64."
+  }
+}
+
+variable "worker_arch" {
+  type        = string
+  description = "Worker node(s) architecture (amd64 or arm64)"
+  default     = "amd64"
+  validation {
+    condition     = contains(["amd64", "arm64"], var.worker_arch)
+    error_message = "The worker_arch must be amd64 or arm64."
  }
 }

@ -156,10 +184,18 @@ variable "daemonset_tolerations" {
  default     = []
 }

-# unofficial, undocumented, unsupported
-
-variable "cluster_domain_suffix" {
-  type        = string
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  default     = "cluster.local"
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
 }
--- a/azure/flatcar-linux/kubernetes/versions.tf
+++ b/azure/flatcar-linux/kubernetes/versions.tf
@ -3,11 +3,11 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    azurerm = ">= 2.8, < 4.0"
+    azurerm = ">= 2.8"
    null    = ">= 2.1"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.11"
+      version = "~> 0.13"
    }
  }
 }
--- a/azure/flatcar-linux/kubernetes/workers.tf
+++ b/azure/flatcar-linux/kubernetes/workers.tf
@ -4,14 +4,17 @@ module "workers" {

  # Azure
  resource_group_name      = azurerm_resource_group.cluster.name
-  region                  = azurerm_resource_group.cluster.location
+  location                 = azurerm_resource_group.cluster.location
  subnet_id                = azurerm_subnet.worker.id
  security_group_id        = azurerm_network_security_group.worker.id
-  backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
+  backend_address_pool_ids = local.backend_address_pool_ids

  worker_count   = var.worker_count
  vm_type        = var.worker_type
  os_image       = var.os_image
+  disk_type      = var.worker_disk_type
+  disk_size      = var.worker_disk_size
+  ephemeral_disk = var.worker_ephemeral_disk
  priority       = var.worker_priority

  # configuration
@ -19,8 +22,7 @@ module "workers" {
  ssh_authorized_key   = var.ssh_authorized_key
  azure_authorized_key = var.azure_authorized_key
  service_cidr         = var.service_cidr
-  cluster_domain_suffix = var.cluster_domain_suffix
  snippets             = var.worker_snippets
  node_labels          = var.worker_node_labels
-  arch                  = var.arch
+  arch                 = var.worker_arch
 }
--- a/azure/flatcar-linux/kubernetes/workers/butane/worker.yaml
+++ b/azure/flatcar-linux/kubernetes/workers/butane/worker.yaml
@ -28,7 +28,7 @@ systemd:
        After=docker.service
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -99,7 +99,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
--- a/azure/flatcar-linux/kubernetes/workers/variables.tf
+++ b/azure/flatcar-linux/kubernetes/workers/variables.tf
@ -5,9 +5,9 @@ variable "name" {

 # Azure

-variable "region" {
+variable "location" {
  type        = string
-  description = "Must be set to the Azure Region of cluster"
+  description = "Must be set to the Azure location of cluster"
 }

 variable "resource_group_name" {
@ -25,9 +25,12 @@ variable "security_group_id" {
  description = "Must be set to the `worker_security_group_id` output by cluster"
 }

-variable "backend_address_pool_id" {
-  type        = string
-  description = "Must be set to the `worker_backend_address_pool_id` output by cluster"
+variable "backend_address_pool_ids" {
+  type = object({
+    ipv4 = list(string)
+    ipv6 = list(string)
+  })
+  description = "Must be set to the `backend_address_pool_ids` output by cluster"
 }

 # instances
@ -55,6 +58,24 @@ variable "os_image" {
  }
 }

+variable "disk_type" {
+  type        = string
+  description = "Type of managed disk"
+  default     = "Standard_LRS"
+}
+
+variable "disk_size" {
+  type        = number
+  description = "Size of the managed disk in GB"
+  default     = 30
+}
+
+variable "ephemeral_disk" {
+  type        = bool
+  description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
+  default     = false
+}
+
 variable "priority" {
  type        = string
  description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
@ -116,12 +137,3 @@ variable "arch" {
    error_message = "The arch must be amd64 or arm64."
  }
 }
-
-# unofficial, undocumented, unsupported
-
-variable "cluster_domain_suffix" {
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  type        = string
-  default     = "cluster.local"
-}
-
--- a/azure/flatcar-linux/kubernetes/workers/versions.tf
+++ b/azure/flatcar-linux/kubernetes/workers/versions.tf
@ -3,10 +3,10 @@
 terraform {
  required_version = ">= 0.13.0, < 2.0.0"
  required_providers {
-    azurerm = ">= 2.8, < 4.0"
+    azurerm = ">= 2.8"
    ct = {
      source  = "poseidon/ct"
-      version = "~> 0.11"
+      version = "~> 0.13"
    }
  }
 }
--- a/azure/flatcar-linux/kubernetes/workers/workers.tf
+++ b/azure/flatcar-linux/kubernetes/workers/workers.tf
@ -8,25 +8,28 @@ locals {
 }

 # Workers scale set
-resource "azurerm_linux_virtual_machine_scale_set" "workers" {
-  resource_group_name = var.resource_group_name
-
+resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
  name                        = "${var.name}-worker"
-  location  = var.region
-  sku       = var.vm_type
+  resource_group_name         = var.resource_group_name
+  location                    = var.location
+  platform_fault_domain_count = 1
+  sku_name                    = var.vm_type
  instances                   = var.worker_count
-  # instance name prefix for instances in the set
-  computer_name_prefix   = "${var.name}-worker"
-  single_placement_group = false
-  custom_data            = base64encode(data.ct_config.worker.rendered)
-  boot_diagnostics {
-    # defaults to a managed storage account
-  }

  # storage
+  encryption_at_host_enabled = true
  os_disk {
-    storage_account_type = "Standard_LRS"
-    caching              = "ReadWrite"
+    storage_account_type = var.disk_type
+    disk_size_gb         = var.disk_size
+    caching              = "ReadOnly"
+    # Optionally, use the ephemeral disk of the instance type (support varies)
+    dynamic "diff_disk_settings" {
+      for_each = var.ephemeral_disk ? [1] : []
+      content {
+        option    = "Local"
+        placement = "ResourceDisk"
+      }
+    }
  }

  # Flatcar Container Linux
@ -46,13 +49,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
    }
  }

-  # Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
-  admin_username = "core"
-  admin_ssh_key {
-    username   = "core"
-    public_key = local.azure_authorized_key
-  }
-
  # network
  network_interface {
    name                      = "nic0"
@ -60,17 +56,41 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
    network_security_group_id = var.security_group_id

    ip_configuration {
-      name      = "ip0"
+      name      = "ipv4"
+      version   = "IPv4"
      primary   = true
      subnet_id = var.subnet_id
-
      # backend address pool to which the NIC should be added
-      load_balancer_backend_address_pool_ids = [var.backend_address_pool_id]
+      load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4
+    }
+    ip_configuration {
+      name      = "ipv6"
+      version   = "IPv6"
+      subnet_id = var.subnet_id
+      # backend address pool to which the NIC should be added
+      load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6
+    }
+  }
+
+  # boot
+  user_data_base64 = base64encode(data.ct_config.worker.rendered)
+  boot_diagnostics {
+    # defaults to a managed storage account
+  }
+
+  # Azure requires an RSA admin_ssh_key
+  os_profile {
+    linux_configuration {
+      admin_username = "core"
+      admin_ssh_key {
+        username   = "core"
+        public_key = local.azure_authorized_key
+      }
+      computer_name_prefix = "${var.name}-worker"
    }
  }

  # lifecycle
-  upgrade_mode = "Manual"
  # eviction policy may only be set when priority is Spot
  priority        = var.priority
  eviction_policy = var.priority == "Spot" ? "Delete" : null
@ -79,35 +99,12 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
  }
 }

-# Scale up or down to maintain desired number, tolerating deallocations.
-resource "azurerm_monitor_autoscale_setting" "workers" {
-  resource_group_name = var.resource_group_name
-
-  name     = "${var.name}-maintain-desired"
-  location = var.region
-
-  # autoscale
-  enabled            = true
-  target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
-
-  profile {
-    name = "default"
-
-    capacity {
-      minimum = var.worker_count
-      default = var.worker_count
-      maximum = var.worker_count
-    }
-  }
-}
-
 # Flatcar Linux worker
 data "ct_config" "worker" {
  content = templatefile("${path.module}/butane/worker.yaml", {
    kubeconfig             = indent(10, var.kubeconfig)
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
    node_labels            = join(",", var.node_labels)
    node_taints            = join(",", var.node_taints)
  })
--- a/bare-metal/fedora-coreos/kubernetes/README.md
+++ b/bare-metal/fedora-coreos/kubernetes/README.md
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster

 ## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>

-* Kubernetes v1.28.3 (upstream)
+* Kubernetes v1.31.3 (upstream)
 * Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
 * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
 * Advanced features like [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
--- a/bare-metal/fedora-coreos/kubernetes/bootstrap.tf
+++ b/bare-metal/fedora-coreos/kubernetes/bootstrap.tf
@ -1,6 +1,6 @@
 # Kubernetes assets (kubeconfig, manifests)
 module "bootstrap" {
-  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
+  source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"

  cluster_name                    = var.cluster_name
  api_servers                     = [var.k8s_domain_name]
@ -10,9 +10,7 @@ module "bootstrap" {
  network_ip_autodetection_method = var.network_ip_autodetection_method
  pod_cidr                        = var.pod_cidr
  service_cidr                    = var.service_cidr
-  cluster_domain_suffix           = var.cluster_domain_suffix
-  enable_reporting                = var.enable_reporting
-  enable_aggregation              = var.enable_aggregation
+  components                      = var.components
 }


--- a/bare-metal/fedora-coreos/kubernetes/butane/controller.yaml
+++ b/bare-metal/fedora-coreos/kubernetes/butane/controller.yaml
@ -12,7 +12,7 @@ systemd:
        Wants=network-online.target
        After=network-online.target
        [Service]
-        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
+        Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
        Type=exec
        ExecStartPre=/bin/mkdir -p /var/lib/etcd
        ExecStartPre=-/usr/bin/podman rm etcd
@ -53,7 +53,7 @@ systemd:
        Description=Kubelet (System Container)
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -113,7 +113,7 @@ systemd:
        Type=oneshot
        RemainAfterExit=true
        WorkingDirectory=/opt/bootstrap
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=-/usr/bin/podman rm bootstrap
        ExecStart=/usr/bin/podman run --name bootstrap \
            --network host \
@ -154,7 +154,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
@ -168,7 +168,7 @@ storage:
      contents:
        inline: |
          #!/bin/bash -e
-          mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
+          mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
          awk '/#####/ {filename=$2; next} {print > filename}' assets
          mkdir -p /etc/ssl/etcd/etcd
          mkdir -p /etc/kubernetes/pki
@ -182,8 +182,7 @@ storage:
          mv static-manifests/* /etc/kubernetes/manifests/
          mkdir -p /opt/bootstrap/assets
          mv manifests /opt/bootstrap/assets/manifests
-          mv manifests-networking/* /opt/bootstrap/assets/manifests/
-          rm -rf assets auth static-manifests tls manifests-networking
+          rm -rf assets auth static-manifests tls manifests
          chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
    - path: /opt/bootstrap/apply
      mode: 0544
--- a/bare-metal/fedora-coreos/kubernetes/profiles.tf
+++ b/bare-metal/fedora-coreos/kubernetes/profiles.tf
@ -59,7 +59,6 @@ data "ct_config" "controllers" {
    etcd_name              = var.controllers.*.name[count.index]
    etcd_initial_cluster   = join(",", formatlist("%s=https://%s:2380", var.controllers.*.name, var.controllers.*.domain))
    cluster_dns_service_ip = module.bootstrap.cluster_dns_service_ip
-    cluster_domain_suffix  = var.cluster_domain_suffix
    ssh_authorized_key     = var.ssh_authorized_key
  })
  strict   = true
--- a/bare-metal/fedora-coreos/kubernetes/variables.tf
+++ b/bare-metal/fedora-coreos/kubernetes/variables.tf
@ -139,23 +139,20 @@ variable "kernel_args" {
  default     = []
 }

-variable "enable_reporting" {
-  type        = bool
-  description = "Enable usage or analytics reporting to upstreams (Calico)"
-  default     = false
+# advanced
+
+variable "components" {
+  description = "Configure pre-installed cluster components"
+  # Component configs are passed through to terraform-render-bootstrap,
+  # which handles type enforcement and defines defaults
+  # https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
+  type = object({
+    enable     = optional(bool)
+    coredns    = optional(map(any))
+    kube_proxy = optional(map(any))
+    flannel    = optional(map(any))
+    calico     = optional(map(any))
+    cilium     = optional(map(any))
+  })
+  default = null
 }
-
-variable "enable_aggregation" {
-  type        = bool
-  description = "Enable the Kubernetes Aggregation Layer"
-  default     = true
-}
-
-# unofficial, undocumented, unsupported
-
-variable "cluster_domain_suffix" {
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  type        = string
-  default     = "cluster.local"
-}
-
--- a/bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml
+++ b/bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml
@ -25,7 +25,7 @@ systemd:
        Description=Kubelet (System Container)
        Wants=rpc-statd.service
        [Service]
-        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
+        Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
        ExecStartPre=/bin/mkdir -p /etc/cni/net.d
        ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
        ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -108,7 +108,7 @@ storage:
          cgroupDriver: systemd
          clusterDNS:
            - ${cluster_dns_service_ip}
-          clusterDomain: ${cluster_domain_suffix}
+          clusterDomain: cluster.local
          healthzPort: 0
          rotateCertificates: true
          shutdownGracePeriod: 45s
--- a/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf
+++ b/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf
@ -53,7 +53,6 @@ data "ct_config" "worker" {
    domain_name            = var.domain
    ssh_authorized_key     = var.ssh_authorized_key
    cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
-    cluster_domain_suffix  = var.cluster_domain_suffix
    node_labels            = join(",", var.node_labels)
    node_taints            = join(",", var.node_taints)
  })
--- a/bare-metal/fedora-coreos/kubernetes/worker/variables.tf
+++ b/bare-metal/fedora-coreos/kubernetes/worker/variables.tf
@ -103,9 +103,3 @@ The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for
 EOD
  default     = "10.3.0.0/16"
 }
-
-variable "cluster_domain_suffix" {
-  description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
-  type        = string
-  default     = "cluster.local"
-}
--- a/Show More
+++ b/Show More