Add IPv6 support for Typhoon Azure clusters

* Define a dual-stack virtual network with both IPv4 and IPv6 private
address space. Change `host_cidr` variable (string) to a `network_cidr`
variable (object) with "ipv4" and "ipv6" fields that list CIDR strings.
* Define dual-stack controller and worker subnets. Disable Azure
default outbound access (a deprecated fallback mechanism)
* Enable dual-stack load balancing to Kubernetes Ingress by adding
a public IPv6 frontend IP and LB rule to the load balancer.
* Enable worker outbound IPv6 connectivity through load balancer
SNAT by adding an IPv6 frontend IP and outbound rule
* Configure controller nodes with a public IPv6 address to provide
direct outbound IPv6 connectivity
* Add an IPv6 worker backend pool. Azure requires separate IPv4 and
IPv6 backend pools, though the health probe can be shared
* Extend network security group rules for IPv6 source/destinations

Checklist:

Access to controller and worker nodes via IPv6 addresses:

  * SSH access to controller nodes via public IPv6 address
  * SSH access to worker nodes via (private) IPv6 address (via
    controller)

Outbound IPv6 connectivity from controller and worker nodes:

```
nc -6 -zv ipv6.google.com 80
Ncat: Version 7.94 ( https://nmap.org/ncat )
Ncat: Connected to [2607:f8b0:4001:c16::66]:80.
Ncat: 0 bytes sent, 0 bytes received in 0.02 seconds.
```

Serve Ingress traffic via IPv4 or IPv6 just requires setting
up A and AAAA records and running the ingress controller with
`hostNetwork: true` since, hostPort only forwards IPv4 traffic
This commit is contained in:
Dalton Hubble
2024-07-05 17:21:50 -07:00
parent 3483ed8bd5
commit 48d4973957
28 changed files with 899 additions and 560 deletions

View File

@ -25,9 +25,12 @@ variable "security_group_id" {
description = "Must be set to the `worker_security_group_id` output by cluster"
}
variable "backend_address_pool_id" {
type = string
description = "Must be set to the `worker_backend_address_pool_id` output by cluster"
variable "backend_address_pool_ids" {
type = object({
ipv4 = list(string)
ipv6 = list(string)
})
description = "Must be set to the `backend_address_pool_ids` output by cluster"
}
# instances

View File

@ -9,19 +9,14 @@ locals {
# Workers scale set
resource "azurerm_linux_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker"
resource_group_name = var.resource_group_name
name = "${var.name}-worker"
location = var.region
sku = var.vm_type
instances = var.worker_count
location = var.region
sku = var.vm_type
instances = var.worker_count
# instance name prefix for instances in the set
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
custom_data = base64encode(data.ct_config.worker.rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# storage
os_disk {
@ -46,13 +41,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
}
}
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
# network
network_interface {
name = "nic0"
@ -60,13 +48,33 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
network_security_group_id = var.security_group_id
ip_configuration {
name = "ip0"
name = "ipv4"
version = "IPv4"
primary = true
subnet_id = var.subnet_id
# backend address pool to which the NIC should be added
load_balancer_backend_address_pool_ids = [var.backend_address_pool_id]
load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4
}
ip_configuration {
name = "ipv6"
version = "IPv6"
subnet_id = var.subnet_id
# backend address pool to which the NIC should be added
load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6
}
}
# boot
custom_data = base64encode(data.ct_config.worker.rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# Azure requires an RSA admin_ssh_key
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
# lifecycle
@ -81,18 +89,15 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
# Scale up or down to maintain desired number, tolerating deallocations.
resource "azurerm_monitor_autoscale_setting" "workers" {
name = "${var.name}-maintain-desired"
resource_group_name = var.resource_group_name
name = "${var.name}-maintain-desired"
location = var.region
location = var.region
# autoscale
enabled = true
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
profile {
name = "default"
capacity {
minimum = var.worker_count
default = var.worker_count