Change worker node pools from uniform to flexible orchestration mode

* Use flexible orchestration mode. Azure has started to recommend this
mode because it allows interacting with VMSS instances like regular VMs
via the CLI or via the Azure Portal
* Add options to allow workers nodes to use ephemeral local disks
  * Add `controller_disk_type` and `controller_disk_size` variables
  * Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
This commit is contained in:
Dalton Hubble 2024-07-09 07:53:41 -07:00
parent a4fab61066
commit 0d10d180f8
No known key found for this signature in database
GPG Key ID: BD34C2E3EF32B7A0
12 changed files with 206 additions and 121 deletions

View File

@ -18,6 +18,10 @@ Notable changes between versions.
* Configure worker nodes to use outbound rules and the load balancer for SNAT * Configure worker nodes to use outbound rules and the load balancer for SNAT
* Extend network security rules to allow IPv6 traffic, analogous to IPv4 * Extend network security rules to allow IPv6 traffic, analogous to IPv4
* Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469)) * Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469))
* Change worker pools from uniform to flexible orchestration mode ([#1473](https://github.com/poseidon/typhoon/pull/1473))
* Add options to allow workers nodes to use ephemeral local disks ([#1473](https://github.com/poseidon/typhoon/pull/1473))
* Add `controller_disk_type` and `controller_disk_size` variables
* Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
* Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470)) * Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470))
```diff ```diff
@ -30,6 +34,10 @@ module "cluster" {
+ network_cidr = { + network_cidr = {
+ ipv4 = ["10.0.0.0/16"] + ipv4 = ["10.0.0.0/16"]
+ } + }
# optional
+ controller_disk_type = "StandardSSD_LRS"
+ worker_ephemeral_disk = true
} }
``` ```

View File

@ -44,9 +44,9 @@ resource "azurerm_linux_virtual_machine" "controllers" {
source_image_id = var.os_image source_image_id = var.os_image
os_disk { os_disk {
name = "${var.cluster_name}-controller-${count.index}" name = "${var.cluster_name}-controller-${count.index}"
storage_account_type = var.controller_disk_type
disk_size_gb = var.controller_disk_size
caching = "None" caching = "None"
disk_size_gb = var.disk_size
storage_account_type = "Premium_LRS"
} }
# network # network

View File

@ -22,41 +22,66 @@ variable "dns_zone_group" {
# instances # instances
variable "os_image" {
type = string
description = "Fedora CoreOS image for instances"
}
variable "controller_count" { variable "controller_count" {
type = number type = number
description = "Number of controllers (i.e. masters)" description = "Number of controllers (i.e. masters)"
default = 1 default = 1
} }
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "controller_type" { variable "controller_type" {
type = string type = string
description = "Machine type for controllers (see `az vm list-skus --location centralus`)" description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
default = "Standard_B2s" default = "Standard_B2s"
} }
variable "controller_disk_type" {
type = string
description = "Type of managed disk for controller node(s)"
default = "Premium_LRS"
}
variable "controller_disk_size" {
type = number
description = "Size of the managed disk in GB for controller node(s)"
default = 30
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "worker_type" { variable "worker_type" {
type = string type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)" description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5" default = "Standard_D2as_v5"
} }
variable "os_image" { variable "worker_disk_type" {
type = string type = string
description = "Fedora CoreOS image for instances" description = "Type of managed disk for worker nodes"
default = "Standard_LRS"
} }
variable "disk_size" { variable "worker_disk_size" {
type = number type = number
description = "Size of the disk in GB" description = "Size of the managed disk in GB for worker nodes"
default = 30 default = 30
} }
variable "worker_ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "worker_priority" { variable "worker_priority" {
type = string type = string
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time." description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."

View File

@ -9,10 +9,13 @@ module "workers" {
security_group_id = azurerm_network_security_group.worker.id security_group_id = azurerm_network_security_group.worker.id
backend_address_pool_ids = local.backend_address_pool_ids backend_address_pool_ids = local.backend_address_pool_ids
worker_count = var.worker_count worker_count = var.worker_count
vm_type = var.worker_type vm_type = var.worker_type
os_image = var.os_image os_image = var.os_image
priority = var.worker_priority disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
ephemeral_disk = var.worker_ephemeral_disk
priority = var.worker_priority
# configuration # configuration
kubeconfig = module.bootstrap.kubeconfig-kubelet kubeconfig = module.bootstrap.kubeconfig-kubelet

View File

@ -52,6 +52,24 @@ variable "os_image" {
description = "Fedora CoreOS image for instances" description = "Fedora CoreOS image for instances"
} }
variable "disk_type" {
type = string
description = "Type of managed disk"
default = "Standard_LRS"
}
variable "disk_size" {
type = number
description = "Size of the managed disk in GB"
default = 30
}
variable "ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "priority" { variable "priority" {
type = string type = string
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."

View File

@ -3,21 +3,29 @@ locals {
} }
# Workers scale set # Workers scale set
resource "azurerm_linux_virtual_machine_scale_set" "workers" { resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker" name = "${var.name}-worker"
resource_group_name = var.resource_group_name resource_group_name = var.resource_group_name
location = var.location location = var.location
sku = var.vm_type platform_fault_domain_count = 1
instances = var.worker_count sku_name = var.vm_type
# instance name prefix for instances in the set instances = var.worker_count
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
# storage # storage
source_image_id = var.os_image encryption_at_host_enabled = true
source_image_id = var.os_image
os_disk { os_disk {
storage_account_type = "Standard_LRS" storage_account_type = var.disk_type
caching = "ReadWrite" disk_size_gb = var.disk_size
caching = "ReadOnly"
# Optionally, use the ephemeral disk of the instance type (support varies)
dynamic "diff_disk_settings" {
for_each = var.ephemeral_disk ? [1] : []
content {
option = "Local"
placement = "ResourceDisk"
}
}
} }
# network # network
@ -44,20 +52,24 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
} }
# boot # boot
custom_data = base64encode(data.ct_config.worker.rendered) user_data_base64 = base64encode(data.ct_config.worker.rendered)
boot_diagnostics { boot_diagnostics {
# defaults to a managed storage account # defaults to a managed storage account
} }
# Azure requires an RSA admin_ssh_key # Azure requires an RSA admin_ssh_key
admin_username = "core" os_profile {
admin_ssh_key { linux_configuration {
username = "core" admin_username = "core"
public_key = local.azure_authorized_key admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
computer_name_prefix = "${var.name}-worker"
}
} }
# lifecycle # lifecycle
upgrade_mode = "Manual"
# eviction policy may only be set when priority is Spot # eviction policy may only be set when priority is Spot
priority = var.priority priority = var.priority
eviction_policy = var.priority == "Spot" ? "Delete" : null eviction_policy = var.priority == "Spot" ? "Delete" : null
@ -66,25 +78,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
} }
} }
# Scale up or down to maintain desired number, tolerating deallocations.
resource "azurerm_monitor_autoscale_setting" "workers" {
name = "${var.name}-maintain-desired"
resource_group_name = var.resource_group_name
location = var.location
# autoscale
enabled = true
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
profile {
name = "default"
capacity {
minimum = var.worker_count
default = var.worker_count
maximum = var.worker_count
}
}
}
# Fedora CoreOS worker # Fedora CoreOS worker
data "ct_config" "worker" { data "ct_config" "worker" {
content = templatefile("${path.module}/butane/worker.yaml", { content = templatefile("${path.module}/butane/worker.yaml", {

View File

@ -49,9 +49,9 @@ resource "azurerm_linux_virtual_machine" "controllers" {
# storage # storage
os_disk { os_disk {
name = "${var.cluster_name}-controller-${count.index}" name = "${var.cluster_name}-controller-${count.index}"
storage_account_type = var.controller_disk_type
disk_size_gb = var.controller_disk_size
caching = "None" caching = "None"
disk_size_gb = var.disk_size
storage_account_type = "Premium_LRS"
} }
# Flatcar Container Linux # Flatcar Container Linux

View File

@ -22,30 +22,6 @@ variable "dns_zone_group" {
# instances # instances
variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "controller_type" {
type = string
description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
default = "Standard_B2s"
}
variable "worker_type" {
type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5"
}
variable "os_image" { variable "os_image" {
type = string type = string
description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)" description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
@ -57,12 +33,60 @@ variable "os_image" {
} }
} }
variable "disk_size" { variable "controller_count" {
type = number type = number
description = "Size of the disk in GB" description = "Number of controllers (i.e. masters)"
default = 1
}
variable "controller_type" {
type = string
description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
default = "Standard_B2s"
}
variable "controller_disk_type" {
type = string
description = "Type of managed disk for controller node(s)"
default = "Premium_LRS"
}
variable "controller_disk_size" {
type = number
description = "Size of the managed disk in GB for controller node(s)"
default = 30 default = 30
} }
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "worker_type" {
type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5"
}
variable "worker_disk_type" {
type = string
description = "Type of managed disk for worker nodes"
default = "Standard_LRS"
}
variable "worker_disk_size" {
type = number
description = "Size of the managed disk in GB for worker nodes"
default = 30
}
variable "worker_ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "worker_priority" { variable "worker_priority" {
type = string type = string
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time." description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."

View File

@ -9,10 +9,13 @@ module "workers" {
security_group_id = azurerm_network_security_group.worker.id security_group_id = azurerm_network_security_group.worker.id
backend_address_pool_ids = local.backend_address_pool_ids backend_address_pool_ids = local.backend_address_pool_ids
worker_count = var.worker_count worker_count = var.worker_count
vm_type = var.worker_type vm_type = var.worker_type
os_image = var.os_image os_image = var.os_image
priority = var.worker_priority disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
ephemeral_disk = var.worker_ephemeral_disk
priority = var.worker_priority
# configuration # configuration
kubeconfig = module.bootstrap.kubeconfig-kubelet kubeconfig = module.bootstrap.kubeconfig-kubelet

View File

@ -58,6 +58,24 @@ variable "os_image" {
} }
} }
variable "disk_type" {
type = string
description = "Type of managed disk"
default = "Standard_LRS"
}
variable "disk_size" {
type = number
description = "Size of the managed disk in GB"
default = 30
}
variable "ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "priority" { variable "priority" {
type = string type = string
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."

View File

@ -8,20 +8,28 @@ locals {
} }
# Workers scale set # Workers scale set
resource "azurerm_linux_virtual_machine_scale_set" "workers" { resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker" name = "${var.name}-worker"
resource_group_name = var.resource_group_name resource_group_name = var.resource_group_name
location = var.location location = var.location
sku = var.vm_type platform_fault_domain_count = 1
instances = var.worker_count sku_name = var.vm_type
# instance name prefix for instances in the set instances = var.worker_count
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
# storage # storage
encryption_at_host_enabled = true
os_disk { os_disk {
storage_account_type = "Standard_LRS" storage_account_type = var.disk_type
caching = "ReadWrite" disk_size_gb = var.disk_size
caching = "ReadOnly"
# Optionally, use the ephemeral disk of the instance type (support varies)
dynamic "diff_disk_settings" {
for_each = var.ephemeral_disk ? [1] : []
content {
option = "Local"
placement = "ResourceDisk"
}
}
} }
# Flatcar Container Linux # Flatcar Container Linux
@ -65,20 +73,24 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
} }
# boot # boot
custom_data = base64encode(data.ct_config.worker.rendered) user_data_base64 = base64encode(data.ct_config.worker.rendered)
boot_diagnostics { boot_diagnostics {
# defaults to a managed storage account # defaults to a managed storage account
} }
# Azure requires an RSA admin_ssh_key # Azure requires an RSA admin_ssh_key
admin_username = "core" os_profile {
admin_ssh_key { linux_configuration {
username = "core" admin_username = "core"
public_key = local.azure_authorized_key admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
computer_name_prefix = "${var.name}-worker"
}
} }
# lifecycle # lifecycle
upgrade_mode = "Manual"
# eviction policy may only be set when priority is Spot # eviction policy may only be set when priority is Spot
priority = var.priority priority = var.priority
eviction_policy = var.priority == "Spot" ? "Delete" : null eviction_policy = var.priority == "Spot" ? "Delete" : null
@ -87,25 +99,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
} }
} }
# Scale up or down to maintain desired number, tolerating deallocations.
resource "azurerm_monitor_autoscale_setting" "workers" {
name = "${var.name}-maintain-desired"
resource_group_name = var.resource_group_name
location = var.location
# autoscale
enabled = true
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
profile {
name = "default"
capacity {
minimum = var.worker_count
default = var.worker_count
maximum = var.worker_count
}
}
}
# Flatcar Linux worker # Flatcar Linux worker
data "ct_config" "worker" { data "ct_config" "worker" {
content = templatefile("${path.module}/butane/worker.yaml", { content = templatefile("${path.module}/butane/worker.yaml", {

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

After

Width:  |  Height:  |  Size: 82 KiB