Change worker node pools from uniform to flexible orchestration mode

* Use flexible orchestration mode. Azure has started to recommend this
mode because it allows interacting with VMSS instances like regular VMs
via the CLI or via the Azure Portal
* Add options to allow workers nodes to use ephemeral local disks
  * Add `controller_disk_type` and `controller_disk_size` variables
  * Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
This commit is contained in:
Dalton Hubble
2024-07-09 07:53:41 -07:00
parent a4fab61066
commit 0d10d180f8
12 changed files with 206 additions and 121 deletions

View File

@ -58,6 +58,24 @@ variable "os_image" {
}
}
variable "disk_type" {
type = string
description = "Type of managed disk"
default = "Standard_LRS"
}
variable "disk_size" {
type = number
description = "Size of the managed disk in GB"
default = 30
}
variable "ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "priority" {
type = string
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."

View File

@ -8,20 +8,28 @@ locals {
}
# Workers scale set
resource "azurerm_linux_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker"
resource_group_name = var.resource_group_name
location = var.location
sku = var.vm_type
instances = var.worker_count
# instance name prefix for instances in the set
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker"
resource_group_name = var.resource_group_name
location = var.location
platform_fault_domain_count = 1
sku_name = var.vm_type
instances = var.worker_count
# storage
encryption_at_host_enabled = true
os_disk {
storage_account_type = "Standard_LRS"
caching = "ReadWrite"
storage_account_type = var.disk_type
disk_size_gb = var.disk_size
caching = "ReadOnly"
# Optionally, use the ephemeral disk of the instance type (support varies)
dynamic "diff_disk_settings" {
for_each = var.ephemeral_disk ? [1] : []
content {
option = "Local"
placement = "ResourceDisk"
}
}
}
# Flatcar Container Linux
@ -65,20 +73,24 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
}
# boot
custom_data = base64encode(data.ct_config.worker.rendered)
user_data_base64 = base64encode(data.ct_config.worker.rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# Azure requires an RSA admin_ssh_key
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
os_profile {
linux_configuration {
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
computer_name_prefix = "${var.name}-worker"
}
}
# lifecycle
upgrade_mode = "Manual"
# eviction policy may only be set when priority is Spot
priority = var.priority
eviction_policy = var.priority == "Spot" ? "Delete" : null
@ -87,25 +99,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
}
}
# Scale up or down to maintain desired number, tolerating deallocations.
resource "azurerm_monitor_autoscale_setting" "workers" {
name = "${var.name}-maintain-desired"
resource_group_name = var.resource_group_name
location = var.location
# autoscale
enabled = true
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
profile {
name = "default"
capacity {
minimum = var.worker_count
default = var.worker_count
maximum = var.worker_count
}
}
}
# Flatcar Linux worker
data "ct_config" "worker" {
content = templatefile("${path.module}/butane/worker.yaml", {