diff --git a/CHANGES.md b/CHANGES.md index e2a40194..7d1e5129 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,6 +18,10 @@ Notable changes between versions. * Configure worker nodes to use outbound rules and the load balancer for SNAT * Extend network security rules to allow IPv6 traffic, analogous to IPv4 * Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469)) +* Change worker pools from uniform to flexible orchestration mode ([#1473](https://github.com/poseidon/typhoon/pull/1473)) +* Add options to allow workers nodes to use ephemeral local disks ([#1473](https://github.com/poseidon/typhoon/pull/1473)) + * Add `controller_disk_type` and `controller_disk_size` variables + * Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables * Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470)) ```diff @@ -30,6 +34,10 @@ module "cluster" { + network_cidr = { + ipv4 = ["10.0.0.0/16"] + } + + # optional ++ controller_disk_type = "StandardSSD_LRS" ++ worker_ephemeral_disk = true } ``` diff --git a/azure/fedora-coreos/kubernetes/controllers.tf b/azure/fedora-coreos/kubernetes/controllers.tf index dab74257..5c4a0cfb 100644 --- a/azure/fedora-coreos/kubernetes/controllers.tf +++ b/azure/fedora-coreos/kubernetes/controllers.tf @@ -44,9 +44,9 @@ resource "azurerm_linux_virtual_machine" "controllers" { source_image_id = var.os_image os_disk { name = "${var.cluster_name}-controller-${count.index}" + storage_account_type = var.controller_disk_type + disk_size_gb = var.controller_disk_size caching = "None" - disk_size_gb = var.disk_size - storage_account_type = "Premium_LRS" } # network diff --git a/azure/fedora-coreos/kubernetes/variables.tf b/azure/fedora-coreos/kubernetes/variables.tf index a8dd877b..90323487 100644 --- a/azure/fedora-coreos/kubernetes/variables.tf +++ b/azure/fedora-coreos/kubernetes/variables.tf @@ -22,41 +22,66 @@ variable "dns_zone_group" { # instances +variable "os_image" { + type = string + description = "Fedora CoreOS image for instances" +} + + variable "controller_count" { type = number description = "Number of controllers (i.e. masters)" default = 1 } -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - variable "controller_type" { type = string description = "Machine type for controllers (see `az vm list-skus --location centralus`)" default = "Standard_B2s" } +variable "controller_disk_type" { + type = string + description = "Type of managed disk for controller node(s)" + default = "Premium_LRS" +} + +variable "controller_disk_size" { + type = number + description = "Size of the managed disk in GB for controller node(s)" + default = 30 +} + +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + variable "worker_type" { type = string description = "Machine type for workers (see `az vm list-skus --location centralus`)" default = "Standard_D2as_v5" } -variable "os_image" { +variable "worker_disk_type" { type = string - description = "Fedora CoreOS image for instances" + description = "Type of managed disk for worker nodes" + default = "Standard_LRS" } -variable "disk_size" { +variable "worker_disk_size" { type = number - description = "Size of the disk in GB" + description = "Size of the managed disk in GB for worker nodes" default = 30 } +variable "worker_ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "worker_priority" { type = string description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time." diff --git a/azure/fedora-coreos/kubernetes/workers.tf b/azure/fedora-coreos/kubernetes/workers.tf index 641ad226..e61ca3da 100644 --- a/azure/fedora-coreos/kubernetes/workers.tf +++ b/azure/fedora-coreos/kubernetes/workers.tf @@ -9,10 +9,13 @@ module "workers" { security_group_id = azurerm_network_security_group.worker.id backend_address_pool_ids = local.backend_address_pool_ids - worker_count = var.worker_count - vm_type = var.worker_type - os_image = var.os_image - priority = var.worker_priority + worker_count = var.worker_count + vm_type = var.worker_type + os_image = var.os_image + disk_type = var.worker_disk_type + disk_size = var.worker_disk_size + ephemeral_disk = var.worker_ephemeral_disk + priority = var.worker_priority # configuration kubeconfig = module.bootstrap.kubeconfig-kubelet diff --git a/azure/fedora-coreos/kubernetes/workers/variables.tf b/azure/fedora-coreos/kubernetes/workers/variables.tf index f009a8c8..d1f2d791 100644 --- a/azure/fedora-coreos/kubernetes/workers/variables.tf +++ b/azure/fedora-coreos/kubernetes/workers/variables.tf @@ -52,6 +52,24 @@ variable "os_image" { description = "Fedora CoreOS image for instances" } +variable "disk_type" { + type = string + description = "Type of managed disk" + default = "Standard_LRS" +} + +variable "disk_size" { + type = number + description = "Size of the managed disk in GB" + default = 30 +} + +variable "ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "priority" { type = string description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." diff --git a/azure/fedora-coreos/kubernetes/workers/workers.tf b/azure/fedora-coreos/kubernetes/workers/workers.tf index ae20c4ff..9cfa3058 100644 --- a/azure/fedora-coreos/kubernetes/workers/workers.tf +++ b/azure/fedora-coreos/kubernetes/workers/workers.tf @@ -3,21 +3,29 @@ locals { } # Workers scale set -resource "azurerm_linux_virtual_machine_scale_set" "workers" { - name = "${var.name}-worker" - resource_group_name = var.resource_group_name - location = var.location - sku = var.vm_type - instances = var.worker_count - # instance name prefix for instances in the set - computer_name_prefix = "${var.name}-worker" - single_placement_group = false +resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" { + name = "${var.name}-worker" + resource_group_name = var.resource_group_name + location = var.location + platform_fault_domain_count = 1 + sku_name = var.vm_type + instances = var.worker_count # storage - source_image_id = var.os_image + encryption_at_host_enabled = true + source_image_id = var.os_image os_disk { - storage_account_type = "Standard_LRS" - caching = "ReadWrite" + storage_account_type = var.disk_type + disk_size_gb = var.disk_size + caching = "ReadOnly" + # Optionally, use the ephemeral disk of the instance type (support varies) + dynamic "diff_disk_settings" { + for_each = var.ephemeral_disk ? [1] : [] + content { + option = "Local" + placement = "ResourceDisk" + } + } } # network @@ -44,20 +52,24 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { } # boot - custom_data = base64encode(data.ct_config.worker.rendered) + user_data_base64 = base64encode(data.ct_config.worker.rendered) boot_diagnostics { # defaults to a managed storage account } # Azure requires an RSA admin_ssh_key - admin_username = "core" - admin_ssh_key { - username = "core" - public_key = local.azure_authorized_key + os_profile { + linux_configuration { + admin_username = "core" + admin_ssh_key { + username = "core" + public_key = local.azure_authorized_key + } + computer_name_prefix = "${var.name}-worker" + } } # lifecycle - upgrade_mode = "Manual" # eviction policy may only be set when priority is Spot priority = var.priority eviction_policy = var.priority == "Spot" ? "Delete" : null @@ -66,25 +78,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { } } -# Scale up or down to maintain desired number, tolerating deallocations. -resource "azurerm_monitor_autoscale_setting" "workers" { - name = "${var.name}-maintain-desired" - resource_group_name = var.resource_group_name - location = var.location - # autoscale - enabled = true - target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id - - profile { - name = "default" - capacity { - minimum = var.worker_count - default = var.worker_count - maximum = var.worker_count - } - } -} - # Fedora CoreOS worker data "ct_config" "worker" { content = templatefile("${path.module}/butane/worker.yaml", { diff --git a/azure/flatcar-linux/kubernetes/controllers.tf b/azure/flatcar-linux/kubernetes/controllers.tf index 56a352ef..a4e11729 100644 --- a/azure/flatcar-linux/kubernetes/controllers.tf +++ b/azure/flatcar-linux/kubernetes/controllers.tf @@ -49,9 +49,9 @@ resource "azurerm_linux_virtual_machine" "controllers" { # storage os_disk { name = "${var.cluster_name}-controller-${count.index}" + storage_account_type = var.controller_disk_type + disk_size_gb = var.controller_disk_size caching = "None" - disk_size_gb = var.disk_size - storage_account_type = "Premium_LRS" } # Flatcar Container Linux diff --git a/azure/flatcar-linux/kubernetes/variables.tf b/azure/flatcar-linux/kubernetes/variables.tf index 57a4e3d3..232331c4 100644 --- a/azure/flatcar-linux/kubernetes/variables.tf +++ b/azure/flatcar-linux/kubernetes/variables.tf @@ -22,30 +22,6 @@ variable "dns_zone_group" { # instances -variable "controller_count" { - type = number - description = "Number of controllers (i.e. masters)" - default = 1 -} - -variable "worker_count" { - type = number - description = "Number of workers" - default = 1 -} - -variable "controller_type" { - type = string - description = "Machine type for controllers (see `az vm list-skus --location centralus`)" - default = "Standard_B2s" -} - -variable "worker_type" { - type = string - description = "Machine type for workers (see `az vm list-skus --location centralus`)" - default = "Standard_D2as_v5" -} - variable "os_image" { type = string description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)" @@ -57,12 +33,60 @@ variable "os_image" { } } -variable "disk_size" { +variable "controller_count" { type = number - description = "Size of the disk in GB" + description = "Number of controllers (i.e. masters)" + default = 1 +} + +variable "controller_type" { + type = string + description = "Machine type for controllers (see `az vm list-skus --location centralus`)" + default = "Standard_B2s" +} + +variable "controller_disk_type" { + type = string + description = "Type of managed disk for controller node(s)" + default = "Premium_LRS" +} + +variable "controller_disk_size" { + type = number + description = "Size of the managed disk in GB for controller node(s)" default = 30 } +variable "worker_count" { + type = number + description = "Number of workers" + default = 1 +} + +variable "worker_type" { + type = string + description = "Machine type for workers (see `az vm list-skus --location centralus`)" + default = "Standard_D2as_v5" +} + +variable "worker_disk_type" { + type = string + description = "Type of managed disk for worker nodes" + default = "Standard_LRS" +} + +variable "worker_disk_size" { + type = number + description = "Size of the managed disk in GB for worker nodes" + default = 30 +} + +variable "worker_ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "worker_priority" { type = string description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time." diff --git a/azure/flatcar-linux/kubernetes/workers.tf b/azure/flatcar-linux/kubernetes/workers.tf index cd60d447..c9c492a1 100644 --- a/azure/flatcar-linux/kubernetes/workers.tf +++ b/azure/flatcar-linux/kubernetes/workers.tf @@ -9,10 +9,13 @@ module "workers" { security_group_id = azurerm_network_security_group.worker.id backend_address_pool_ids = local.backend_address_pool_ids - worker_count = var.worker_count - vm_type = var.worker_type - os_image = var.os_image - priority = var.worker_priority + worker_count = var.worker_count + vm_type = var.worker_type + os_image = var.os_image + disk_type = var.worker_disk_type + disk_size = var.worker_disk_size + ephemeral_disk = var.worker_ephemeral_disk + priority = var.worker_priority # configuration kubeconfig = module.bootstrap.kubeconfig-kubelet diff --git a/azure/flatcar-linux/kubernetes/workers/variables.tf b/azure/flatcar-linux/kubernetes/workers/variables.tf index 6fc2fab8..67a13d85 100644 --- a/azure/flatcar-linux/kubernetes/workers/variables.tf +++ b/azure/flatcar-linux/kubernetes/workers/variables.tf @@ -58,6 +58,24 @@ variable "os_image" { } } +variable "disk_type" { + type = string + description = "Type of managed disk" + default = "Standard_LRS" +} + +variable "disk_size" { + type = number + description = "Size of the managed disk in GB" + default = 30 +} + +variable "ephemeral_disk" { + type = bool + description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)" + default = false +} + variable "priority" { type = string description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." diff --git a/azure/flatcar-linux/kubernetes/workers/workers.tf b/azure/flatcar-linux/kubernetes/workers/workers.tf index fbd109fc..0d0d22e4 100644 --- a/azure/flatcar-linux/kubernetes/workers/workers.tf +++ b/azure/flatcar-linux/kubernetes/workers/workers.tf @@ -8,20 +8,28 @@ locals { } # Workers scale set -resource "azurerm_linux_virtual_machine_scale_set" "workers" { - name = "${var.name}-worker" - resource_group_name = var.resource_group_name - location = var.location - sku = var.vm_type - instances = var.worker_count - # instance name prefix for instances in the set - computer_name_prefix = "${var.name}-worker" - single_placement_group = false +resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" { + name = "${var.name}-worker" + resource_group_name = var.resource_group_name + location = var.location + platform_fault_domain_count = 1 + sku_name = var.vm_type + instances = var.worker_count # storage + encryption_at_host_enabled = true os_disk { - storage_account_type = "Standard_LRS" - caching = "ReadWrite" + storage_account_type = var.disk_type + disk_size_gb = var.disk_size + caching = "ReadOnly" + # Optionally, use the ephemeral disk of the instance type (support varies) + dynamic "diff_disk_settings" { + for_each = var.ephemeral_disk ? [1] : [] + content { + option = "Local" + placement = "ResourceDisk" + } + } } # Flatcar Container Linux @@ -65,20 +73,24 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { } # boot - custom_data = base64encode(data.ct_config.worker.rendered) + user_data_base64 = base64encode(data.ct_config.worker.rendered) boot_diagnostics { # defaults to a managed storage account } # Azure requires an RSA admin_ssh_key - admin_username = "core" - admin_ssh_key { - username = "core" - public_key = local.azure_authorized_key + os_profile { + linux_configuration { + admin_username = "core" + admin_ssh_key { + username = "core" + public_key = local.azure_authorized_key + } + computer_name_prefix = "${var.name}-worker" + } } # lifecycle - upgrade_mode = "Manual" # eviction policy may only be set when priority is Spot priority = var.priority eviction_policy = var.priority == "Spot" ? "Delete" : null @@ -87,25 +99,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" { } } -# Scale up or down to maintain desired number, tolerating deallocations. -resource "azurerm_monitor_autoscale_setting" "workers" { - name = "${var.name}-maintain-desired" - resource_group_name = var.resource_group_name - location = var.location - # autoscale - enabled = true - target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id - - profile { - name = "default" - capacity { - minimum = var.worker_count - default = var.worker_count - maximum = var.worker_count - } - } -} - # Flatcar Linux worker data "ct_config" "worker" { content = templatefile("${path.module}/butane/worker.yaml", { diff --git a/docs/img/typhoon-azure-load-balancing.png b/docs/img/typhoon-azure-load-balancing.png index 0d227f7a..beb532e1 100644 Binary files a/docs/img/typhoon-azure-load-balancing.png and b/docs/img/typhoon-azure-load-balancing.png differ