Upgrade terraform-provider-azurerm to v2.0+

* Add support for `terraform-provider-azurerm` v2.0+. Require
`terraform-provider-azurerm` v2.0+ and drop v1.x support since
the Azure provider major release is not backwards compatible
* Use Azure's new Linux VM and Linux VM Scale Set resources
* Change controller's Azure disk caching to None
* Associate subnets (in addition to NICs) with security groups
(aesthetic)
* If set, change `worker_priority` from `Low` to `Spot` (action required)

Related:

* https://www.terraform.io/docs/providers/azurerm/guides/2.0-upgrade-guide.html
This commit is contained in:
Dalton Hubble 2020-03-07 18:40:39 -08:00
parent c4683c5bad
commit 7b0ea23cdc
8 changed files with 103 additions and 105 deletions

View File

@ -14,6 +14,14 @@ Notable changes between versions.
* Fix `worker_node_labels` on Fedora CoreOS ([#651](https://github.com/poseidon/typhoon/pull/651))
* Fix automatic worker node delete on shutdown on Fedora CoreOS ([#657](https://github.com/poseidon/typhoon/pull/657))
#### Azure
* Upgrade to `terraform-provider-azurerm` [v2.0+](https://www.terraform.io/docs/providers/azurerm/guides/2.0-upgrade-guide.html) (action required)
* Switch to Azure's new Linux VM and Linux VM Scale Set resources
* If set, change `worker_priority` from `Low` to `Spot` (action required)
* Set controller's Azure disk caching to None
* Associate subnets (in addition to NICs) with security groups (aesthetic)
#### Google Cloud
* Fix `worker_node_labels` on Fedora CoreOS ([#651](https://github.com/poseidon/typhoon/pull/651))

View File

@ -32,92 +32,52 @@ resource "azurerm_availability_set" "controllers" {
}
# Controller instances
resource "azurerm_virtual_machine" "controllers" {
resource "azurerm_linux_virtual_machine" "controllers" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
location = var.region
availability_set_id = azurerm_availability_set.controllers.id
vm_size = var.controller_type
# boot
storage_image_reference {
size = var.controller_type
custom_data = base64encode(data.ct_config.controller-ignitions.*.rendered[count.index])
# storage
os_disk {
name = "${var.cluster_name}-controller-${count.index}"
caching = "None"
disk_size_gb = var.disk_size
storage_account_type = "Premium_LRS"
}
source_image_reference {
publisher = "CoreOS"
offer = "CoreOS"
sku = local.channel
version = "latest"
}
# storage
storage_os_disk {
name = "${var.cluster_name}-controller-${count.index}"
create_option = "FromImage"
caching = "ReadWrite"
disk_size_gb = var.disk_size
os_type = "Linux"
managed_disk_type = "Premium_LRS"
}
# network
network_interface_ids = [azurerm_network_interface.controllers.*.id[count.index]]
network_interface_ids = [
azurerm_network_interface.controllers.*.id[count.index]
]
os_profile {
computer_name = "${var.cluster_name}-controller-${count.index}"
admin_username = "core"
custom_data = data.ct_config.controller-ignitions.*.rendered[count.index]
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = var.ssh_authorized_key
}
# Azure mandates setting an ssh_key, even though Ignition custom_data handles it too
os_profile_linux_config {
disable_password_authentication = true
ssh_keys {
path = "/home/core/.ssh/authorized_keys"
key_data = var.ssh_authorized_key
}
}
# lifecycle
delete_os_disk_on_termination = true
delete_data_disks_on_termination = true
lifecycle {
ignore_changes = [
storage_os_disk,
os_profile,
os_disk,
custom_data,
]
}
}
# Controller NICs with public and private IPv4
resource "azurerm_network_interface" "controllers" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
location = azurerm_resource_group.cluster.location
network_security_group_id = azurerm_network_security_group.controller.id
ip_configuration {
name = "ip0"
subnet_id = azurerm_subnet.controller.id
private_ip_address_allocation = "dynamic"
# public IPv4
public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
}
}
# Add controller NICs to the controller backend address pool
resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
count = var.controller_count
network_interface_id = azurerm_network_interface.controllers[count.index].id
ip_configuration_name = "ip0"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
}
# Controller public IPv4 addresses
resource "azurerm_public_ip" "controllers" {
count = var.controller_count
@ -129,6 +89,40 @@ resource "azurerm_public_ip" "controllers" {
allocation_method = "Static"
}
# Controller NICs with public and private IPv4
resource "azurerm_network_interface" "controllers" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
location = azurerm_resource_group.cluster.location
ip_configuration {
name = "ip0"
subnet_id = azurerm_subnet.controller.id
private_ip_address_allocation = "Dynamic"
# instance public IPv4
public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
}
}
# Associate controller network interface with controller security group
resource "azurerm_network_interface_security_group_association" "controllers" {
count = var.controller_count
network_interface_id = azurerm_network_interface.controllers[count.index].id
network_security_group_id = azurerm_network_security_group.controller.id
}
# Associate controller network interface with controller backend address pool
resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
count = var.controller_count
network_interface_id = azurerm_network_interface.controllers[count.index].id
ip_configuration_name = "ip0"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
}
# Controller Ignition configs
data "ct_config" "controller-ignitions" {
count = var.controller_count

View File

@ -24,6 +24,11 @@ resource "azurerm_subnet" "controller" {
address_prefix = cidrsubnet(var.host_cidr, 1, 0)
}
resource "azurerm_subnet_network_security_group_association" "controller" {
subnet_id = azurerm_subnet.controller.id
network_security_group_id = azurerm_network_security_group.controller.id
}
resource "azurerm_subnet" "worker" {
resource_group_name = azurerm_resource_group.cluster.name
@ -32,3 +37,8 @@ resource "azurerm_subnet" "worker" {
address_prefix = cidrsubnet(var.host_cidr, 1, 1)
}
resource "azurerm_subnet_network_security_group_association" "worker" {
subnet_id = azurerm_subnet.worker.id
network_security_group_id = azurerm_network_security_group.worker.id
}

View File

@ -13,7 +13,7 @@ resource "null_resource" "copy-controller-secrets" {
depends_on = [
module.bootstrap,
azurerm_virtual_machine.controllers
azurerm_linux_virtual_machine.controllers
]
connection {

View File

@ -3,7 +3,7 @@
terraform {
required_version = "~> 0.12.6"
required_providers {
azurerm = "~> 1.27"
azurerm = "~> 2.0"
ct = "~> 0.3"
template = "~> 2.1"
null = "~> 2.1"

View File

@ -22,4 +22,3 @@ module "workers" {
clc_snippets = var.worker_clc_snippets
node_labels = var.worker_node_labels
}

View File

@ -5,53 +5,40 @@ locals {
}
# Workers scale set
resource "azurerm_virtual_machine_scale_set" "workers" {
resource "azurerm_linux_virtual_machine_scale_set" "workers" {
resource_group_name = var.resource_group_name
name = "${var.name}-workers"
name = "${var.name}-worker"
location = var.region
sku = var.vm_type
instances = var.worker_count
# instance name prefix for instances in the set
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
custom_data = base64encode(data.ct_config.worker-ignition.rendered)
sku {
name = var.vm_type
tier = "standard"
capacity = var.worker_count
# storage
os_disk {
storage_account_type = "Standard_LRS"
caching = "ReadWrite"
}
# boot
storage_profile_image_reference {
source_image_reference {
publisher = "CoreOS"
offer = "CoreOS"
sku = local.channel
version = "latest"
}
# storage
storage_profile_os_disk {
create_option = "FromImage"
caching = "ReadWrite"
os_type = "linux"
managed_disk_type = "Standard_LRS"
}
os_profile {
computer_name_prefix = "${var.name}-worker-"
admin_username = "core"
custom_data = data.ct_config.worker-ignition.rendered
}
# Azure mandates setting an ssh_key, even though Ignition custom_data handles it too
os_profile_linux_config {
disable_password_authentication = true
ssh_keys {
path = "/home/core/.ssh/authorized_keys"
key_data = var.ssh_authorized_key
}
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = var.ssh_authorized_key
}
# network
network_profile {
network_interface {
name = "nic0"
primary = true
network_security_group_id = var.security_group_id
@ -67,10 +54,10 @@ resource "azurerm_virtual_machine_scale_set" "workers" {
}
# lifecycle
upgrade_policy_mode = "Manual"
# eviction policy may only be set when priority is Low
upgrade_mode = "Manual"
# eviction policy may only be set when priority is Spot
priority = var.priority
eviction_policy = var.priority == "Low" ? "Delete" : null
eviction_policy = var.priority == "Spot" ? "Delete" : null
}
# Scale up or down to maintain desired number, tolerating deallocations.
@ -82,7 +69,7 @@ resource "azurerm_monitor_autoscale_setting" "workers" {
# autoscale
enabled = true
target_resource_id = azurerm_virtual_machine_scale_set.workers.id
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
profile {
name = "default"

View File

@ -50,7 +50,7 @@ Configure the Azure provider in a `providers.tf` file.
```tf
provider "azurerm" {
version = "1.43.0"
version = "2.0.0"
}
provider "ct" {
@ -225,7 +225,7 @@ Reference the DNS zone with `azurerm_dns_zone.clusters.name` and its resource gr
| worker_type | Machine type for workers | "Standard_DS1_v2" | See below |
| os_image | Channel for a Container Linux derivative | "coreos-stable" | coreos-stable, coreos-beta, coreos-alpha |
| disk_size | Size of the disk in GB | 40 | 100 |
| worker_priority | Set priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Low |
| worker_priority | Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Spot |
| controller_clc_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/#usage) |
| worker_clc_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/#usage) |
| networking | Choice of networking provider | "calico" | "flannel" or "calico" |
@ -242,6 +242,6 @@ Check the list of valid [machine types](https://azure.microsoft.com/en-us/pricin
!!! warning
Do not choose a `controller_type` smaller than `Standard_B2s`. Smaller instances are not sufficient for running a controller.
#### Low Priority
#### Spot Priority
Add `worker_priority=Low` to use [Low Priority](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-low-priority) workers that run on Azure's surplus capacity at lower cost, but with the tradeoff that they can be deallocated at random. Low priority VMs are Azure's analog to AWS spot instances or GCP premptible instances.
Add `worker_priority=Spot` to use [Spot Priority](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/spot-vms) workers that run on Azure's surplus capacity at lower cost, but with the tradeoff that they can be deallocated at random. Spot priority VMs are Azure's analog to AWS spot instances or GCP premptible instances.