Configure controller and worker node architecture separately

* On platforms that support ARM64 instances, configure controller
and worker node host architectures separately
* For example, you can run arm64 controllers and amd64 workers
* Add `controller_arch` and `worker_arch` variables
* Remove `arch` variable
This commit is contained in:
Dalton Hubble 2024-07-31 15:05:57 -07:00
parent 516786d7bb
commit af27661432
11 changed files with 76 additions and 29 deletions

View File

@ -4,6 +4,12 @@ Notable changes between versions.
## Latest
### Azure
* Allow controller and worker nodes to use different CPU architectures
* Add `controller_arch` and `worker_arch` variables
* Remove the `arch` variable
## v1.30.3
* Kubernetes [v1.30.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1303)
@ -19,17 +25,23 @@ Notable changes between versions.
* Fix propagating settings to worker disks, previously ignored
* Allow configuring CPU pricing model for burstable instance types ([#1482](https://github.com/poseidon/typhoon/pull/1482))
* Add `controller_cpu_credits` and `worker_cpu_credits` variables (`standard` or `unlimited`)
* Configure controller or worker instance architecture ([#1485](https://github.com/poseidon/typhoon/pull/1485))
* Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
* Remove `arch` variable
```diff
module "cluster" {
...
- arch = "amd64"
- disk_type = "gp3"
- disk_size = 30
- disk_iops = 3000
+ controller_arch = "amd64"
+ controller_disk_size = 15
+ worker_disk_size = 22
+ controller_cpu_credits = "standard"
+ worker_arch = "amd64"
+ worker_disk_size = 22
+ worker_cpu_credits = "unlimited"
}
```
@ -53,6 +65,9 @@ module "cluster" {
* Add `controller_disk_type` and `controller_disk_size` variables
* Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
* Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470))
* Configure controller or worker instance architecture for Flatcar Linux ([#1485](https://github.com/poseidon/typhoon/pull/1485))
* Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
* Remove `arch` variable
```diff
module "cluster" {
@ -65,7 +80,7 @@ module "cluster" {
+ ipv4 = ["10.0.0.0/16"]
+ }
# optional
# instances
+ controller_disk_type = "StandardSSD_LRS"
+ worker_ephemeral_disk = true
}

View File

@ -19,7 +19,7 @@ data "aws_ami" "fedora-coreos" {
}
data "aws_ami" "fedora-coreos-arm" {
count = var.arch == "arm64" ? 1 : 0
count = var.controller_arch == "arm64" ? 1 : 0
most_recent = true
owners = ["125523088429"]

View File

@ -22,7 +22,7 @@ resource "aws_instance" "controllers" {
}
instance_type = var.controller_type
ami = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
ami = var.controller_arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
user_data = data.ct_config.controllers.*.rendered[count.index]
# storage

View File

@ -190,14 +190,25 @@ variable "cluster_domain_suffix" {
default = "cluster.local"
}
variable "arch" {
type = string
description = "Container architecture (amd64 or arm64)"
default = "amd64"
# advanced
variable "controller_arch" {
type = string
description = "Controller node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
error_message = "The arch must be amd64 or arm64."
condition = contains(["amd64", "arm64"], var.controller_arch)
error_message = "The controller_arch must be amd64 or arm64."
}
}
variable "worker_arch" {
type = string
description = "Worker node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = contains(["amd64", "arm64"], var.worker_arch)
error_message = "The worker_arch must be amd64 or arm64."
}
}

View File

@ -9,7 +9,7 @@ module "workers" {
worker_count = var.worker_count
instance_type = var.worker_type
os_stream = var.os_stream
arch = var.arch
arch = var.worker_arch
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
disk_iops = var.worker_disk_iops

View File

@ -1,7 +1,7 @@
locals {
# Pick a Flatcar Linux AMI
# flatcar-stable -> Flatcar Linux AMI
ami_id = var.arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
ami_id = var.controller_arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
channel = split("-", var.os_image)[1]
}
@ -26,7 +26,7 @@ data "aws_ami" "flatcar" {
}
data "aws_ami" "flatcar-arm64" {
count = var.arch == "arm64" ? 1 : 0
count = var.controller_arch == "arm64" ? 1 : 0
most_recent = true
owners = ["075585003325"]

View File

@ -190,17 +190,27 @@ variable "cluster_domain_suffix" {
default = "cluster.local"
}
variable "arch" {
variable "controller_arch" {
type = string
description = "Container architecture (amd64 or arm64)"
description = "Controller node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
error_message = "The arch must be amd64 or arm64."
condition = contains(["amd64", "arm64"], var.controller_arch)
error_message = "The controller_arch must be amd64 or arm64."
}
}
variable "worker_arch" {
type = string
description = "Worker node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = contains(["amd64", "arm64"], var.worker_arch)
error_message = "The worker_arch must be amd64 or arm64."
}
}
variable "daemonset_tolerations" {
type = list(string)
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"

View File

@ -9,7 +9,7 @@ module "workers" {
worker_count = var.worker_count
instance_type = var.worker_type
os_image = var.os_image
arch = var.arch
arch = var.worker_arch
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
disk_iops = var.worker_disk_iops

View File

@ -2,8 +2,8 @@ locals {
# Container Linux derivative
# flatcar-stable -> Flatcar Linux Stable
channel = split("-", var.os_image)[1]
offer_suffix = var.arch == "arm64" ? "corevm" : "free"
urn = var.arch == "arm64" ? local.channel : "${local.channel}-gen2"
offer_suffix = var.controller_arch == "arm64" ? "corevm" : "free"
urn = var.controller_arch == "arm64" ? local.channel : "${local.channel}-gen2"
# Typhoon ssh_authorized_key supports RSA or a newer formats (e.g. ed25519).
# However, Azure requires an older RSA key to pass validations. To use a
@ -63,7 +63,7 @@ resource "azurerm_linux_virtual_machine" "controllers" {
}
dynamic "plan" {
for_each = var.arch == "arm64" ? [] : [1]
for_each = var.controller_arch == "arm64" ? [] : [1]
content {
publisher = "kinvolk"
product = "flatcar-container-linux-${local.offer_suffix}"

View File

@ -168,14 +168,25 @@ variable "worker_node_labels" {
default = []
}
variable "arch" {
type = string
description = "Container architecture (amd64 or arm64)"
default = "amd64"
# advanced
variable "controller_arch" {
type = string
description = "Controller node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
error_message = "The arch must be amd64 or arm64."
condition = contains(["amd64", "arm64"], var.controller_arch)
error_message = "The controller_arch must be amd64 or arm64."
}
}
variable "worker_arch" {
type = string
description = "Worker node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = contains(["amd64", "arm64"], var.worker_arch)
error_message = "The worker_arch must be amd64 or arm64."
}
}

View File

@ -25,5 +25,5 @@ module "workers" {
cluster_domain_suffix = var.cluster_domain_suffix
snippets = var.worker_snippets
node_labels = var.worker_node_labels
arch = var.arch
arch = var.worker_arch
}