Add experimental Fedora CoreOS arm64 support on AWS

* Add experimental `arch` variable to Fedora CoreOS AWS,
accepting amd64 (default) or arm64 to support native
arm64/aarch64 clusters or mixed/hybrid clusters with
a worker pool of arm64 workers
* Add `daemonset_tolerations` variable to cluster module
(experimental)
* Add `node_taints` variable to workers module
* Requires flannel CNI and experimental Poseidon-built
arm64 Fedora CoreOS AMIs (published to us-east-1, us-east-2,
and us-west-1)

WARN:

* Our AMIs are experimental, may be removed at any time, and
will be removed when Fedora CoreOS publishes official arm64
AMIs. Do NOT use in production

Related:

* https://github.com/poseidon/typhoon/pull/682
This commit is contained in:
Dalton Hubble 2020-11-08 10:51:42 -08:00
parent 1113a22f61
commit 1b3a0f6ebc
15 changed files with 214 additions and 6 deletions

View File

@ -15,6 +15,15 @@ Notable changes between versions.
* Allow a snippet with a systemd dropin to set an alternate image (e.g. mirror) * Allow a snippet with a systemd dropin to set an alternate image (e.g. mirror)
* Fix local node delete oneshot on node shutdown ([#856](https://github.com/poseidon/typhoon/pull/855)) * Fix local node delete oneshot on node shutdown ([#856](https://github.com/poseidon/typhoon/pull/855))
#### AWS
* Add experimental Fedora CoreOS arm64 support ([docs](https://typhoon.psdn.io/advanced/arm64/), [#875](https://github.com/poseidon/typhoon/pull/875))
* Allow arm64 full-cluster or mixed/hybrid cluster with worker pools
* Add `arch` variable to cluster module
* Add `daemonset_tolerations` variable to cluster module
* Add `node_taints` variable to workers module
* Requires flannel CNI provider and use of experimental AMI (see docs)
### Flatcar Linux ### Flatcar Linux
* Rename `container-linux` modules to `flatcar-linux` ([#858](https://github.com/poseidon/typhoon/issues/858)) (**action required**) * Rename `container-linux` modules to `flatcar-linux` ([#858](https://github.com/poseidon/typhoon/issues/858)) (**action required**)

View File

@ -18,3 +18,27 @@ data "aws_ami" "fedora-coreos" {
values = ["Fedora CoreOS ${var.os_stream} *"] values = ["Fedora CoreOS ${var.os_stream} *"]
} }
} }
# Experimental Fedora CoreOS arm64 / aarch64 AMIs from Poseidon
# WARNING: These AMIs will be removed when Fedora CoreOS publishes arm64 AMIs
# and may be removed for any reason before then as well. Do not use.
data "aws_ami" "fedora-coreos-arm" {
most_recent = true
owners = ["099663496933"]
filter {
name = "architecture"
values = ["arm64"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
filter {
name = "name"
values = ["fedora-coreos-*"]
}
}

View File

@ -12,6 +12,7 @@ module "bootstrap" {
cluster_domain_suffix = var.cluster_domain_suffix cluster_domain_suffix = var.cluster_domain_suffix
enable_reporting = var.enable_reporting enable_reporting = var.enable_reporting
enable_aggregation = var.enable_aggregation enable_aggregation = var.enable_aggregation
daemonset_tolerations = var.daemonset_tolerations
trusted_certs_dir = "/etc/pki/tls/certs" trusted_certs_dir = "/etc/pki/tls/certs"
} }

View File

@ -22,8 +22,7 @@ resource "aws_instance" "controllers" {
} }
instance_type = var.controller_type instance_type = var.controller_type
ami = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm.image_id : data.aws_ami.fedora-coreos.image_id
ami = data.aws_ami.fedora-coreos.image_id
user_data = data.ct_config.controller-ignitions.*.rendered[count.index] user_data = data.ct_config.controller-ignitions.*.rendered[count.index]
# storage # storage
@ -63,6 +62,7 @@ data "template_file" "controller-configs" {
vars = { vars = {
# Cannot use cyclic dependencies on controllers or their DNS records # Cannot use cyclic dependencies on controllers or their DNS records
etcd_arch = var.arch == "arm64" ? "-arm64" : ""
etcd_name = "etcd${count.index}" etcd_name = "etcd${count.index}"
etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}" etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
# etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,... # etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...

View File

@ -12,7 +12,7 @@ systemd:
Wants=network-online.target network.target Wants=network-online.target network.target
After=network-online.target After=network-online.target
[Service] [Service]
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.4.12 Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.4.12${etcd_arch}
Type=exec Type=exec
ExecStartPre=/bin/mkdir -p /var/lib/etcd ExecStartPre=/bin/mkdir -p /var/lib/etcd
ExecStartPre=-/usr/bin/podman rm etcd ExecStartPre=-/usr/bin/podman rm etcd
@ -214,6 +214,7 @@ storage:
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
ETCD_PEER_CLIENT_CERT_AUTH=true ETCD_PEER_CLIENT_CERT_AUTH=true
ETCD_UNSUPPORTED_ARCH=arm64
passwd: passwd:
users: users:
- name: core - name: core

View File

@ -155,3 +155,15 @@ variable "cluster_domain_suffix" {
default = "cluster.local" default = "cluster.local"
} }
variable "arch" {
type = string
description = "Container architecture (amd64 or arm64)"
default = "amd64"
}
variable "daemonset_tolerations" {
type = list(string)
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
default = []
}

View File

@ -9,6 +9,7 @@ module "workers" {
worker_count = var.worker_count worker_count = var.worker_count
instance_type = var.worker_type instance_type = var.worker_type
os_stream = var.os_stream os_stream = var.os_stream
arch = var.arch
disk_size = var.disk_size disk_size = var.disk_size
spot_price = var.worker_price spot_price = var.worker_price
target_groups = var.worker_target_groups target_groups = var.worker_target_groups

View File

@ -18,3 +18,27 @@ data "aws_ami" "fedora-coreos" {
values = ["Fedora CoreOS ${var.os_stream} *"] values = ["Fedora CoreOS ${var.os_stream} *"]
} }
} }
# Experimental Fedora CoreOS arm64 / aarch64 AMIs from Poseidon
# WARNING: These AMIs will be removed when Fedora CoreOS publishes arm64 AMIs
# and may be removed for any reason before then as well. Do not use.
data "aws_ami" "fedora-coreos-arm" {
most_recent = true
owners = ["099663496933"]
filter {
name = "architecture"
values = ["arm64"]
}
filter {
name = "virtualization-type"
values = ["hvm"]
}
filter {
name = "name"
values = ["fedora-coreos-*"]
}
}

View File

@ -68,6 +68,9 @@ systemd:
%{~ for label in split(",", node_labels) ~} %{~ for label in split(",", node_labels) ~}
--node-labels=${label} \ --node-labels=${label} \
%{~ endfor ~} %{~ endfor ~}
%{~ for taint in split(",", node_taints) ~}
--register-with-taints=${taint} \
%{~ endfor ~}
--pod-manifest-path=/etc/kubernetes/manifests \ --pod-manifest-path=/etc/kubernetes/manifests \
--read-only-port=0 \ --read-only-port=0 \
--rotate-certificates \ --rotate-certificates \

View File

@ -108,3 +108,17 @@ variable "node_labels" {
description = "List of initial node labels" description = "List of initial node labels"
default = [] default = []
} }
variable "node_taints" {
type = list(string)
description = "List of initial node taints"
default = []
}
# unofficial, undocumented, unsupported
variable "arch" {
type = string
description = "Container architecture (amd64 or arm64)"
default = "amd64"
}

View File

@ -44,7 +44,7 @@ resource "aws_autoscaling_group" "workers" {
# Worker template # Worker template
resource "aws_launch_configuration" "worker" { resource "aws_launch_configuration" "worker" {
image_id = data.aws_ami.fedora-coreos.image_id image_id = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm.image_id : data.aws_ami.fedora-coreos.image_id
instance_type = var.instance_type instance_type = var.instance_type
spot_price = var.spot_price > 0 ? var.spot_price : null spot_price = var.spot_price > 0 ? var.spot_price : null
enable_monitoring = false enable_monitoring = false
@ -86,6 +86,7 @@ data "template_file" "worker-config" {
cluster_dns_service_ip = cidrhost(var.service_cidr, 10) cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix cluster_domain_suffix = var.cluster_domain_suffix
node_labels = join(",", var.node_labels) node_labels = join(",", var.node_labels)
node_taints = join(",", var.node_taints)
} }
} }

116
docs/advanced/arm64.md Normal file
View File

@ -0,0 +1,116 @@
# ARM64
!!! warning
ARM64 support is experimental
Typhoon has experimental support for ARM64 with Fedora CoreOS on AWS. Full clusters can be created with ARM64 controller and worker nodes. Or worker pools of ARM64 nodes can be attached to an AMD64 cluster to create a hybrid/mixed architecture cluster.
!!! note
Currently, CNI networking must be set to flannel.
## AMIs
In lieu of official Fedora CoreOS ARM64 AMIs, Poseidon publishes experimental ARM64 AMIs to a few regions (us-east-1, us-east-2, us-west-1). These AMIs may be **removed** at any time and will be replaced when Fedora CoreOS publishes equivalents.
!!! note
AMIs are only published to a few regions, and AWS availability of ARM instance types varies.
## Cluster
Create a cluster with ARM64 controller and worker nodes. Container workloads must be `arm64` compatible and use `arm64` container images.
```tf
module "gravitas" {
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.19.4"
# AWS
cluster_name = "gravitas"
dns_zone = "aws.example.com"
dns_zone_id = "Z3PAABBCFAKEC0"
# configuration
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
# optional
arch = "arm64"
networking = "flannel"
worker_count = 2
worker_price = "0.0168"
controller_type = "t4g.small"
worker_type = "t4g.small"
}
```
Verify the cluster has only arm64 (`aarch64`) nodes.
```
$ kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
ip-10-0-12-178 Ready <none> 101s v1.19.4 10.0.12.178 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
ip-10-0-18-93 Ready <none> 102s v1.19.4 10.0.18.93 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
ip-10-0-90-10 Ready <none> 104s v1.19.4 10.0.90.10 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
```
## Hybrid
Create a hybrid/mixed arch cluster by defining an AWS cluster. Then define a [worker pool](worker-pools.md#aws) with ARM64 workers. Optional taints are added to aid in scheduling.
=== "Cluster (amd64)"
```tf
module "gravitas" {
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.19.4"
# AWS
cluster_name = "gravitas"
dns_zone = "aws.example.com"
dns_zone_id = "Z3PAABBCFAKEC0"
# configuration
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
# optional
networking = "flannel"
worker_count = 2
worker_price = "0.021"
daemonset_tolerations = ["arch"] # important
}
```
=== "Worker Pool (arm64)"
```tf
module "gravitas-arm64" {
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.19.4"
# AWS
vpc_id = module.gravitas.vpc_id
subnet_ids = module.gravitas.subnet_ids
security_groups = module.gravitas.worker_security_groups
# configuration
name = "gravitas-arm64"
kubeconfig = module.gravitas.kubeconfig
ssh_authorized_key = var.ssh_authorized_key
# optional
arch = "arm64"
instance_type = "t4g.small"
spot_price = "0.0168"
node_taints = ["arch=arm64:NoSchedule"]
}
```
Verify amd64 (x86_64) and arm64 (aarch64) nodes are present.
```
$ kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
ip-10-0-14-73 Ready <none> 116s v1.19.4 10.0.14.73 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
ip-10-0-17-167 Ready <none> 104s v1.19.4 10.0.17.167 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
ip-10-0-47-166 Ready <none> 110s v1.19.4 10.0.47.166 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
ip-10-0-7-237 Ready <none> 111s v1.19.4 10.0.7.237 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
```

View File

@ -2,5 +2,6 @@
Typhoon clusters offer several advanced features for skilled users. Typhoon clusters offer several advanced features for skilled users.
* [ARM64](arm64.md)
* [Customization](customization.md) * [Customization](customization.md)
* [Worker Pools](worker-pools.md) * [Worker Pools](worker-pools.md)

View File

@ -78,6 +78,7 @@ nav:
- 'FAQ': 'topics/faq.md' - 'FAQ': 'topics/faq.md'
- 'Advanced': - 'Advanced':
- 'Overview': 'advanced/overview.md' - 'Overview': 'advanced/overview.md'
- 'ARM64': 'advanced/arm64.md'
- 'Customization': 'advanced/customization.md' - 'Customization': 'advanced/customization.md'
- 'Worker Pools': 'advanced/worker-pools.md' - 'Worker Pools': 'advanced/worker-pools.md'
- 'Addons': - 'Addons':

View File

@ -1,4 +1,4 @@
mkdocs==1.1.2 mkdocs==1.1.2
mkdocs-material==6.1.0 mkdocs-material==6.1.4
pygments==2.6.1 pygments==2.6.1
pymdown-extensions==7.1.0 pymdown-extensions==7.1.0