mirror of
https://github.com/puppetmaster/typhoon.git
synced 2025-01-24 00:48:30 +01:00
Add experimental Fedora CoreOS arm64 support on AWS
* Add experimental `arch` variable to Fedora CoreOS AWS, accepting amd64 (default) or arm64 to support native arm64/aarch64 clusters or mixed/hybrid clusters with a worker pool of arm64 workers * Add `daemonset_tolerations` variable to cluster module (experimental) * Add `node_taints` variable to workers module * Requires flannel CNI and experimental Poseidon-built arm64 Fedora CoreOS AMIs (published to us-east-1, us-east-2, and us-west-1) WARN: * Our AMIs are experimental, may be removed at any time, and will be removed when Fedora CoreOS publishes official arm64 AMIs. Do NOT use in production Related: * https://github.com/poseidon/typhoon/pull/682
This commit is contained in:
parent
1113a22f61
commit
1b3a0f6ebc
@ -15,6 +15,15 @@ Notable changes between versions.
|
||||
* Allow a snippet with a systemd dropin to set an alternate image (e.g. mirror)
|
||||
* Fix local node delete oneshot on node shutdown ([#856](https://github.com/poseidon/typhoon/pull/855))
|
||||
|
||||
#### AWS
|
||||
|
||||
* Add experimental Fedora CoreOS arm64 support ([docs](https://typhoon.psdn.io/advanced/arm64/), [#875](https://github.com/poseidon/typhoon/pull/875))
|
||||
* Allow arm64 full-cluster or mixed/hybrid cluster with worker pools
|
||||
* Add `arch` variable to cluster module
|
||||
* Add `daemonset_tolerations` variable to cluster module
|
||||
* Add `node_taints` variable to workers module
|
||||
* Requires flannel CNI provider and use of experimental AMI (see docs)
|
||||
|
||||
### Flatcar Linux
|
||||
|
||||
* Rename `container-linux` modules to `flatcar-linux` ([#858](https://github.com/poseidon/typhoon/issues/858)) (**action required**)
|
||||
|
@ -18,3 +18,27 @@ data "aws_ami" "fedora-coreos" {
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
}
|
||||
|
||||
# Experimental Fedora CoreOS arm64 / aarch64 AMIs from Poseidon
|
||||
# WARNING: These AMIs will be removed when Fedora CoreOS publishes arm64 AMIs
|
||||
# and may be removed for any reason before then as well. Do not use.
|
||||
data "aws_ami" "fedora-coreos-arm" {
|
||||
most_recent = true
|
||||
owners = ["099663496933"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["arm64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["fedora-coreos-*"]
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,6 +12,7 @@ module "bootstrap" {
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
enable_reporting = var.enable_reporting
|
||||
enable_aggregation = var.enable_aggregation
|
||||
daemonset_tolerations = var.daemonset_tolerations
|
||||
|
||||
trusted_certs_dir = "/etc/pki/tls/certs"
|
||||
}
|
||||
|
@ -22,9 +22,8 @@ resource "aws_instance" "controllers" {
|
||||
}
|
||||
|
||||
instance_type = var.controller_type
|
||||
|
||||
ami = data.aws_ami.fedora-coreos.image_id
|
||||
user_data = data.ct_config.controller-ignitions.*.rendered[count.index]
|
||||
ami = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm.image_id : data.aws_ami.fedora-coreos.image_id
|
||||
user_data = data.ct_config.controller-ignitions.*.rendered[count.index]
|
||||
|
||||
# storage
|
||||
root_block_device {
|
||||
@ -63,6 +62,7 @@ data "template_file" "controller-configs" {
|
||||
|
||||
vars = {
|
||||
# Cannot use cyclic dependencies on controllers or their DNS records
|
||||
etcd_arch = var.arch == "arm64" ? "-arm64" : ""
|
||||
etcd_name = "etcd${count.index}"
|
||||
etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
|
||||
# etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
|
||||
|
@ -12,7 +12,7 @@ systemd:
|
||||
Wants=network-online.target network.target
|
||||
After=network-online.target
|
||||
[Service]
|
||||
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.4.12
|
||||
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.4.12${etcd_arch}
|
||||
Type=exec
|
||||
ExecStartPre=/bin/mkdir -p /var/lib/etcd
|
||||
ExecStartPre=-/usr/bin/podman rm etcd
|
||||
@ -214,6 +214,7 @@ storage:
|
||||
ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt
|
||||
ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key
|
||||
ETCD_PEER_CLIENT_CERT_AUTH=true
|
||||
ETCD_UNSUPPORTED_ARCH=arm64
|
||||
passwd:
|
||||
users:
|
||||
- name: core
|
||||
|
@ -155,3 +155,15 @@ variable "cluster_domain_suffix" {
|
||||
default = "cluster.local"
|
||||
}
|
||||
|
||||
variable "arch" {
|
||||
type = string
|
||||
description = "Container architecture (amd64 or arm64)"
|
||||
default = "amd64"
|
||||
}
|
||||
|
||||
variable "daemonset_tolerations" {
|
||||
type = list(string)
|
||||
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||
default = []
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@ module "workers" {
|
||||
worker_count = var.worker_count
|
||||
instance_type = var.worker_type
|
||||
os_stream = var.os_stream
|
||||
arch = var.arch
|
||||
disk_size = var.disk_size
|
||||
spot_price = var.worker_price
|
||||
target_groups = var.worker_target_groups
|
||||
|
@ -18,3 +18,27 @@ data "aws_ami" "fedora-coreos" {
|
||||
values = ["Fedora CoreOS ${var.os_stream} *"]
|
||||
}
|
||||
}
|
||||
|
||||
# Experimental Fedora CoreOS arm64 / aarch64 AMIs from Poseidon
|
||||
# WARNING: These AMIs will be removed when Fedora CoreOS publishes arm64 AMIs
|
||||
# and may be removed for any reason before then as well. Do not use.
|
||||
data "aws_ami" "fedora-coreos-arm" {
|
||||
most_recent = true
|
||||
owners = ["099663496933"]
|
||||
|
||||
filter {
|
||||
name = "architecture"
|
||||
values = ["arm64"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "virtualization-type"
|
||||
values = ["hvm"]
|
||||
}
|
||||
|
||||
filter {
|
||||
name = "name"
|
||||
values = ["fedora-coreos-*"]
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,9 @@ systemd:
|
||||
%{~ for label in split(",", node_labels) ~}
|
||||
--node-labels=${label} \
|
||||
%{~ endfor ~}
|
||||
%{~ for taint in split(",", node_taints) ~}
|
||||
--register-with-taints=${taint} \
|
||||
%{~ endfor ~}
|
||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||
--read-only-port=0 \
|
||||
--rotate-certificates \
|
||||
|
@ -108,3 +108,17 @@ variable "node_labels" {
|
||||
description = "List of initial node labels"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "node_taints" {
|
||||
type = list(string)
|
||||
description = "List of initial node taints"
|
||||
default = []
|
||||
}
|
||||
|
||||
# unofficial, undocumented, unsupported
|
||||
|
||||
variable "arch" {
|
||||
type = string
|
||||
description = "Container architecture (amd64 or arm64)"
|
||||
default = "amd64"
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ resource "aws_autoscaling_group" "workers" {
|
||||
|
||||
# Worker template
|
||||
resource "aws_launch_configuration" "worker" {
|
||||
image_id = data.aws_ami.fedora-coreos.image_id
|
||||
image_id = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm.image_id : data.aws_ami.fedora-coreos.image_id
|
||||
instance_type = var.instance_type
|
||||
spot_price = var.spot_price > 0 ? var.spot_price : null
|
||||
enable_monitoring = false
|
||||
@ -86,6 +86,7 @@ data "template_file" "worker-config" {
|
||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||
cluster_domain_suffix = var.cluster_domain_suffix
|
||||
node_labels = join(",", var.node_labels)
|
||||
node_taints = join(",", var.node_taints)
|
||||
}
|
||||
}
|
||||
|
||||
|
116
docs/advanced/arm64.md
Normal file
116
docs/advanced/arm64.md
Normal file
@ -0,0 +1,116 @@
|
||||
# ARM64
|
||||
|
||||
!!! warning
|
||||
ARM64 support is experimental
|
||||
|
||||
Typhoon has experimental support for ARM64 with Fedora CoreOS on AWS. Full clusters can be created with ARM64 controller and worker nodes. Or worker pools of ARM64 nodes can be attached to an AMD64 cluster to create a hybrid/mixed architecture cluster.
|
||||
|
||||
!!! note
|
||||
Currently, CNI networking must be set to flannel.
|
||||
|
||||
## AMIs
|
||||
|
||||
In lieu of official Fedora CoreOS ARM64 AMIs, Poseidon publishes experimental ARM64 AMIs to a few regions (us-east-1, us-east-2, us-west-1). These AMIs may be **removed** at any time and will be replaced when Fedora CoreOS publishes equivalents.
|
||||
|
||||
!!! note
|
||||
AMIs are only published to a few regions, and AWS availability of ARM instance types varies.
|
||||
|
||||
## Cluster
|
||||
|
||||
Create a cluster with ARM64 controller and worker nodes. Container workloads must be `arm64` compatible and use `arm64` container images.
|
||||
|
||||
```tf
|
||||
module "gravitas" {
|
||||
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.19.4"
|
||||
|
||||
# AWS
|
||||
cluster_name = "gravitas"
|
||||
dns_zone = "aws.example.com"
|
||||
dns_zone_id = "Z3PAABBCFAKEC0"
|
||||
|
||||
# configuration
|
||||
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
|
||||
|
||||
# optional
|
||||
arch = "arm64"
|
||||
networking = "flannel"
|
||||
worker_count = 2
|
||||
worker_price = "0.0168"
|
||||
|
||||
controller_type = "t4g.small"
|
||||
worker_type = "t4g.small"
|
||||
}
|
||||
```
|
||||
|
||||
Verify the cluster has only arm64 (`aarch64`) nodes.
|
||||
|
||||
```
|
||||
$ kubectl get nodes -o wide
|
||||
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
|
||||
ip-10-0-12-178 Ready <none> 101s v1.19.4 10.0.12.178 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||
ip-10-0-18-93 Ready <none> 102s v1.19.4 10.0.18.93 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||
ip-10-0-90-10 Ready <none> 104s v1.19.4 10.0.90.10 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||
```
|
||||
|
||||
## Hybrid
|
||||
|
||||
Create a hybrid/mixed arch cluster by defining an AWS cluster. Then define a [worker pool](worker-pools.md#aws) with ARM64 workers. Optional taints are added to aid in scheduling.
|
||||
|
||||
=== "Cluster (amd64)"
|
||||
|
||||
```tf
|
||||
module "gravitas" {
|
||||
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.19.4"
|
||||
|
||||
# AWS
|
||||
cluster_name = "gravitas"
|
||||
dns_zone = "aws.example.com"
|
||||
dns_zone_id = "Z3PAABBCFAKEC0"
|
||||
|
||||
# configuration
|
||||
ssh_authorized_key = "ssh-rsa AAAAB3Nz..."
|
||||
|
||||
# optional
|
||||
networking = "flannel"
|
||||
worker_count = 2
|
||||
worker_price = "0.021"
|
||||
|
||||
daemonset_tolerations = ["arch"] # important
|
||||
}
|
||||
```
|
||||
|
||||
=== "Worker Pool (arm64)"
|
||||
|
||||
```tf
|
||||
module "gravitas-arm64" {
|
||||
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.19.4"
|
||||
|
||||
# AWS
|
||||
vpc_id = module.gravitas.vpc_id
|
||||
subnet_ids = module.gravitas.subnet_ids
|
||||
security_groups = module.gravitas.worker_security_groups
|
||||
|
||||
# configuration
|
||||
name = "gravitas-arm64"
|
||||
kubeconfig = module.gravitas.kubeconfig
|
||||
ssh_authorized_key = var.ssh_authorized_key
|
||||
|
||||
# optional
|
||||
arch = "arm64"
|
||||
instance_type = "t4g.small"
|
||||
spot_price = "0.0168"
|
||||
node_taints = ["arch=arm64:NoSchedule"]
|
||||
}
|
||||
```
|
||||
|
||||
Verify amd64 (x86_64) and arm64 (aarch64) nodes are present.
|
||||
|
||||
```
|
||||
$ kubectl get nodes -o wide
|
||||
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
|
||||
ip-10-0-14-73 Ready <none> 116s v1.19.4 10.0.14.73 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
||||
ip-10-0-17-167 Ready <none> 104s v1.19.4 10.0.17.167 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
||||
ip-10-0-47-166 Ready <none> 110s v1.19.4 10.0.47.166 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||
ip-10-0-7-237 Ready <none> 111s v1.19.4 10.0.7.237 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
||||
```
|
||||
|
@ -2,5 +2,6 @@
|
||||
|
||||
Typhoon clusters offer several advanced features for skilled users.
|
||||
|
||||
* [ARM64](arm64.md)
|
||||
* [Customization](customization.md)
|
||||
* [Worker Pools](worker-pools.md)
|
||||
|
@ -78,6 +78,7 @@ nav:
|
||||
- 'FAQ': 'topics/faq.md'
|
||||
- 'Advanced':
|
||||
- 'Overview': 'advanced/overview.md'
|
||||
- 'ARM64': 'advanced/arm64.md'
|
||||
- 'Customization': 'advanced/customization.md'
|
||||
- 'Worker Pools': 'advanced/worker-pools.md'
|
||||
- 'Addons':
|
||||
|
@ -1,4 +1,4 @@
|
||||
mkdocs==1.1.2
|
||||
mkdocs-material==6.1.0
|
||||
mkdocs-material==6.1.4
|
||||
pygments==2.6.1
|
||||
pymdown-extensions==7.1.0
|
||||
|
Loading…
Reference in New Issue
Block a user