From 1caea3388c5dd306738661b56dbd2a3d484bf21b Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Tue, 24 Jan 2023 20:37:57 -0800 Subject: [PATCH] Restructure bare-metal module to use a worker submodule * Add an internal `worker` module to the bare-metal module, to allow individual bare-metal machines to be defined and joined to an existing bare-metal cluster. This is similar to the "worker pools" modules for adding sets of nodes to cloud (AWS, GCP, Azure) clusters, but on metal, each piece of hardware is potentially unique New: Using the new `worker` module, a Kubernetes cluster can be defined without any `workers` (i.e. just a control-plane). Use the `worker` module to define each piece machine that should join the bare-metal cluster and customize it in detail. This style is quite flexible and suited for clusters with hardware that varies quite a bit. ```tf module "mercury" { source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.26.2" # bare-metal cluster_name = "mercury" matchbox_http_endpoint = "http://matchbox.example.com" os_channel = "flatcar-stable" os_version = "2345.3.1" # configuration k8s_domain_name = "node1.example.com" ssh_authorized_key = "ssh-rsa AAAAB3Nz..." # machines controllers = [{ name = "node1" mac = "52:54:00:a1:9c:ae" domain = "node1.example.com" }] } ``` ```tf module "mercury-node1" { source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes/worker?ref=v1.26.2" cluster_name = "mercury" # bare-metal matchbox_http_endpoint = "http://matchbox.example.com" os_channel = "flatcar-stable" os_version = "2345.3.1" # configuration name = "node2" mac = "52:54:00:b2:2f:86" domain = "node2.example.com" kubeconfig = module.mercury.kubeconfig ssh_authorized_key = "ssh-rsa AAAAB3Nz..." # optional snippets = [] node_labels = [] node_tains = [] install_disk = "/dev/vda" cached_install = false } ``` For clusters with fairly similar hardware, you may continue to define `workers` directly within the cluster definition. This reduces some repetition, but is not quite as flexible. ```tf module "mercury" { source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.26.1" # bare-metal cluster_name = "mercury" matchbox_http_endpoint = "http://matchbox.example.com" os_channel = "flatcar-stable" os_version = "2345.3.1" # configuration k8s_domain_name = "node1.example.com" ssh_authorized_key = "ssh-rsa AAAAB3Nz..." # machines controllers = [{ name = "node1" mac = "52:54:00:a1:9c:ae" domain = "node1.example.com" }] workers = [ { name = "node2", mac = "52:54:00:b2:2f:86" domain = "node2.example.com" }, { name = "node3", mac = "52:54:00:c3:61:77" domain = "node3.example.com" } ] } ``` Optional variables `snippets`, `worker_node_labels`, and `worker_node_taints` are still defined as a map from machine name to a list of snippets, labels, or taints respectively to allow some degree of per-machine customization. However, fields like `install_disk`, `kernel_args`, `cached_install` and future options will not be designed this way. Instead, if your machines vary it is recommended to use the new `worker` module to define each node --- CHANGES.md | 4 + bare-metal/fedora-coreos/kubernetes/groups.tf | 22 ---- .../fedora-coreos/kubernetes/outputs.tf | 7 + .../fedora-coreos/kubernetes/profiles.tf | 37 ++---- bare-metal/fedora-coreos/kubernetes/ssh.tf | 33 ----- .../{ => worker}/butane/worker.yaml | 0 .../kubernetes/worker/matchbox.tf | 63 +++++++++ .../fedora-coreos/kubernetes/worker/ssh.tf | 27 ++++ .../kubernetes/worker/variables.tf | 111 ++++++++++++++++ .../kubernetes/worker/versions.tf | 17 +++ .../fedora-coreos/kubernetes/workers.tf | 30 +++++ bare-metal/flatcar-linux/kubernetes/groups.tf | 35 ----- .../flatcar-linux/kubernetes/profiles.tf | 112 ++++++---------- bare-metal/flatcar-linux/kubernetes/ssh.tf | 34 ----- .../kubernetes/worker/butane/install.yaml | 46 +++++++ .../{ => worker}/butane/worker.yaml | 0 .../kubernetes/worker/matchbox.tf | 87 +++++++++++++ .../flatcar-linux/kubernetes/worker/ssh.tf | 27 ++++ .../kubernetes/worker/variables.tf | 120 ++++++++++++++++++ .../kubernetes/worker/versions.tf | 16 +++ .../flatcar-linux/kubernetes/workers.tf | 32 +++++ 21 files changed, 639 insertions(+), 221 deletions(-) delete mode 100644 bare-metal/fedora-coreos/kubernetes/groups.tf rename bare-metal/fedora-coreos/kubernetes/{ => worker}/butane/worker.yaml (100%) create mode 100644 bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf create mode 100644 bare-metal/fedora-coreos/kubernetes/worker/ssh.tf create mode 100644 bare-metal/fedora-coreos/kubernetes/worker/variables.tf create mode 100644 bare-metal/fedora-coreos/kubernetes/worker/versions.tf create mode 100644 bare-metal/fedora-coreos/kubernetes/workers.tf delete mode 100644 bare-metal/flatcar-linux/kubernetes/groups.tf create mode 100644 bare-metal/flatcar-linux/kubernetes/worker/butane/install.yaml rename bare-metal/flatcar-linux/kubernetes/{ => worker}/butane/worker.yaml (100%) create mode 100644 bare-metal/flatcar-linux/kubernetes/worker/matchbox.tf create mode 100644 bare-metal/flatcar-linux/kubernetes/worker/ssh.tf create mode 100644 bare-metal/flatcar-linux/kubernetes/worker/variables.tf create mode 100644 bare-metal/flatcar-linux/kubernetes/worker/versions.tf create mode 100644 bare-metal/flatcar-linux/kubernetes/workers.tf diff --git a/CHANGES.md b/CHANGES.md index 63172a20..e5463abb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,10 @@ Notable changes between versions. ## Latest +### Bare-Metal + +* Add a `worker` module to allow customizing individual worker nodes ([#1295](https://github.com/poseidon/typhoon/pull/1295)) + ## v1.26.1 * Kubernetes [v1.26.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.26.md#v1261) diff --git a/bare-metal/fedora-coreos/kubernetes/groups.tf b/bare-metal/fedora-coreos/kubernetes/groups.tf deleted file mode 100644 index 8c72aa30..00000000 --- a/bare-metal/fedora-coreos/kubernetes/groups.tf +++ /dev/null @@ -1,22 +0,0 @@ -# Match each controller or worker to a profile - -resource "matchbox_group" "controller" { - count = length(var.controllers) - name = format("%s-%s", var.cluster_name, var.controllers.*.name[count.index]) - profile = matchbox_profile.controllers.*.name[count.index] - - selector = { - mac = var.controllers.*.mac[count.index] - } -} - -resource "matchbox_group" "worker" { - count = length(var.workers) - name = format("%s-%s", var.cluster_name, var.workers.*.name[count.index]) - profile = matchbox_profile.workers.*.name[count.index] - - selector = { - mac = var.workers.*.mac[count.index] - } -} - diff --git a/bare-metal/fedora-coreos/kubernetes/outputs.tf b/bare-metal/fedora-coreos/kubernetes/outputs.tf index 67275db5..5918d0bd 100644 --- a/bare-metal/fedora-coreos/kubernetes/outputs.tf +++ b/bare-metal/fedora-coreos/kubernetes/outputs.tf @@ -3,6 +3,13 @@ output "kubeconfig-admin" { sensitive = true } +# Outputs for workers + +output "kubeconfig" { + value = module.bootstrap.kubeconfig-kubelet + sensitive = true +} + # Outputs for debug output "assets_dist" { diff --git a/bare-metal/fedora-coreos/kubernetes/profiles.tf b/bare-metal/fedora-coreos/kubernetes/profiles.tf index d161a39f..c5b8c650 100644 --- a/bare-metal/fedora-coreos/kubernetes/profiles.tf +++ b/bare-metal/fedora-coreos/kubernetes/profiles.tf @@ -28,6 +28,16 @@ locals { args = var.cached_install ? local.cached_args : local.remote_args } +# Match a controller to a profile by MAC +resource "matchbox_group" "controller" { + count = length(var.controllers) + name = format("%s-%s", var.cluster_name, var.controllers.*.name[count.index]) + profile = matchbox_profile.controllers.*.name[count.index] + + selector = { + mac = var.controllers.*.mac[count.index] + } +} // Fedora CoreOS controller profile resource "matchbox_profile" "controllers" { @@ -55,30 +65,3 @@ data "ct_config" "controllers" { strict = true snippets = lookup(var.snippets, var.controllers.*.name[count.index], []) } - -// Fedora CoreOS worker profile -resource "matchbox_profile" "workers" { - count = length(var.workers) - name = format("%s-worker-%s", var.cluster_name, var.workers.*.name[count.index]) - - kernel = local.kernel - initrd = local.initrd - args = concat(local.args, var.kernel_args) - - raw_ignition = data.ct_config.workers.*.rendered[count.index] -} - -# Fedora CoreOS workers -data "ct_config" "workers" { - count = length(var.workers) - content = templatefile("${path.module}/butane/worker.yaml", { - domain_name = var.workers.*.domain[count.index] - cluster_dns_service_ip = module.bootstrap.cluster_dns_service_ip - cluster_domain_suffix = var.cluster_domain_suffix - ssh_authorized_key = var.ssh_authorized_key - node_labels = join(",", lookup(var.worker_node_labels, var.workers.*.name[count.index], [])) - node_taints = join(",", lookup(var.worker_node_taints, var.workers.*.name[count.index], [])) - }) - strict = true - snippets = lookup(var.snippets, var.workers.*.name[count.index], []) -} diff --git a/bare-metal/fedora-coreos/kubernetes/ssh.tf b/bare-metal/fedora-coreos/kubernetes/ssh.tf index 0ca5f6ba..40ecbbf6 100644 --- a/bare-metal/fedora-coreos/kubernetes/ssh.tf +++ b/bare-metal/fedora-coreos/kubernetes/ssh.tf @@ -15,7 +15,6 @@ resource "null_resource" "copy-controller-secrets" { # matchbox groups are written, causing a deadlock. depends_on = [ matchbox_group.controller, - matchbox_group.worker, module.bootstrap, ] @@ -45,37 +44,6 @@ resource "null_resource" "copy-controller-secrets" { } } -# Secure copy kubeconfig to all workers. Activates kubelet.service -resource "null_resource" "copy-worker-secrets" { - count = length(var.workers) - - # Without depends_on, remote-exec could start and wait for machines before - # matchbox groups are written, causing a deadlock. - depends_on = [ - matchbox_group.controller, - matchbox_group.worker, - ] - - connection { - type = "ssh" - host = var.workers.*.domain[count.index] - user = "core" - timeout = "60m" - } - - provisioner "file" { - content = module.bootstrap.kubeconfig-kubelet - destination = "/home/core/kubeconfig" - } - - provisioner "remote-exec" { - inline = [ - "sudo mv /home/core/kubeconfig /etc/kubernetes/kubeconfig", - "sudo touch /etc/kubernetes", - ] - } -} - # Connect to a controller to perform one-time cluster bootstrap. resource "null_resource" "bootstrap" { # Without depends_on, this remote-exec may start before the kubeconfig copy. @@ -83,7 +51,6 @@ resource "null_resource" "bootstrap" { # while no Kubelets are running. depends_on = [ null_resource.copy-controller-secrets, - null_resource.copy-worker-secrets, ] connection { diff --git a/bare-metal/fedora-coreos/kubernetes/butane/worker.yaml b/bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml similarity index 100% rename from bare-metal/fedora-coreos/kubernetes/butane/worker.yaml rename to bare-metal/fedora-coreos/kubernetes/worker/butane/worker.yaml diff --git a/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf b/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf new file mode 100644 index 00000000..7fd0bdf9 --- /dev/null +++ b/bare-metal/fedora-coreos/kubernetes/worker/matchbox.tf @@ -0,0 +1,63 @@ +locals { + remote_kernel = "https://builds.coreos.fedoraproject.org/prod/streams/${var.os_stream}/builds/${var.os_version}/x86_64/fedora-coreos-${var.os_version}-live-kernel-x86_64" + remote_initrd = [ + "--name main https://builds.coreos.fedoraproject.org/prod/streams/${var.os_stream}/builds/${var.os_version}/x86_64/fedora-coreos-${var.os_version}-live-initramfs.x86_64.img", + ] + + remote_args = [ + "initrd=main", + "coreos.live.rootfs_url=https://builds.coreos.fedoraproject.org/prod/streams/${var.os_stream}/builds/${var.os_version}/x86_64/fedora-coreos-${var.os_version}-live-rootfs.x86_64.img", + "coreos.inst.install_dev=${var.install_disk}", + "coreos.inst.ignition_url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}", + ] + + cached_kernel = "/assets/fedora-coreos/fedora-coreos-${var.os_version}-live-kernel-x86_64" + cached_initrd = [ + "/assets/fedora-coreos/fedora-coreos-${var.os_version}-live-initramfs.x86_64.img", + ] + + cached_args = [ + "initrd=main", + "coreos.live.rootfs_url=${var.matchbox_http_endpoint}/assets/fedora-coreos/fedora-coreos-${var.os_version}-live-rootfs.x86_64.img", + "coreos.inst.install_dev=${var.install_disk}", + "coreos.inst.ignition_url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}", + ] + + kernel = var.cached_install ? local.cached_kernel : local.remote_kernel + initrd = var.cached_install ? local.cached_initrd : local.remote_initrd + args = var.cached_install ? local.cached_args : local.remote_args +} + +// Match a worker to a profile by MAC +resource "matchbox_group" "worker" { + name = format("%s-%s", var.cluster_name, var.name) + profile = matchbox_profile.worker.name + selector = { + mac = var.mac + } +} + +// Fedora CoreOS worker profile +resource "matchbox_profile" "worker" { + name = format("%s-worker-%s", var.cluster_name, var.name) + kernel = local.kernel + initrd = local.initrd + args = concat(local.args, var.kernel_args) + + raw_ignition = data.ct_config.worker.rendered +} + +# Fedora CoreOS workers +data "ct_config" "worker" { + content = templatefile("${path.module}/butane/worker.yaml", { + domain_name = var.domain + ssh_authorized_key = var.ssh_authorized_key + cluster_dns_service_ip = cidrhost(var.service_cidr, 10) + cluster_domain_suffix = var.cluster_domain_suffix + node_labels = join(",", var.node_labels) + node_taints = join(",", var.node_taints) + }) + strict = true + snippets = var.snippets +} + diff --git a/bare-metal/fedora-coreos/kubernetes/worker/ssh.tf b/bare-metal/fedora-coreos/kubernetes/worker/ssh.tf new file mode 100644 index 00000000..58ff3d43 --- /dev/null +++ b/bare-metal/fedora-coreos/kubernetes/worker/ssh.tf @@ -0,0 +1,27 @@ +# Secure copy kubeconfig to worker. Activates kubelet.service +resource "null_resource" "copy-worker-secrets" { + # Without depends_on, remote-exec could start and wait for machines before + # matchbox groups are written, causing a deadlock. + depends_on = [ + matchbox_group.worker, + ] + + connection { + type = "ssh" + host = var.domain + user = "core" + timeout = "60m" + } + + provisioner "file" { + content = var.kubeconfig + destination = "/home/core/kubeconfig" + } + + provisioner "remote-exec" { + inline = [ + "sudo mv /home/core/kubeconfig /etc/kubernetes/kubeconfig", + "sudo touch /etc/kubernetes", + ] + } +} diff --git a/bare-metal/fedora-coreos/kubernetes/worker/variables.tf b/bare-metal/fedora-coreos/kubernetes/worker/variables.tf new file mode 100644 index 00000000..fe89f21b --- /dev/null +++ b/bare-metal/fedora-coreos/kubernetes/worker/variables.tf @@ -0,0 +1,111 @@ +variable "cluster_name" { + type = string + description = "Must be set to the `cluster_name` of cluster" +} + +# bare-metal + +variable "matchbox_http_endpoint" { + type = string + description = "Matchbox HTTP read-only endpoint (e.g. http://matchbox.example.com:8080)" +} + +variable "os_stream" { + type = string + description = "Fedora CoreOS release stream (e.g. stable, testing, next)" + default = "stable" + + validation { + condition = contains(["stable", "testing", "next"], var.os_stream) + error_message = "The os_stream must be stable, testing, or next." + } +} + +variable "os_version" { + type = string + description = "Fedora CoreOS version to PXE and install (e.g. 31.20200310.3.0)" +} + +# machine + +variable "name" { + type = string + description = "Unique name for the machine (e.g. node1)" +} + +variable "mac" { + type = string + description = "MAC address (e.g. 52:54:00:a1:9c:ae)" +} + +variable "domain" { + type = string + description = "Fully qualified domain name (e.g. node1.example.com)" +} + +# configuration + +variable "kubeconfig" { + type = string + description = "Must be set to `kubeconfig` output by cluster" +} + +variable "ssh_authorized_key" { + type = string + description = "SSH public key for user 'core'" +} + +variable "snippets" { + type = list(string) + description = "List of Butane snippets" + default = [] +} + +variable "node_labels" { + type = list(string) + description = "List of initial node labels" + default = [] +} + +variable "node_taints" { + type = list(string) + description = "List of initial node taints" + default = [] +} + +# optional + +variable "cached_install" { + type = bool + description = "Whether Fedora CoreOS should PXE boot and install from matchbox /assets cache. Note that the admin must have downloaded the os_version into matchbox assets." + default = false +} + +variable "install_disk" { + type = string + description = "Disk device to install Fedora CoreOS (e.g. sda)" + default = "sda" +} + +variable "kernel_args" { + type = list(string) + description = "Additional kernel arguments to provide at PXE boot." + default = [] +} + +# unofficial, undocumented, unsupported + +variable "service_cidr" { + type = string + description = < stable channel channel = split("-", var.os_channel)[1] -} -// Flatcar Linux install profile (from release.flatcar-linux.net) -resource "matchbox_profile" "flatcar-install" { - count = length(var.controllers) + length(var.workers) - name = format("%s-flatcar-install-%s", var.cluster_name, concat(var.controllers.*.name, var.workers.*.name)[count.index]) - - kernel = "${var.download_protocol}://${local.channel}.release.flatcar-linux.net/amd64-usr/${var.os_version}/flatcar_production_pxe.vmlinuz" - - initrd = [ + remote_kernel = "${var.download_protocol}://${local.channel}.release.flatcar-linux.net/amd64-usr/${var.os_version}/flatcar_production_pxe.vmlinuz" + remote_initrd = [ "${var.download_protocol}://${local.channel}.release.flatcar-linux.net/amd64-usr/${var.os_version}/flatcar_production_pxe_image.cpio.gz", ] - - args = flatten([ + args = [ "initrd=flatcar_production_pxe_image.cpio.gz", "flatcar.config.url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}", "flatcar.first_boot=yes", - var.kernel_args, - ]) + ] - raw_ignition = data.ct_config.install.*.rendered[count.index] -} - -// Flatcar Linux Install profile (from matchbox /assets cache) -// Note: Admin must have downloaded os_version into matchbox assets/flatcar. -resource "matchbox_profile" "cached-flatcar-install" { - count = length(var.controllers) + length(var.workers) - name = format("%s-cached-flatcar-linux-install-%s", var.cluster_name, concat(var.controllers.*.name, var.workers.*.name)[count.index]) - - kernel = "/assets/flatcar/${var.os_version}/flatcar_production_pxe.vmlinuz" - - initrd = [ + cached_kernel = "/assets/flatcar/${var.os_version}/flatcar_production_pxe.vmlinuz" + cached_initrd = [ "/assets/flatcar/${var.os_version}/flatcar_production_pxe_image.cpio.gz", ] - args = flatten([ - "initrd=flatcar_production_pxe_image.cpio.gz", - "flatcar.config.url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}", - "flatcar.first_boot=yes", - var.kernel_args, - ]) + kernel = var.cached_install ? local.cached_kernel : local.remote_kernel + initrd = var.cached_install ? local.cached_initrd : local.remote_initrd +} - raw_ignition = data.ct_config.cached-install.*.rendered[count.index] +# Match controllers to install profiles by MAC +resource "matchbox_group" "install" { + count = length(var.controllers) + + name = format("install-%s", var.controllers[count.index].name) + profile = matchbox_profile.install[count.index].name + selector = { + mac = concat(var.controllers.*.mac, var.workers.*.mac)[count.index] + } +} + +// Flatcar Linux install +resource "matchbox_profile" "install" { + count = length(var.controllers) + + name = format("%s-install-%s", var.cluster_name, var.controllers.*.name[count.index]) + kernel = local.kernel + initrd = local.initrd + args = concat(local.args, var.kernel_args) + + raw_ignition = data.ct_config.install[count.index].rendered } # Flatcar Linux install data "ct_config" "install" { - count = length(var.controllers) + length(var.workers) + count = length(var.controllers) + content = templatefile("${path.module}/butane/install.yaml", { os_channel = local.channel os_version = var.os_version @@ -57,25 +56,20 @@ data "ct_config" "install" { install_disk = var.install_disk ssh_authorized_key = var.ssh_authorized_key # only cached profile adds -b baseurl - baseurl_flag = "" + baseurl_flag = var.cached_install ? "-b ${var.matchbox_http_endpoint}/assets/flatcar" : "" }) strict = true } -# Flatcar Linux cached install -data "ct_config" "cached-install" { - count = length(var.controllers) + length(var.workers) - content = templatefile("${path.module}/butane/install.yaml", { - os_channel = local.channel - os_version = var.os_version - ignition_endpoint = format("%s/ignition", var.matchbox_http_endpoint) - mac = concat(var.controllers.*.mac, var.workers.*.mac)[count.index] - install_disk = var.install_disk - ssh_authorized_key = var.ssh_authorized_key - # profile uses -b baseurl to install from matchbox cache - baseurl_flag = "-b ${var.matchbox_http_endpoint}/assets/flatcar" - }) - strict = true +# Match each controller by MAC +resource "matchbox_group" "controller" { + count = length(var.controllers) + name = format("%s-%s", var.cluster_name, var.controllers[count.index].name) + profile = matchbox_profile.controllers[count.index].name + selector = { + mac = var.controllers[count.index].mac + os = "installed" + } } // Kubernetes Controller profiles @@ -99,25 +93,3 @@ data "ct_config" "controllers" { strict = true snippets = lookup(var.snippets, var.controllers.*.name[count.index], []) } - -// Kubernetes Worker profiles -resource "matchbox_profile" "workers" { - count = length(var.workers) - name = format("%s-worker-%s", var.cluster_name, var.workers.*.name[count.index]) - raw_ignition = data.ct_config.workers.*.rendered[count.index] -} - -# Flatcar Linux workers -data "ct_config" "workers" { - count = length(var.workers) - content = templatefile("${path.module}/butane/worker.yaml", { - domain_name = var.workers.*.domain[count.index] - cluster_dns_service_ip = module.bootstrap.cluster_dns_service_ip - cluster_domain_suffix = var.cluster_domain_suffix - ssh_authorized_key = var.ssh_authorized_key - node_labels = join(",", lookup(var.worker_node_labels, var.workers.*.name[count.index], [])) - node_taints = join(",", lookup(var.worker_node_taints, var.workers.*.name[count.index], [])) - }) - strict = true - snippets = lookup(var.snippets, var.workers.*.name[count.index], []) -} diff --git a/bare-metal/flatcar-linux/kubernetes/ssh.tf b/bare-metal/flatcar-linux/kubernetes/ssh.tf index 5a718c1d..c2a8388a 100644 --- a/bare-metal/flatcar-linux/kubernetes/ssh.tf +++ b/bare-metal/flatcar-linux/kubernetes/ssh.tf @@ -16,7 +16,6 @@ resource "null_resource" "copy-controller-secrets" { depends_on = [ matchbox_group.install, matchbox_group.controller, - matchbox_group.worker, module.bootstrap, ] @@ -45,37 +44,6 @@ resource "null_resource" "copy-controller-secrets" { } } -# Secure copy kubeconfig to all workers. Activates kubelet.service -resource "null_resource" "copy-worker-secrets" { - count = length(var.workers) - - # Without depends_on, remote-exec could start and wait for machines before - # matchbox groups are written, causing a deadlock. - depends_on = [ - matchbox_group.install, - matchbox_group.controller, - matchbox_group.worker, - ] - - connection { - type = "ssh" - host = var.workers.*.domain[count.index] - user = "core" - timeout = "60m" - } - - provisioner "file" { - content = module.bootstrap.kubeconfig-kubelet - destination = "/home/core/kubeconfig" - } - - provisioner "remote-exec" { - inline = [ - "sudo mv /home/core/kubeconfig /etc/kubernetes/kubeconfig", - ] - } -} - # Connect to a controller to perform one-time cluster bootstrap. resource "null_resource" "bootstrap" { # Without depends_on, this remote-exec may start before the kubeconfig copy. @@ -83,7 +51,6 @@ resource "null_resource" "bootstrap" { # while no Kubelets are running. depends_on = [ null_resource.copy-controller-secrets, - null_resource.copy-worker-secrets, ] connection { @@ -99,4 +66,3 @@ resource "null_resource" "bootstrap" { ] } } - diff --git a/bare-metal/flatcar-linux/kubernetes/worker/butane/install.yaml b/bare-metal/flatcar-linux/kubernetes/worker/butane/install.yaml new file mode 100644 index 00000000..fcf539a4 --- /dev/null +++ b/bare-metal/flatcar-linux/kubernetes/worker/butane/install.yaml @@ -0,0 +1,46 @@ +variant: flatcar +version: 1.0.0 +systemd: + units: + - name: installer.service + enabled: true + contents: | + [Unit] + Requires=network-online.target + After=network-online.target + [Service] + Type=simple + ExecStart=/opt/installer + [Install] + WantedBy=multi-user.target + # Avoid using the standard SSH port so terraform apply cannot SSH until + # post-install. But admins may SSH to debug disk install problems. + # After install, sshd will use port 22 and users/terraform can connect. + - name: sshd.socket + dropins: + - name: 10-sshd-port.conf + contents: | + [Socket] + ListenStream= + ListenStream=2222 +storage: + files: + - path: /opt/installer + mode: 0500 + contents: + inline: | + #!/bin/bash -ex + curl --retry 10 "${ignition_endpoint}?mac=${mac}&os=installed" -o ignition.json + flatcar-install \ + -d ${install_disk} \ + -C ${os_channel} \ + -V ${os_version} \ + ${baseurl_flag} \ + -i ignition.json + udevadm settle + systemctl reboot +passwd: + users: + - name: core + ssh_authorized_keys: + - "${ssh_authorized_key}" diff --git a/bare-metal/flatcar-linux/kubernetes/butane/worker.yaml b/bare-metal/flatcar-linux/kubernetes/worker/butane/worker.yaml similarity index 100% rename from bare-metal/flatcar-linux/kubernetes/butane/worker.yaml rename to bare-metal/flatcar-linux/kubernetes/worker/butane/worker.yaml diff --git a/bare-metal/flatcar-linux/kubernetes/worker/matchbox.tf b/bare-metal/flatcar-linux/kubernetes/worker/matchbox.tf new file mode 100644 index 00000000..37930d62 --- /dev/null +++ b/bare-metal/flatcar-linux/kubernetes/worker/matchbox.tf @@ -0,0 +1,87 @@ +locals { + # flatcar-stable -> stable channel + channel = split("-", var.os_channel)[1] + + remote_kernel = "${var.download_protocol}://${local.channel}.release.flatcar-linux.net/amd64-usr/${var.os_version}/flatcar_production_pxe.vmlinuz" + remote_initrd = [ + "${var.download_protocol}://${local.channel}.release.flatcar-linux.net/amd64-usr/${var.os_version}/flatcar_production_pxe_image.cpio.gz", + ] + args = flatten([ + "initrd=flatcar_production_pxe_image.cpio.gz", + "flatcar.config.url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}", + "flatcar.first_boot=yes", + var.kernel_args, + ]) + + cached_kernel = "/assets/flatcar/${var.os_version}/flatcar_production_pxe.vmlinuz" + cached_initrd = [ + "/assets/flatcar/${var.os_version}/flatcar_production_pxe_image.cpio.gz", + ] + + kernel = var.cached_install ? local.cached_kernel : local.remote_kernel + initrd = var.cached_install ? local.cached_initrd : local.remote_initrd +} + +# Match machine to an install profile by MAC +resource "matchbox_group" "install" { + name = format("install-%s", var.name) + profile = matchbox_profile.install.name + selector = { + mac = var.mac + } +} + +// Flatcar Linux install profile (from release.flatcar-linux.net) +resource "matchbox_profile" "install" { + name = format("%s-install-%s", var.cluster_name, var.name) + kernel = local.kernel + initrd = local.initrd + args = concat(local.args, var.kernel_args) + + raw_ignition = data.ct_config.install.rendered +} + +# Flatcar Linux install +data "ct_config" "install" { + content = templatefile("${path.module}/butane/install.yaml", { + os_channel = local.channel + os_version = var.os_version + ignition_endpoint = format("%s/ignition", var.matchbox_http_endpoint) + mac = var.mac + install_disk = var.install_disk + ssh_authorized_key = var.ssh_authorized_key + # only cached profile adds -b baseurl + baseurl_flag = var.cached_install ? "-b ${var.matchbox_http_endpoint}/assets/flatcar" : "" + }) + strict = true +} + +# Match a worker to a profile by MAC +resource "matchbox_group" "worker" { + name = format("%s-%s", var.cluster_name, var.name) + profile = matchbox_profile.worker.name + selector = { + mac = var.mac + os = "installed" + } +} + +// Flatcar Linux Worker profile +resource "matchbox_profile" "worker" { + name = format("%s-worker-%s", var.cluster_name, var.name) + raw_ignition = data.ct_config.worker.rendered +} + +# Flatcar Linux workers +data "ct_config" "worker" { + content = templatefile("${path.module}/butane/worker.yaml", { + domain_name = var.domain + ssh_authorized_key = var.ssh_authorized_key + cluster_dns_service_ip = cidrhost(var.service_cidr, 10) + cluster_domain_suffix = var.cluster_domain_suffix + node_labels = join(",", var.node_labels) + node_taints = join(",", var.node_taints) + }) + strict = true + snippets = var.snippets +} diff --git a/bare-metal/flatcar-linux/kubernetes/worker/ssh.tf b/bare-metal/flatcar-linux/kubernetes/worker/ssh.tf new file mode 100644 index 00000000..33434299 --- /dev/null +++ b/bare-metal/flatcar-linux/kubernetes/worker/ssh.tf @@ -0,0 +1,27 @@ +# Secure copy kubeconfig to worker. Activates kubelet.service +resource "null_resource" "copy-worker-secrets" { + # Without depends_on, remote-exec could start and wait for machines before + # matchbox groups are written, causing a deadlock. + depends_on = [ + matchbox_group.install, + matchbox_group.worker, + ] + + connection { + type = "ssh" + host = var.domain + user = "core" + timeout = "60m" + } + + provisioner "file" { + content = var.kubeconfig + destination = "/home/core/kubeconfig" + } + + provisioner "remote-exec" { + inline = [ + "sudo mv /home/core/kubeconfig /etc/kubernetes/kubeconfig", + ] + } +} diff --git a/bare-metal/flatcar-linux/kubernetes/worker/variables.tf b/bare-metal/flatcar-linux/kubernetes/worker/variables.tf new file mode 100644 index 00000000..2293ea90 --- /dev/null +++ b/bare-metal/flatcar-linux/kubernetes/worker/variables.tf @@ -0,0 +1,120 @@ +variable "cluster_name" { + type = string + description = "Must be set to the `cluster_name` of cluster" +} + +# bare-metal + +variable "matchbox_http_endpoint" { + type = string + description = "Matchbox HTTP read-only endpoint (e.g. http://matchbox.example.com:8080)" +} + +variable "os_channel" { + type = string + description = "Channel for a Flatcar Linux (flatcar-stable, flatcar-beta, flatcar-alpha)" + + validation { + condition = contains(["flatcar-stable", "flatcar-beta", "flatcar-alpha"], var.os_channel) + error_message = "The os_channel must be flatcar-stable, flatcar-beta, or flatcar-alpha." + } +} + +variable "os_version" { + type = string + description = "Version of Flatcar Linux to PXE and install (e.g. 2079.5.1)" +} + +# machine + +variable "name" { + type = string + description = "Unique name for the machine (e.g. node1)" +} + +variable "mac" { + type = string + description = "MAC address (e.g. 52:54:00:a1:9c:ae)" +} + +variable "domain" { + type = string + description = "Fully qualified domain name (e.g. node1.example.com)" +} + +# configuration + +variable "kubeconfig" { + type = string + description = "Must be set to `kubeconfig` output by cluster" +} + +variable "ssh_authorized_key" { + type = string + description = "SSH public key for user 'core'" +} + +variable "snippets" { + type = list(string) + description = "List of Butane snippets" + default = [] +} + +variable "node_labels" { + type = list(string) + description = "List of initial node labels" + default = [] +} + +variable "node_taints" { + type = list(string) + description = "List of initial node taints" + default = [] +} + +# optional + +variable "download_protocol" { + type = string + description = "Protocol iPXE should use to download the kernel and initrd. Defaults to https, which requires iPXE compiled with crypto support. Unused if cached_install is true." + default = "https" +} + +variable "cached_install" { + type = bool + description = "Whether Flatcar Linux should PXE boot and install from matchbox /assets cache. Note that the admin must have downloaded the os_version into matchbox assets." + default = false +} + +variable "install_disk" { + type = string + default = "/dev/sda" + description = "Disk device to which the install profiles should install Flatcar Linux (e.g. /dev/sda)" +} + +variable "kernel_args" { + type = list(string) + description = "Additional kernel arguments to provide at PXE boot." + default = [] +} + +# unofficial, undocumented, unsupported + +variable "service_cidr" { + type = string + description = <