Restructure bare-metal module to use a worker submodule

* Add an internal `worker` module to the bare-metal module, to
allow individual bare-metal machines to be defined and joined
to an existing bare-metal cluster. This is similar to the "worker
pools" modules for adding sets of nodes to cloud (AWS, GCP, Azure)
clusters, but on metal, each piece of hardware is potentially
unique

New: Using the new `worker` module, a Kubernetes cluster can be defined
without any `workers` (i.e. just a control-plane). Use the `worker`
module to define each piece machine that should join the bare-metal
cluster and customize it in detail. This style is quite flexible and
suited for clusters with hardware that varies quite a bit.

```tf
module "mercury" {
  source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.26.2"

  # bare-metal
  cluster_name            = "mercury"
  matchbox_http_endpoint  = "http://matchbox.example.com"
  os_channel              = "flatcar-stable"
  os_version              = "2345.3.1"

  # configuration
  k8s_domain_name    = "node1.example.com"
  ssh_authorized_key = "ssh-rsa AAAAB3Nz..."

  # machines
  controllers = [{
    name   = "node1"
    mac    = "52:54:00:a1:9c:ae"
    domain = "node1.example.com"
  }]
}
```

```tf
module "mercury-node1" {
  source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes/worker?ref=v1.26.2"

  cluster_name = "mercury"

  # bare-metal
  matchbox_http_endpoint  = "http://matchbox.example.com"
  os_channel              = "flatcar-stable"
  os_version              = "2345.3.1"

  # configuration
  name               = "node2"
  mac                = "52:54:00:b2:2f:86"
  domain             = "node2.example.com"
  kubeconfig         = module.mercury.kubeconfig
  ssh_authorized_key = "ssh-rsa AAAAB3Nz..."

  # optional
  snippets       = []
  node_labels    = []
  node_tains     = []
  install_disk   = "/dev/vda"
  cached_install = false
}
```

For clusters with fairly similar hardware, you may continue to
define `workers` directly within the cluster definition. This
reduces some repetition, but is not quite as flexible.

```tf
module "mercury" {
  source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.26.1"

  # bare-metal
  cluster_name            = "mercury"
  matchbox_http_endpoint  = "http://matchbox.example.com"
  os_channel              = "flatcar-stable"
  os_version              = "2345.3.1"

  # configuration
  k8s_domain_name    = "node1.example.com"
  ssh_authorized_key = "ssh-rsa AAAAB3Nz..."

  # machines
  controllers = [{
    name   = "node1"
    mac    = "52:54:00:a1:9c:ae"
    domain = "node1.example.com"
  }]
  workers = [
    {
      name   = "node2",
      mac    = "52:54:00:b2:2f:86"
      domain = "node2.example.com"
    },
    {
      name   = "node3",
      mac    = "52:54:00:c3:61:77"
      domain = "node3.example.com"
    }
  ]
}
```

Optional variables `snippets`, `worker_node_labels`, and
`worker_node_taints` are still defined as a map from machine name
to a list of snippets, labels, or taints respectively to allow some
degree of per-machine customization. However, fields like
`install_disk`, `kernel_args`, `cached_install` and future options
will not be designed this way. Instead, if your machines vary it
is recommended to use the new `worker` module to define each node
This commit is contained in:
Dalton Hubble
2023-01-24 20:37:57 -08:00
parent d04d88023d
commit 1caea3388c
21 changed files with 639 additions and 221 deletions

View File

@ -0,0 +1,46 @@
variant: flatcar
version: 1.0.0
systemd:
units:
- name: installer.service
enabled: true
contents: |
[Unit]
Requires=network-online.target
After=network-online.target
[Service]
Type=simple
ExecStart=/opt/installer
[Install]
WantedBy=multi-user.target
# Avoid using the standard SSH port so terraform apply cannot SSH until
# post-install. But admins may SSH to debug disk install problems.
# After install, sshd will use port 22 and users/terraform can connect.
- name: sshd.socket
dropins:
- name: 10-sshd-port.conf
contents: |
[Socket]
ListenStream=
ListenStream=2222
storage:
files:
- path: /opt/installer
mode: 0500
contents:
inline: |
#!/bin/bash -ex
curl --retry 10 "${ignition_endpoint}?mac=${mac}&os=installed" -o ignition.json
flatcar-install \
-d ${install_disk} \
-C ${os_channel} \
-V ${os_version} \
${baseurl_flag} \
-i ignition.json
udevadm settle
systemctl reboot
passwd:
users:
- name: core
ssh_authorized_keys:
- "${ssh_authorized_key}"

View File

@ -0,0 +1,140 @@
variant: flatcar
version: 1.0.0
systemd:
units:
- name: docker.service
enabled: true
- name: locksmithd.service
mask: true
- name: kubelet.path
enabled: true
contents: |
[Unit]
Description=Watch for kubeconfig
[Path]
PathExists=/etc/kubernetes/kubeconfig
[Install]
WantedBy=multi-user.target
- name: wait-for-dns.service
enabled: true
contents: |
[Unit]
Description=Wait for DNS entries
Wants=systemd-resolved.service
Before=kubelet.service
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
[Install]
RequiredBy=kubelet.service
- name: kubelet.service
contents: |
[Unit]
Description=Kubelet
Requires=docker.service
After=docker.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.26.1
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
ExecStartPre=/bin/mkdir -p /var/lib/calico
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
# Podman, rkt, or runc run container processes, whereas docker run
# is a client to a daemon and requires workarounds to use within a
# systemd unit. https://github.com/moby/moby/issues/6791
ExecStartPre=/usr/bin/docker run -d \
--name kubelet \
--privileged \
--pid host \
--network host \
-v /etc/cni/net.d:/etc/cni/net.d:ro \
-v /etc/kubernetes:/etc/kubernetes:ro \
-v /etc/machine-id:/etc/machine-id:ro \
-v /usr/lib/os-release:/etc/os-release:ro \
-v /lib/modules:/lib/modules:ro \
-v /run:/run \
-v /sys/fs/cgroup:/sys/fs/cgroup \
-v /var/lib/calico:/var/lib/calico:ro \
-v /var/lib/containerd:/var/lib/containerd \
-v /var/lib/kubelet:/var/lib/kubelet:rshared \
-v /var/log:/var/log \
-v /opt/cni/bin:/opt/cni/bin \
$${KUBELET_IMAGE} \
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
--config=/etc/kubernetes/kubelet.yaml \
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
--hostname-override=${domain_name} \
--kubeconfig=/var/lib/kubelet/kubeconfig \
--node-labels=node.kubernetes.io/node \
%{~ for label in compact(split(",", node_labels)) ~}
--node-labels=${label} \
%{~ endfor ~}
%{~ for taint in compact(split(",", node_taints)) ~}
--register-with-taints=${taint} \
%{~ endfor ~}
--node-labels=node.kubernetes.io/node
ExecStart=docker logs -f kubelet
ExecStop=docker stop kubelet
ExecStopPost=docker rm kubelet
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
storage:
directories:
- path: /etc/kubernetes
mode: 0755
files:
- path: /etc/hostname
mode: 0644
contents:
inline:
${domain_name}
- path: /etc/kubernetes/kubelet.yaml
mode: 0644
contents:
inline: |
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
authentication:
anonymous:
enabled: false
webhook:
enabled: true
x509:
clientCAFile: /etc/kubernetes/ca.crt
authorization:
mode: Webhook
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s
shutdownGracePeriodCriticalPods: 30s
staticPodPath: /etc/kubernetes/manifests
readOnlyPort: 0
resolvConf: /run/systemd/resolve/resolv.conf
volumePluginDir: /var/lib/kubelet/volumeplugins
- path: /etc/systemd/logind.conf.d/inhibitors.conf
contents:
inline: |
[Login]
InhibitDelayMaxSec=45s
- path: /etc/sysctl.d/max-user-watches.conf
mode: 0644
contents:
inline: |
fs.inotify.max_user_watches=16184
passwd:
users:
- name: core
ssh_authorized_keys:
- ${ssh_authorized_key}

View File

@ -0,0 +1,87 @@
locals {
# flatcar-stable -> stable channel
channel = split("-", var.os_channel)[1]
remote_kernel = "${var.download_protocol}://${local.channel}.release.flatcar-linux.net/amd64-usr/${var.os_version}/flatcar_production_pxe.vmlinuz"
remote_initrd = [
"${var.download_protocol}://${local.channel}.release.flatcar-linux.net/amd64-usr/${var.os_version}/flatcar_production_pxe_image.cpio.gz",
]
args = flatten([
"initrd=flatcar_production_pxe_image.cpio.gz",
"flatcar.config.url=${var.matchbox_http_endpoint}/ignition?uuid=$${uuid}&mac=$${mac:hexhyp}",
"flatcar.first_boot=yes",
var.kernel_args,
])
cached_kernel = "/assets/flatcar/${var.os_version}/flatcar_production_pxe.vmlinuz"
cached_initrd = [
"/assets/flatcar/${var.os_version}/flatcar_production_pxe_image.cpio.gz",
]
kernel = var.cached_install ? local.cached_kernel : local.remote_kernel
initrd = var.cached_install ? local.cached_initrd : local.remote_initrd
}
# Match machine to an install profile by MAC
resource "matchbox_group" "install" {
name = format("install-%s", var.name)
profile = matchbox_profile.install.name
selector = {
mac = var.mac
}
}
// Flatcar Linux install profile (from release.flatcar-linux.net)
resource "matchbox_profile" "install" {
name = format("%s-install-%s", var.cluster_name, var.name)
kernel = local.kernel
initrd = local.initrd
args = concat(local.args, var.kernel_args)
raw_ignition = data.ct_config.install.rendered
}
# Flatcar Linux install
data "ct_config" "install" {
content = templatefile("${path.module}/butane/install.yaml", {
os_channel = local.channel
os_version = var.os_version
ignition_endpoint = format("%s/ignition", var.matchbox_http_endpoint)
mac = var.mac
install_disk = var.install_disk
ssh_authorized_key = var.ssh_authorized_key
# only cached profile adds -b baseurl
baseurl_flag = var.cached_install ? "-b ${var.matchbox_http_endpoint}/assets/flatcar" : ""
})
strict = true
}
# Match a worker to a profile by MAC
resource "matchbox_group" "worker" {
name = format("%s-%s", var.cluster_name, var.name)
profile = matchbox_profile.worker.name
selector = {
mac = var.mac
os = "installed"
}
}
// Flatcar Linux Worker profile
resource "matchbox_profile" "worker" {
name = format("%s-worker-%s", var.cluster_name, var.name)
raw_ignition = data.ct_config.worker.rendered
}
# Flatcar Linux workers
data "ct_config" "worker" {
content = templatefile("${path.module}/butane/worker.yaml", {
domain_name = var.domain
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
node_labels = join(",", var.node_labels)
node_taints = join(",", var.node_taints)
})
strict = true
snippets = var.snippets
}

View File

@ -0,0 +1,27 @@
# Secure copy kubeconfig to worker. Activates kubelet.service
resource "null_resource" "copy-worker-secrets" {
# Without depends_on, remote-exec could start and wait for machines before
# matchbox groups are written, causing a deadlock.
depends_on = [
matchbox_group.install,
matchbox_group.worker,
]
connection {
type = "ssh"
host = var.domain
user = "core"
timeout = "60m"
}
provisioner "file" {
content = var.kubeconfig
destination = "/home/core/kubeconfig"
}
provisioner "remote-exec" {
inline = [
"sudo mv /home/core/kubeconfig /etc/kubernetes/kubeconfig",
]
}
}

View File

@ -0,0 +1,120 @@
variable "cluster_name" {
type = string
description = "Must be set to the `cluster_name` of cluster"
}
# bare-metal
variable "matchbox_http_endpoint" {
type = string
description = "Matchbox HTTP read-only endpoint (e.g. http://matchbox.example.com:8080)"
}
variable "os_channel" {
type = string
description = "Channel for a Flatcar Linux (flatcar-stable, flatcar-beta, flatcar-alpha)"
validation {
condition = contains(["flatcar-stable", "flatcar-beta", "flatcar-alpha"], var.os_channel)
error_message = "The os_channel must be flatcar-stable, flatcar-beta, or flatcar-alpha."
}
}
variable "os_version" {
type = string
description = "Version of Flatcar Linux to PXE and install (e.g. 2079.5.1)"
}
# machine
variable "name" {
type = string
description = "Unique name for the machine (e.g. node1)"
}
variable "mac" {
type = string
description = "MAC address (e.g. 52:54:00:a1:9c:ae)"
}
variable "domain" {
type = string
description = "Fully qualified domain name (e.g. node1.example.com)"
}
# configuration
variable "kubeconfig" {
type = string
description = "Must be set to `kubeconfig` output by cluster"
}
variable "ssh_authorized_key" {
type = string
description = "SSH public key for user 'core'"
}
variable "snippets" {
type = list(string)
description = "List of Butane snippets"
default = []
}
variable "node_labels" {
type = list(string)
description = "List of initial node labels"
default = []
}
variable "node_taints" {
type = list(string)
description = "List of initial node taints"
default = []
}
# optional
variable "download_protocol" {
type = string
description = "Protocol iPXE should use to download the kernel and initrd. Defaults to https, which requires iPXE compiled with crypto support. Unused if cached_install is true."
default = "https"
}
variable "cached_install" {
type = bool
description = "Whether Flatcar Linux should PXE boot and install from matchbox /assets cache. Note that the admin must have downloaded the os_version into matchbox assets."
default = false
}
variable "install_disk" {
type = string
default = "/dev/sda"
description = "Disk device to which the install profiles should install Flatcar Linux (e.g. /dev/sda)"
}
variable "kernel_args" {
type = list(string)
description = "Additional kernel arguments to provide at PXE boot."
default = []
}
# unofficial, undocumented, unsupported
variable "service_cidr" {
type = string
description = <<EOD
CIDR IPv4 range to assign Kubernetes services.
The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for coredns.
EOD
default = "10.3.0.0/16"
}
variable "cluster_domain_suffix" {
type = string
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
default = "cluster.local"
}

View File

@ -0,0 +1,16 @@
# Terraform version and plugin versions
terraform {
required_version = ">= 0.13.0, < 2.0.0"
required_providers {
null = ">= 2.1"
ct = {
source = "poseidon/ct"
version = "~> 0.9"
}
matchbox = {
source = "poseidon/matchbox"
version = "~> 0.5.0"
}
}
}