Restructure bare-metal module to use a worker submodule

* Add an internal `worker` module to the bare-metal module, to
allow individual bare-metal machines to be defined and joined
to an existing bare-metal cluster. This is similar to the "worker
pools" modules for adding sets of nodes to cloud (AWS, GCP, Azure)
clusters, but on metal, each piece of hardware is potentially
unique

New: Using the new `worker` module, a Kubernetes cluster can be defined
without any `workers` (i.e. just a control-plane). Use the `worker`
module to define each piece machine that should join the bare-metal
cluster and customize it in detail. This style is quite flexible and
suited for clusters with hardware that varies quite a bit.

```tf
module "mercury" {
  source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.26.2"

  # bare-metal
  cluster_name            = "mercury"
  matchbox_http_endpoint  = "http://matchbox.example.com"
  os_channel              = "flatcar-stable"
  os_version              = "2345.3.1"

  # configuration
  k8s_domain_name    = "node1.example.com"
  ssh_authorized_key = "ssh-rsa AAAAB3Nz..."

  # machines
  controllers = [{
    name   = "node1"
    mac    = "52:54:00:a1:9c:ae"
    domain = "node1.example.com"
  }]
}
```

```tf
module "mercury-node1" {
  source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes/worker?ref=v1.26.2"

  cluster_name = "mercury"

  # bare-metal
  matchbox_http_endpoint  = "http://matchbox.example.com"
  os_channel              = "flatcar-stable"
  os_version              = "2345.3.1"

  # configuration
  name               = "node2"
  mac                = "52:54:00:b2:2f:86"
  domain             = "node2.example.com"
  kubeconfig         = module.mercury.kubeconfig
  ssh_authorized_key = "ssh-rsa AAAAB3Nz..."

  # optional
  snippets       = []
  node_labels    = []
  node_tains     = []
  install_disk   = "/dev/vda"
  cached_install = false
}
```

For clusters with fairly similar hardware, you may continue to
define `workers` directly within the cluster definition. This
reduces some repetition, but is not quite as flexible.

```tf
module "mercury" {
  source = "git::https://github.com/poseidon/typhoon//bare-metal/flatcar-linux/kubernetes?ref=v1.26.1"

  # bare-metal
  cluster_name            = "mercury"
  matchbox_http_endpoint  = "http://matchbox.example.com"
  os_channel              = "flatcar-stable"
  os_version              = "2345.3.1"

  # configuration
  k8s_domain_name    = "node1.example.com"
  ssh_authorized_key = "ssh-rsa AAAAB3Nz..."

  # machines
  controllers = [{
    name   = "node1"
    mac    = "52:54:00:a1:9c:ae"
    domain = "node1.example.com"
  }]
  workers = [
    {
      name   = "node2",
      mac    = "52:54:00:b2:2f:86"
      domain = "node2.example.com"
    },
    {
      name   = "node3",
      mac    = "52:54:00:c3:61:77"
      domain = "node3.example.com"
    }
  ]
}
```

Optional variables `snippets`, `worker_node_labels`, and
`worker_node_taints` are still defined as a map from machine name
to a list of snippets, labels, or taints respectively to allow some
degree of per-machine customization. However, fields like
`install_disk`, `kernel_args`, `cached_install` and future options
will not be designed this way. Instead, if your machines vary it
is recommended to use the new `worker` module to define each node
This commit is contained in:
Dalton Hubble
2023-01-24 20:37:57 -08:00
parent d04d88023d
commit 1caea3388c
21 changed files with 639 additions and 221 deletions

View File

@ -1,140 +0,0 @@
variant: flatcar
version: 1.0.0
systemd:
units:
- name: docker.service
enabled: true
- name: locksmithd.service
mask: true
- name: kubelet.path
enabled: true
contents: |
[Unit]
Description=Watch for kubeconfig
[Path]
PathExists=/etc/kubernetes/kubeconfig
[Install]
WantedBy=multi-user.target
- name: wait-for-dns.service
enabled: true
contents: |
[Unit]
Description=Wait for DNS entries
Wants=systemd-resolved.service
Before=kubelet.service
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done'
[Install]
RequiredBy=kubelet.service
- name: kubelet.service
contents: |
[Unit]
Description=Kubelet
Requires=docker.service
After=docker.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.26.1
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
ExecStartPre=/bin/mkdir -p /var/lib/calico
ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins
ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt"
# Podman, rkt, or runc run container processes, whereas docker run
# is a client to a daemon and requires workarounds to use within a
# systemd unit. https://github.com/moby/moby/issues/6791
ExecStartPre=/usr/bin/docker run -d \
--name kubelet \
--privileged \
--pid host \
--network host \
-v /etc/cni/net.d:/etc/cni/net.d:ro \
-v /etc/kubernetes:/etc/kubernetes:ro \
-v /etc/machine-id:/etc/machine-id:ro \
-v /usr/lib/os-release:/etc/os-release:ro \
-v /lib/modules:/lib/modules:ro \
-v /run:/run \
-v /sys/fs/cgroup:/sys/fs/cgroup \
-v /var/lib/calico:/var/lib/calico:ro \
-v /var/lib/containerd:/var/lib/containerd \
-v /var/lib/kubelet:/var/lib/kubelet:rshared \
-v /var/log:/var/log \
-v /opt/cni/bin:/opt/cni/bin \
$${KUBELET_IMAGE} \
--bootstrap-kubeconfig=/etc/kubernetes/kubeconfig \
--config=/etc/kubernetes/kubelet.yaml \
--container-runtime-endpoint=unix:///run/containerd/containerd.sock \
--hostname-override=${domain_name} \
--kubeconfig=/var/lib/kubelet/kubeconfig \
--node-labels=node.kubernetes.io/node \
%{~ for label in compact(split(",", node_labels)) ~}
--node-labels=${label} \
%{~ endfor ~}
%{~ for taint in compact(split(",", node_taints)) ~}
--register-with-taints=${taint} \
%{~ endfor ~}
--node-labels=node.kubernetes.io/node
ExecStart=docker logs -f kubelet
ExecStop=docker stop kubelet
ExecStopPost=docker rm kubelet
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
storage:
directories:
- path: /etc/kubernetes
mode: 0755
files:
- path: /etc/hostname
mode: 0644
contents:
inline:
${domain_name}
- path: /etc/kubernetes/kubelet.yaml
mode: 0644
contents:
inline: |
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
authentication:
anonymous:
enabled: false
webhook:
enabled: true
x509:
clientCAFile: /etc/kubernetes/ca.crt
authorization:
mode: Webhook
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s
shutdownGracePeriodCriticalPods: 30s
staticPodPath: /etc/kubernetes/manifests
readOnlyPort: 0
resolvConf: /run/systemd/resolve/resolv.conf
volumePluginDir: /var/lib/kubelet/volumeplugins
- path: /etc/systemd/logind.conf.d/inhibitors.conf
contents:
inline: |
[Login]
InhibitDelayMaxSec=45s
- path: /etc/sysctl.d/max-user-watches.conf
mode: 0644
contents:
inline: |
fs.inotify.max_user_watches=16184
passwd:
users:
- name: core
ssh_authorized_keys:
- ${ssh_authorized_key}