Migrate GCP, DO, Azure to static pod control plane

* Run a kube-apiserver, kube-scheduler, and kube-controller-manager
static pod on each controller node. Previously, kube-apiserver was
self-hosted as a DaemonSet across controllers and kube-scheduler
and kube-controller-manager were a Deployment (with 2 or
controller_count many replicas).
* Remove bootkube bootstrap and pivot to self-hosted
* Remove pod-checkpointer manifests (no longer needed)
This commit is contained in:
Dalton Hubble
2019-09-05 23:12:09 -07:00
parent c933bdfc26
commit db947537d1
21 changed files with 235 additions and 199 deletions

View File

@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
* Kubernetes v1.15.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
* Kubernetes v1.15.3 (upstream)
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/cl/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization

View File

@ -1,6 +1,6 @@
# Self-hosted Kubernetes assets (kubeconfig, manifests)
module "bootkube" {
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=98cc19f80f2c4c3ddc63fc7aea6320e74bec561a"
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=6e59af71138bc5f784453873074de16e7ee150eb"
cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]

View File

@ -96,17 +96,28 @@ systemd:
RestartSec=10
[Install]
WantedBy=multi-user.target
- name: bootkube.service
- name: bootstrap.service
contents: |
[Unit]
Description=Bootstrap a Kubernetes cluster
ConditionPathExists=!/opt/bootkube/init_bootkube.done
Description=Kubernetes control plane
ConditionPathExists=!/opt/bootstrap/bootstrap.done
[Service]
Type=oneshot
RemainAfterExit=true
WorkingDirectory=/opt/bootkube
ExecStart=/opt/bootkube/bootkube-start
ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done
WorkingDirectory=/opt/bootstrap
ExecStartPre=-/usr/bin/bash -c 'set -x && [ -n "$(ls /opt/bootstrap/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootstrap/assets/manifests-*/* /opt/bootstrap/assets/manifests && rm -rf /opt/bootstrap/assets/manifests-*'
ExecStart=/usr/bin/rkt run \
--trust-keys-from-https \
--volume assets,kind=host,source=/opt/bootstrap/assets \
--mount volume=assets,target=/assets \
--volume script,kind=host,source=/opt/bootstrap/apply \
--mount volume=script,target=/apply \
--insecure-options=image \
docker://k8s.gcr.io/hyperkube:v1.15.3 \
--net=host \
--dns=host \
--exec=/apply
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
[Install]
WantedBy=multi-user.target
storage:
@ -124,36 +135,26 @@ storage:
inline: |
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
KUBELET_IMAGE_TAG=v1.15.3
- path: /opt/bootstrap/apply
filesystem: root
mode: 0544
contents:
inline: |
#!/bin/bash -e
export KUBECONFIG=/assets/auth/kubeconfig
until kubectl version; do
echo "Waiting for static pod control plane"
sleep 5
done
until kubectl apply -f /assets/manifests -R; do
echo "Retry applying manifests"
sleep 5
done
- path: /etc/sysctl.d/max-user-watches.conf
filesystem: root
contents:
inline: |
fs.inotify.max_user_watches=16184
- path: /opt/bootkube/bootkube-start
filesystem: root
mode: 0544
user:
id: 500
group:
id: 500
contents:
inline: |
#!/bin/bash
# Wrapper for bootkube start
set -e
# Move experimental manifests
[ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-*
exec /usr/bin/rkt run \
--trust-keys-from-https \
--volume assets,kind=host,source=/opt/bootkube/assets \
--mount volume=assets,target=/assets \
--volume bootstrap,kind=host,source=/etc/kubernetes \
--mount volume=bootstrap,target=/etc/kubernetes \
$${RKT_OPTS} \
quay.io/coreos/bootkube:v0.14.0 \
--net=host \
--dns=host \
--exec=/bootkube -- start --asset-dir=/assets "$@"
passwd:
users:
- name: core

View File

@ -53,6 +53,22 @@ resource "azurerm_network_security_rule" "controller-etcd-metrics" {
destination_address_prefix = azurerm_subnet.controller.address_prefix
}
# Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
resource "azurerm_network_security_rule" "controller-kube-metrics" {
resource_group_name = azurerm_resource_group.cluster.name
name = "allow-kube-metrics"
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2011"
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10251-10252"
source_address_prefix = azurerm_subnet.worker.address_prefix
destination_address_prefix = azurerm_subnet.controller.address_prefix
}
resource "azurerm_network_security_rule" "controller-apiserver" {
resource_group_name = azurerm_resource_group.cluster.name

View File

@ -1,12 +1,15 @@
# Secure copy etcd TLS assets to controllers.
# Secure copy assets to controllers.
resource "null_resource" "copy-controller-secrets" {
count = var.controller_count
depends_on = [azurerm_virtual_machine.controllers]
depends_on = [
module.bootkube,
azurerm_virtual_machine.controllers
]
connection {
type = "ssh"
host = element(azurerm_public_ip.controllers.*.ip_address, count.index)
host = azurerm_public_ip.controllers.*.ip_address[count.index]
user = "core"
timeout = "15m"
}
@ -45,6 +48,11 @@ resource "null_resource" "copy-controller-secrets" {
content = module.bootkube.etcd_peer_key
destination = "$HOME/etcd-peer.key"
}
provisioner "file" {
source = var.asset_dir
destination = "$HOME/assets"
}
provisioner "remote-exec" {
inline = [
@ -58,18 +66,21 @@ resource "null_resource" "copy-controller-secrets" {
"sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key",
"sudo chown -R etcd:etcd /etc/ssl/etcd",
"sudo chmod -R 500 /etc/ssl/etcd",
"sudo mv $HOME/assets /opt/bootstrap/assets",
"sudo mkdir -p /etc/kubernetes/bootstrap-secrets",
"sudo cp -r /opt/bootstrap/assets/tls/* /etc/kubernetes/bootstrap-secrets/",
"sudo cp /opt/bootstrap/assets/auth/kubeconfig /etc/kubernetes/bootstrap-secrets/",
"sudo cp -r /opt/bootstrap/assets/static-manifests/* /etc/kubernetes/manifests/",
]
}
}
# Secure copy bootkube assets to ONE controller and start bootkube to perform
# one-time self-hosted cluster bootstrapping.
resource "null_resource" "bootkube-start" {
# Connect to a controller to perform one-time cluster bootstrap.
resource "null_resource" "bootstrap" {
depends_on = [
module.bootkube,
null_resource.copy-controller-secrets,
module.workers,
azurerm_dns_a_record.apiserver,
null_resource.copy-controller-secrets,
]
connection {
@ -79,15 +90,9 @@ resource "null_resource" "bootkube-start" {
timeout = "15m"
}
provisioner "file" {
source = var.asset_dir
destination = "$HOME/assets"
}
provisioner "remote-exec" {
inline = [
"sudo mv $HOME/assets /opt/bootkube",
"sudo systemctl start bootkube",
"sudo systemctl start bootstrap",
]
}
}