From b60a2ecdf76d88573a021c4f8673ddb19c0d42c0 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Mon, 2 Sep 2019 21:10:30 -0700 Subject: [PATCH] Migrate Fedora CoreOS AWS to a static pod control plane * Run a kube-apiserver, kube-scheduler, and kube-controller-manager static pod on each controller node. Previously, kube-apiserver was self-hosted as a DaemonSet across controllers and kube-scheduler and kube-controller-manager were a Deployment (with 2 or controller_count many replicas). * Remove bootkube bootstrap and pivot to self-hosted * Remove pod-checkpointer manifests (no longer needed) --- aws/fedora-coreos/kubernetes/README.md | 2 +- aws/fedora-coreos/kubernetes/bootkube.tf | 2 +- .../kubernetes/fcc/controller.yaml | 39 +++++++++++++------ aws/fedora-coreos/kubernetes/security.tf | 22 +++++++++++ aws/fedora-coreos/kubernetes/ssh.tf | 32 ++++++++------- .../kubernetes/workers/fcc/worker.yaml | 1 - docs/fedora-coreos/aws.md | 38 ++++++++---------- 7 files changed, 87 insertions(+), 49 deletions(-) diff --git a/aws/fedora-coreos/kubernetes/README.md b/aws/fedora-coreos/kubernetes/README.md index c249ea1a..33421e84 100644 --- a/aws/fedora-coreos/kubernetes/README.md +++ b/aws/fedora-coreos/kubernetes/README.md @@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster ## Features -* Kubernetes v1.15.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube)) +* Kubernetes v1.15.3 (upstream) * Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking * On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/) * Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/cl/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization diff --git a/aws/fedora-coreos/kubernetes/bootkube.tf b/aws/fedora-coreos/kubernetes/bootkube.tf index d651b51d..b11e6cf8 100644 --- a/aws/fedora-coreos/kubernetes/bootkube.tf +++ b/aws/fedora-coreos/kubernetes/bootkube.tf @@ -1,6 +1,6 @@ # Self-hosted Kubernetes assets (kubeconfig, manifests) module "bootkube" { - source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=98cc19f80f2c4c3ddc63fc7aea6320e74bec561a" + source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=6e59af71138bc5f784453873074de16e7ee150eb" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/aws/fedora-coreos/kubernetes/fcc/controller.yaml b/aws/fedora-coreos/kubernetes/fcc/controller.yaml index 047cf1b0..667583c7 100644 --- a/aws/fedora-coreos/kubernetes/fcc/controller.yaml +++ b/aws/fedora-coreos/kubernetes/fcc/controller.yaml @@ -107,33 +107,48 @@ systemd: RestartSec=10 [Install] WantedBy=multi-user.target - - name: bootkube.service + - name: bootstrap.service contents: | [Unit] - Description=Bootstrap a Kubernetes control plane - ConditionPathExists=!/opt/bootkube/init_bootkube.done + Description=Bootstrap Kubernetes control plane + ConditionPathExists=!/opt/bootstrap/bootstrap.done [Service] Type=oneshot RemainAfterExit=true - WorkingDirectory=/opt/bootkube - ExecStart=/usr/bin/bash -c 'set -x && \ - [ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-* && exec podman run --name bootkube --privileged \ + WorkingDirectory=/opt/bootstrap + ExecStartPre=-/usr/bin/bash -c 'set -x && [ -n "$(ls /opt/bootstrap/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootstrap/assets/manifests-*/* /opt/bootstrap/assets/manifests && rm -rf /opt/bootstrap/assets/manifests-*' + ExecStart=/usr/bin/podman run --name bootstrap \ --network host \ - --volume /opt/bootkube/assets:/assets \ - --volume /etc/kubernetes:/etc/kubernetes \ - quay.io/coreos/bootkube:v0.14.0 \ - /bootkube start --asset-dir=/assets' - ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done + --volume /opt/bootstrap/assets:/assets:ro,Z \ + --volume /opt/bootstrap/apply:/apply:ro,Z \ + k8s.gcr.io/hyperkube:v1.15.3 \ + /apply + ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done + ExecStartPost=-/usr/bin/podman stop bootstrap storage: directories: - path: /etc/kubernetes - - path: /opt/bootkube + - path: /opt/bootstrap files: - path: /etc/kubernetes/kubeconfig mode: 0644 contents: inline: | ${kubeconfig} + - path: /opt/bootstrap/apply + mode: 0544 + contents: + inline: | + #!/bin/bash -e + export KUBECONFIG=/assets/auth/kubeconfig + until kubectl version; do + echo "Waiting for static pod control plane" + sleep 5 + done + until kubectl apply -f /assets/manifests -R; do + echo "Retry applying manifests" + sleep 5 + done - path: /etc/sysctl.d/reverse-path-filter.conf contents: inline: | diff --git a/aws/fedora-coreos/kubernetes/security.tf b/aws/fedora-coreos/kubernetes/security.tf index aa2f84cb..ddc4e8e4 100644 --- a/aws/fedora-coreos/kubernetes/security.tf +++ b/aws/fedora-coreos/kubernetes/security.tf @@ -44,6 +44,28 @@ resource "aws_security_group_rule" "controller-etcd-metrics" { source_security_group_id = aws_security_group.worker.id } +# Allow Prometheus to scrape kube-scheduler +resource "aws_security_group_rule" "controller-scheduler-metrics" { + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 10251 + to_port = 10251 + source_security_group_id = aws_security_group.worker.id +} + +# Allow Prometheus to scrape kube-controller-manager +resource "aws_security_group_rule" "controller-manager-metrics" { + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 10252 + to_port = 10252 + source_security_group_id = aws_security_group.worker.id +} + resource "aws_security_group_rule" "controller-vxlan" { count = var.networking == "flannel" ? 1 : 0 diff --git a/aws/fedora-coreos/kubernetes/ssh.tf b/aws/fedora-coreos/kubernetes/ssh.tf index 09e31c19..2e39779e 100644 --- a/aws/fedora-coreos/kubernetes/ssh.tf +++ b/aws/fedora-coreos/kubernetes/ssh.tf @@ -1,6 +1,10 @@ -# Secure copy etcd TLS assets to controllers. +# Secure copy assets to controllers. resource "null_resource" "copy-controller-secrets" { count = var.controller_count + + depends_on = [ + module.bootkube, + ] connection { type = "ssh" @@ -44,6 +48,11 @@ resource "null_resource" "copy-controller-secrets" { destination = "$HOME/etcd-peer.key" } + provisioner "file" { + source = var.asset_dir + destination = "$HOME/assets" + } + provisioner "remote-exec" { inline = [ "sudo mkdir -p /etc/ssl/etcd/etcd", @@ -56,18 +65,21 @@ resource "null_resource" "copy-controller-secrets" { "sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key", "sudo chown -R etcd:etcd /etc/ssl/etcd", "sudo chmod -R 500 /etc/ssl/etcd", + "sudo mv $HOME/assets /opt/bootstrap/assets", + "sudo mkdir -p /etc/kubernetes/bootstrap-secrets", + "sudo cp -r /opt/bootstrap/assets/tls/* /etc/kubernetes/bootstrap-secrets/", + "sudo cp /opt/bootstrap/assets/auth/kubeconfig /etc/kubernetes/bootstrap-secrets/", + "sudo cp -r /opt/bootstrap/assets/static-manifests/* /etc/kubernetes/manifests/" ] } } -# Secure copy bootkube assets to ONE controller and start bootkube to perform -# one-time self-hosted cluster bootstrapping. -resource "null_resource" "bootkube-start" { +# Connect to a controller to perform one-time cluster bootstrap. +resource "null_resource" "bootstrap" { depends_on = [ - module.bootkube, + null_resource.copy-controller-secrets, module.workers, aws_route53_record.apiserver, - null_resource.copy-controller-secrets, ] connection { @@ -77,15 +89,9 @@ resource "null_resource" "bootkube-start" { timeout = "15m" } - provisioner "file" { - source = var.asset_dir - destination = "$HOME/assets" - } - provisioner "remote-exec" { inline = [ - "sudo mv $HOME/assets /opt/bootkube", - "sudo systemctl start bootkube", + "sudo systemctl start bootstrap", ] } } diff --git a/aws/fedora-coreos/kubernetes/workers/fcc/worker.yaml b/aws/fedora-coreos/kubernetes/workers/fcc/worker.yaml index c5f1aeff..4cdcd462 100644 --- a/aws/fedora-coreos/kubernetes/workers/fcc/worker.yaml +++ b/aws/fedora-coreos/kubernetes/workers/fcc/worker.yaml @@ -78,7 +78,6 @@ systemd: storage: directories: - path: /etc/kubernetes - - path: /opt/bootkube files: - path: /etc/kubernetes/kubeconfig mode: 0644 diff --git a/docs/fedora-coreos/aws.md b/docs/fedora-coreos/aws.md index ab7d5906..67466c82 100644 --- a/docs/fedora-coreos/aws.md +++ b/docs/fedora-coreos/aws.md @@ -7,7 +7,7 @@ In this tutorial, we'll create a Kubernetes v1.15.3 cluster on AWS with Fedora C We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a VPC, gateway, subnets, security groups, controller instances, worker auto-scaling group, network load balancer, and TLS assets. -Controllers are provisioned to run an `etcd-member` peer and a `kubelet` service. Workers run just a `kubelet` service. A one-time [bootkube](https://github.com/kubernetes-incubator/bootkube) bootstrap schedules the `apiserver`, `scheduler`, `controller-manager`, and `coredns` on controllers and schedules `kube-proxy` and `calico` (or `flannel`) on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controllers hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster. ## Requirements @@ -94,7 +94,7 @@ Reference the [variables docs](#variables) or the [variables.tf](https://github. ## ssh-agent -Initial bootstrapping requires `bootkube.service` be started on one controller node. Terraform uses `ssh-agent` to automate this step. Add your SSH private key to `ssh-agent`. +Initial bootstrapping requires `bootstrap.service` be started on one controller node. Terraform uses `ssh-agent` to automate this step. Add your SSH private key to `ssh-agent`. ```sh ssh-add ~/.ssh/id_rsa @@ -121,9 +121,9 @@ Apply the changes to create the cluster. ```sh $ terraform apply ... -module.aws-tempest.null_resource.bootkube-start: Still creating... (4m50s elapsed) -module.aws-tempest.null_resource.bootkube-start: Still creating... (5m0s elapsed) -module.aws-tempest.null_resource.bootkube-start: Creation complete after 11m8s (ID: 3961816482286168143) +module.aws-tempest.null_resource.bootstrap: Still creating... (4m50s elapsed) +module.aws-tempest.null_resource.bootstrap: Still creating... (5m0s elapsed) +module.aws-tempest.null_resource.bootstrap: Creation complete after 5m8s (ID: 3961816482286168143) Apply complete! Resources: 98 added, 0 changed, 0 destroyed. ``` @@ -147,22 +147,18 @@ List the pods. ``` $ kubectl get pods --all-namespaces -NAMESPACE NAME READY STATUS RESTARTS AGE -kube-system calico-node-1m5bf 2/2 Running 0 34m -kube-system calico-node-7jmr1 2/2 Running 0 34m -kube-system calico-node-bknc8 2/2 Running 0 34m -kube-system coredns-1187388186-wx1lg 1/1 Running 0 34m -kube-system coredns-1187388186-qjnvp 1/1 Running 0 34m -kube-system kube-apiserver-4mjbk 1/1 Running 0 34m -kube-system kube-controller-manager-3597210155-j2jbt 1/1 Running 1 34m -kube-system kube-controller-manager-3597210155-j7g7x 1/1 Running 0 34m -kube-system kube-proxy-14wxv 1/1 Running 0 34m -kube-system kube-proxy-9vxh2 1/1 Running 0 34m -kube-system kube-proxy-sbbsh 1/1 Running 0 34m -kube-system kube-scheduler-3359497473-5plhf 1/1 Running 0 34m -kube-system kube-scheduler-3359497473-r7zg7 1/1 Running 1 34m -kube-system pod-checkpointer-4kxtl 1/1 Running 0 34m -kube-system pod-checkpointer-4kxtl-ip-10-0-3-155 1/1 Running 0 33m +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system calico-node-1m5bf 2/2 Running 0 34m +kube-system calico-node-7jmr1 2/2 Running 0 34m +kube-system calico-node-bknc8 2/2 Running 0 34m +kube-system coredns-1187388186-wx1lg 1/1 Running 0 34m +kube-system coredns-1187388186-qjnvp 1/1 Running 0 34m +kube-system kube-apiserver-4mjbk 1/1 Running 0 34m +kube-system kube-controller-manager-ip-10-0-3-155 1/1 Running 0 34m +kube-system kube-proxy-14wxv 1/1 Running 0 34m +kube-system kube-proxy-9vxh2 1/1 Running 0 34m +kube-system kube-proxy-sbbsh 1/1 Running 0 34m +kube-system kube-scheduler-ip-10-0-3-155 1/1 Running 1 34m ``` ## Going Further