Migrate Container Linux AWS to static pod control plane
* Run a kube-apiserver, kube-scheduler, and kube-controller-manager static pod on each controller node. Previously, kube-apiserver was self-hosted as a DaemonSet across controllers and kube-scheduler and kube-controller-manager were a Deployment (with 2 or controller_count many replicas). * Remove bootkube bootstrap and pivot to self-hosted * Remove pod-checkpointer manifests (no longer needed)
This commit is contained in:
parent
21632c6674
commit
c933bdfc26
|
@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
|
||||||
|
|
||||||
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
|
||||||
|
|
||||||
* Kubernetes v1.15.3 (upstream, via [kubernetes-incubator/bootkube](https://github.com/kubernetes-incubator/bootkube))
|
* Kubernetes v1.15.3 (upstream)
|
||||||
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [flannel](https://github.com/coreos/flannel) networking
|
||||||
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
|
||||||
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/cl/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/cl/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#container-linux) customization
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
# Self-hosted Kubernetes assets (kubeconfig, manifests)
|
||||||
module "bootkube" {
|
module "bootkube" {
|
||||||
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=98cc19f80f2c4c3ddc63fc7aea6320e74bec561a"
|
source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=6e59af71138bc5f784453873074de16e7ee150eb"
|
||||||
|
|
||||||
cluster_name = var.cluster_name
|
cluster_name = var.cluster_name
|
||||||
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
|
||||||
|
|
|
@ -98,17 +98,28 @@ systemd:
|
||||||
RestartSec=10
|
RestartSec=10
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
- name: bootkube.service
|
- name: bootstrap.service
|
||||||
contents: |
|
contents: |
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Bootstrap a Kubernetes cluster
|
Description=Kubernetes control plane
|
||||||
ConditionPathExists=!/opt/bootkube/init_bootkube.done
|
ConditionPathExists=!/opt/bootstrap/bootstrap.done
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
RemainAfterExit=true
|
RemainAfterExit=true
|
||||||
WorkingDirectory=/opt/bootkube
|
WorkingDirectory=/opt/bootstrap
|
||||||
ExecStart=/opt/bootkube/bootkube-start
|
ExecStartPre=-/usr/bin/bash -c 'set -x && [ -n "$(ls /opt/bootstrap/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootstrap/assets/manifests-*/* /opt/bootstrap/assets/manifests && rm -rf /opt/bootstrap/assets/manifests-*'
|
||||||
ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done
|
ExecStart=/usr/bin/rkt run \
|
||||||
|
--trust-keys-from-https \
|
||||||
|
--volume assets,kind=host,source=/opt/bootstrap/assets \
|
||||||
|
--mount volume=assets,target=/assets \
|
||||||
|
--volume script,kind=host,source=/opt/bootstrap/apply \
|
||||||
|
--mount volume=script,target=/apply \
|
||||||
|
--insecure-options=image \
|
||||||
|
docker://k8s.gcr.io/hyperkube:v1.15.3 \
|
||||||
|
--net=host \
|
||||||
|
--dns=host \
|
||||||
|
--exec=/apply
|
||||||
|
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
storage:
|
storage:
|
||||||
|
@ -126,36 +137,26 @@ storage:
|
||||||
inline: |
|
inline: |
|
||||||
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube
|
||||||
KUBELET_IMAGE_TAG=v1.15.3
|
KUBELET_IMAGE_TAG=v1.15.3
|
||||||
|
- path: /opt/bootstrap/apply
|
||||||
|
filesystem: root
|
||||||
|
mode: 0544
|
||||||
|
contents:
|
||||||
|
inline: |
|
||||||
|
#!/bin/bash -e
|
||||||
|
export KUBECONFIG=/assets/auth/kubeconfig
|
||||||
|
until kubectl version; do
|
||||||
|
echo "Waiting for static pod control plane"
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
until kubectl apply -f /assets/manifests -R; do
|
||||||
|
echo "Retry applying manifests"
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
- path: /etc/sysctl.d/max-user-watches.conf
|
- path: /etc/sysctl.d/max-user-watches.conf
|
||||||
filesystem: root
|
filesystem: root
|
||||||
contents:
|
contents:
|
||||||
inline: |
|
inline: |
|
||||||
fs.inotify.max_user_watches=16184
|
fs.inotify.max_user_watches=16184
|
||||||
- path: /opt/bootkube/bootkube-start
|
|
||||||
filesystem: root
|
|
||||||
mode: 0544
|
|
||||||
user:
|
|
||||||
id: 500
|
|
||||||
group:
|
|
||||||
id: 500
|
|
||||||
contents:
|
|
||||||
inline: |
|
|
||||||
#!/bin/bash
|
|
||||||
# Wrapper for bootkube start
|
|
||||||
set -e
|
|
||||||
# Move experimental manifests
|
|
||||||
[ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-*
|
|
||||||
exec /usr/bin/rkt run \
|
|
||||||
--trust-keys-from-https \
|
|
||||||
--volume assets,kind=host,source=/opt/bootkube/assets \
|
|
||||||
--mount volume=assets,target=/assets \
|
|
||||||
--volume bootstrap,kind=host,source=/etc/kubernetes \
|
|
||||||
--mount volume=bootstrap,target=/etc/kubernetes \
|
|
||||||
$${RKT_OPTS} \
|
|
||||||
quay.io/coreos/bootkube:v0.14.0 \
|
|
||||||
--net=host \
|
|
||||||
--dns=host \
|
|
||||||
--exec=/bootkube -- start --asset-dir=/assets "$@"
|
|
||||||
passwd:
|
passwd:
|
||||||
users:
|
users:
|
||||||
- name: core
|
- name: core
|
||||||
|
|
|
@ -33,6 +33,28 @@ resource "aws_security_group_rule" "controller-etcd" {
|
||||||
self = true
|
self = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Allow Prometheus to scrape kube-scheduler
|
||||||
|
resource "aws_security_group_rule" "controller-scheduler-metrics" {
|
||||||
|
security_group_id = aws_security_group.controller.id
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 10251
|
||||||
|
to_port = 10251
|
||||||
|
source_security_group_id = aws_security_group.worker.id
|
||||||
|
}
|
||||||
|
|
||||||
|
# Allow Prometheus to scrape kube-controller-manager
|
||||||
|
resource "aws_security_group_rule" "controller-manager-metrics" {
|
||||||
|
security_group_id = aws_security_group.controller.id
|
||||||
|
|
||||||
|
type = "ingress"
|
||||||
|
protocol = "tcp"
|
||||||
|
from_port = 10252
|
||||||
|
to_port = 10252
|
||||||
|
source_security_group_id = aws_security_group.worker.id
|
||||||
|
}
|
||||||
|
|
||||||
# Allow Prometheus to scrape etcd metrics
|
# Allow Prometheus to scrape etcd metrics
|
||||||
resource "aws_security_group_rule" "controller-etcd-metrics" {
|
resource "aws_security_group_rule" "controller-etcd-metrics" {
|
||||||
security_group_id = aws_security_group.controller.id
|
security_group_id = aws_security_group.controller.id
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
# Secure copy etcd TLS assets to controllers.
|
# Secure copy assets to controllers.
|
||||||
resource "null_resource" "copy-controller-secrets" {
|
resource "null_resource" "copy-controller-secrets" {
|
||||||
count = var.controller_count
|
count = var.controller_count
|
||||||
|
|
||||||
|
depends_on = [
|
||||||
|
module.bootkube,
|
||||||
|
]
|
||||||
|
|
||||||
connection {
|
connection {
|
||||||
type = "ssh"
|
type = "ssh"
|
||||||
host = element(aws_instance.controllers.*.public_ip, count.index)
|
host = aws_instance.controllers.*.public_ip[count.index]
|
||||||
user = "core"
|
user = "core"
|
||||||
timeout = "15m"
|
timeout = "15m"
|
||||||
}
|
}
|
||||||
|
@ -43,6 +47,11 @@ resource "null_resource" "copy-controller-secrets" {
|
||||||
content = module.bootkube.etcd_peer_key
|
content = module.bootkube.etcd_peer_key
|
||||||
destination = "$HOME/etcd-peer.key"
|
destination = "$HOME/etcd-peer.key"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
provisioner "file" {
|
||||||
|
source = var.asset_dir
|
||||||
|
destination = "$HOME/assets"
|
||||||
|
}
|
||||||
|
|
||||||
provisioner "remote-exec" {
|
provisioner "remote-exec" {
|
||||||
inline = [
|
inline = [
|
||||||
|
@ -56,18 +65,21 @@ resource "null_resource" "copy-controller-secrets" {
|
||||||
"sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key",
|
"sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key",
|
||||||
"sudo chown -R etcd:etcd /etc/ssl/etcd",
|
"sudo chown -R etcd:etcd /etc/ssl/etcd",
|
||||||
"sudo chmod -R 500 /etc/ssl/etcd",
|
"sudo chmod -R 500 /etc/ssl/etcd",
|
||||||
|
"sudo mv $HOME/assets /opt/bootstrap/assets",
|
||||||
|
"sudo mkdir -p /etc/kubernetes/bootstrap-secrets",
|
||||||
|
"sudo cp -r /opt/bootstrap/assets/tls/* /etc/kubernetes/bootstrap-secrets/",
|
||||||
|
"sudo cp /opt/bootstrap/assets/auth/kubeconfig /etc/kubernetes/bootstrap-secrets/",
|
||||||
|
"sudo cp -r /opt/bootstrap/assets/static-manifests/* /etc/kubernetes/manifests/",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Secure copy bootkube assets to ONE controller and start bootkube to perform
|
# Connect to a controller to perform one-time cluster bootstrap.
|
||||||
# one-time self-hosted cluster bootstrapping.
|
resource "null_resource" "bootstrap" {
|
||||||
resource "null_resource" "bootkube-start" {
|
|
||||||
depends_on = [
|
depends_on = [
|
||||||
module.bootkube,
|
null_resource.copy-controller-secrets,
|
||||||
module.workers,
|
module.workers,
|
||||||
aws_route53_record.apiserver,
|
aws_route53_record.apiserver,
|
||||||
null_resource.copy-controller-secrets,
|
|
||||||
]
|
]
|
||||||
|
|
||||||
connection {
|
connection {
|
||||||
|
@ -77,15 +89,9 @@ resource "null_resource" "bootkube-start" {
|
||||||
timeout = "15m"
|
timeout = "15m"
|
||||||
}
|
}
|
||||||
|
|
||||||
provisioner "file" {
|
|
||||||
source = var.asset_dir
|
|
||||||
destination = "$HOME/assets"
|
|
||||||
}
|
|
||||||
|
|
||||||
provisioner "remote-exec" {
|
provisioner "remote-exec" {
|
||||||
inline = [
|
inline = [
|
||||||
"sudo mv $HOME/assets /opt/bootkube",
|
"sudo systemctl start bootstrap",
|
||||||
"sudo systemctl start bootkube",
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ In this tutorial, we'll create a Kubernetes v1.15.3 cluster on AWS with Containe
|
||||||
|
|
||||||
We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a VPC, gateway, subnets, security groups, controller instances, worker auto-scaling group, network load balancer, and TLS assets.
|
We'll declare a Kubernetes cluster using the Typhoon Terraform module. Then apply the changes to create a VPC, gateway, subnets, security groups, controller instances, worker auto-scaling group, network load balancer, and TLS assets.
|
||||||
|
|
||||||
Controllers are provisioned to run an `etcd-member` peer and a `kubelet` service. Workers run just a `kubelet` service. A one-time [bootkube](https://github.com/kubernetes-incubator/bootkube) bootstrap schedules the `apiserver`, `scheduler`, `controller-manager`, and `coredns` on controllers and schedules `kube-proxy` and `calico` (or `flannel`) on every node. A generated `kubeconfig` provides `kubectl` access to the cluster.
|
Controller hosts are provisioned to run an `etcd-member` peer and a `kubelet` service. Worker hosts run a `kubelet` service. Controller nodes run `kube-apiserver`, `kube-scheduler`, `kube-controller-manager`, and `coredns`, while `kube-proxy` and `calico` (or `flannel`) run on every node. A generated `kubeconfig` provides `kubectl` access to the cluster.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ Reference the [variables docs](#variables) or the [variables.tf](https://github.
|
||||||
|
|
||||||
## ssh-agent
|
## ssh-agent
|
||||||
|
|
||||||
Initial bootstrapping requires `bootkube.service` be started on one controller node. Terraform uses `ssh-agent` to automate this step. Add your SSH private key to `ssh-agent`.
|
Initial bootstrapping requires `bootstrap.service` be started on one controller node. Terraform uses `ssh-agent` to automate this step. Add your SSH private key to `ssh-agent`.
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
ssh-add ~/.ssh/id_rsa
|
ssh-add ~/.ssh/id_rsa
|
||||||
|
@ -118,9 +118,9 @@ Apply the changes to create the cluster.
|
||||||
```sh
|
```sh
|
||||||
$ terraform apply
|
$ terraform apply
|
||||||
...
|
...
|
||||||
module.aws-tempest.null_resource.bootkube-start: Still creating... (4m50s elapsed)
|
module.aws-tempest.null_resource.bootstrap: Still creating... (4m50s elapsed)
|
||||||
module.aws-tempest.null_resource.bootkube-start: Still creating... (5m0s elapsed)
|
module.aws-tempest.null_resource.bootstrap: Still creating... (5m0s elapsed)
|
||||||
module.aws-tempest.null_resource.bootkube-start: Creation complete after 11m8s (ID: 3961816482286168143)
|
module.aws-tempest.null_resource.bootstrap: Creation complete after 11m8s (ID: 3961816482286168143)
|
||||||
|
|
||||||
Apply complete! Resources: 98 added, 0 changed, 0 destroyed.
|
Apply complete! Resources: 98 added, 0 changed, 0 destroyed.
|
||||||
```
|
```
|
||||||
|
@ -150,16 +150,12 @@ kube-system calico-node-7jmr1 2/2 Running 0
|
||||||
kube-system calico-node-bknc8 2/2 Running 0 34m
|
kube-system calico-node-bknc8 2/2 Running 0 34m
|
||||||
kube-system coredns-1187388186-wx1lg 1/1 Running 0 34m
|
kube-system coredns-1187388186-wx1lg 1/1 Running 0 34m
|
||||||
kube-system coredns-1187388186-qjnvp 1/1 Running 0 34m
|
kube-system coredns-1187388186-qjnvp 1/1 Running 0 34m
|
||||||
kube-system kube-apiserver-4mjbk 1/1 Running 0 34m
|
kube-system kube-apiserver-ip-10-0-3-155 1/1 Running 0 34m
|
||||||
kube-system kube-controller-manager-3597210155-j2jbt 1/1 Running 1 34m
|
kube-system kube-controller-manager-ip-10-0-3-155 1/1 Running 0 34m
|
||||||
kube-system kube-controller-manager-3597210155-j7g7x 1/1 Running 0 34m
|
|
||||||
kube-system kube-proxy-14wxv 1/1 Running 0 34m
|
kube-system kube-proxy-14wxv 1/1 Running 0 34m
|
||||||
kube-system kube-proxy-9vxh2 1/1 Running 0 34m
|
kube-system kube-proxy-9vxh2 1/1 Running 0 34m
|
||||||
kube-system kube-proxy-sbbsh 1/1 Running 0 34m
|
kube-system kube-proxy-sbbsh 1/1 Running 0 34m
|
||||||
kube-system kube-scheduler-3359497473-5plhf 1/1 Running 0 34m
|
kube-system kube-scheduler-ip-10-0-3-155 1/1 Running 1 34m
|
||||||
kube-system kube-scheduler-3359497473-r7zg7 1/1 Running 1 34m
|
|
||||||
kube-system pod-checkpointer-4kxtl 1/1 Running 0 34m
|
|
||||||
kube-system pod-checkpointer-4kxtl-ip-10-0-3-155 1/1 Running 0 33m
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Going Further
|
## Going Further
|
||||||
|
|
Loading…
Reference in New Issue