From c111924913ee641ead3731c8ee01f1b6e28191a5 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Mon, 26 Jun 2017 21:55:39 -0700 Subject: [PATCH] Add dghubble/pegasus GCE Kubernetes Terraform module --- .../cl/bootkube-controller.yaml.tmpl | 148 ++++++++++++++++++ gce-bootkube-controller/controller.tf | 85 ++++++++++ gce-bootkube-controller/network.tf | 61 ++++++++ gce-bootkube-controller/variables.tf | 97 ++++++++++++ .../cl/bootkube-worker.yaml.tmpl | 132 ++++++++++++++++ gce-bootkube-worker/network.tf | 45 ++++++ gce-bootkube-worker/outputs.tf | 3 + gce-bootkube-worker/variables.tf | 82 ++++++++++ gce-bootkube-worker/worker.tf | 87 ++++++++++ gce-bootkube/bootkube.tf | 12 ++ gce-bootkube/cluster.tf | 44 ++++++ gce-bootkube/network.tf | 46 ++++++ gce-bootkube/outputs.tf | 3 + gce-bootkube/ssh.tf | 25 +++ gce-bootkube/variables.tf | 87 ++++++++++ 15 files changed, 957 insertions(+) create mode 100644 gce-bootkube-controller/cl/bootkube-controller.yaml.tmpl create mode 100644 gce-bootkube-controller/controller.tf create mode 100644 gce-bootkube-controller/network.tf create mode 100644 gce-bootkube-controller/variables.tf create mode 100644 gce-bootkube-worker/cl/bootkube-worker.yaml.tmpl create mode 100644 gce-bootkube-worker/network.tf create mode 100644 gce-bootkube-worker/outputs.tf create mode 100644 gce-bootkube-worker/variables.tf create mode 100644 gce-bootkube-worker/worker.tf create mode 100644 gce-bootkube/bootkube.tf create mode 100644 gce-bootkube/cluster.tf create mode 100644 gce-bootkube/network.tf create mode 100644 gce-bootkube/outputs.tf create mode 100644 gce-bootkube/ssh.tf create mode 100644 gce-bootkube/variables.tf diff --git a/gce-bootkube-controller/cl/bootkube-controller.yaml.tmpl b/gce-bootkube-controller/cl/bootkube-controller.yaml.tmpl new file mode 100644 index 00000000..2fc503c0 --- /dev/null +++ b/gce-bootkube-controller/cl/bootkube-controller.yaml.tmpl @@ -0,0 +1,148 @@ +--- +systemd: + units: + - name: docker.service + enable: true + - name: locksmithd.service + dropins: + - name: 40-etcd-lock.conf + contents: | + [Service] + Environment="REBOOT_STRATEGY=etcd-lock" + Environment="LOCKSMITHD_ENDPOINT=http://${k8s_etcd_service_ip}:2379" + - name: wait-for-dns.service + enable: true + contents: | + [Unit] + Description=Wait for DNS entries + Wants=systemd-resolved.service + Before=kubelet.service + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + - name: kubelet.service + enable: true + contents: | + [Unit] + Description=Kubelet via Hyperkube ACI + [Service] + EnvironmentFile=/etc/kubernetes/kubelet.env + Environment="RKT_RUN_ARGS=--uuid-file-save=/var/run/kubelet-pod.uuid \ + --volume=resolv,kind=host,source=/etc/resolv.conf \ + --mount volume=resolv,target=/etc/resolv.conf \ + --volume var-lib-cni,kind=host,source=/var/lib/cni \ + --mount volume=var-lib-cni,target=/var/lib/cni \ + --volume var-log,kind=host,source=/var/log \ + --mount volume=var-log,target=/var/log" + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets + ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests + ExecStartPre=/bin/mkdir -p /var/lib/cni + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/run/kubelet-pod.uuid + ExecStart=/usr/lib/coreos/kubelet-wrapper \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --require-kubeconfig \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --anonymous-auth=false \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --network-plugin=cni \ + --lock-file=/var/run/lock/kubelet.lock \ + --exit-on-lock-contention \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --allow-privileged \ + --node-labels=node-role.kubernetes.io/master \ + --register-with-taints=node-role.kubernetes.io/master=:NoSchedule \ + --cluster_dns=${k8s_dns_service_ip} \ + --cluster_domain=cluster.local + ExecStop=-/usr/bin/rkt stop --uuid-file=/var/run/kubelet-pod.uuid + Restart=always + RestartSec=10 + [Install] + WantedBy=multi-user.target + - name: bootkube.service + contents: | + [Unit] + Description=Bootstrap a Kubernetes cluster + ConditionPathExists=!/opt/bootkube/init_bootkube.done + [Service] + Type=oneshot + RemainAfterExit=true + WorkingDirectory=/opt/bootkube + ExecStart=/opt/bootkube/bootkube-start + ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done + [Install] + WantedBy=multi-user.target +storage: + files: + - path: /etc/kubernetes/kubeconfig + filesystem: root + mode: 0644 + contents: + inline: | + apiVersion: v1 + kind: Config + clusters: + - name: local + cluster: + server: ${kubeconfig_server} + certificate-authority-data: ${kubeconfig_ca_cert} + users: + - name: kubelet + user: + client-certificate-data: ${kubeconfig_kubelet_cert} + client-key-data: ${kubeconfig_kubelet_key} + contexts: + - context: + cluster: local + user: kubelet + - path: /etc/kubernetes/kubelet.env + filesystem: root + mode: 0644 + contents: + inline: | + KUBELET_IMAGE_URL=quay.io/coreos/hyperkube + KUBELET_IMAGE_TAG=v1.6.4_coreos.0 + - path: /etc/sysctl.d/max-user-watches.conf + filesystem: root + contents: + inline: | + fs.inotify.max_user_watches=16184 + - path: /opt/bootkube/bootkube-start + filesystem: root + mode: 0544 + user: + id: 500 + group: + id: 500 + contents: + inline: | + #!/bin/bash + # Wrapper for bootkube start + set -e + # Move experimental manifests + [ -d /opt/bootkube/assets/experimental/manifests ] && mv /opt/bootkube/assets/experimental/manifests/* /opt/bootkube/assets/manifests && rm -r /opt/bootkube/assets/experimental/manifests + [ -d /opt/bootkube/assets/experimental/bootstrap-manifests ] && mv /opt/bootkube/assets/experimental/bootstrap-manifests/* /opt/bootkube/assets/bootstrap-manifests && rm -r /opt/bootkube/assets/experimental/bootstrap-manifests + BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}" + BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.4.4}" + BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}" + exec /usr/bin/rkt run \ + --trust-keys-from-https \ + --volume assets,kind=host,source=$${BOOTKUBE_ASSETS} \ + --mount volume=assets,target=/assets \ + --volume bootstrap,kind=host,source=/etc/kubernetes \ + --mount volume=bootstrap,target=/etc/kubernetes \ + $${RKT_OPTS} \ + $${BOOTKUBE_ACI}:$${BOOTKUBE_VERSION} \ + --net=host \ + --dns=host \ + --exec=/bootkube -- start --asset-dir=/assets "$@" +passwd: + users: + - name: core + ssh_authorized_keys: + - "${ssh_authorized_keys}" diff --git a/gce-bootkube-controller/controller.tf b/gce-bootkube-controller/controller.tf new file mode 100644 index 00000000..57f0d701 --- /dev/null +++ b/gce-bootkube-controller/controller.tf @@ -0,0 +1,85 @@ +# Managed Instance Group +resource "google_compute_instance_group_manager" "controllers" { + name = "${var.cluster_name}-controller-group" + description = "Compute instance group of ${var.cluster_name} controllers" + + # Instance name prefix for instances in the group + base_instance_name = "${var.cluster_name}-controller" + instance_template = "${google_compute_instance_template.controller.self_link}" + update_strategy = "RESTART" + zone = "${var.zone}" + target_size = "${var.count}" + + # Target pool instances in the group should be added into + target_pools = [ + "${google_compute_target_pool.controllers.self_link}", + ] +} + +# bootkube-controller Container Linux config +data "template_file" "controller_config" { + template = "${file("${path.module}/cl/bootkube-controller.yaml.tmpl")}" + + vars = { + k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}" + k8s_etcd_service_ip = "${cidrhost(var.service_cidr, 15)}" + ssh_authorized_keys = "${var.ssh_authorized_key}" + kubeconfig_ca_cert = "${var.kubeconfig_ca_cert}" + kubeconfig_kubelet_cert = "${var.kubeconfig_kubelet_cert}" + kubeconfig_kubelet_key = "${var.kubeconfig_kubelet_key}" + kubeconfig_server = "${var.kubeconfig_server}" + } +} + +data "ct_config" "controller_ign" { + content = "${data.template_file.controller_config.rendered}" + pretty_print = false +} + +resource "google_compute_instance_template" "controller" { + name_prefix = "${var.cluster_name}-controller-" + description = "bootkube-controller Instance template" + machine_type = "${var.machine_type}" + + metadata { + user-data = "${data.ct_config.controller_ign.rendered}" + } + + scheduling { + automatic_restart = "${var.preemptible ? false : true}" + preemptible = "${var.preemptible}" + } + + # QUIRK: Undocumented field defaults to true if not set + automatic_restart = "${var.preemptible ? false : true}" + + disk { + auto_delete = true + boot = true + source_image = "${var.os_image}" + disk_size_gb = "${var.disk_size}" + } + + network_interface { + network = "${var.network}" + + # Ephemeral external IP + access_config = {} + } + + can_ip_forward = true + + service_account { + scopes = [ + "storage-ro", + "compute-rw", + "datastore", + "userinfo-email", + ] + } + + lifecycle { + # To update an Instance Template, Terraform should replace the existing resource + create_before_destroy = true + } +} diff --git a/gce-bootkube-controller/network.tf b/gce-bootkube-controller/network.tf new file mode 100644 index 00000000..292645a6 --- /dev/null +++ b/gce-bootkube-controller/network.tf @@ -0,0 +1,61 @@ +# DNS record set to the network load balancer over controllers +resource "google_dns_record_set" "k8s_dns" { + # Managed DNS Zone name + managed_zone = "${var.dns_base_zone_name}" + + # Name of the DNS record + #name = "${format("%s.%s.", var.cluster_name, var.dns_base_zone)}" + name = "${var.k8s_domain_name}." + + type = "A" + ttl = 300 + + # compute instance public IP + rrdatas = ["${google_compute_address.controllers-ip.address}"] +} + +# Static IP for the Network Load Balancer +resource "google_compute_address" "controllers-ip" { + name = "${var.cluster_name}-controllers-ip" +} + +# Network Load Balancer (i.e. forwarding rules) +resource "google_compute_forwarding_rule" "controller-https-rule" { + name = "${var.cluster_name}-controller-https-rule" + ip_address = "${google_compute_address.controllers-ip.address}" + port_range = "443" + target = "${google_compute_target_pool.controllers.self_link}" +} + +resource "google_compute_forwarding_rule" "controller-ssh-rule" { + name = "${var.cluster_name}-controller-ssh-rule" + ip_address = "${google_compute_address.controllers-ip.address}" + port_range = "22" + target = "${google_compute_target_pool.controllers.self_link}" +} + +# Network Load Balancer target pool of instances. +resource "google_compute_target_pool" "controllers" { + name = "${var.cluster_name}-controller-pool" + + health_checks = [ + "${google_compute_http_health_check.ingress.name}", + ] + + session_affinity = "NONE" +} + +# Kubelet HTTP Health Check +resource "google_compute_http_health_check" "ingress" { + name = "${var.cluster_name}-kubelet-health" + description = "Health check Kubelet health host port" + + timeout_sec = 5 + check_interval_sec = 5 + + healthy_threshold = 2 + unhealthy_threshold = 4 + + port = 10255 + request_path = "/healthz" +} diff --git a/gce-bootkube-controller/variables.tf b/gce-bootkube-controller/variables.tf new file mode 100644 index 00000000..ce8af7cd --- /dev/null +++ b/gce-bootkube-controller/variables.tf @@ -0,0 +1,97 @@ +variable "cluster_name" { + type = "string" + description = "Unique cluster name" +} + +variable "ssh_authorized_key" { + type = "string" + description = "SSH public key for logging in as user 'core'" +} + +variable "network" { + type = "string" + description = "Name of the network to attach to the compute instance interfaces" +} + +variable "dns_base_zone" { + type = "string" + description = "Google Cloud DNS Zone value to create etcd/k8s subdomains (e.g. dghubble.io)" +} + +variable "dns_base_zone_name" { + type = "string" + description = "Google Cloud DNS Zone name to create etcd/k8s subdomains (e.g. dghubble-io)" +} + +variable "k8s_domain_name" { + type = "string" + description = "Controller DNS name which resolves to the controller instance. Kubectl and workers use TLS client credentials to communicate via this endpoint." +} + +# instances + +variable "count" { + type = "string" + description = "Number of controller compute instances the instance group should manage" +} + +variable "zone" { + type = "string" + description = "Google zone that compute instances in the group should be created in (e.g. gcloud compute zones list)" +} + +variable "machine_type" { + type = "string" + description = "Machine type for compute instances (e.g. gcloud compute machine-types list)" +} + +variable "os_image" { + type = "string" + description = "OS image from which to initialize the disk (e.g. gcloud compute images list)" +} + +variable "disk_size" { + type = "string" + default = "40" + description = "The size of the disk in gigabytes." +} + +variable "preemptible" { + type = "string" + default = "false" + description = "If enabled, Compute Engine will terminate instances randomly within 24 hours" +} + +// configuration + +variable "service_cidr" { + description = < /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + - name: kubelet.service + enable: true + contents: | + [Unit] + Description=Kubelet via Hyperkube ACI + [Service] + EnvironmentFile=/etc/kubernetes/kubelet.env + Environment="RKT_RUN_ARGS=--uuid-file-save=/var/run/kubelet-pod.uuid \ + --volume=resolv,kind=host,source=/etc/resolv.conf \ + --mount volume=resolv,target=/etc/resolv.conf \ + --volume var-lib-cni,kind=host,source=/var/lib/cni \ + --mount volume=var-lib-cni,target=/var/lib/cni \ + --volume var-log,kind=host,source=/var/log \ + --mount volume=var-log,target=/var/log" + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets + ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests + ExecStartPre=/bin/mkdir -p /var/lib/cni + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/run/kubelet-pod.uuid + ExecStart=/usr/lib/coreos/kubelet-wrapper \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --require-kubeconfig \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --anonymous-auth=false \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --network-plugin=cni \ + --lock-file=/var/run/lock/kubelet.lock \ + --exit-on-lock-contention \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --allow-privileged \ + --node-labels=node-role.kubernetes.io/node \ + --cluster_dns=${k8s_dns_service_ip} \ + --cluster_domain=cluster.local + ExecStop=-/usr/bin/rkt stop --uuid-file=/var/run/kubelet-pod.uuid + Restart=always + RestartSec=5 + [Install] + WantedBy=multi-user.target + - name: delete-node.service + enable: true + contents: | + [Unit] + Description=Waiting to delete Kubernetes node on shutdown + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/true + ExecStop=/etc/kubernetes/delete-node + [Install] + WantedBy=multi-user.target +storage: + files: + - path: /etc/kubernetes/kubeconfig + filesystem: root + mode: 0644 + contents: + inline: | + apiVersion: v1 + kind: Config + clusters: + - name: local + cluster: + server: ${kubeconfig_server} + certificate-authority-data: ${kubeconfig_ca_cert} + users: + - name: kubelet + user: + client-certificate-data: ${kubeconfig_kubelet_cert} + client-key-data: ${kubeconfig_kubelet_key} + contexts: + - context: + cluster: local + user: kubelet + - path: /etc/kubernetes/kubelet.env + filesystem: root + mode: 0644 + contents: + inline: | + KUBELET_IMAGE_URL=quay.io/coreos/hyperkube + KUBELET_IMAGE_TAG=v1.6.4_coreos.0 + - path: /etc/sysctl.d/max-user-watches.conf + filesystem: root + contents: + inline: | + fs.inotify.max_user_watches=16184 + - path: /etc/kubernetes/delete-node + filesystem: root + mode: 0744 + contents: + inline: | + #!/bin/bash + set -e + exec /usr/bin/rkt run \ + --trust-keys-from-https \ + --volume config,kind=host,source=/etc/kubernetes \ + --mount volume=config,target=/etc/kubernetes \ + quay.io/coreos/hyperkube:v1.6.4_coreos.0 \ + --net=host \ + --dns=host \ + --exec=/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname) +passwd: + users: + - name: core + ssh_authorized_keys: + - "${ssh_authorized_key}" diff --git a/gce-bootkube-worker/network.tf b/gce-bootkube-worker/network.tf new file mode 100644 index 00000000..7dcd1403 --- /dev/null +++ b/gce-bootkube-worker/network.tf @@ -0,0 +1,45 @@ +# Static IP for the Network Load Balancer +resource "google_compute_address" "ingress-ip" { + name = "${var.cluster_name}-ingress-ip" +} + +# Network Load Balancer (i.e. forwarding rules) +resource "google_compute_forwarding_rule" "worker-http-lb" { + name = "${var.cluster_name}-worker-http-rule" + ip_address = "${google_compute_address.ingress-ip.address}" + port_range = "80" + target = "${google_compute_target_pool.workers.self_link}" +} + +resource "google_compute_forwarding_rule" "worker-https-lb" { + name = "${var.cluster_name}-worker-https-rule" + ip_address = "${google_compute_address.ingress-ip.address}" + port_range = "443" + target = "${google_compute_target_pool.workers.self_link}" +} + +# Network Load Balancer target pool of instances. +resource "google_compute_target_pool" "workers" { + name = "${var.cluster_name}-worker-pool" + + health_checks = [ + "${google_compute_http_health_check.ingress.name}", + ] + + session_affinity = "NONE" +} + +# Ingress HTTP Health Check +resource "google_compute_http_health_check" "ingress" { + name = "${var.cluster_name}-ingress-health" + description = "Health check Ingress controller health host port" + + timeout_sec = 5 + check_interval_sec = 5 + + healthy_threshold = 2 + unhealthy_threshold = 4 + + port = 10254 + request_path = "/healthz" +} diff --git a/gce-bootkube-worker/outputs.tf b/gce-bootkube-worker/outputs.tf new file mode 100644 index 00000000..9f3801d9 --- /dev/null +++ b/gce-bootkube-worker/outputs.tf @@ -0,0 +1,3 @@ +output "ingress_static_ip" { + value = "${google_compute_address.ingress-ip.address}" +} diff --git a/gce-bootkube-worker/variables.tf b/gce-bootkube-worker/variables.tf new file mode 100644 index 00000000..b3d0bbdd --- /dev/null +++ b/gce-bootkube-worker/variables.tf @@ -0,0 +1,82 @@ +variable "cluster_name" { + type = "string" + description = "Unique cluster name" +} + +variable "ssh_authorized_key" { + type = "string" + description = "SSH public key for logging in as user 'core'" +} + +variable "network" { + type = "string" + description = "Name of the network to attach to the compute instance interfaces" +} + +# instances + +variable "count" { + type = "string" + description = "Number of worker compute instances the instance group should manage" +} + +variable "zone" { + type = "string" + description = "Google zone that compute instances in the group should be created in (e.g. gcloud compute zones list)" +} + +variable "machine_type" { + type = "string" + description = "Machine type for compute instances (e.g. gcloud compute machine-types list)" +} + +variable "os_image" { + type = "string" + description = "OS image from which to initialize the disk (e.g. gcloud compute images list)" +} + +variable "disk_size" { + type = "string" + default = "40" + description = "The size of the disk in gigabytes." +} + +variable "preemptible" { + type = "string" + default = "false" + description = "If enabled, Compute Engine will terminate instances randomly within 24 hours" +} + +# configuration + +variable "service_cidr" { + description = <