Run etcd cluster on-host, across controllers on Google Cloud

* Change controllers from a managed group to individual instances
* Create discrete DNS records to each controller's private IP for etcd
* Change etcd to run on-host, across controllers (etcd-member.service)
* Reduce time to bootstrap a cluster
* Deprecate self-hosted-etcd on the Google Cloud platform
This commit is contained in:
Dalton Hubble
2017-11-05 11:01:50 -08:00
parent ae07a21e3d
commit 7b38271212
17 changed files with 212 additions and 93 deletions

View File

@ -1,6 +1,29 @@
---
systemd:
units:
- name: etcd-member.service
enable: true
dropins:
- name: 40-etcd-cluster.conf
contents: |
[Service]
Environment="ETCD_IMAGE_TAG=v3.2.0"
Environment="ETCD_NAME=${etcd_name}"
Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379"
Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380"
Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379"
Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380"
Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}"
Environment="ETCD_STRICT_RECONFIG_CHECK=true"
Environment="ETCD_SSL_DIR=/etc/ssl/etcd"
Environment="ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt"
Environment="ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt"
Environment="ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key"
Environment="ETCD_CLIENT_CERT_AUTH=true"
Environment="ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt"
Environment="ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt"
Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key"
Environment="ETCD_PEER_CLIENT_CERT_AUTH=true"
- name: docker.service
enable: true
- name: locksmithd.service

View File

@ -1,55 +1,38 @@
# Managed Instance Group
resource "google_compute_instance_group_manager" "controllers" {
name = "${var.cluster_name}-controller-group"
description = "Compute instance group of ${var.cluster_name} controllers"
# Discrete DNS records for each controller's private IPv4 for etcd usage.
resource "google_dns_record_set" "etcds" {
count = "${var.count}"
# Instance name prefix for instances in the group
base_instance_name = "${var.cluster_name}-controller"
instance_template = "${google_compute_instance_template.controller.self_link}"
update_strategy = "RESTART"
zone = "${var.zone}"
target_size = "${var.count}"
# DNS Zone name where record should be created
managed_zone = "${var.dns_zone_name}"
# Target pool instances in the group should be added into
target_pools = [
"${google_compute_target_pool.controllers.self_link}",
]
# DNS record
name = "${format("%s-etcd%d.%s.", var.cluster_name, count.index, var.dns_zone)}"
type = "A"
ttl = 300
# private IPv4 address for etcd
rrdatas = ["${element(google_compute_instance.controllers.*.network_interface.0.address, count.index)}"]
}
# Controller Container Linux Config
data "template_file" "controller_config" {
template = "${file("${path.module}/cl/controller.yaml.tmpl")}"
# Controller instances
resource "google_compute_instance" "controllers" {
count = "${var.count}"
vars = {
k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}"
k8s_etcd_service_ip = "${cidrhost(var.service_cidr, 15)}"
ssh_authorized_key = "${var.ssh_authorized_key}"
kubeconfig_ca_cert = "${var.kubeconfig_ca_cert}"
kubeconfig_kubelet_cert = "${var.kubeconfig_kubelet_cert}"
kubeconfig_kubelet_key = "${var.kubeconfig_kubelet_key}"
kubeconfig_server = "${var.kubeconfig_server}"
}
}
data "ct_config" "controller_ign" {
content = "${data.template_file.controller_config.rendered}"
pretty_print = false
}
resource "google_compute_instance_template" "controller" {
name_prefix = "${var.cluster_name}-controller-"
description = "Controller Instance template"
name = "${var.cluster_name}-controller-${count.index}"
zone = "${var.zone}"
machine_type = "${var.machine_type}"
metadata {
user-data = "${data.ct_config.controller_ign.rendered}"
user-data = "${element(data.ct_config.controller_ign.*.rendered, count.index)}"
}
disk {
auto_delete = true
boot = true
source_image = "${var.os_image}"
disk_size_gb = "${var.disk_size}"
boot_disk {
auto_delete = true
initialize_params {
image = "${var.os_image}"
size = "${var.disk_size}"
}
}
network_interface {
@ -60,9 +43,44 @@ resource "google_compute_instance_template" "controller" {
}
can_ip_forward = true
}
lifecycle {
# To update an Instance Template, Terraform should replace the existing resource
create_before_destroy = true
# Controller Container Linux Config
data "template_file" "controller_config" {
count = "${var.count}"
template = "${file("${path.module}/cl/controller.yaml.tmpl")}"
vars = {
# Cannot use cyclic dependencies on controllers or their DNS records
etcd_name = "etcd${count.index}"
etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
# etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,...
etcd_initial_cluster = "${join(",", formatlist("%s=https://%s:2380", null_resource.repeat.*.triggers.name, null_resource.repeat.*.triggers.domain))}"
k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}"
ssh_authorized_key = "${var.ssh_authorized_key}"
kubeconfig_ca_cert = "${var.kubeconfig_ca_cert}"
kubeconfig_kubelet_cert = "${var.kubeconfig_kubelet_cert}"
kubeconfig_kubelet_key = "${var.kubeconfig_kubelet_key}"
kubeconfig_server = "${var.kubeconfig_server}"
}
}
# Horrible hack to generate a Terraform list of a desired length without dependencies.
# Ideal ${repeat("etcd", 3) -> ["etcd", "etcd", "etcd"]}
resource null_resource "repeat" {
count = "${var.count}"
triggers {
name = "etcd${count.index}"
domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}"
}
}
data "ct_config" "controller_ign" {
count = "${var.count}"
content = "${element(data.template_file.controller_config.*.rendered, count.index)}"
pretty_print = false
}

View File

@ -1,4 +1,9 @@
# Controller Network Load balancer DNS record
# Static IPv4 address for the Network Load Balancer
resource "google_compute_address" "controllers-ip" {
name = "${var.cluster_name}-controllers-ip"
}
# DNS record for the Network Load Balancer
resource "google_dns_record_set" "controllers" {
# DNS Zone name where record should be created
managed_zone = "${var.dns_zone_name}"
@ -12,12 +17,7 @@ resource "google_dns_record_set" "controllers" {
rrdatas = ["${google_compute_address.controllers-ip.address}"]
}
# Static IP for the Network Load Balancer
resource "google_compute_address" "controllers-ip" {
name = "${var.cluster_name}-controllers-ip"
}
# Network Load Balancer (i.e. forwarding rules)
# Network Load Balancer (i.e. forwarding rule)
resource "google_compute_forwarding_rule" "controller-https-rule" {
name = "${var.cluster_name}-controller-https-rule"
ip_address = "${google_compute_address.controllers-ip.address}"
@ -25,26 +25,23 @@ resource "google_compute_forwarding_rule" "controller-https-rule" {
target = "${google_compute_target_pool.controllers.self_link}"
}
resource "google_compute_forwarding_rule" "controller-ssh-rule" {
name = "${var.cluster_name}-controller-ssh-rule"
ip_address = "${google_compute_address.controllers-ip.address}"
port_range = "22"
target = "${google_compute_target_pool.controllers.self_link}"
}
# Network Load Balancer target pool of instances.
# Target pool of instances for the controller(s) Network Load Balancer
resource "google_compute_target_pool" "controllers" {
name = "${var.cluster_name}-controller-pool"
instances = [
"${google_compute_instance.controllers.*.self_link}",
]
health_checks = [
"${google_compute_http_health_check.ingress.name}",
"${google_compute_http_health_check.kubelet.name}",
]
session_affinity = "NONE"
}
# Kubelet HTTP Health Check
resource "google_compute_http_health_check" "ingress" {
resource "google_compute_http_health_check" "kubelet" {
name = "${var.cluster_name}-kubelet-health"
description = "Health check Kubelet health host port"

View File

@ -0,0 +1,11 @@
output "etcd_fqdns" {
value = ["${null_resource.repeat.*.triggers.domain}"]
}
output "ipv4_public" {
value = ["${google_compute_instance.controllers.*.network_interface.0.access_config.0.assigned_nat_ip}"]
}
output "ipv4_private" {
value = ["${google_compute_instance.controllers.*.network_interface.0.address}"]
}