From 805dd772a82d2cac647e1399400b34d4fcd157f1 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Sun, 5 Nov 2017 22:36:50 -0800 Subject: [PATCH] Run etcd cluster on-host, across controllers on AWS * Change controllers ASG to heterogeneous EC2 instances * Create DNS records for each controller's private IP for etcd * Change etcd to run on-host, across controllers (etcd-member.service) * Reduce time to bootstrap a cluster * Deprecate self-hosted-etcd on the AWS platform --- CHANGES.md | 6 ++ aws/container-linux/kubernetes/bootkube.tf | 17 ++-- .../kubernetes/cl/controller.yaml.tmpl | 23 ++++++ aws/container-linux/kubernetes/controllers.tf | 81 ++++++++++--------- aws/container-linux/kubernetes/elb.tf | 11 +-- aws/container-linux/kubernetes/ssh.tf | 73 ++++++++++++++++- .../container-linux/kubernetes/controllers.tf | 2 +- docs/aws.md | 14 +--- docs/topics/performance.md | 2 +- .../kubernetes/controllers/controllers.tf | 2 +- 10 files changed, 161 insertions(+), 70 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 90a795f2..18fb1ee2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,12 @@ Notable changes between versions. * Remove the `zone` required variable * Remove the `controller_preemptible` optional variable +#### AWS + +* Reduce time to bootstrap a cluster +* Change etcd to run on-host, across controllers (etcd-member.service) +* Remove support for self-hosted etcd + ## v1.8.2 * Kubernetes v1.8.2 diff --git a/aws/container-linux/kubernetes/bootkube.tf b/aws/container-linux/kubernetes/bootkube.tf index aca04af4..f8b8afe9 100644 --- a/aws/container-linux/kubernetes/bootkube.tf +++ b/aws/container-linux/kubernetes/bootkube.tf @@ -2,13 +2,12 @@ module "bootkube" { source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=v0.8.1" - cluster_name = "${var.cluster_name}" - api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"] - etcd_servers = ["http://127.0.0.1:2379"] - asset_dir = "${var.asset_dir}" - networking = "${var.networking}" - network_mtu = "${var.network_mtu}" - pod_cidr = "${var.pod_cidr}" - service_cidr = "${var.service_cidr}" - experimental_self_hosted_etcd = "true" + cluster_name = "${var.cluster_name}" + api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"] + etcd_servers = ["${aws_route53_record.etcds.*.fqdn}"] + asset_dir = "${var.asset_dir}" + networking = "${var.networking}" + network_mtu = "${var.network_mtu}" + pod_cidr = "${var.pod_cidr}" + service_cidr = "${var.service_cidr}" } diff --git a/aws/container-linux/kubernetes/cl/controller.yaml.tmpl b/aws/container-linux/kubernetes/cl/controller.yaml.tmpl index c68f81d7..b11daf63 100644 --- a/aws/container-linux/kubernetes/cl/controller.yaml.tmpl +++ b/aws/container-linux/kubernetes/cl/controller.yaml.tmpl @@ -1,6 +1,29 @@ --- systemd: units: + - name: etcd-member.service + enable: true + dropins: + - name: 40-etcd-cluster.conf + contents: | + [Service] + Environment="ETCD_IMAGE_TAG=v3.2.0" + Environment="ETCD_NAME=${etcd_name}" + Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379" + Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380" + Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379" + Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380" + Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}" + Environment="ETCD_STRICT_RECONFIG_CHECK=true" + Environment="ETCD_SSL_DIR=/etc/ssl/etcd" + Environment="ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt" + Environment="ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt" + Environment="ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key" + Environment="ETCD_CLIENT_CERT_AUTH=true" + Environment="ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt" + Environment="ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt" + Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key" + Environment="ETCD_PEER_CLIENT_CERT_AUTH=true" - name: docker.service enable: true - name: locksmithd.service diff --git a/aws/container-linux/kubernetes/controllers.tf b/aws/container-linux/kubernetes/controllers.tf index b6ad2ffd..990bc631 100644 --- a/aws/container-linux/kubernetes/controllers.tf +++ b/aws/container-linux/kubernetes/controllers.tf @@ -1,39 +1,30 @@ -# Controllers AutoScaling Group -resource "aws_autoscaling_group" "controllers" { - name = "${var.cluster_name}-controller" - load_balancers = ["${aws_elb.controllers.id}"] +# Discrete DNS records for each controller's private IPv4 for etcd usage +resource "aws_route53_record" "etcds" { + count = "${var.controller_count}" - # count - desired_capacity = "${var.controller_count}" - min_size = "${var.controller_count}" - max_size = "${var.controller_count}" + # DNS Zone where record should be created + zone_id = "${var.dns_zone_id}" - # network - vpc_zone_identifier = ["${aws_subnet.public.*.id}"] + name = "${format("%s-etcd%d.%s.", var.cluster_name, count.index, var.dns_zone)}" + type = "A" + ttl = 300 - # template - launch_configuration = "${aws_launch_configuration.controller.name}" - - lifecycle { - # override the default destroy and replace update behavior - create_before_destroy = true - ignore_changes = ["image_id"] - } - - tags = [{ - key = "Name" - value = "${var.cluster_name}-controller" - propagate_at_launch = true - }] + # private IPv4 address for etcd + records = ["${element(aws_instance.controllers.*.private_ip, count.index)}"] } -# Controller template -resource "aws_launch_configuration" "controller" { - name_prefix = "${var.cluster_name}-controller-template-" - image_id = "${data.aws_ami.coreos.image_id}" +# Controller instances +resource "aws_instance" "controllers" { + count = "${var.controller_count}" + + tags = { + Name = "${var.cluster_name}-controller-${count.index}" + } + instance_type = "${var.controller_type}" - user_data = "${data.ct_config.controller_ign.rendered}" + ami = "${data.aws_ami.coreos.image_id}" + user_data = "${element(data.ct_config.controller_ign.*.rendered, count.index)}" # storage root_block_device { @@ -43,21 +34,25 @@ resource "aws_launch_configuration" "controller" { # network associate_public_ip_address = true - security_groups = ["${aws_security_group.controller.id}"] - - lifecycle { - // Override the default destroy and replace update behavior - create_before_destroy = true - } + subnet_id = "${element(aws_subnet.public.*.id, count.index)}" + vpc_security_group_ids = ["${aws_security_group.controller.id}"] } # Controller Container Linux Config data "template_file" "controller_config" { + count = "${var.controller_count}" + template = "${file("${path.module}/cl/controller.yaml.tmpl")}" vars = { + # Cannot use cyclic dependencies on controllers or their DNS records + etcd_name = "etcd${count.index}" + etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}" + + # etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,... + etcd_initial_cluster = "${join(",", formatlist("%s=https://%s:2380", null_resource.repeat.*.triggers.name, null_resource.repeat.*.triggers.domain))}" + k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}" - k8s_etcd_service_ip = "${cidrhost(var.service_cidr, 15)}" ssh_authorized_key = "${var.ssh_authorized_key}" kubeconfig_ca_cert = "${module.bootkube.ca_cert}" kubeconfig_kubelet_cert = "${module.bootkube.kubelet_cert}" @@ -66,8 +61,20 @@ data "template_file" "controller_config" { } } +# Horrible hack to generate a Terraform list of a desired length without dependencies. +# Ideal ${repeat("etcd", 3) -> ["etcd", "etcd", "etcd"]} +resource null_resource "repeat" { + count = "${var.controller_count}" + + triggers { + name = "etcd${count.index}" + domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}" + } +} + data "ct_config" "controller_ign" { - content = "${data.template_file.controller_config.rendered}" + count = "${var.controller_count}" + content = "${element(data.template_file.controller_config.*.rendered, count.index)}" pretty_print = false } diff --git a/aws/container-linux/kubernetes/elb.tf b/aws/container-linux/kubernetes/elb.tf index 51f526f5..60f6d1a2 100644 --- a/aws/container-linux/kubernetes/elb.tf +++ b/aws/container-linux/kubernetes/elb.tf @@ -19,13 +19,6 @@ resource "aws_elb" "controllers" { subnets = ["${aws_subnet.public.*.id}"] security_groups = ["${aws_security_group.controller.id}"] - listener { - lb_port = 22 - lb_protocol = "tcp" - instance_port = 22 - instance_protocol = "tcp" - } - listener { lb_port = 443 lb_protocol = "tcp" @@ -33,6 +26,8 @@ resource "aws_elb" "controllers" { instance_protocol = "tcp" } + instances = ["${aws_instance.controllers.*.id}"] + # Kubelet HTTP health check health_check { target = "HTTP:10255/healthz" @@ -42,7 +37,7 @@ resource "aws_elb" "controllers" { interval = 6 } - idle_timeout = 1800 + idle_timeout = 3600 connection_draining = true connection_draining_timeout = 300 } diff --git a/aws/container-linux/kubernetes/ssh.tf b/aws/container-linux/kubernetes/ssh.tf index 8b9b89dd..3630b661 100644 --- a/aws/container-linux/kubernetes/ssh.tf +++ b/aws/container-linux/kubernetes/ssh.tf @@ -1,12 +1,79 @@ +# Secure copy etcd TLS assets and kubeconfig to controllers. Activates kubelet.service +resource "null_resource" "copy-secrets" { + count = "${var.controller_count}" + + connection { + type = "ssh" + host = "${element(aws_instance.controllers.*.public_ip, count.index)}" + user = "core" + timeout = "15m" + } + + provisioner "file" { + content = "${module.bootkube.kubeconfig}" + destination = "$HOME/kubeconfig" + } + + provisioner "file" { + content = "${module.bootkube.etcd_ca_cert}" + destination = "$HOME/etcd-client-ca.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_client_cert}" + destination = "$HOME/etcd-client.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_client_key}" + destination = "$HOME/etcd-client.key" + } + + provisioner "file" { + content = "${module.bootkube.etcd_server_cert}" + destination = "$HOME/etcd-server.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_server_key}" + destination = "$HOME/etcd-server.key" + } + + provisioner "file" { + content = "${module.bootkube.etcd_peer_cert}" + destination = "$HOME/etcd-peer.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_peer_key}" + destination = "$HOME/etcd-peer.key" + } + + provisioner "remote-exec" { + inline = [ + "sudo mkdir -p /etc/ssl/etcd/etcd", + "sudo mv etcd-client* /etc/ssl/etcd/", + "sudo cp /etc/ssl/etcd/etcd-client-ca.crt /etc/ssl/etcd/etcd/server-ca.crt", + "sudo mv etcd-server.crt /etc/ssl/etcd/etcd/server.crt", + "sudo mv etcd-server.key /etc/ssl/etcd/etcd/server.key", + "sudo cp /etc/ssl/etcd/etcd-client-ca.crt /etc/ssl/etcd/etcd/peer-ca.crt", + "sudo mv etcd-peer.crt /etc/ssl/etcd/etcd/peer.crt", + "sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key", + "sudo chown -R etcd:etcd /etc/ssl/etcd", + "sudo chmod -R 500 /etc/ssl/etcd", + "sudo mv /home/core/kubeconfig /etc/kubernetes/kubeconfig", + ] + } +} + # Secure copy bootkube assets to ONE controller and start bootkube to perform # one-time self-hosted cluster bootstrapping. resource "null_resource" "bootkube-start" { - depends_on = ["module.bootkube", "aws_autoscaling_group.controllers"] + depends_on = ["module.bootkube", "null_resource.copy-secrets", "aws_route53_record.controllers"] - # TODO: SSH to a controller's IP instead of waiting on DNS resolution connection { type = "ssh" - host = "${aws_route53_record.controllers.fqdn}" + host = "${aws_instance.controllers.0.public_ip}" user = "core" timeout = "15m" } diff --git a/digital-ocean/container-linux/kubernetes/controllers.tf b/digital-ocean/container-linux/kubernetes/controllers.tf index 243168e2..4355bef9 100644 --- a/digital-ocean/container-linux/kubernetes/controllers.tf +++ b/digital-ocean/container-linux/kubernetes/controllers.tf @@ -14,7 +14,7 @@ resource "digitalocean_record" "controllers" { value = "${element(digitalocean_droplet.controllers.*.ipv4_address, count.index)}" } -# Discrete DNS records for each controller's private IPv4 for etcd usage. +# Discrete DNS records for each controller's private IPv4 for etcd usage resource "digitalocean_record" "etcds" { count = "${var.controller_count}" diff --git a/docs/aws.md b/docs/aws.md index 68826e18..833d959a 100644 --- a/docs/aws.md +++ b/docs/aws.md @@ -4,7 +4,7 @@ In this tutorial, we'll create a Kubernetes v1.8.2 cluster on AWS. We'll declare a Kubernetes cluster in Terraform using the Typhoon Terraform module. On apply, a VPC, gateway, subnets, auto-scaling groups of controllers and workers, network load balancers for controllers and workers, and security groups will be created. -Controllers and workers are provisioned to run a `kubelet`. A one-time [bootkube](https://github.com/kubernetes-incubator/bootkube) bootstrap schedules `etcd`, `apiserver`, `scheduler`, `controller-manager`, and `kube-dns` on controllers and runs `kube-proxy` and `calico` or `flannel` on each node. A generated `kubeconfig` provides `kubectl` access to the cluster. +Controllers and workers are provisioned to run a `kubelet`. A one-time [bootkube](https://github.com/kubernetes-incubator/bootkube) bootstrap schedules an `apiserver`, `scheduler`, `controller-manager`, and `kube-dns` on controllers and runs `kube-proxy` and `calico` or `flannel` on each node. A generated `kubeconfig` provides `kubectl` access to the cluster. !!! warning "Alpha" Typhoon Kubernetes clusters on AWS are marked as "alpha". @@ -140,17 +140,14 @@ Apply the changes to create the cluster. ```sh $ terraform apply ... -module.aws-tempest.null_resource.bootkube-start: Still creating... (10m50s elapsed) -module.aws-tempest.null_resource.bootkube-start: Still creating... (11m0s elapsed) +module.aws-tempest.null_resource.bootkube-start: Still creating... (4m50s elapsed) +module.aws-tempest.null_resource.bootkube-start: Still creating... (5m0s elapsed) module.aws-tempest.null_resource.bootkube-start: Creation complete after 11m8s (ID: 3961816482286168143) Apply complete! Resources: 98 added, 0 changed, 0 destroyed. ``` -In 10-20 minutes, the Kubernetes cluster will be ready. - -!!! bug "" - Typhoon clusters on AWS take much longer to create than clusters on other platforms. This is related to DNS resolution time to the ASG, which will be resolved in a future version that uses static controllers. +In 5-10 minutes, the Kubernetes cluster will be ready. ## Verify @@ -173,13 +170,10 @@ NAMESPACE NAME READY STATUS RESTART kube-system calico-node-1m5bf 2/2 Running 0 34m kube-system calico-node-7jmr1 2/2 Running 0 34m kube-system calico-node-bknc8 2/2 Running 0 34m -kube-system etcd-operator-2287495111-br512 1/1 Running 1 34m kube-system kube-apiserver-4mjbk 1/1 Running 0 34m kube-system kube-controller-manager-3597210155-j2jbt 1/1 Running 1 34m kube-system kube-controller-manager-3597210155-j7g7x 1/1 Running 0 34m kube-system kube-dns-1187388186-wx1lg 3/3 Running 0 34m -kube-system kube-etcd-0000 1/1 Running 0 32m -kube-system kube-etcd-network-checkpointer-dt5pt 1/1 Running 0 34m kube-system kube-proxy-14wxv 1/1 Running 0 34m kube-system kube-proxy-9vxh2 1/1 Running 0 34m kube-system kube-proxy-sbbsh 1/1 Running 0 34m diff --git a/docs/topics/performance.md b/docs/topics/performance.md index cce0dc68..65b0199d 100644 --- a/docs/topics/performance.md +++ b/docs/topics/performance.md @@ -6,7 +6,7 @@ Provisioning times vary based on the platform. Sampling the time to create (appl | Platform | Apply | Destroy | |---------------|-------|---------| -| AWS | 20 min | 8 min 10 sec | +| AWS | 5 min | 5 min | | Bare-Metal | 10-14 min | NA | | Digital Ocean | 3 min 30 sec | 20 sec | | Google Cloud | 4 min | 4 min 30 sec | diff --git a/google-cloud/container-linux/kubernetes/controllers/controllers.tf b/google-cloud/container-linux/kubernetes/controllers/controllers.tf index b29d64bd..da063be3 100644 --- a/google-cloud/container-linux/kubernetes/controllers/controllers.tf +++ b/google-cloud/container-linux/kubernetes/controllers/controllers.tf @@ -1,4 +1,4 @@ -# Discrete DNS records for each controller's private IPv4 for etcd usage. +# Discrete DNS records for each controller's private IPv4 for etcd usage resource "google_dns_record_set" "etcds" { count = "${var.count}"