diff --git a/azure/container-linux/kubernetes/bootkube.tf b/azure/container-linux/kubernetes/bootkube.tf new file mode 100644 index 00000000..cdf2a05b --- /dev/null +++ b/azure/container-linux/kubernetes/bootkube.tf @@ -0,0 +1,13 @@ +# Self-hosted Kubernetes assets (kubeconfig, manifests) +module "bootkube" { + source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=70c28399703cb4ec8930394682400d90d733e5a5" + + cluster_name = "${var.cluster_name}" + api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"] + etcd_servers = ["${formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)}"] + asset_dir = "${var.asset_dir}" + networking = "flannel" + pod_cidr = "${var.pod_cidr}" + service_cidr = "${var.service_cidr}" + cluster_domain_suffix = "${var.cluster_domain_suffix}" +} diff --git a/azure/container-linux/kubernetes/cl/controller.yaml.tmpl b/azure/container-linux/kubernetes/cl/controller.yaml.tmpl new file mode 100644 index 00000000..ebbd9bc4 --- /dev/null +++ b/azure/container-linux/kubernetes/cl/controller.yaml.tmpl @@ -0,0 +1,163 @@ +--- +systemd: + units: + - name: etcd-member.service + enable: true + dropins: + - name: 40-etcd-cluster.conf + contents: | + [Service] + Environment="ETCD_IMAGE_TAG=v3.3.9" + Environment="ETCD_NAME=${etcd_name}" + Environment="ETCD_ADVERTISE_CLIENT_URLS=https://${etcd_domain}:2379" + Environment="ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${etcd_domain}:2380" + Environment="ETCD_LISTEN_CLIENT_URLS=https://0.0.0.0:2379" + Environment="ETCD_LISTEN_PEER_URLS=https://0.0.0.0:2380" + Environment="ETCD_LISTEN_METRICS_URLS=http://0.0.0.0:2381" + Environment="ETCD_INITIAL_CLUSTER=${etcd_initial_cluster}" + Environment="ETCD_STRICT_RECONFIG_CHECK=true" + Environment="ETCD_SSL_DIR=/etc/ssl/etcd" + Environment="ETCD_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/server-ca.crt" + Environment="ETCD_CERT_FILE=/etc/ssl/certs/etcd/server.crt" + Environment="ETCD_KEY_FILE=/etc/ssl/certs/etcd/server.key" + Environment="ETCD_CLIENT_CERT_AUTH=true" + Environment="ETCD_PEER_TRUSTED_CA_FILE=/etc/ssl/certs/etcd/peer-ca.crt" + Environment="ETCD_PEER_CERT_FILE=/etc/ssl/certs/etcd/peer.crt" + Environment="ETCD_PEER_KEY_FILE=/etc/ssl/certs/etcd/peer.key" + Environment="ETCD_PEER_CLIENT_CERT_AUTH=true" + - name: docker.service + enable: true + - name: locksmithd.service + mask: true + - name: wait-for-dns.service + enable: true + contents: | + [Unit] + Description=Wait for DNS entries + Wants=systemd-resolved.service + Before=kubelet.service + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/sh -c 'while ! /usr/bin/grep '^[^#[:space:]]' /etc/resolv.conf > /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + RequiredBy=etcd-member.service + - name: kubelet.service + enable: true + contents: | + [Unit] + Description=Kubelet via Hyperkube + Wants=rpc-statd.service + [Service] + EnvironmentFile=/etc/kubernetes/kubelet.env + Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \ + --volume=resolv,kind=host,source=/etc/resolv.conf \ + --mount volume=resolv,target=/etc/resolv.conf \ + --volume var-lib-cni,kind=host,source=/var/lib/cni \ + --mount volume=var-lib-cni,target=/var/lib/cni \ + --volume var-lib-calico,kind=host,source=/var/lib/calico \ + --mount volume=var-lib-calico,target=/var/lib/calico \ + --volume opt-cni-bin,kind=host,source=/opt/cni/bin \ + --mount volume=opt-cni-bin,target=/opt/cni/bin \ + --volume var-log,kind=host,source=/var/log \ + --mount volume=var-log,target=/var/log \ + --insecure-options=image" + ExecStartPre=/bin/mkdir -p /opt/cni/bin + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /etc/kubernetes/checkpoint-secrets + ExecStartPre=/bin/mkdir -p /etc/kubernetes/inactive-manifests + ExecStartPre=/bin/mkdir -p /var/lib/cni + ExecStartPre=/bin/mkdir -p /var/lib/calico + ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid + ExecStart=/usr/lib/coreos/kubelet-wrapper \ + --anonymous-auth=false \ + --authentication-token-webhook \ + --authorization-mode=Webhook \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --cluster_dns=${k8s_dns_service_ip} \ + --cluster_domain=${cluster_domain_suffix} \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --exit-on-lock-contention \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --lock-file=/var/run/lock/kubelet.lock \ + --network-plugin=cni \ + --node-labels=node-role.kubernetes.io/master \ + --node-labels=node-role.kubernetes.io/controller="true" \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --register-with-taints=node-role.kubernetes.io/master=:NoSchedule \ + --volume-plugin-dir=/var/lib/kubelet/volumeplugins + ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid + Restart=always + RestartSec=10 + [Install] + WantedBy=multi-user.target + - name: bootkube.service + contents: | + [Unit] + Description=Bootstrap a Kubernetes cluster + ConditionPathExists=!/opt/bootkube/init_bootkube.done + [Service] + Type=oneshot + RemainAfterExit=true + WorkingDirectory=/opt/bootkube + ExecStart=/opt/bootkube/bootkube-start + ExecStartPost=/bin/touch /opt/bootkube/init_bootkube.done + [Install] + WantedBy=multi-user.target +storage: + files: + - path: /etc/kubernetes/kubeconfig + filesystem: root + mode: 0644 + contents: + inline: | + ${kubeconfig} + - path: /etc/kubernetes/kubelet.env + filesystem: root + mode: 0644 + contents: + inline: | + KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube + KUBELET_IMAGE_TAG=v1.11.2 + - path: /etc/sysctl.d/max-user-watches.conf + filesystem: root + contents: + inline: | + fs.inotify.max_user_watches=16184 + - path: /opt/bootkube/bootkube-start + filesystem: root + mode: 0544 + user: + id: 500 + group: + id: 500 + contents: + inline: | + #!/bin/bash + # Wrapper for bootkube start + set -e + # Move experimental manifests + [ -n "$(ls /opt/bootkube/assets/manifests-*/* 2>/dev/null)" ] && mv /opt/bootkube/assets/manifests-*/* /opt/bootkube/assets/manifests && rm -rf /opt/bootkube/assets/manifests-* + BOOTKUBE_ACI="$${BOOTKUBE_ACI:-quay.io/coreos/bootkube}" + BOOTKUBE_VERSION="$${BOOTKUBE_VERSION:-v0.13.0}" + BOOTKUBE_ASSETS="$${BOOTKUBE_ASSETS:-/opt/bootkube/assets}" + exec /usr/bin/rkt run \ + --trust-keys-from-https \ + --volume assets,kind=host,source=$${BOOTKUBE_ASSETS} \ + --mount volume=assets,target=/assets \ + --volume bootstrap,kind=host,source=/etc/kubernetes \ + --mount volume=bootstrap,target=/etc/kubernetes \ + $${RKT_OPTS} \ + $${BOOTKUBE_ACI}:$${BOOTKUBE_VERSION} \ + --net=host \ + --dns=host \ + --exec=/bootkube -- start --asset-dir=/assets "$@" +passwd: + users: + - name: core + ssh_authorized_keys: + - "${ssh_authorized_key}" diff --git a/azure/container-linux/kubernetes/controllers.tf b/azure/container-linux/kubernetes/controllers.tf new file mode 100644 index 00000000..0f3c3efb --- /dev/null +++ b/azure/container-linux/kubernetes/controllers.tf @@ -0,0 +1,163 @@ +# Discrete DNS records for each controller's private IPv4 for etcd usage +resource "azurerm_dns_a_record" "etcds" { + count = "${var.controller_count}" + resource_group_name = "${var.dns_zone_group}" + + # DNS Zone name where record should be created + zone_name = "${var.dns_zone}" + + # DNS record + name = "${format("%s-etcd%d", var.cluster_name, count.index)}" + ttl = 300 + + # private IPv4 address for etcd + records = ["${element(azurerm_network_interface.controllers.*.private_ip_address, count.index)}"] +} + +locals { + # Channel for a Container Linux derivative + # coreos-stable -> Container Linux Stable + channel = "${element(split("-", var.os_image), 1)}" +} + +# Controller availability set to spread controllers +resource "azurerm_availability_set" "controllers" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}-controllers" + location = "${var.region}" + platform_fault_domain_count = 2 + platform_update_domain_count = 4 + managed = true +} + +# Controller instances +resource "azurerm_virtual_machine" "controllers" { + count = "${var.controller_count}" + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}-controller-${count.index}" + location = "${var.region}" + availability_set_id = "${azurerm_availability_set.controllers.id}" + vm_size = "${var.controller_type}" + + # boot + storage_image_reference { + publisher = "CoreOS" + offer = "CoreOS" + sku = "${local.channel}" + version = "latest" + } + + # storage + storage_os_disk { + name = "${var.cluster_name}-controller-${count.index}" + create_option = "FromImage" + caching = "ReadWrite" + disk_size_gb = "${var.disk_size}" + os_type = "Linux" + managed_disk_type = "Premium_LRS" + } + + # network + network_interface_ids = ["${element(azurerm_network_interface.controllers.*.id, count.index)}"] + + os_profile { + computer_name = "${var.cluster_name}-controller-${count.index}" + admin_username = "core" + custom_data = "${element(data.ct_config.controller-ignitions.*.rendered, count.index)}" + } + + # Azure mandates setting an ssh_key, even though Ignition custom_data handles it too + os_profile_linux_config { + disable_password_authentication = true + + ssh_keys { + path = "/home/core/.ssh/authorized_keys" + key_data = "${var.ssh_authorized_key}" + } + } + + # lifecycle + delete_os_disk_on_termination = true + delete_data_disks_on_termination = true + + lifecycle { + ignore_changes = [ + "storage_os_disk", + ] + } +} + +# Controller NICs with public and private IPv4 +resource "azurerm_network_interface" "controllers" { + count = "${var.controller_count}" + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}-controller-${count.index}" + location = "${azurerm_resource_group.cluster.location}" + network_security_group_id = "${azurerm_network_security_group.controller.id}" + + ip_configuration { + name = "ip0" + subnet_id = "${azurerm_subnet.controller.id}" + private_ip_address_allocation = "dynamic" + + # public IPv4 + public_ip_address_id = "${element(azurerm_public_ip.controllers.*.id, count.index)}" + + # backend address pool to which the NIC should be added + load_balancer_backend_address_pools_ids = ["${azurerm_lb_backend_address_pool.controller.id}"] + } +} + +# Controller public IPv4 addresses +resource "azurerm_public_ip" "controllers" { + count = "${var.controller_count}" + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}-controller-${count.index}" + location = "${azurerm_resource_group.cluster.location}" + sku = "Standard" + public_ip_address_allocation = "static" +} + +# Controller Ignition configs +data "ct_config" "controller-ignitions" { + count = "${var.controller_count}" + content = "${element(data.template_file.controller-configs.*.rendered, count.index)}" + pretty_print = false + snippets = ["${var.controller_clc_snippets}"] +} + +# Controller Container Linux configs +data "template_file" "controller-configs" { + count = "${var.controller_count}" + + template = "${file("${path.module}/cl/controller.yaml.tmpl")}" + + vars = { + # Cannot use cyclic dependencies on controllers or their DNS records + etcd_name = "etcd${count.index}" + etcd_domain = "${var.cluster_name}-etcd${count.index}.${var.dns_zone}" + + # etcd0=https://cluster-etcd0.example.com,etcd1=https://cluster-etcd1.example.com,... + etcd_initial_cluster = "${join(",", data.template_file.etcds.*.rendered)}" + + kubeconfig = "${indent(10, module.bootkube.kubeconfig)}" + ssh_authorized_key = "${var.ssh_authorized_key}" + k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}" + cluster_domain_suffix = "${var.cluster_domain_suffix}" + } +} + +data "template_file" "etcds" { + count = "${var.controller_count}" + template = "etcd$${index}=https://$${cluster_name}-etcd$${index}.$${dns_zone}:2380" + + vars { + index = "${count.index}" + cluster_name = "${var.cluster_name}" + dns_zone = "${var.dns_zone}" + } +} diff --git a/azure/container-linux/kubernetes/lb.tf b/azure/container-linux/kubernetes/lb.tf new file mode 100644 index 00000000..9e005231 --- /dev/null +++ b/azure/container-linux/kubernetes/lb.tf @@ -0,0 +1,129 @@ +# DNS record for the apiserver load balancer +resource "azurerm_dns_a_record" "apiserver" { + resource_group_name = "${var.dns_zone_group}" + + # DNS Zone name where record should be created + zone_name = "${var.dns_zone}" + + # DNS record + name = "${var.cluster_name}" + ttl = 300 + + # IPv4 address of apiserver load balancer + records = ["${azurerm_public_ip.lb-ipv4.ip_address}"] +} + +# Static IPv4 address for the cluster load balancer +resource "azurerm_public_ip" "lb-ipv4" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}-lb-ipv4" + location = "${var.region}" + sku = "Standard" + public_ip_address_allocation = "static" +} + +# Network Load Balancer for apiservers and ingress +resource "azurerm_lb" "cluster" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}" + location = "${var.region}" + sku = "Standard" + + frontend_ip_configuration { + name = "public" + public_ip_address_id = "${azurerm_public_ip.lb-ipv4.id}" + } +} + +resource "azurerm_lb_rule" "apiserver" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "apiserver" + loadbalancer_id = "${azurerm_lb.cluster.id}" + frontend_ip_configuration_name = "public" + + protocol = "Tcp" + frontend_port = 6443 + backend_port = 6443 + backend_address_pool_id = "${azurerm_lb_backend_address_pool.controller.id}" + probe_id = "${azurerm_lb_probe.apiserver.id}" +} + +resource "azurerm_lb_rule" "ingress-http" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "ingress-http" + loadbalancer_id = "${azurerm_lb.cluster.id}" + frontend_ip_configuration_name = "public" + + protocol = "Tcp" + frontend_port = 80 + backend_port = 80 + backend_address_pool_id = "${azurerm_lb_backend_address_pool.worker.id}" + probe_id = "${azurerm_lb_probe.ingress.id}" +} + +resource "azurerm_lb_rule" "ingress-https" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "ingress-https" + loadbalancer_id = "${azurerm_lb.cluster.id}" + frontend_ip_configuration_name = "public" + + protocol = "Tcp" + frontend_port = 443 + backend_port = 443 + backend_address_pool_id = "${azurerm_lb_backend_address_pool.worker.id}" + probe_id = "${azurerm_lb_probe.ingress.id}" +} + +# Address pool of controllers +resource "azurerm_lb_backend_address_pool" "controller" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "controller" + loadbalancer_id = "${azurerm_lb.cluster.id}" +} + +# Address pool of workers +resource "azurerm_lb_backend_address_pool" "worker" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "worker" + loadbalancer_id = "${azurerm_lb.cluster.id}" +} + +# Health checks / probes + +# TCP health check for apiserver +resource "azurerm_lb_probe" "apiserver" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "apiserver" + loadbalancer_id = "${azurerm_lb.cluster.id}" + protocol = "Tcp" + port = 6443 + + # unhealthy threshold + number_of_probes = 3 + + interval_in_seconds = 5 +} + +# HTTP health check for ingress +resource "azurerm_lb_probe" "ingress" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "ingress" + loadbalancer_id = "${azurerm_lb.cluster.id}" + protocol = "Http" + port = 10254 + request_path = "/healthz" + + # unhealthy threshold + number_of_probes = 3 + + interval_in_seconds = 5 +} diff --git a/azure/container-linux/kubernetes/network.tf b/azure/container-linux/kubernetes/network.tf new file mode 100644 index 00000000..da67a3ed --- /dev/null +++ b/azure/container-linux/kubernetes/network.tf @@ -0,0 +1,33 @@ +# Organize cluster into a resource group +resource "azurerm_resource_group" "cluster" { + name = "${var.cluster_name}" + location = "${var.region}" +} + +resource "azurerm_virtual_network" "network" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}" + location = "${azurerm_resource_group.cluster.location}" + address_space = ["${var.host_cidr}"] +} + +# Subnets - separate subnets for controller and workers because Azure +# network security groups are based on IPv4 CIDR rather than instance +# tags like GCP or security group membership like AWS + +resource "azurerm_subnet" "controller" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "controller" + virtual_network_name = "${azurerm_virtual_network.network.name}" + address_prefix = "${cidrsubnet(var.host_cidr, 1, 0)}" +} + +resource "azurerm_subnet" "worker" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "worker" + virtual_network_name = "${azurerm_virtual_network.network.name}" + address_prefix = "${cidrsubnet(var.host_cidr, 1, 1)}" +} diff --git a/azure/container-linux/kubernetes/security.tf b/azure/container-linux/kubernetes/security.tf new file mode 100644 index 00000000..9967b9b7 --- /dev/null +++ b/azure/container-linux/kubernetes/security.tf @@ -0,0 +1,319 @@ +# Controller security group + +resource "azurerm_network_security_group" "controller" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}-controller" + location = "${azurerm_resource_group.cluster.location}" +} + +resource "azurerm_network_security_rule" "controller-ssh" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-ssh" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2000" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "22" + source_address_prefix = "*" + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +resource "azurerm_network_security_rule" "controller-etcd" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-etcd" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2005" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "2379-2380" + source_address_prefix = "${azurerm_subnet.controller.address_prefix}" + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +# Allow Prometheus to scrape etcd metrics +resource "azurerm_network_security_rule" "controller-etcd-metrics" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-etcd-metrics" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2010" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "2381" + source_address_prefix = "${azurerm_subnet.worker.address_prefix}" + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +resource "azurerm_network_security_rule" "controller-apiserver" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-apiserver" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2015" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "6443" + source_address_prefix = "*" + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +resource "azurerm_network_security_rule" "controller-flannel" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-flannel" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2020" + access = "Allow" + direction = "Inbound" + protocol = "Udp" + source_port_range = "*" + destination_port_range = "8472" + source_address_prefixes = ["${azurerm_subnet.controller.address_prefix}", "${azurerm_subnet.worker.address_prefix}"] + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +# Allow Prometheus to scrape node-exporter daemonset +resource "azurerm_network_security_rule" "controller-node-exporter" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-node-exporter" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2025" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "9100" + source_address_prefix = "${azurerm_subnet.worker.address_prefix}" + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +# Allow apiserver to access kubelet's for exec, log, port-forward +resource "azurerm_network_security_rule" "controller-kubelet" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-kubelet" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2030" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "10250" + + # allow Prometheus to scrape kubelet metrics too + source_address_prefixes = ["${azurerm_subnet.controller.address_prefix}", "${azurerm_subnet.worker.address_prefix}"] + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +# Allow heapster / metrics-server to scrape kubelet read-only +resource "azurerm_network_security_rule" "controller-kubelet-read" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-kubelet-read" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "2035" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "10255" + source_address_prefix = "${azurerm_subnet.worker.address_prefix}" + destination_address_prefix = "${azurerm_subnet.controller.address_prefix}" +} + +# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound +# https://docs.microsoft.com/en-us/azure/virtual-network/security-overview#default-security-rules + +resource "azurerm_network_security_rule" "controller-allow-loadblancer" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-loadbalancer" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "3000" + access = "Allow" + direction = "Inbound" + protocol = "*" + source_port_range = "*" + destination_port_range = "*" + source_address_prefix = "AzureLoadBalancer" + destination_address_prefix = "*" +} + +resource "azurerm_network_security_rule" "controller-deny-all" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "deny-all" + network_security_group_name = "${azurerm_network_security_group.controller.name}" + priority = "3005" + access = "Deny" + direction = "Inbound" + protocol = "*" + source_port_range = "*" + destination_port_range = "*" + source_address_prefix = "*" + destination_address_prefix = "*" +} + +# Worker security group + +resource "azurerm_network_security_group" "worker" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "${var.cluster_name}-worker" + location = "${azurerm_resource_group.cluster.location}" +} + +resource "azurerm_network_security_rule" "worker-ssh" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-ssh" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "2000" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "22" + source_address_prefix = "${azurerm_subnet.controller.address_prefix}" + destination_address_prefix = "${azurerm_subnet.worker.address_prefix}" +} + +resource "azurerm_network_security_rule" "worker-http" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-http" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "2005" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "80" + source_address_prefix = "*" + destination_address_prefix = "${azurerm_subnet.worker.address_prefix}" +} + +resource "azurerm_network_security_rule" "worker-https" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-https" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "2010" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "443" + source_address_prefix = "*" + destination_address_prefix = "${azurerm_subnet.worker.address_prefix}" +} + +resource "azurerm_network_security_rule" "worker-flannel" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-flannel" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "2015" + access = "Allow" + direction = "Inbound" + protocol = "Udp" + source_port_range = "*" + destination_port_range = "8472" + source_address_prefixes = ["${azurerm_subnet.controller.address_prefix}", "${azurerm_subnet.worker.address_prefix}"] + destination_address_prefix = "${azurerm_subnet.worker.address_prefix}" +} + +# Allow Prometheus to scrape node-exporter daemonset +resource "azurerm_network_security_rule" "worker-node-exporter" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-node-exporter" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "2020" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "9100" + source_address_prefix = "${azurerm_subnet.worker.address_prefix}" + destination_address_prefix = "${azurerm_subnet.worker.address_prefix}" +} + +# Allow apiserver to access kubelet's for exec, log, port-forward +resource "azurerm_network_security_rule" "worker-kubelet" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-kubelet" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "2025" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "10250" + + # allow Prometheus to scrape kubelet metrics too + source_address_prefixes = ["${azurerm_subnet.controller.address_prefix}", "${azurerm_subnet.worker.address_prefix}"] + destination_address_prefix = "${azurerm_subnet.worker.address_prefix}" +} + +# Allow heapster / metrics-server to scrape kubelet read-only +resource "azurerm_network_security_rule" "worker-kubelet-read" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-kubelet-read" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "2030" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "10255" + source_address_prefix = "${azurerm_subnet.worker.address_prefix}" + destination_address_prefix = "${azurerm_subnet.worker.address_prefix}" +} + +# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound +# https://docs.microsoft.com/en-us/azure/virtual-network/security-overview#default-security-rules + +resource "azurerm_network_security_rule" "worker-allow-loadblancer" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "allow-loadbalancer" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "3000" + access = "Allow" + direction = "Inbound" + protocol = "*" + source_port_range = "*" + destination_port_range = "*" + source_address_prefix = "AzureLoadBalancer" + destination_address_prefix = "*" +} + +resource "azurerm_network_security_rule" "worker-deny-all" { + resource_group_name = "${azurerm_resource_group.cluster.name}" + + name = "deny-all" + network_security_group_name = "${azurerm_network_security_group.worker.name}" + priority = "3005" + access = "Deny" + direction = "Inbound" + protocol = "*" + source_port_range = "*" + destination_port_range = "*" + source_address_prefix = "*" + destination_address_prefix = "*" +} diff --git a/azure/container-linux/kubernetes/ssh.tf b/azure/container-linux/kubernetes/ssh.tf new file mode 100644 index 00000000..5c8aef26 --- /dev/null +++ b/azure/container-linux/kubernetes/ssh.tf @@ -0,0 +1,95 @@ +# Secure copy etcd TLS assets to controllers. +resource "null_resource" "copy-controller-secrets" { + count = "${var.controller_count}" + + depends_on = [ + "azurerm_virtual_machine.controllers", + ] + + connection { + type = "ssh" + host = "${element(azurerm_public_ip.controllers.*.ip_address, count.index)}" + user = "core" + timeout = "15m" + } + + provisioner "file" { + content = "${module.bootkube.etcd_ca_cert}" + destination = "$HOME/etcd-client-ca.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_client_cert}" + destination = "$HOME/etcd-client.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_client_key}" + destination = "$HOME/etcd-client.key" + } + + provisioner "file" { + content = "${module.bootkube.etcd_server_cert}" + destination = "$HOME/etcd-server.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_server_key}" + destination = "$HOME/etcd-server.key" + } + + provisioner "file" { + content = "${module.bootkube.etcd_peer_cert}" + destination = "$HOME/etcd-peer.crt" + } + + provisioner "file" { + content = "${module.bootkube.etcd_peer_key}" + destination = "$HOME/etcd-peer.key" + } + + provisioner "remote-exec" { + inline = [ + "sudo mkdir -p /etc/ssl/etcd/etcd", + "sudo mv etcd-client* /etc/ssl/etcd/", + "sudo cp /etc/ssl/etcd/etcd-client-ca.crt /etc/ssl/etcd/etcd/server-ca.crt", + "sudo mv etcd-server.crt /etc/ssl/etcd/etcd/server.crt", + "sudo mv etcd-server.key /etc/ssl/etcd/etcd/server.key", + "sudo cp /etc/ssl/etcd/etcd-client-ca.crt /etc/ssl/etcd/etcd/peer-ca.crt", + "sudo mv etcd-peer.crt /etc/ssl/etcd/etcd/peer.crt", + "sudo mv etcd-peer.key /etc/ssl/etcd/etcd/peer.key", + "sudo chown -R etcd:etcd /etc/ssl/etcd", + "sudo chmod -R 500 /etc/ssl/etcd", + ] + } +} + +# Secure copy bootkube assets to ONE controller and start bootkube to perform +# one-time self-hosted cluster bootstrapping. +resource "null_resource" "bootkube-start" { + depends_on = [ + "module.bootkube", + "module.workers", + "azurerm_dns_a_record.apiserver", + "null_resource.copy-controller-secrets", + ] + + connection { + type = "ssh" + host = "${element(azurerm_public_ip.controllers.*.ip_address, 0)}" + user = "core" + timeout = "15m" + } + + provisioner "file" { + source = "${var.asset_dir}" + destination = "$HOME/assets" + } + + provisioner "remote-exec" { + inline = [ + "sudo mv $HOME/assets /opt/bootkube", + "sudo systemctl start bootkube", + ] + } +} diff --git a/azure/container-linux/kubernetes/variables.tf b/azure/container-linux/kubernetes/variables.tf new file mode 100644 index 00000000..748a41a0 --- /dev/null +++ b/azure/container-linux/kubernetes/variables.tf @@ -0,0 +1,117 @@ +variable "cluster_name" { + type = "string" + description = "Unique cluster name (prepended to dns_zone)" +} + +# Azure + +variable "region" { + type = "string" + description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)" +} + +variable "dns_zone" { + type = "string" + description = "Azure DNS Zone (e.g. azure.example.com)" +} + +variable "dns_zone_group" { + type = "string" + description = "Resource group where the Azure DNS Zone resides (e.g. global)" +} + +# instances + +variable "controller_count" { + type = "string" + default = "1" + description = "Number of controllers (i.e. masters)" +} + +variable "worker_count" { + type = "string" + default = "1" + description = "Number of workers" +} + +variable "controller_type" { + type = "string" + default = "Standard_DS1_v2" + description = "Machine type for controllers (see `az vm list-skus --location centralus`)" +} + +variable "worker_type" { + type = "string" + default = "Standard_F1" + description = "Machine type for workers (see `az vm list-skus --location centralus`)" +} + +variable "os_image" { + type = "string" + default = "coreos-stable" + description = "Channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha)" +} + +variable "disk_size" { + type = "string" + default = "40" + description = "Size of the disk in GB" +} + +variable "worker_priority" { + type = "string" + default = "Regular" + description = "Set worker priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." +} + +variable "controller_clc_snippets" { + type = "list" + description = "Controller Container Linux Config snippets" + default = [] +} + +variable "worker_clc_snippets" { + type = "list" + description = "Worker Container Linux Config snippets" + default = [] +} + +# configuration + +variable "ssh_authorized_key" { + type = "string" + description = "SSH public key for user 'core'" +} + +variable "asset_dir" { + description = "Path to a directory where generated assets should be placed (contains secrets)" + type = "string" +} + +variable "host_cidr" { + description = "CIDR IPv4 range to assign to EC2 nodes" + type = "string" + default = "10.0.0.0/16" +} + +variable "pod_cidr" { + description = "CIDR IPv4 range to assign Kubernetes pods" + type = "string" + default = "10.2.0.0/16" +} + +variable "service_cidr" { + description = < /dev/null; do sleep 1; done' + [Install] + RequiredBy=kubelet.service + - name: kubelet.service + enable: true + contents: | + [Unit] + Description=Kubelet via Hyperkube + Wants=rpc-statd.service + [Service] + EnvironmentFile=/etc/kubernetes/kubelet.env + Environment="RKT_RUN_ARGS=--uuid-file-save=/var/cache/kubelet-pod.uuid \ + --volume=resolv,kind=host,source=/etc/resolv.conf \ + --mount volume=resolv,target=/etc/resolv.conf \ + --volume var-lib-cni,kind=host,source=/var/lib/cni \ + --mount volume=var-lib-cni,target=/var/lib/cni \ + --volume var-lib-calico,kind=host,source=/var/lib/calico \ + --mount volume=var-lib-calico,target=/var/lib/calico \ + --volume opt-cni-bin,kind=host,source=/opt/cni/bin \ + --mount volume=opt-cni-bin,target=/opt/cni/bin \ + --volume var-log,kind=host,source=/var/log \ + --mount volume=var-log,target=/var/log \ + --insecure-options=image" + ExecStartPre=/bin/mkdir -p /opt/cni/bin + ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests + ExecStartPre=/bin/mkdir -p /etc/kubernetes/cni/net.d + ExecStartPre=/bin/mkdir -p /var/lib/cni + ExecStartPre=/bin/mkdir -p /var/lib/calico + ExecStartPre=/bin/mkdir -p /var/lib/kubelet/volumeplugins + ExecStartPre=/usr/bin/bash -c "grep 'certificate-authority-data' /etc/kubernetes/kubeconfig | awk '{print $2}' | base64 -d > /etc/kubernetes/ca.crt" + ExecStartPre=-/usr/bin/rkt rm --uuid-file=/var/cache/kubelet-pod.uuid + ExecStart=/usr/lib/coreos/kubelet-wrapper \ + --anonymous-auth=false \ + --authentication-token-webhook \ + --authorization-mode=Webhook \ + --client-ca-file=/etc/kubernetes/ca.crt \ + --cluster_dns=${k8s_dns_service_ip} \ + --cluster_domain=${cluster_domain_suffix} \ + --cni-conf-dir=/etc/kubernetes/cni/net.d \ + --exit-on-lock-contention \ + --kubeconfig=/etc/kubernetes/kubeconfig \ + --lock-file=/var/run/lock/kubelet.lock \ + --network-plugin=cni \ + --node-labels=node-role.kubernetes.io/node \ + --pod-manifest-path=/etc/kubernetes/manifests \ + --volume-plugin-dir=/var/lib/kubelet/volumeplugins + ExecStop=-/usr/bin/rkt stop --uuid-file=/var/cache/kubelet-pod.uuid + Restart=always + RestartSec=5 + [Install] + WantedBy=multi-user.target + - name: delete-node.service + enable: true + contents: | + [Unit] + Description=Waiting to delete Kubernetes node on shutdown + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/bin/true + ExecStop=/etc/kubernetes/delete-node + [Install] + WantedBy=multi-user.target +storage: + files: + - path: /etc/kubernetes/kubeconfig + filesystem: root + mode: 0644 + contents: + inline: | + ${kubeconfig} + - path: /etc/kubernetes/kubelet.env + filesystem: root + mode: 0644 + contents: + inline: | + KUBELET_IMAGE_URL=docker://k8s.gcr.io/hyperkube + KUBELET_IMAGE_TAG=v1.11.2 + - path: /etc/sysctl.d/max-user-watches.conf + filesystem: root + contents: + inline: | + fs.inotify.max_user_watches=16184 + - path: /etc/kubernetes/delete-node + filesystem: root + mode: 0744 + contents: + inline: | + #!/bin/bash + set -e + exec /usr/bin/rkt run \ + --trust-keys-from-https \ + --volume config,kind=host,source=/etc/kubernetes \ + --mount volume=config,target=/etc/kubernetes \ + --insecure-options=image \ + docker://k8s.gcr.io/hyperkube:v1.11.2 \ + --net=host \ + --dns=host \ + --exec=/kubectl -- --kubeconfig=/etc/kubernetes/kubeconfig delete node $(hostname) +passwd: + users: + - name: core + ssh_authorized_keys: + - "${ssh_authorized_key}" diff --git a/azure/container-linux/kubernetes/workers/ingress.tf b/azure/container-linux/kubernetes/workers/ingress.tf new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/azure/container-linux/kubernetes/workers/ingress.tf @@ -0,0 +1 @@ + diff --git a/azure/container-linux/kubernetes/workers/variables.tf b/azure/container-linux/kubernetes/workers/variables.tf new file mode 100644 index 00000000..a33a5beb --- /dev/null +++ b/azure/container-linux/kubernetes/workers/variables.tf @@ -0,0 +1,97 @@ +variable "name" { + type = "string" + description = "Unique name for the worker pool" +} + +# Azure + +variable "region" { + type = "string" + description = "Must be set to the Azure Region of cluster" +} + +variable "resource_group_name" { + type = "string" + description = "Must be set to the resource group name of cluster" +} + +variable "subnet_id" { + type = "string" + description = "Must be set to the `worker_subnet_id` output by cluster" +} + +variable "security_group_id" { + type = "string" + description = "Must be set to the `worker_security_group_id` output by cluster" +} + +variable "backend_address_pool_id" { + type = "string" + description = "Must be set to the `worker_backend_address_pool_id` output by cluster" +} + +# instances + +variable "count" { + type = "string" + default = "1" + description = "Number of instances" +} + +variable "vm_type" { + type = "string" + default = "Standard_DS1_v2" + description = "Machine type for instances (see `az vm list-skus --location centralus`)" +} + +variable "os_image" { + type = "string" + default = "coreos-stable" + description = "Channel for a Container Linux derivative (coreos-stable, coreos-beta, coreos-alpha)" +} + +variable "disk_size" { + type = "string" + default = "40" + description = "Size of the disk in GB" +} + +variable "priority" { + type = "string" + default = "Regular" + description = "Set priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time." +} + +variable "clc_snippets" { + type = "list" + description = "Container Linux Config snippets" + default = [] +} + +# configuration + +variable "kubeconfig" { + type = "string" + description = "Must be set to `kubeconfig` output by cluster" +} + +variable "ssh_authorized_key" { + type = "string" + description = "SSH public key for user 'core'" +} + +variable "service_cidr" { + description = < Container Linux Stable + channel = "${element(split("-", var.os_image), 1)}" +} + +# Workers scale set +resource "azurerm_virtual_machine_scale_set" "workers" { + resource_group_name = "${var.resource_group_name}" + + name = "${var.name}-workers" + location = "${var.region}" + + sku { + name = "${var.vm_type}" + tier = "standard" + capacity = "${var.count}" + } + + # boot + storage_profile_image_reference { + publisher = "CoreOS" + offer = "CoreOS" + sku = "${local.channel}" + version = "latest" + } + + # storage + storage_profile_os_disk { + create_option = "FromImage" + caching = "ReadWrite" + os_type = "linux" + managed_disk_type = "Standard_LRS" + } + + os_profile { + computer_name_prefix = "${var.name}-worker-" + admin_username = "core" + + # Required by Azure, but password auth is disabled below + admin_password = "" + custom_data = "${element(data.ct_config.worker-ignitions.*.rendered, count.index)}" + } + + # Azure mandates setting an ssh_key, even though Ignition custom_data handles it too + os_profile_linux_config { + disable_password_authentication = true + + ssh_keys { + path = "/home/core/.ssh/authorized_keys" + key_data = "${var.ssh_authorized_key}" + } + } + + # network + network_profile { + name = "nic0" + primary = true + network_security_group_id = "${var.security_group_id}" + + ip_configuration { + name = "ip0" + subnet_id = "${var.subnet_id}" + + # backend address pool to which the NIC should be added + load_balancer_backend_address_pool_ids = ["${var.backend_address_pool_id}"] + } + } + + # lifecycle + priority = "${var.priority}" + upgrade_policy_mode = "Manual" +} + +# Scale up or down to maintain desired number, tolerating deallocations. +resource "azurerm_autoscale_setting" "workers" { + resource_group_name = "${var.resource_group_name}" + + name = "maintain-desired" + location = "${var.region}" + + # autoscale + enabled = true + target_resource_id = "${azurerm_virtual_machine_scale_set.workers.id}" + + profile { + name = "default" + + capacity { + minimum = "${var.count}" + default = "${var.count}" + maximum = "${var.count}" + } + } +} + +# Worker Ignition configs +data "ct_config" "worker-ignitions" { + content = "${data.template_file.worker-configs.rendered}" + pretty_print = false + snippets = ["${var.clc_snippets}"] +} + +# Worker Container Linux configs +data "template_file" "worker-configs" { + template = "${file("${path.module}/cl/worker.yaml.tmpl")}" + + vars = { + kubeconfig = "${indent(10, var.kubeconfig)}" + ssh_authorized_key = "${var.ssh_authorized_key}" + k8s_dns_service_ip = "${cidrhost(var.service_cidr, 10)}" + cluster_domain_suffix = "${var.cluster_domain_suffix}" + } +}