diff --git a/CHANGES.md b/CHANGES.md index 3af78824..dbe4aa21 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,8 @@ Notable changes between versions. ## Latest +* Introduce Typhoon for Azure as alpha ([#288](https://github.com/poseidon/typhoon/pull/288)) + * Special thanks @justaugustus for an earlier variant * Update Calico from v3.1.3 to v3.2.1 ([#278](https://github.com/poseidon/typhoon/pull/278)) #### AWS diff --git a/addons/nginx-ingress/azure/0-namespace.yaml b/addons/nginx-ingress/azure/0-namespace.yaml new file mode 100644 index 00000000..56903e1f --- /dev/null +++ b/addons/nginx-ingress/azure/0-namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: ingress + labels: + name: ingress diff --git a/addons/nginx-ingress/azure/default-backend/deployment.yaml b/addons/nginx-ingress/azure/default-backend/deployment.yaml new file mode 100644 index 00000000..786968e0 --- /dev/null +++ b/addons/nginx-ingress/azure/default-backend/deployment.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: default-backend + namespace: ingress +spec: + replicas: 1 + selector: + matchLabels: + name: default-backend + phase: prod + template: + metadata: + labels: + name: default-backend + phase: prod + spec: + containers: + - name: default-backend + # Any image is permissable as long as: + # 1. It serves a 404 page at / + # 2. It serves 200 on a /healthz endpoint + image: k8s.gcr.io/defaultbackend:1.4 + ports: + - containerPort: 8080 + resources: + limits: + cpu: 10m + memory: 20Mi + requests: + cpu: 10m + memory: 20Mi + livenessProbe: + httpGet: + path: /healthz + port: 8080 + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 5 + terminationGracePeriodSeconds: 60 diff --git a/addons/nginx-ingress/azure/default-backend/service.yaml b/addons/nginx-ingress/azure/default-backend/service.yaml new file mode 100644 index 00000000..87997aba --- /dev/null +++ b/addons/nginx-ingress/azure/default-backend/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: default-backend + namespace: ingress +spec: + type: ClusterIP + selector: + name: default-backend + phase: prod + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 8080 diff --git a/addons/nginx-ingress/azure/deployment.yaml b/addons/nginx-ingress/azure/deployment.yaml new file mode 100644 index 00000000..e56e73d5 --- /dev/null +++ b/addons/nginx-ingress/azure/deployment.yaml @@ -0,0 +1,77 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-ingress-controller + namespace: ingress +spec: + replicas: 2 + strategy: + rollingUpdate: + maxUnavailable: 1 + selector: + matchLabels: + name: nginx-ingress-controller + phase: prod + template: + metadata: + labels: + name: nginx-ingress-controller + phase: prod + spec: + nodeSelector: + node-role.kubernetes.io/node: "" + containers: + - name: nginx-ingress-controller + image: quay.io/kubernetes-ingress-controller/nginx-ingress-controller:0.17.1 + args: + - /nginx-ingress-controller + - --default-backend-service=$(POD_NAMESPACE)/default-backend + - --ingress-class=public + # use downward API + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - name: http + containerPort: 80 + hostPort: 80 + - name: https + containerPort: 443 + hostPort: 443 + - name: health + containerPort: 10254 + hostPort: 10254 + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 10254 + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + securityContext: + capabilities: + add: + - NET_BIND_SERVICE + drop: + - ALL + runAsUser: 33 # www-data + restartPolicy: Always + terminationGracePeriodSeconds: 60 diff --git a/addons/nginx-ingress/azure/rbac/cluster-role-binding.yaml b/addons/nginx-ingress/azure/rbac/cluster-role-binding.yaml new file mode 100644 index 00000000..3be57109 --- /dev/null +++ b/addons/nginx-ingress/azure/rbac/cluster-role-binding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: ingress +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: ingress +subjects: + - kind: ServiceAccount + namespace: ingress + name: default diff --git a/addons/nginx-ingress/azure/rbac/cluster-role.yaml b/addons/nginx-ingress/azure/rbac/cluster-role.yaml new file mode 100644 index 00000000..9fee9fde --- /dev/null +++ b/addons/nginx-ingress/azure/rbac/cluster-role.yaml @@ -0,0 +1,51 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: ingress +rules: + - apiGroups: + - "" + resources: + - configmaps + - endpoints + - nodes + - pods + - secrets + verbs: + - list + - watch + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - apiGroups: + - "" + resources: + - services + verbs: + - get + - list + - watch + - apiGroups: + - "extensions" + resources: + - ingresses + verbs: + - get + - list + - watch + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - apiGroups: + - "extensions" + resources: + - ingresses/status + verbs: + - update diff --git a/addons/nginx-ingress/azure/rbac/role-binding.yaml b/addons/nginx-ingress/azure/rbac/role-binding.yaml new file mode 100644 index 00000000..46b5cece --- /dev/null +++ b/addons/nginx-ingress/azure/rbac/role-binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: ingress + namespace: ingress +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: ingress +subjects: + - kind: ServiceAccount + namespace: ingress + name: default diff --git a/addons/nginx-ingress/azure/rbac/role.yaml b/addons/nginx-ingress/azure/rbac/role.yaml new file mode 100644 index 00000000..84a6065f --- /dev/null +++ b/addons/nginx-ingress/azure/rbac/role.yaml @@ -0,0 +1,41 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: ingress + namespace: ingress +rules: + - apiGroups: + - "" + resources: + - configmaps + - pods + - secrets + verbs: + - get + - apiGroups: + - "" + resources: + - configmaps + resourceNames: + # Defaults to "-" + # Here: "-" + # This has to be adapted if you change either parameter + # when launching the nginx-ingress-controller. + - "ingress-controller-leader-public" + verbs: + - get + - update + - apiGroups: + - "" + resources: + - configmaps + verbs: + - create + - apiGroups: + - "" + resources: + - endpoints + verbs: + - get + - create + - update diff --git a/addons/nginx-ingress/azure/service.yaml b/addons/nginx-ingress/azure/service.yaml new file mode 100644 index 00000000..fb81064f --- /dev/null +++ b/addons/nginx-ingress/azure/service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: nginx-ingress-controller + namespace: ingress + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '10254' +spec: + type: ClusterIP + selector: + name: nginx-ingress-controller + phase: prod + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 80 + - name: https + protocol: TCP + port: 443 + targetPort: 443 diff --git a/azure/container-linux/kubernetes/workers/variables.tf b/azure/container-linux/kubernetes/workers/variables.tf index 181fe892..1b0bd6d7 100644 --- a/azure/container-linux/kubernetes/workers/variables.tf +++ b/azure/container-linux/kubernetes/workers/variables.tf @@ -40,7 +40,7 @@ variable "count" { variable "vm_type" { type = "string" - default = "Standard_DS1_v2" + default = "Standard_F1" description = "Machine type for instances (see `az vm list-skus --location centralus`)" } diff --git a/docs/addons/ingress.md b/docs/addons/ingress.md index 33fb4aa6..1c4281de 100644 --- a/docs/addons/ingress.md +++ b/docs/addons/ingress.md @@ -4,7 +4,7 @@ Nginx Ingress controller pods accept and demultiplex HTTP, HTTPS, TCP, or UDP tr ## AWS -On AWS, a network load balancer (NLB) distributes traffic across a target group of worker nodes running an Ingress controller deployment on host ports 80 and 443. Firewall rules allow traffic to ports 80 and 443. Health check rules ensure only workers with a health Ingress controller receive traffic. +On AWS, a network load balancer (NLB) distributes traffic across a target group of worker nodes running an Ingress controller deployment. Security group rules allow traffic to ports 80 and 443. Health checks ensure only workers with a healthy Ingress controller receive traffic. Create the Ingress controller deployment, service, RBAC roles, RBAC bindings, default backend, and namespace. @@ -35,44 +35,17 @@ resource "google_dns_record_set" "some-application" { } ``` -## Digital Ocean +## Azure -On Digital Ocean, a DNS A record (e.g. `nemo-workers.example.com`) resolves to each worker[^1] running an Ingress controller DaemonSet on host ports 80 and 443. Firewall rules allow IPv4 and IPv6 traffic to ports 80 and 443. - -Create the Ingress controller daemonset, service, RBAC roles, RBAC bindings, default backend, and namespace. - -``` -kubectl apply -R -f addons/nginx-ingress/digital-ocean -``` - -For each application, add a CNAME record resolving to the worker(s) DNS record. Use the Typhoon module's output `workers_dns` to find the worker DNS value. For example, you might use Terraform to manage a Google Cloud DNS record: - -```tf -resource "google_dns_record_set" "some-application" { - # DNS zone name - managed_zone = "example-zone" - - # DNS record - name = "app.example.com." - type = "CNAME" - ttl = 300 - rrdatas = ["${module.digital-ocean-nemo.workers_dns}."] -} -``` - -[^1]: Digital Ocean does offers load balancers. We've opted not to use them to keep the Digital Ocean setup simple and cheap for developers. - -## Google Cloud - -On Google Cloud, a network load balancer distributes traffic across worker nodes (i.e. a target pool of backends) running an Ingress controller deployment on host ports 80 and 443. Firewall rules allow traffic to ports 80 and 443. Health check rules ensure the target pool only includes worker nodes with a healthy Nginx Ingress controller. +On Azure, a load balancer distributes traffic across a backend pool of worker nodes running an Ingress controller deployment. Security group rules allow traffic to ports 80 and 443. Health probes ensure only workers with a healthy Ingress controller receive traffic. Create the Ingress controller deployment, service, RBAC roles, RBAC bindings, default backend, and namespace. ``` -kubectl apply -R -f addons/nginx-ingress/google-cloud +kubectl apply -R -f addons/nginx-ingress/azure ``` -For each application, add a DNS record resolving to the network load balancer's IPv4 address. +For each application, add a DNS record resolving to the load balancer's IPv4 address. ``` app1.example.com -> 11.22.33.44 @@ -80,7 +53,7 @@ aap2.example.com -> 11.22.33.44 app3.example.com -> 11.22.33.44 ``` -Find the IPv4 address with `gcloud compute addresses list` or use the Typhoon module's output `ingress_static_ipv4`. For example, you might use Terraform to manage a Google Cloud DNS record: +Find the load balancer's IPv4 address with the Azure console or use the Typhoon module's output `ingress_static_ipv4`. For example, you might use Terraform to manage a Google Cloud DNS record: ```tf resource "google_dns_record_set" "some-application" { @@ -91,7 +64,7 @@ resource "google_dns_record_set" "some-application" { name = "app.example.com." type = "A" ttl = 300 - rrdatas = ["${module.google-cloud-yavin.ingress_static_ipv4}"] + rrdatas = ["${module.azure-ramius.ingress_static_ipv4}"] } ``` @@ -125,3 +98,63 @@ resource "google_dns_record_set" "some-application" { rrdatas = ["SOME-WAN-IP"] } ``` + +## Digital Ocean + +On Digital Ocean, a DNS A record (e.g. `nemo-workers.example.com`) resolves to each worker[^1] running an Ingress controller DaemonSet on host ports 80 and 443. Firewall rules allow IPv4 and IPv6 traffic to ports 80 and 443. + +Create the Ingress controller daemonset, service, RBAC roles, RBAC bindings, default backend, and namespace. + +``` +kubectl apply -R -f addons/nginx-ingress/digital-ocean +``` + +For each application, add a CNAME record resolving to the worker(s) DNS record. Use the Typhoon module's output `workers_dns` to find the worker DNS value. For example, you might use Terraform to manage a Google Cloud DNS record: + +```tf +resource "google_dns_record_set" "some-application" { + # DNS zone name + managed_zone = "example-zone" + + # DNS record + name = "app.example.com." + type = "CNAME" + ttl = 300 + rrdatas = ["${module.digital-ocean-nemo.workers_dns}."] +} +``` + +[^1]: Digital Ocean does offer load balancers. We've opted not to use them to keep the Digital Ocean setup simple and cheap for developers. + +## Google Cloud + +On Google Cloud, a TCP Proxy load balancer distributes traffic across a backend service of worker nodes running an Ingress controller deployment. Firewall rules allow traffic to ports 80 and 443. Health check rules ensure only workers with a healthy Ingress controller receive traffic. + +Create the Ingress controller deployment, service, RBAC roles, RBAC bindings, default backend, and namespace. + +``` +kubectl apply -R -f addons/nginx-ingress/google-cloud +``` + +For each application, add a DNS record resolving to the load balancer's IPv4 address. + +``` +app1.example.com -> 11.22.33.44 +aap2.example.com -> 11.22.33.44 +app3.example.com -> 11.22.33.44 +``` + +Find the IPv4 address with `gcloud compute addresses list` or use the Typhoon module's output `ingress_static_ipv4`. For example, you might use Terraform to manage a Google Cloud DNS record: + +```tf +resource "google_dns_record_set" "some-application" { + # DNS zone name + managed_zone = "example-zone" + + # DNS record + name = "app.example.com." + type = "A" + ttl = 300 + rrdatas = ["${module.google-cloud-yavin.ingress_static_ipv4}"] +} +``` diff --git a/docs/advanced/worker-pools.md b/docs/advanced/worker-pools.md index 591ac8be..ecf06d52 100644 --- a/docs/advanced/worker-pools.md +++ b/docs/advanced/worker-pools.md @@ -1,11 +1,12 @@ # Worker Pools -Typhoon AWS and Google Cloud allow additional groups of workers to be defined and joined to a cluster. For example, add worker pools of instances with different types, disk sizes, Container Linux channels, or preemptibility modes. +Typhoon AWS, Azure, and Google Cloud allow additional groups of workers to be defined and joined to a cluster. For example, add worker pools of instances with different types, disk sizes, Container Linux channels, or preemptibility modes. Internal Terraform Modules: * `aws/container-linux/kubernetes/workers` * `aws/fedora-atomic/kubernetes/workers` +* `azure/container-linux/kubernetes/workers` * `google-cloud/container-linux/kubernetes/workers` * `google-cloud/fedora-atomic/kubernetes/workers` @@ -31,6 +32,7 @@ module "tempest-worker-pool" { kubeconfig = "${module.aws-tempest.kubeconfig}" ssh_authorized_key = "${var.ssh_authorized_key}" + # optional count = 2 instance_type = "m5.large" os_image = "coreos-beta" @@ -43,7 +45,7 @@ Apply the change. terraform apply ``` -Verify an auto-scaling group of workers join the cluster within a few minutes. +Verify an auto-scaling group of workers joins the cluster within a few minutes. ### Variables @@ -53,10 +55,10 @@ The AWS internal `workers` module supports a number of [variables](https://githu | Name | Description | Example | |:-----|:------------|:--------| +| name | Unique name (distinct from cluster name) | "tempest-m5s" | | vpc_id | Must be set to `vpc_id` output by cluster | "${module.cluster.vpc_id}" | | subnet_ids | Must be set to `subnet_ids` output by cluster | "${module.cluster.subnet_ids}" | | security_groups | Must be set to `worker_security_groups` output by cluster | "${module.cluster.worker_security_groups}" | -| name | Unique name (distinct from cluster name) | "tempest-m5s" | | kubeconfig | Must be set to `kubeconfig` output by cluster | "${module.cluster.kubeconfig}" | | ssh_authorized_key | SSH public key for user 'core' | "ssh-rsa AAAAB3NZ..." | @@ -74,6 +76,76 @@ The AWS internal `workers` module supports a number of [variables](https://githu Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-types/) or per-region and per-type [spot prices](https://aws.amazon.com/ec2/spot/pricing/). +## Azure + +Create a cluster following the Azure [tutorial](../cl/azure.md#cluster). Define a worker pool using the Azure internal `workers` module. + +```tf +module "ramius-worker-pool" { + source = "git::https://github.com/poseidon/typhoon//azure/container-linux/kubernetes/workers?ref=v1.11.3" + + providers = { + azurerm = "azurerm.default" + } + + # Azure + region = "${module.azure-ramius.region}" + resource_group_name = "${module.azure-ramius.resource_group_name}" + subnet_id = "${module.azure-ramius.subnet_id}" + security_group_id = "${module.azure-ramius.security_group_id}" + backend_address_pool_id = "${module.azure-ramius.backend_address_pool_id}" + + # configuration + name = "ramius-low-priority" + kubeconfig = "${module.azure-ramius.kubeconfig}" + ssh_authorized_key = "${var.ssh_authorized_key}" + + # optional + count = 2 + vm_type = "Standard_F4" + priority = "Low" +} +``` + +Apply the change. + +``` +terraform apply +``` + +Verify a scale set of workers joins the cluster within a few minutes. + +### Variables + +The Azure internal `workers` module supports a number of [variables](https://github.com/poseidon/typhoon/blob/master/azure/container-linux/kubernetes/workers/variables.tf). + +#### Required + +| Name | Description | Example | +|:-----|:------------|:--------| +| name | Unique name (distinct from cluster name) | "ramius-f4" | +| region | Must be set to `region` output by cluster | "${module.cluster.region}" | +| resource_group_name | Must be set to `resource_group_name` output by cluster | "${module.cluster.resource_group_name}" | +| subnet_id | Must be set to `subnet_id` output by cluster | "${module.cluster.subnet_id}" | +| security_group_id | Must be set to `security_group_id` output by cluster | "${module.cluster.security_group_id}" | +| backend_address_pool_id | Must be set to `backend_address_pool_id` output by cluster | "${module.cluster.backend_address_pool_id}" | +| kubeconfig | Must be set to `kubeconfig` output by cluster | "${module.cluster.kubeconfig}" | +| ssh_authorized_key | SSH public key for user 'core' | "ssh-rsa AAAAB3NZ..." | + +#### Optional + +| Name | Description | Default | Example | +|:-----|:------------|:--------|:--------| +| count | Number of instances | 1 | 3 | +| vm_type | Machine type for instances | "Standard_F1" | See below | +| os_image | Channel for a Container Linux derivative | coreos-stable | coreos-stable, coreos-beta, coreos-alpha | +| priority | Set priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Low | +| clc_snippets | Container Linux Config snippets | [] | [example](/advanced/customization/#usage) | +| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | +| cluster_domain_suffix | FQDN suffix for Kubernetes services answered by coredns. | "cluster.local" | "k8s.example.com" | + +Check the list of valid [machine types](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/) and their [specs](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general). Use `az vm list-skus` to get the identifier. + ## Google Cloud Create a cluster following the Google Cloud [tutorial](../cl/google-cloud.md#cluster). Define a worker pool using the Google Cloud internal `workers` module. @@ -87,7 +159,7 @@ module "yavin-worker-pool" { } # Google Cloud - region = "us-central1" + region = "europe-west2" network = "${module.google-cloud-yavin.network_name}" cluster_name = "yavin" @@ -96,6 +168,7 @@ module "yavin-worker-pool" { kubeconfig = "${module.google-cloud-yavin.kubeconfig}" ssh_authorized_key = "${var.ssh_authorized_key}" + # optional count = 2 machine_type = "n1-standard-16" os_image = "coreos-beta" @@ -129,13 +202,15 @@ The Google Cloud internal `workers` module supports a number of [variables](http | Name | Description | Example | |:-----|:------------|:--------| -| region | Must be set to `region` of cluster | "us-central1" | -| network | Must be set to `network_name` output by cluster | "${module.cluster.network_name}" | | name | Unique name (distinct from cluster name) | "yavin-16x" | +| region | Region for the worker pool instances. May differ from the cluster's region | "europe-west2" | +| network | Must be set to `network_name` output by cluster | "${module.cluster.network_name}" | | cluster_name | Must be set to `cluster_name` of cluster | "yavin" | | kubeconfig | Must be set to `kubeconfig` output by cluster | "${module.cluster.kubeconfig}" | | ssh_authorized_key | SSH public key for user 'core' | "ssh-rsa AAAAB3NZ..." | +Check the list of regions [docs](https://cloud.google.com/compute/docs/regions-zones/regions-zones) or with `gcloud compute regions list`. + #### Optional | Name | Description | Default | Example | diff --git a/docs/cl/azure.md b/docs/cl/azure.md index 543e36f3..3ff6d1b9 100644 --- a/docs/cl/azure.md +++ b/docs/cl/azure.md @@ -1,7 +1,7 @@ # Azure !!! danger - Typhoon for Azure is alpha. Expect rough edges and changes. + Typhoon for Azure is alpha. For production, use AWS, Google Cloud, or bare-metal. As Azure matures, check [errata](https://github.com/poseidon/typhoon/wiki/Errata) for known shortcomings. In this tutorial, we'll create a Kubernetes v1.11.2 cluster on Azure with Container Linux.