From 147c21a4bd69fb92e763f99ea4bb5738bdd1af65 Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Mon, 6 May 2019 00:38:23 -0700 Subject: [PATCH] Allow Calico networking on Azure and DigitalOcean * Introduce "calico" as a `networking` option on Azure and DigitalOcean using Calico's new VXLAN support (similar to flannel). Flannel remains the default on these platforms for now. * Historically, DigitalOcean and Azure only allowed Flannel as the CNI provider, since those platforms don't support IPIP traffic that was previously required for Calico. * Looking forward, its desireable for Calico to become the default across Typhoon clusters, since it provides NetworkPolicy and a consistent experience * No changes to AWS, GCP, or bare-metal where Calico remains the default CNI provider. On these platforms, IPIP mode will always be used, since its available and more performant than vxlan --- CHANGES.md | 13 +++++++++++-- azure/container-linux/kubernetes/bootkube.tf | 17 ++++++++++++----- azure/container-linux/kubernetes/variables.tf | 6 ++++++ .../container-linux/kubernetes/bootkube.tf | 17 +++++++++++------ digital-ocean/container-linux/kubernetes/ssh.tf | 1 + .../container-linux/kubernetes/variables.tf | 6 ++++++ digital-ocean/fedora-atomic/kubernetes/ssh.tf | 1 + docs/cl/azure.md | 1 + docs/cl/digital-ocean.md | 1 + docs/topics/performance.md | 13 ++++++------- 10 files changed, 56 insertions(+), 20 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ef612115..2c77ce71 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,14 +7,23 @@ Notable changes between versions. * Kubernetes [v1.14.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG-1.14.md#v1142) * Update etcd from v3.3.12 to [v3.3.13](https://github.com/etcd-io/etcd/releases/tag/v3.3.13) * Upgrade Calico from v3.6.1 to [v3.7.2](https://docs.projectcalico.org/v3.7/release-notes/) -* Change flannel port from 8472 (kernel default) to 4789 (IANA VXLAN) +* Change flannel VXLAN port from 8472 (kernel default) to 4789 (IANA VXLAN) #### AWS -* Only set internal VXLAN rules when `networking` is flannel (default: calico) +* Only set internal VXLAN rules when `networking` is "flannel" (default: calico) + +#### Azure + +* Allow choosing Calico as the network provider (experimental) ([#472](https://github.com/poseidon/typhoon/pull/472)) + * Add a `networking` variable accepting "flannel" (default) or "calico" + * Use VXLAN encapsulation since Azure doesn't support IPIP #### DigitalOcean +* Allow choosing Calico as the network provider (experimental) ([#472](https://github.com/poseidon/typhoon/pull/472)) + * Add a `networking` variable accepting "flannel" (default) or "calico" + * Use VXLAN encapsulation since DigitalOcean doesn't support IPIP * Add explicit ordering between firewall rule creation and secure copying Kubelet credentials ([#469](https://github.com/poseidon/typhoon/pull/469)) * Fix race scenario if copies to nodes were before rule creation, blocking cluster creation diff --git a/azure/container-linux/kubernetes/bootkube.tf b/azure/container-linux/kubernetes/bootkube.tf index 8912a0b7..0898fb1c 100644 --- a/azure/container-linux/kubernetes/bootkube.tf +++ b/azure/container-linux/kubernetes/bootkube.tf @@ -2,11 +2,18 @@ module "bootkube" { source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=85571f6dae3522e2a7de01b7e0a3f7e3a9359641/" - cluster_name = "${var.cluster_name}" - api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"] - etcd_servers = ["${formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)}"] - asset_dir = "${var.asset_dir}" - networking = "flannel" + cluster_name = "${var.cluster_name}" + api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"] + etcd_servers = ["${formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)}"] + asset_dir = "${var.asset_dir}" + + networking = "${var.networking}" + + # only effective with Calico networking + # we should be able to use 1450 MTU, but in practice, 1410 was needed + network_encapsulation = "vxlan" + network_mtu = "1410" + pod_cidr = "${var.pod_cidr}" service_cidr = "${var.service_cidr}" cluster_domain_suffix = "${var.cluster_domain_suffix}" diff --git a/azure/container-linux/kubernetes/variables.tf b/azure/container-linux/kubernetes/variables.tf index b5e378dc..90a14574 100644 --- a/azure/container-linux/kubernetes/variables.tf +++ b/azure/container-linux/kubernetes/variables.tf @@ -88,6 +88,12 @@ variable "asset_dir" { type = "string" } +variable "networking" { + description = "Choice of networking provider (flannel or calico)" + type = "string" + default = "flannel" +} + variable "host_cidr" { description = "CIDR IPv4 range to assign to instances" type = "string" diff --git a/digital-ocean/container-linux/kubernetes/bootkube.tf b/digital-ocean/container-linux/kubernetes/bootkube.tf index 32f08424..f229de76 100644 --- a/digital-ocean/container-linux/kubernetes/bootkube.tf +++ b/digital-ocean/container-linux/kubernetes/bootkube.tf @@ -2,12 +2,17 @@ module "bootkube" { source = "git::https://github.com/poseidon/terraform-render-bootkube.git?ref=85571f6dae3522e2a7de01b7e0a3f7e3a9359641/" - cluster_name = "${var.cluster_name}" - api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"] - etcd_servers = "${digitalocean_record.etcds.*.fqdn}" - asset_dir = "${var.asset_dir}" - networking = "flannel" - network_mtu = 1440 + cluster_name = "${var.cluster_name}" + api_servers = ["${format("%s.%s", var.cluster_name, var.dns_zone)}"] + etcd_servers = "${digitalocean_record.etcds.*.fqdn}" + asset_dir = "${var.asset_dir}" + + networking = "${var.networking}" + + # only effective with Calico networking + network_encapsulation = "vxlan" + network_mtu = "1450" + pod_cidr = "${var.pod_cidr}" service_cidr = "${var.service_cidr}" cluster_domain_suffix = "${var.cluster_domain_suffix}" diff --git a/digital-ocean/container-linux/kubernetes/ssh.tf b/digital-ocean/container-linux/kubernetes/ssh.tf index 98740a22..1816f564 100644 --- a/digital-ocean/container-linux/kubernetes/ssh.tf +++ b/digital-ocean/container-linux/kubernetes/ssh.tf @@ -1,6 +1,7 @@ # Secure copy etcd TLS assets and kubeconfig to controllers. Activates kubelet.service resource "null_resource" "copy-controller-secrets" { count = "${var.controller_count}" + depends_on = [ "digitalocean_firewall.rules", ] diff --git a/digital-ocean/container-linux/kubernetes/variables.tf b/digital-ocean/container-linux/kubernetes/variables.tf index 9606fed0..bd8b4195 100644 --- a/digital-ocean/container-linux/kubernetes/variables.tf +++ b/digital-ocean/container-linux/kubernetes/variables.tf @@ -71,6 +71,12 @@ variable "asset_dir" { type = "string" } +variable "networking" { + description = "Choice of networking provider (flannel or calico)" + type = "string" + default = "flannel" +} + variable "pod_cidr" { description = "CIDR IPv4 range to assign Kubernetes pods" type = "string" diff --git a/digital-ocean/fedora-atomic/kubernetes/ssh.tf b/digital-ocean/fedora-atomic/kubernetes/ssh.tf index 156a4894..77ae4aba 100644 --- a/digital-ocean/fedora-atomic/kubernetes/ssh.tf +++ b/digital-ocean/fedora-atomic/kubernetes/ssh.tf @@ -1,6 +1,7 @@ # Secure copy etcd TLS assets and kubeconfig to controllers. Activates kubelet.service resource "null_resource" "copy-controller-secrets" { count = "${var.controller_count}" + depends_on = [ "digitalocean_firewall.rules", ] diff --git a/docs/cl/azure.md b/docs/cl/azure.md index 2a12f80d..97d09d72 100644 --- a/docs/cl/azure.md +++ b/docs/cl/azure.md @@ -253,6 +253,7 @@ Reference the DNS zone with `"${azurerm_dns_zone.clusters.name}"` and its resour | worker_priority | Set priority to Low to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time | Regular | Low | | controller_clc_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/#usage) | | worker_clc_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/#usage) | +| networking | Choice of networking provider | "flannel" | "flannel" or "calico" (experimental) | | host_cidr | CIDR IPv4 range to assign to instances | "10.0.0.0/16" | "10.0.0.0/20" | | pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | | service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | diff --git a/docs/cl/digital-ocean.md b/docs/cl/digital-ocean.md index 90e26b4f..faf7f042 100644 --- a/docs/cl/digital-ocean.md +++ b/docs/cl/digital-ocean.md @@ -253,6 +253,7 @@ Digital Ocean requires the SSH public key be uploaded to your account, so you ma | image | Container Linux image for instances | "coreos-stable" | coreos-stable, coreos-beta, coreos-alpha | | controller_clc_snippets | Controller Container Linux Config snippets | [] | [example](/advanced/customization/) | | worker_clc_snippets | Worker Container Linux Config snippets | [] | [example](/advanced/customization/) | +| networking | Choice of networking provider | "flannel" | "flannel" or "calico" (experimental) | | pod_cidr | CIDR IPv4 range to assign to Kubernetes pods | "10.2.0.0/16" | "10.22.0.0/16" | | service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" | | cluster_domain_suffix | FQDN suffix for Kubernetes services answered by coredns. | "cluster.local" | "k8s.example.com" | diff --git a/docs/topics/performance.md b/docs/topics/performance.md index 20f32f42..d7aa60c0 100644 --- a/docs/topics/performance.md +++ b/docs/topics/performance.md @@ -26,20 +26,19 @@ Network performance varies based on the platform and CNI plugin. `iperf` was use |----------------------------|-------:|-------------:|-------------:| | AWS (flannel) | 5 Gb/s | 4.94 Gb/s | 4.89 Gb/s | | AWS (calico, MTU 1480) | 5 Gb/s | 4.94 Gb/s | 4.42 Gb/s | -| AWS (calico, MTU 8981) | 5 Gb/s | 4.94 Gb/s | 4.75 Gb/s | -| Azure (flannel) | Varies | 749 Mb/s | 680 Mb/s | +| AWS (calico, MTU 8981) | 5 Gb/s | 4.94 Gb/s | 4.90 Gb/s | +| Azure (flannel) | Varies | 749 Mb/s | 650 Mb/s | +| Azure (calico) | Varies | 749 Mb/s | 650 Mb/s | | Bare-Metal (flannel) | 1 Gb/s | 940 Mb/s | 903 Mb/s | | Bare-Metal (calico) | 1 Gb/s | 940 Mb/s | 931 Mb/s | -| Bare-Metal (flannel, bond) | 3 Gb/s | 2.3 Gb/s | 1.17 Gb/s | -| Bare-Metal (calico, bond) | 3 Gb/s | 2.3 Gb/s | 1.17 Gb/s | -| Digital Ocean | 2 Gb/s | 1.97 Gb/s | 1.64 Gb/s | +| Digital Ocean (flannel) | Varies | 1.97 Gb/s | 1.20 Gb/s | +| Digital Ocean (calico) | Varies | 1.97 Gb/s | 1.20 Gb/s | | Google Cloud (flannel) | 2 Gb/s | 1.94 Gb/s | 1.76 Gb/s | | Google Cloud (calico) | 2 Gb/s | 1.94 Gb/s | 1.81 Gb/s | Notes: * Calico and Flannel have comparable performance. Platform and configuration differences dominate. -* AWS and Azure node bandwidth (i.e. upper bound) depends greatly on machine type +* Azure and DigitalOcean network performance can be quite variable or depend on machine type * Only [certain AWS EC2 instance types](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/network_mtu.html#jumbo_frame_instances) allow jumbo frames. This is why the default MTU on AWS must be 1480. -* Neither CNI provider seems to be able to leverage bonded NICs well (bare-metal)