Merge remote-tracking branch 'upstream/main'

This commit is contained in:
Philippe Caseiro 2024-12-02 11:05:29 +01:00
commit daa5fc4171
173 changed files with 4505 additions and 1838 deletions

View File

@ -1,10 +0,0 @@
High level description of the change.
* Specific change
* Specific change
## Testing
Describe your work to validate the change works.
rel: issue number (if applicable)

12
.github/release.yaml vendored Normal file
View File

@ -0,0 +1,12 @@
changelog:
categories:
- title: Contributions
labels:
- '*'
exclude:
labels:
- dependencies
- no-release-note
- title: Dependencies
labels:
- dependencies

View File

@ -4,6 +4,214 @@ Notable changes between versions.
## Latest
## v1.31.3
* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312)
* Update CoreDNS from v1.11.3 to v1.11.4
* Update Cilium from v1.16.3 to [v1.16.4](https://github.com/cilium/cilium/releases/tag/v1.16.4)
### Deprecations
* Plan to drop support for using Calico CNI, recommend everyone use the Cilium default
## v1.31.2
* Kubernetes [v1.31.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1312)
* Update Cilium from v1.16.1 to [v1.16.3](https://github.com/cilium/cilium/releases/tag/v1.16.3)
* Update flannel from v0.25.6 to [v0.26.0](https://github.com/flannel-io/flannel/releases/tag/v0.26.0)
## v1.31.1
* Kubernetes [v1.31.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1311)
* Update flannel from v0.25.5 to [v0.25.6](https://github.com/flannel-io/flannel/releases/tag/v0.25.6)
### Google
* Add `controller_disk_type` and `worker_disk_type` variables ([#1513](https://github.com/poseidon/typhoon/pull/1513))
* Add explicit `region` field to regional worker instance templates ([#1524](https://github.com/poseidon/typhoon/pull/1524))
## v1.31.0
* Kubernetes [v1.31.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1310)
* Use Cilium kube-proxy replacement mode when `cilium` networking is chosen ([#1501](https://github.com/poseidon/typhoon/pull/1501))
* Fix invalid flannel-cni container image for those using `flannel` networking ([#1497](https://github.com/poseidon/typhoon/pull/1497))
### AWS
* Use EC2 resource-based hostnames instead of IP-based hostnames ([#1499](https://github.com/poseidon/typhoon/pull/1499))
* The Amazon DNS server can resolve A and AAAA queries to IPv4 and IPv6 node addresses
* Tag controller node EBS volumes with a name based on the controller node name
### Google
* Use `google_compute_region_instance_template` instead of `google_compute_instance_template`
* Google's regional instance template metadata is kept in the associated region for greater resiliency. The "global" instance templates were kept in a single region
## v1.30.4
* Kubernetes [v1.30.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1304)
* Update Cilium from v1.15.7 to [v1.16.1](https://github.com/cilium/cilium/releases/tag/v1.16.1)
* Update CoreDNS from v1.11.1 to v1.11.3
* Remove `enable_aggregation` variable for Kubernetes Aggregation Layer, always set to true
* Remove `cluster_domain_suffix` variable, always use "cluster.local"
* Remove `enable_reporting` variable for analytics, always set to false
## v1.30.3
* Kubernetes [v1.30.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1303)
* Update Cilium from v1.15.6 to [v1.15.7](https://github.com/cilium/cilium/releases/tag/v1.15.7)
* Update flannel from v0.25.4 to [v0.25.5](https://github.com/flannel-io/flannel/releases/tag/v0.25.5)
### AWS
* Configure controller and worker disks ([#1482](https://github.com/poseidon/typhoon/pull/1482))
* Add `controller_disk_type`, `controller_disk_size`, and `controller_disk_iops` variables
* Add `worker_disk_type`, `worker_disk_size`, and `worker_disk_iops` variables
* Remove `disk_type`, `disk_size`, and `disk_iops` variables
* Fix propagating settings to worker disks, previously ignored
* Configure CPU pricing model for burstable instance types ([#1482](https://github.com/poseidon/typhoon/pull/1482))
* Add `controller_cpu_credits` and `worker_cpu_credits` variables (`standard` or `unlimited`)
* Configure controller or worker instance architecture ([#1485](https://github.com/poseidon/typhoon/pull/1485))
* Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
* Remove `arch` variable
```diff
module "cluster" {
...
- arch = "amd64"
- disk_type = "gp3"
- disk_size = 30
- disk_iops = 3000
+ controller_arch = "amd64"
+ controller_disk_size = 15
+ controller_cpu_credits = "standard"
+ worker_arch = "amd64"
+ worker_disk_size = 22
+ worker_cpu_credits = "unlimited"
}
```
### Azure
* Configure the virtual network and subnets with IPv6 private address space
* Change `host_cidr` variable (string) to a `network_cidr` object with `ipv4` and `ipv6` fields that list CIDR strings. Leave the variable unset to use the defaults. (**breaking**)
* Add support for dual-stack Kubernetes Ingress Load Balancing
* Add a public IPv6 frontend, 80/443 rules, and a worker-ipv6 backend pool
* Change the `controller_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**)
* Change the `worker_address_prefixes` output from a list of strings to an object with `ipv4` and `ipv6` fields. Most Azure resources can't accept a mix, so these are split out (**breaking**)
* Change the `backend_address_pool_id` output (and worker module input) from a string to an object with `ipv4` and `ipv6` fields that list ids (**breaking**)
* Configure nodes to have outbound IPv6 internet connectivity (analogous to IPv4 SNAT)
* Configure controller nodes to have a public IPv6 address
* Configure worker nodes to use outbound rules and the load balancer for SNAT
* Extend network security rules to allow IPv6 traffic, analogous to IPv4
* Rename `region` variable to `location` to align with Azure platform conventions ([#1469](https://github.com/poseidon/typhoon/pull/1469))
* Change worker pools from uniform to flexible orchestration mode ([#1473](https://github.com/poseidon/typhoon/pull/1473))
* Add options to allow workers nodes to use ephemeral local disks ([#1473](https://github.com/poseidon/typhoon/pull/1473))
* Add `controller_disk_type` and `controller_disk_size` variables
* Add `worker_disk_type`, `worker_disk_size`, and `worker_ephemeral_disk` variables
* Reduce the number of public IPv4 addresses needed for the Azure load balancer ([#1470](https://github.com/poseidon/typhoon/pull/1470))
* Configure controller or worker instance architecture for Flatcar Linux ([#1485](https://github.com/poseidon/typhoon/pull/1485))
* Add `controller_arch` and `worker_arch` variables (`amd64` or `arm64`)
* Remove `arch` variable
```diff
module "cluster" {
...
- region = "centralus"
+ location = "centralus"
# optional
- host_cidr = "10.0.0.0/16"
+ network_cidr = {
+ ipv4 = ["10.0.0.0/16"]
+ }
# instances
+ controller_disk_type = "StandardSSD_LRS"
+ worker_ephemeral_disk = true
}
```
### Google Cloud
* Allow configuring controller and worker disks ([#1486](https://github.com/poseidon/typhoon/pull/1486))
* Add `controller_disk_size` and `worker_disk_size` variables
* Remove `disk_size` variable
## v1.30.2
* Kubernetes [v1.30.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1302)
* Update CoreDNS from v1.9.4 to v1.11.1
* Update Cilium from v1.15.5 to [v1.15.6](https://github.com/cilium/cilium/releases/tag/v1.15.6)
* Update flannel from v0.25.1 to [v0.25.4](https://github.com/flannel-io/flannel/releases/tag/v0.25.4)
## v1.30.1
* Kubernetes [v1.30.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1301)
* Add firewall rules and security group rules for Cilium and Hubble metrics ([#1449](https://github.com/poseidon/typhoon/pull/1449))
* Update Cilium from v1.15.3 to [v1.15.5](https://github.com/cilium/cilium/releases/tag/v1.15.5)
* Update flannel from v0.24.4 to [v0.25.1](https://github.com/flannel-io/flannel/releases/tag/v0.25.1)
* Introduce `components` variabe to enable/disable/configure pre-installed components ([#1453](https://github.com/poseidon/typhoon/pull/1453))
* Add Terraform modules for `coredns`, `cilium`, and `flannel` components
### Azure
* Add `controller_security_group_name` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450))
* Add `controller_address_prefixes` output for adding custom security rules ([#1450](https://github.com/poseidon/typhoon/pull/1450))
## v1.30.0
* Kubernetes [v1.30.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1300)
* Update etcd from v3.5.12 to [v3.5.13](https://github.com/etcd-io/etcd/releases/tag/v3.5.13)
* Update Cilium from v1.15.2 to [v1.15.3](https://github.com/cilium/cilium/releases/tag/v1.15.3)
* Update Calico from v3.27.2 to [v3.27.3](https://github.com/projectcalico/calico/releases/tag/v3.27.3)
## v1.29.3
* Kubernetes [v1.29.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1293)
* Update Cilium from v1.15.1 to [v1.15.2](https://github.com/cilium/cilium/releases/tag/v1.15.2)
* Update flannel from v0.24.2 to [v0.24.4](https://github.com/flannel-io/flannel/releases/tag/v0.24.4)
## v1.29.2
* Kubernetes [v1.29.2](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1292)
* Update etcd from v3.5.10 to [v3.5.12](https://github.com/etcd-io/etcd/releases/tag/v3.5.12)
* Update Cilium from v1.14.3 to [v1.15.1](https://github.com/cilium/cilium/releases/tag/v1.15.1)
* Update Calico from v3.26.3 to [v3.27.2](https://github.com/projectcalico/calico/releases/tag/v3.27.2)
* Fix upstream incompatibility with Fedora CoreOS ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
* Update flannel from v0.22.2 to [v0.24.2](https://github.com/flannel-io/flannel/releases/tag/v0.24.2)
* Add an `install_container_networking` variable (default `true`) ([#1421](https://github.com/poseidon/typhoon/pull/1421))
* When `true`, the chosen container `networking` provider is installed during cluster bootstrap
* Set `false` to self-manage the container networking provider. This allows flannel, Calico, or Cilium
to be managed via Terraform (like any other Kubernetes resources). Nodes will be NotReady until you
apply the self-managed container networking provider. This may become the default in future.
* Continue to set `networking` to one of the three supported container networking providers. Most
require custom firewall / security policies be present across nodes so they have some infra tie-ins.
## v1.29.1
* Kubernetes [v1.29.1](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1291)
### AWS
* Continue to support AWS IMDSv1 ([#1412](https://github.com/poseidon/typhoon/pull/1412))
### Known Issues
* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
## v1.29.0
* Kubernetes [v1.29.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.29.md#v1290)
### Known Issues
* Calico and Fedora CoreOS cannot be used together currently ([calico#8372](https://github.com/projectcalico/calico/issues/8372))
## v1.28.4
* Kubernetes [v1.28.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1284)
## v1.28.3
* Kubernetes [v1.28.3](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.28.md#v1283)

View File

@ -1,4 +1,9 @@
# Typhoon [![Release](https://img.shields.io/github/v/release/poseidon/typhoon)](https://github.com/poseidon/typhoon/releases) [![Stars](https://img.shields.io/github/stars/poseidon/typhoon)](https://github.com/poseidon/typhoon/stargazers) [![Sponsors](https://img.shields.io/github/sponsors/poseidon?logo=github)](https://github.com/sponsors/poseidon) [![Mastodon](https://img.shields.io/badge/follow-news-6364ff?logo=mastodon)](https://fosstodon.org/@typhoon)
# Typhoon
[![Release](https://img.shields.io/github/v/release/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/releases)
[![Stars](https://img.shields.io/github/stars/poseidon/typhoon?style=flat-square)](https://github.com/poseidon/typhoon/stargazers)
[![Sponsors](https://img.shields.io/github/sponsors/poseidon?logo=github&style=flat-square)](https://github.com/sponsors/poseidon)
[![Mastodon](https://img.shields.io/badge/follow-news-6364ff?logo=mastodon&style=flat-square)](https://fosstodon.org/@typhoon)
<img align="right" src="https://storage.googleapis.com/poseidon/typhoon-logo.png">
@ -13,7 +18,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
* Kubernetes v1.28.3 (upstream)
* Kubernetes v1.31.3 (upstream)
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [preemptible](https://typhoon.psdn.io/flatcar-linux/google-cloud/#preemption) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization
@ -21,7 +26,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Modules
Typhoon provides a Terraform Module for each supported operating system and platform.
Typhoon provides a Terraform Module for defining a Kubernetes cluster on each supported operating system and platform.
Typhoon is available for [Fedora CoreOS](https://getfedora.org/coreos/).
@ -52,6 +57,14 @@ Typhoon is available for [Flatcar Linux](https://www.flatcar-linux.org/releases/
| AWS | Flatcar Linux (ARM64) | [aws/flatcar-linux/kubernetes](aws/flatcar-linux/kubernetes) | alpha |
| Azure | Flatcar Linux (ARM64) | [azure/flatcar-linux/kubernetes](azure/flatcar-linux/kubernetes) | alpha |
Typhoon also provides Terraform Modules for optionally managing individual components applied onto clusters.
| Name | Terraform Module | Status |
|---------|------------------|--------|
| CoreDNS | [addons/coredns](addons/coredns) | beta |
| Cilium | [addons/cilium](addons/cilium) | beta |
| flannel | [addons/flannel](addons/flannel) | beta |
## Documentation
* [Docs](https://typhoon.psdn.io)
@ -65,7 +78,7 @@ Define a Kubernetes cluster by using the Terraform module for your chosen platfo
```tf
module "yavin" {
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.28.3"
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.31.3"
# Google Cloud
cluster_name = "yavin"
@ -85,6 +98,7 @@ module "yavin" {
resource "local_file" "kubeconfig-yavin" {
content = module.yavin.kubeconfig-admin
filename = "/home/user/.kube/configs/yavin-config"
file_permission = "0600"
}
```
@ -104,9 +118,9 @@ In 4-8 minutes (varies by platform), the cluster will be ready. This Google Clou
$ export KUBECONFIG=/home/user/.kube/configs/yavin-config
$ kubectl get nodes
NAME ROLES STATUS AGE VERSION
yavin-controller-0.c.example-com.internal <none> Ready 6m v1.28.3
yavin-worker-jrbf.c.example-com.internal <none> Ready 5m v1.28.3
yavin-worker-mzdm.c.example-com.internal <none> Ready 5m v1.28.3
yavin-controller-0.c.example-com.internal <none> Ready 6m v1.31.3
yavin-worker-jrbf.c.example-com.internal <none> Ready 5m v1.31.3
yavin-worker-mzdm.c.example-com.internal <none> Ready 5m v1.31.3
```
List the pods.
@ -114,9 +128,10 @@ List the pods.
```
$ kubectl get pods --all-namespaces
NAMESPACE NAME READY STATUS RESTARTS AGE
kube-system calico-node-1cs8z 2/2 Running 0 6m
kube-system calico-node-d1l5b 2/2 Running 0 6m
kube-system calico-node-sp9ps 2/2 Running 0 6m
kube-system cilium-1cs8z 1/1 Running 0 6m
kube-system cilium-d1l5b 1/1 Running 0 6m
kube-system cilium-sp9ps 1/1 Running 0 6m
kube-system cilium-operator-68d778b448-g744f 1/1 Running 0 6m
kube-system coredns-1187388186-zj5dl 1/1 Running 0 6m
kube-system coredns-1187388186-dkh3o 1/1 Running 0 6m
kube-system kube-apiserver-controller-0 1/1 Running 0 6m

View File

@ -0,0 +1,36 @@
resource "kubernetes_cluster_role_binding" "operator" {
metadata {
name = "cilium-operator"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cilium-operator"
}
subject {
kind = "ServiceAccount"
name = "cilium-operator"
namespace = "kube-system"
}
}
resource "kubernetes_cluster_role_binding" "agent" {
metadata {
name = "cilium-agent"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "cilium-agent"
}
subject {
kind = "ServiceAccount"
name = "cilium-agent"
namespace = "kube-system"
}
}

View File

@ -0,0 +1,112 @@
resource "kubernetes_cluster_role" "operator" {
metadata {
name = "cilium-operator"
}
# detect and restart [core|kube]dns pods on startup
rule {
verbs = ["get", "list", "watch", "delete"]
api_groups = [""]
resources = ["pods"]
}
rule {
verbs = ["list", "watch"]
api_groups = [""]
resources = ["nodes"]
}
rule {
verbs = ["patch"]
api_groups = [""]
resources = ["nodes", "nodes/status"]
}
rule {
verbs = ["get", "list", "watch"]
api_groups = ["discovery.k8s.io"]
resources = ["endpointslices"]
}
rule {
verbs = ["get", "list", "watch"]
api_groups = [""]
resources = ["services"]
}
# Perform LB IP allocation for BGP
rule {
verbs = ["update"]
api_groups = [""]
resources = ["services/status"]
}
# Perform the translation of a CNP that contains `ToGroup` to its endpoints
rule {
verbs = ["get", "list", "watch"]
api_groups = [""]
resources = ["services", "endpoints", "namespaces"]
}
rule {
verbs = ["*"]
api_groups = ["cilium.io"]
resources = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumnetworkpolicies/finalizers", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumclusterwidenetworkpolicies/finalizers", "ciliumendpoints", "ciliumendpoints/status", "ciliumendpoints/finalizers", "ciliumnodes", "ciliumnodes/status", "ciliumnodes/finalizers", "ciliumidentities", "ciliumidentities/status", "ciliumidentities/finalizers", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumlocalredirectpolicies/finalizers", "ciliumendpointslices", "ciliumloadbalancerippools", "ciliumloadbalancerippools/status", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"]
}
rule {
verbs = ["create", "get", "list", "update", "watch"]
api_groups = ["apiextensions.k8s.io"]
resources = ["customresourcedefinitions"]
}
# Cilium leader elects if among multiple operator replicas
rule {
verbs = ["create", "get", "update"]
api_groups = ["coordination.k8s.io"]
resources = ["leases"]
}
}
resource "kubernetes_cluster_role" "agent" {
metadata {
name = "cilium-agent"
}
rule {
verbs = ["get", "list", "watch"]
api_groups = ["networking.k8s.io"]
resources = ["networkpolicies"]
}
rule {
verbs = ["get", "list", "watch"]
api_groups = ["discovery.k8s.io"]
resources = ["endpointslices"]
}
rule {
verbs = ["get", "list", "watch"]
api_groups = [""]
resources = ["namespaces", "services", "pods", "endpoints", "nodes"]
}
rule {
verbs = ["patch"]
api_groups = [""]
resources = ["nodes/status"]
}
rule {
verbs = ["create", "get", "list", "watch", "update"]
api_groups = ["apiextensions.k8s.io"]
resources = ["customresourcedefinitions"]
}
rule {
verbs = ["*"]
api_groups = ["cilium.io"]
resources = ["ciliumnetworkpolicies", "ciliumnetworkpolicies/status", "ciliumclusterwidenetworkpolicies", "ciliumclusterwidenetworkpolicies/status", "ciliumendpoints", "ciliumendpoints/status", "ciliumnodes", "ciliumnodes/status", "ciliumidentities", "ciliumidentities/status", "ciliumlocalredirectpolicies", "ciliumlocalredirectpolicies/status", "ciliumegressnatpolicies", "ciliumendpointslices", "ciliumcidrgroups", "ciliuml2announcementpolicies", "ciliuml2announcementpolicies/status", "ciliumpodippools"]
}
}

196
addons/cilium/config.tf Normal file
View File

@ -0,0 +1,196 @@
resource "kubernetes_config_map" "cilium" {
metadata {
name = "cilium"
namespace = "kube-system"
}
data = {
# Identity allocation mode selects how identities are shared between cilium
# nodes by setting how they are stored. The options are "crd" or "kvstore".
# - "crd" stores identities in kubernetes as CRDs (custom resource definition).
# These can be queried with:
# kubectl get ciliumid
# - "kvstore" stores identities in a kvstore, etcd or consul, that is
# configured below. Cilium versions before 1.6 supported only the kvstore
# backend. Upgrades from these older cilium versions should continue using
# the kvstore by commenting out the identity-allocation-mode below, or
# setting it to "kvstore".
identity-allocation-mode = "crd"
cilium-endpoint-gc-interval = "5m0s"
nodes-gc-interval = "5m0s"
# If you want to run cilium in debug mode change this value to true
debug = "false"
# The agent can be put into the following three policy enforcement modes
# default, always and never.
# https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes
enable-policy = "default"
# Prometheus
enable-metrics = "true"
prometheus-serve-addr = ":9962"
operator-prometheus-serve-addr = ":9963"
proxy-prometheus-port = "9964" # envoy
# Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4
# address.
enable-ipv4 = "true"
# Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6
# address.
enable-ipv6 = "false"
# Enable probing for a more efficient clock source for the BPF datapath
enable-bpf-clock-probe = "true"
# Enable use of transparent proxying mechanisms (Linux 5.7+)
enable-bpf-tproxy = "false"
# If you want cilium monitor to aggregate tracing for packets, set this level
# to "low", "medium", or "maximum". The higher the level, the less packets
# that will be seen in monitor output.
monitor-aggregation = "medium"
# The monitor aggregation interval governs the typical time between monitor
# notification events for each allowed connection.
#
# Only effective when monitor aggregation is set to "medium" or higher.
monitor-aggregation-interval = "5s"
# The monitor aggregation flags determine which TCP flags which, upon the
# first observation, cause monitor notifications to be generated.
#
# Only effective when monitor aggregation is set to "medium" or higher.
monitor-aggregation-flags = "all"
# Specifies the ratio (0.0-1.0) of total system memory to use for dynamic
# sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps.
bpf-map-dynamic-size-ratio = "0.0025"
# bpf-policy-map-max specified the maximum number of entries in endpoint
# policy map (per endpoint)
bpf-policy-map-max = "16384"
# bpf-lb-map-max specifies the maximum number of entries in bpf lb service,
# backend and affinity maps.
bpf-lb-map-max = "65536"
# Pre-allocation of map entries allows per-packet latency to be reduced, at
# the expense of up-front memory allocation for the entries in the maps. The
# default value below will minimize memory usage in the default installation;
# users who are sensitive to latency may consider setting this to "true".
#
# This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore
# this option and behave as though it is set to "true".
#
# If this value is modified, then during the next Cilium startup the restore
# of existing endpoints and tracking of ongoing connections may be disrupted.
# As a result, reply packets may be dropped and the load-balancing decisions
# for established connections may change.
#
# If this option is set to "false" during an upgrade from 1.3 or earlier to
# 1.4 or later, then it may cause one-time disruptions during the upgrade.
preallocate-bpf-maps = "false"
# Name of the cluster. Only relevant when building a mesh of clusters.
cluster-name = "default"
# Unique ID of the cluster. Must be unique across all conneted clusters and
# in the range of 1 and 255. Only relevant when building a mesh of clusters.
cluster-id = "0"
# Encapsulation mode for communication between nodes
# Possible values:
# - disabled
# - vxlan (default)
# - geneve
routing-mode = "tunnel"
tunnel = "vxlan"
# Enables L7 proxy for L7 policy enforcement and visibility
enable-l7-proxy = "true"
auto-direct-node-routes = "false"
# enableXTSocketFallback enables the fallback compatibility solution
# when the xt_socket kernel module is missing and it is needed for
# the datapath L7 redirection to work properly. See documentation
# for details on when this can be disabled:
# http://docs.cilium.io/en/latest/install/system_requirements/#admin-kernel-version.
enable-xt-socket-fallback = "true"
# installIptablesRules enables installation of iptables rules to allow for
# TPROXY (L7 proxy injection), itpables based masquerading and compatibility
# with kube-proxy. See documentation for details on when this can be
# disabled.
install-iptables-rules = "true"
# masquerade traffic leaving the node destined for outside
enable-ipv4-masquerade = "true"
enable-ipv6-masquerade = "false"
# bpfMasquerade enables masquerading with BPF instead of iptables
enable-bpf-masquerade = "true"
# kube-proxy
kube-proxy-replacement = "true"
kube-proxy-replacement-healthz-bind-address = ":10256"
enable-session-affinity = "true"
# ClusterIPs from host namespace
bpf-lb-sock = "true"
# ClusterIPs from external nodes
bpf-lb-external-clusterip = "true"
# NodePort
enable-node-port = "true"
enable-health-check-nodeport = "false"
# ExternalIPs
enable-external-ips = "true"
# HostPort
enable-host-port = "true"
# IPAM
ipam = "cluster-pool"
disable-cnp-status-updates = "true"
cluster-pool-ipv4-cidr = "${var.pod_cidr}"
cluster-pool-ipv4-mask-size = "24"
# Health
agent-health-port = "9876"
enable-health-checking = "true"
enable-endpoint-health-checking = "true"
# Identity
enable-well-known-identities = "false"
enable-remote-node-identity = "true"
# Hubble server
enable-hubble = var.enable_hubble
hubble-disable-tls = "false"
hubble-listen-address = ":4244"
hubble-socket-path = "/var/run/cilium/hubble.sock"
hubble-tls-client-ca-files = "/var/lib/cilium/tls/hubble/client-ca.crt"
hubble-tls-cert-file = "/var/lib/cilium/tls/hubble/server.crt"
hubble-tls-key-file = "/var/lib/cilium/tls/hubble/server.key"
hubble-export-file-max-backups = "5"
hubble-export-file-max-size-mb = "10"
# Hubble metrics
hubble-metrics-server = ":9965"
hubble-metrics = "dns drop tcp flow port-distribution icmp httpV2"
enable-hubble-open-metrics = "false"
# Misc
enable-bandwidth-manager = "false"
enable-local-redirect-policy = "false"
policy-audit-mode = "false"
operator-api-serve-addr = "127.0.0.1:9234"
enable-l2-neigh-discovery = "true"
enable-k8s-terminating-endpoint = "true"
enable-k8s-networkpolicy = "true"
external-envoy-proxy = "false"
write-cni-conf-when-ready = "/host/etc/cni/net.d/05-cilium.conflist"
cni-exclusive = "true"
cni-log-file = "/var/run/cilium/cilium-cni.log"
}
}

379
addons/cilium/daemonset.tf Normal file
View File

@ -0,0 +1,379 @@
resource "kubernetes_daemonset" "cilium" {
wait_for_rollout = false
metadata {
name = "cilium"
namespace = "kube-system"
labels = {
k8s-app = "cilium"
}
}
spec {
strategy {
type = "RollingUpdate"
rolling_update {
max_unavailable = "1"
}
}
selector {
match_labels = {
k8s-app = "cilium-agent"
}
}
template {
metadata {
labels = {
k8s-app = "cilium-agent"
}
annotations = {
"prometheus.io/port" = "9962"
"prometheus.io/scrape" = "true"
}
}
spec {
host_network = true
priority_class_name = "system-node-critical"
service_account_name = "cilium-agent"
security_context {
seccomp_profile {
type = "RuntimeDefault"
}
}
toleration {
key = "node-role.kubernetes.io/controller"
operator = "Exists"
}
toleration {
key = "node.kubernetes.io/not-ready"
operator = "Exists"
}
dynamic "toleration" {
for_each = var.daemonset_tolerations
content {
key = toleration.value
operator = "Exists"
}
}
automount_service_account_token = true
enable_service_links = false
# Cilium v1.13.1 starts installing CNI plugins in yet another init container
# https://github.com/cilium/cilium/pull/24075
init_container {
name = "install-cni"
image = "quay.io/cilium/cilium:v1.16.4"
command = ["/install-plugin.sh"]
security_context {
allow_privilege_escalation = true
privileged = true
capabilities {
drop = ["ALL"]
}
}
volume_mount {
name = "cni-bin-dir"
mount_path = "/host/opt/cni/bin"
}
}
# Required to mount cgroup2 filesystem on the underlying Kubernetes node.
# We use nsenter command with host's cgroup and mount namespaces enabled.
init_container {
name = "mount-cgroup"
image = "quay.io/cilium/cilium:v1.16.4"
command = [
"sh",
"-ec",
# The statically linked Go program binary is invoked to avoid any
# dependency on utilities like sh and mount that can be missing on certain
# distros installed on the underlying host. Copy the binary to the
# same directory where we install cilium cni plugin so that exec permissions
# are available.
"cp /usr/bin/cilium-mount /hostbin/cilium-mount && nsenter --cgroup=/hostproc/1/ns/cgroup --mount=/hostproc/1/ns/mnt \"$${BIN_PATH}/cilium-mount\" $CGROUP_ROOT; rm /hostbin/cilium-mount"
]
env {
name = "CGROUP_ROOT"
value = "/run/cilium/cgroupv2"
}
env {
name = "BIN_PATH"
value = "/opt/cni/bin"
}
security_context {
allow_privilege_escalation = true
privileged = true
}
volume_mount {
name = "hostproc"
mount_path = "/hostproc"
}
volume_mount {
name = "cni-bin-dir"
mount_path = "/hostbin"
}
}
init_container {
name = "clean-cilium-state"
image = "quay.io/cilium/cilium:v1.16.4"
command = ["/init-container.sh"]
security_context {
allow_privilege_escalation = true
privileged = true
}
volume_mount {
name = "sys-fs-bpf"
mount_path = "/sys/fs/bpf"
}
volume_mount {
name = "var-run-cilium"
mount_path = "/var/run/cilium"
}
# Required to mount cgroup filesystem from the host to cilium agent pod
volume_mount {
name = "cilium-cgroup"
mount_path = "/run/cilium/cgroupv2"
mount_propagation = "HostToContainer"
}
}
container {
name = "cilium-agent"
image = "quay.io/cilium/cilium:v1.16.4"
command = ["cilium-agent"]
args = [
"--config-dir=/tmp/cilium/config-map"
]
env {
name = "K8S_NODE_NAME"
value_from {
field_ref {
api_version = "v1"
field_path = "spec.nodeName"
}
}
}
env {
name = "CILIUM_K8S_NAMESPACE"
value_from {
field_ref {
api_version = "v1"
field_path = "metadata.namespace"
}
}
}
env {
name = "KUBERNETES_SERVICE_HOST"
value_from {
config_map_key_ref {
name = "in-cluster"
key = "apiserver-host"
}
}
}
env {
name = "KUBERNETES_SERVICE_PORT"
value_from {
config_map_key_ref {
name = "in-cluster"
key = "apiserver-port"
}
}
}
port {
name = "peer-service"
protocol = "TCP"
container_port = 4244
}
# Metrics
port {
name = "metrics"
protocol = "TCP"
container_port = 9962
}
port {
name = "envoy-metrics"
protocol = "TCP"
container_port = 9964
}
port {
name = "hubble-metrics"
protocol = "TCP"
container_port = 9965
}
# Not yet used, prefer exec's
port {
name = "health"
protocol = "TCP"
container_port = 9876
}
lifecycle {
pre_stop {
exec {
command = ["/cni-uninstall.sh"]
}
}
}
security_context {
allow_privilege_escalation = true
privileged = true
}
liveness_probe {
exec {
command = ["cilium", "status", "--brief"]
}
initial_delay_seconds = 120
timeout_seconds = 5
period_seconds = 30
success_threshold = 1
failure_threshold = 10
}
readiness_probe {
exec {
command = ["cilium", "status", "--brief"]
}
initial_delay_seconds = 5
timeout_seconds = 5
period_seconds = 20
success_threshold = 1
failure_threshold = 3
}
# Load kernel modules
volume_mount {
name = "lib-modules"
read_only = true
mount_path = "/lib/modules"
}
# Access iptables concurrently
volume_mount {
name = "xtables-lock"
mount_path = "/run/xtables.lock"
}
# Keep state between restarts
volume_mount {
name = "var-run-cilium"
mount_path = "/var/run/cilium"
}
volume_mount {
name = "sys-fs-bpf"
mount_path = "/sys/fs/bpf"
mount_propagation = "Bidirectional"
}
# Configuration
volume_mount {
name = "config"
read_only = true
mount_path = "/tmp/cilium/config-map"
}
# Install config on host
volume_mount {
name = "cni-conf-dir"
mount_path = "/host/etc/cni/net.d"
}
# Hubble
volume_mount {
name = "hubble-tls"
mount_path = "/var/lib/cilium/tls/hubble"
read_only = true
}
}
termination_grace_period_seconds = 1
# Load kernel modules
volume {
name = "lib-modules"
host_path {
path = "/lib/modules"
}
}
# Access iptables concurrently with other processes (e.g. kube-proxy)
volume {
name = "xtables-lock"
host_path {
path = "/run/xtables.lock"
type = "FileOrCreate"
}
}
# Keep state between restarts
volume {
name = "var-run-cilium"
host_path {
path = "/var/run/cilium"
type = "DirectoryOrCreate"
}
}
# Keep state for bpf maps between restarts
volume {
name = "sys-fs-bpf"
host_path {
path = "/sys/fs/bpf"
type = "DirectoryOrCreate"
}
}
# Mount host cgroup2 filesystem
volume {
name = "hostproc"
host_path {
path = "/proc"
type = "Directory"
}
}
volume {
name = "cilium-cgroup"
host_path {
path = "/run/cilium/cgroupv2"
type = "DirectoryOrCreate"
}
}
# Read configuration
volume {
name = "config"
config_map {
name = "cilium"
}
}
# Install CNI plugin and config on host
volume {
name = "cni-bin-dir"
host_path {
path = "/opt/cni/bin"
type = "DirectoryOrCreate"
}
}
volume {
name = "cni-conf-dir"
host_path {
path = "/etc/cni/net.d"
type = "DirectoryOrCreate"
}
}
# Hubble TLS (optional)
volume {
name = "hubble-tls"
projected {
default_mode = "0400"
sources {
secret {
name = "hubble-server-certs"
optional = true
items {
key = "ca.crt"
path = "client-ca.crt"
}
items {
key = "tls.crt"
path = "server.crt"
}
items {
key = "tls.key"
path = "server.key"
}
}
}
}
}
}
}
}
}

163
addons/cilium/deployment.tf Normal file
View File

@ -0,0 +1,163 @@
resource "kubernetes_deployment" "operator" {
wait_for_rollout = false
metadata {
name = "cilium-operator"
namespace = "kube-system"
}
spec {
replicas = 1
strategy {
type = "RollingUpdate"
rolling_update {
max_unavailable = "1"
}
}
selector {
match_labels = {
name = "cilium-operator"
}
}
template {
metadata {
labels = {
name = "cilium-operator"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/port" = "9963"
}
}
spec {
host_network = true
priority_class_name = "system-cluster-critical"
service_account_name = "cilium-operator"
security_context {
seccomp_profile {
type = "RuntimeDefault"
}
}
toleration {
key = "node-role.kubernetes.io/controller"
operator = "Exists"
}
toleration {
key = "node.kubernetes.io/not-ready"
operator = "Exists"
}
topology_spread_constraint {
max_skew = 1
topology_key = "kubernetes.io/hostname"
when_unsatisfiable = "DoNotSchedule"
label_selector {
match_labels = {
name = "cilium-operator"
}
}
}
automount_service_account_token = true
enable_service_links = false
container {
name = "cilium-operator"
image = "quay.io/cilium/operator-generic:v1.16.4"
command = ["cilium-operator-generic"]
args = [
"--config-dir=/tmp/cilium/config-map",
"--debug=$(CILIUM_DEBUG)"
]
env {
name = "K8S_NODE_NAME"
value_from {
field_ref {
api_version = "v1"
field_path = "spec.nodeName"
}
}
}
env {
name = "CILIUM_K8S_NAMESPACE"
value_from {
field_ref {
api_version = "v1"
field_path = "metadata.namespace"
}
}
}
env {
name = "KUBERNETES_SERVICE_HOST"
value_from {
config_map_key_ref {
name = "in-cluster"
key = "apiserver-host"
}
}
}
env {
name = "KUBERNETES_SERVICE_PORT"
value_from {
config_map_key_ref {
name = "in-cluster"
key = "apiserver-port"
}
}
}
env {
name = "CILIUM_DEBUG"
value_from {
config_map_key_ref {
name = "cilium"
key = "debug"
optional = true
}
}
}
port {
name = "metrics"
protocol = "TCP"
host_port = 9963
container_port = 9963
}
port {
name = "health"
container_port = 9234
protocol = "TCP"
}
liveness_probe {
http_get {
scheme = "HTTP"
host = "127.0.0.1"
port = "9234"
path = "/healthz"
}
initial_delay_seconds = 60
timeout_seconds = 3
period_seconds = 10
}
readiness_probe {
http_get {
scheme = "HTTP"
host = "127.0.0.1"
port = "9234"
path = "/healthz"
}
timeout_seconds = 3
period_seconds = 15
failure_threshold = 5
}
volume_mount {
name = "config"
read_only = true
mount_path = "/tmp/cilium/config-map"
}
}
volume {
name = "config"
config_map {
name = "cilium"
}
}
}
}
}
}

View File

@ -0,0 +1,15 @@
resource "kubernetes_service_account" "operator" {
metadata {
name = "cilium-operator"
namespace = "kube-system"
}
automount_service_account_token = false
}
resource "kubernetes_service_account" "agent" {
metadata {
name = "cilium-agent"
namespace = "kube-system"
}
automount_service_account_token = false
}

View File

@ -0,0 +1,17 @@
variable "pod_cidr" {
type = string
description = "CIDR IP range to assign Kubernetes pods"
default = "10.2.0.0/16"
}
variable "daemonset_tolerations" {
type = list(string)
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
default = []
}
variable "enable_hubble" {
type = bool
description = "Run the embedded Hubble Server and mount hubble-server-certs Secret"
default = true
}

View File

@ -0,0 +1,8 @@
terraform {
required_providers {
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.8"
}
}
}

View File

@ -0,0 +1,37 @@
resource "kubernetes_cluster_role" "coredns" {
metadata {
name = "system:coredns"
}
rule {
api_groups = [""]
resources = [
"endpoints",
"services",
"pods",
"namespaces",
]
verbs = [
"list",
"watch",
]
}
rule {
api_groups = [""]
resources = [
"nodes",
]
verbs = [
"get",
]
}
rule {
api_groups = ["discovery.k8s.io"]
resources = [
"endpointslices",
]
verbs = [
"list",
"watch",
]
}
}

30
addons/coredns/config.tf Normal file
View File

@ -0,0 +1,30 @@
resource "kubernetes_config_map" "coredns" {
metadata {
name = "coredns"
namespace = "kube-system"
}
data = {
"Corefile" = <<-EOF
.:53 {
errors
health {
lameduck 5s
}
ready
log . {
class error
}
kubernetes ${var.cluster_domain_suffix} in-addr.arpa ip6.arpa {
pods insecure
fallthrough in-addr.arpa ip6.arpa
}
prometheus :9153
forward . /etc/resolv.conf
cache 30
loop
reload
loadbalance
}
EOF
}
}

View File

@ -0,0 +1,151 @@
resource "kubernetes_deployment" "coredns" {
wait_for_rollout = false
metadata {
name = "coredns"
namespace = "kube-system"
labels = {
k8s-app = "coredns"
"kubernetes.io/name" = "CoreDNS"
}
}
spec {
replicas = var.replicas
strategy {
type = "RollingUpdate"
rolling_update {
max_unavailable = "1"
}
}
selector {
match_labels = {
k8s-app = "coredns"
tier = "control-plane"
}
}
template {
metadata {
labels = {
k8s-app = "coredns"
tier = "control-plane"
}
}
spec {
affinity {
node_affinity {
preferred_during_scheduling_ignored_during_execution {
weight = 100
preference {
match_expressions {
key = "node.kubernetes.io/controller"
operator = "Exists"
}
}
}
}
pod_anti_affinity {
preferred_during_scheduling_ignored_during_execution {
weight = 100
pod_affinity_term {
label_selector {
match_expressions {
key = "tier"
operator = "In"
values = ["control-plane"]
}
match_expressions {
key = "k8s-app"
operator = "In"
values = ["coredns"]
}
}
topology_key = "kubernetes.io/hostname"
}
}
}
}
dns_policy = "Default"
priority_class_name = "system-cluster-critical"
security_context {
seccomp_profile {
type = "RuntimeDefault"
}
}
service_account_name = "coredns"
toleration {
key = "node-role.kubernetes.io/controller"
effect = "NoSchedule"
}
container {
name = "coredns"
image = "registry.k8s.io/coredns/coredns:v1.12.0"
args = ["-conf", "/etc/coredns/Corefile"]
port {
name = "dns"
container_port = 53
protocol = "UDP"
}
port {
name = "dns-tcp"
container_port = 53
protocol = "TCP"
}
port {
name = "metrics"
container_port = 9153
protocol = "TCP"
}
resources {
requests = {
cpu = "100m"
memory = "70Mi"
}
limits = {
memory = "170Mi"
}
}
security_context {
capabilities {
add = ["NET_BIND_SERVICE"]
drop = ["all"]
}
read_only_root_filesystem = true
}
liveness_probe {
http_get {
path = "/health"
port = "8080"
scheme = "HTTP"
}
initial_delay_seconds = 60
timeout_seconds = 5
success_threshold = 1
failure_threshold = 5
}
readiness_probe {
http_get {
path = "/ready"
port = "8181"
scheme = "HTTP"
}
}
volume_mount {
name = "config"
mount_path = "/etc/coredns"
read_only = true
}
}
volume {
name = "config"
config_map {
name = "coredns"
items {
key = "Corefile"
path = "Corefile"
}
}
}
}
}
}
}

View File

@ -0,0 +1,24 @@
resource "kubernetes_service_account" "coredns" {
metadata {
name = "coredns"
namespace = "kube-system"
}
automount_service_account_token = false
}
resource "kubernetes_cluster_role_binding" "coredns" {
metadata {
name = "system:coredns"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "system:coredns"
}
subject {
kind = "ServiceAccount"
name = "coredns"
namespace = "kube-system"
}
}

31
addons/coredns/service.tf Normal file
View File

@ -0,0 +1,31 @@
resource "kubernetes_service" "coredns" {
metadata {
name = "coredns"
namespace = "kube-system"
labels = {
"k8s-app" = "coredns"
"kubernetes.io/name" = "CoreDNS"
}
annotations = {
"prometheus.io/scrape" = "true"
"prometheus.io/port" = "9153"
}
}
spec {
type = "ClusterIP"
cluster_ip = var.cluster_dns_service_ip
selector = {
k8s-app = "coredns"
}
port {
name = "dns"
protocol = "UDP"
port = 53
}
port {
name = "dns-tcp"
protocol = "TCP"
port = 53
}
}
}

View File

@ -0,0 +1,15 @@
variable "replicas" {
type = number
description = "CoreDNS replica count"
default = 2
}
variable "cluster_dns_service_ip" {
description = "Must be set to `cluster_dns_service_ip` output by cluster"
default = "10.3.0.10"
}
variable "cluster_domain_suffix" {
description = "Must be set to `cluster_domain_suffix` output by cluster"
default = "cluster.local"
}

View File

@ -0,0 +1,9 @@
terraform {
required_providers {
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.8"
}
}
}

View File

@ -0,0 +1,18 @@
resource "kubernetes_cluster_role_binding" "flannel" {
metadata {
name = "flannel"
}
role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = "flannel"
}
subject {
kind = "ServiceAccount"
name = "flannel"
namespace = "kube-system"
}
}

View File

@ -0,0 +1,24 @@
resource "kubernetes_cluster_role" "flannel" {
metadata {
name = "flannel"
}
rule {
api_groups = [""]
resources = ["pods"]
verbs = ["get"]
}
rule {
api_groups = [""]
resources = ["nodes"]
verbs = ["list", "watch"]
}
rule {
api_groups = [""]
resources = ["nodes/status"]
verbs = ["patch"]
}
}

44
addons/flannel/config.tf Normal file
View File

@ -0,0 +1,44 @@
resource "kubernetes_config_map" "config" {
metadata {
name = "flannel-config"
namespace = "kube-system"
labels = {
k8s-app = "flannel"
tier = "node"
}
}
data = {
"cni-conf.json" = <<-EOF
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
EOF
"net-conf.json" = <<-EOF
{
"Network": "${var.pod_cidr}",
"Backend": {
"Type": "vxlan",
"Port": 4789
}
}
EOF
}
}

167
addons/flannel/daemonset.tf Normal file
View File

@ -0,0 +1,167 @@
resource "kubernetes_daemonset" "flannel" {
metadata {
name = "flannel"
namespace = "kube-system"
labels = {
k8s-app = "flannel"
}
}
spec {
strategy {
type = "RollingUpdate"
rolling_update {
max_unavailable = "1"
}
}
selector {
match_labels = {
k8s-app = "flannel"
}
}
template {
metadata {
labels = {
k8s-app = "flannel"
}
}
spec {
host_network = true
priority_class_name = "system-node-critical"
service_account_name = "flannel"
security_context {
seccomp_profile {
type = "RuntimeDefault"
}
}
toleration {
key = "node-role.kubernetes.io/controller"
operator = "Exists"
}
toleration {
key = "node.kubernetes.io/not-ready"
operator = "Exists"
}
dynamic "toleration" {
for_each = var.daemonset_tolerations
content {
key = toleration.value
operator = "Exists"
}
}
init_container {
name = "install-cni"
image = "quay.io/poseidon/flannel-cni:v0.4.2"
command = ["/install-cni.sh"]
env {
name = "CNI_NETWORK_CONFIG"
value_from {
config_map_key_ref {
name = "flannel-config"
key = "cni-conf.json"
}
}
}
volume_mount {
name = "cni-bin-dir"
mount_path = "/host/opt/cni/bin/"
}
volume_mount {
name = "cni-conf-dir"
mount_path = "/host/etc/cni/net.d"
}
}
container {
name = "flannel"
image = "docker.io/flannel/flannel:v0.26.1"
command = [
"/opt/bin/flanneld",
"--ip-masq",
"--kube-subnet-mgr",
"--iface=$(POD_IP)"
]
env {
name = "POD_NAME"
value_from {
field_ref {
field_path = "metadata.name"
}
}
}
env {
name = "POD_NAMESPACE"
value_from {
field_ref {
field_path = "metadata.namespace"
}
}
}
env {
name = "POD_IP"
value_from {
field_ref {
field_path = "status.podIP"
}
}
}
security_context {
privileged = true
}
resources {
requests = {
cpu = "100m"
}
}
volume_mount {
name = "flannel-config"
mount_path = "/etc/kube-flannel/"
}
volume_mount {
name = "run-flannel"
mount_path = "/run/flannel"
}
volume_mount {
name = "xtables-lock"
mount_path = "/run/xtables.lock"
}
}
volume {
name = "flannel-config"
config_map {
name = "flannel-config"
}
}
volume {
name = "run-flannel"
host_path {
path = "/run/flannel"
}
}
# Used by install-cni
volume {
name = "cni-bin-dir"
host_path {
path = "/opt/cni/bin"
}
}
volume {
name = "cni-conf-dir"
host_path {
path = "/etc/cni/net.d"
type = "DirectoryOrCreate"
}
}
# Acces iptables concurrently
volume {
name = "xtables-lock"
host_path {
path = "/run/xtables.lock"
type = "FileOrCreate"
}
}
}
}
}
}

View File

@ -0,0 +1,7 @@
resource "kubernetes_service_account" "flannel" {
metadata {
name = "flannel"
namespace = "kube-system"
}
}

View File

@ -0,0 +1,11 @@
variable "pod_cidr" {
type = string
description = "CIDR IP range to assign Kubernetes pods"
default = "10.2.0.0/16"
}
variable "daemonset_tolerations" {
type = list(string)
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
default = []
}

View File

@ -0,0 +1,8 @@
terraform {
required_providers {
kubernetes = {
source = "hashicorp/kubernetes"
version = "~> 2.8"
}
}
}

View File

@ -59,4 +59,11 @@ rules:
- get
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- "endpointslices"
verbs:
- get
- list
- watch

View File

@ -59,4 +59,11 @@ rules:
- get
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- "endpointslices"
verbs:
- get
- list
- watch

View File

@ -59,4 +59,11 @@ rules:
- get
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- "endpointslices"
verbs:
- get
- list
- watch

View File

@ -1,7 +1,7 @@
apiVersion: v1
kind: Service
metadata:
name: ingress-controller-public
name: nginx-ingress-controller
namespace: ingress
annotations:
prometheus.io/scrape: 'true'
@ -10,7 +10,7 @@ spec:
type: ClusterIP
clusterIP: 10.3.0.12
selector:
name: ingress-controller-public
name: nginx-ingress-controller
phase: prod
ports:
- name: http

View File

@ -59,4 +59,11 @@ rules:
- get
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- "endpointslices"
verbs:
- get
- list
- watch

View File

@ -59,4 +59,11 @@ rules:
- get
- list
- watch
- apiGroups:
- discovery.k8s.io
resources:
- "endpointslices"
verbs:
- get
- list
- watch

View File

@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
* Kubernetes v1.28.3 (upstream)
* Kubernetes v1.31.3 (upstream)
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/fedora-coreos/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization

View File

@ -19,7 +19,7 @@ data "aws_ami" "fedora-coreos" {
}
data "aws_ami" "fedora-coreos-arm" {
count = var.arch == "arm64" ? 1 : 0
count = var.controller_arch == "arm64" ? 1 : 0
most_recent = true
owners = ["125523088429"]

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"
cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
@ -9,9 +9,7 @@ module "bootstrap" {
network_mtu = var.network_mtu
pod_cidr = var.pod_cidr
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
enable_reporting = var.enable_reporting
enable_aggregation = var.enable_aggregation
daemonset_tolerations = var.daemonset_tolerations
components = var.components
}

View File

@ -12,7 +12,7 @@ systemd:
Wants=network-online.target
After=network-online.target
[Service]
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
Type=exec
ExecStartPre=/bin/mkdir -p /var/lib/etcd
ExecStartPre=-/usr/bin/podman rm etcd
@ -57,7 +57,7 @@ systemd:
After=afterburn.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
EnvironmentFile=/run/metadata/afterburn
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -116,7 +116,7 @@ systemd:
--volume /opt/bootstrap/assets:/assets:ro,Z \
--volume /opt/bootstrap/apply:/apply:ro,Z \
--entrypoint=/apply \
quay.io/poseidon/kubelet:v1.28.3
quay.io/poseidon/kubelet:v1.31.3
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
ExecStartPost=-/usr/bin/podman stop bootstrap
storage:
@ -149,7 +149,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s
@ -163,7 +163,7 @@ storage:
contents:
inline: |
#!/bin/bash -e
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
awk '/#####/ {filename=$2; next} {print > filename}' assets
mkdir -p /etc/ssl/etcd/etcd
mkdir -p /etc/kubernetes/pki
@ -177,8 +177,7 @@ storage:
mv static-manifests/* /etc/kubernetes/manifests/
mkdir -p /opt/bootstrap/assets
mv manifests /opt/bootstrap/assets/manifests
mv manifests-networking/* /opt/bootstrap/assets/manifests/
rm -rf assets auth static-manifests tls manifests-networking
rm -rf assets auth static-manifests tls manifests
chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
- path: /opt/bootstrap/apply
mode: 0544

View File

@ -20,18 +20,18 @@ resource "aws_instance" "controllers" {
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
instance_type = var.controller_type
ami = var.arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
user_data = data.ct_config.controllers.*.rendered[count.index]
ami = var.controller_arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
# storage
root_block_device {
volume_type = var.disk_type
volume_size = var.disk_size
iops = var.disk_iops
volume_type = var.controller_disk_type
volume_size = var.controller_disk_size
iops = var.controller_disk_iops
encrypted = true
tags = {}
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
}
# network
@ -39,6 +39,14 @@ resource "aws_instance" "controllers" {
subnet_id = element(aws_subnet.public.*.id, count.index)
vpc_security_group_ids = [aws_security_group.controller.id]
# boot
user_data = data.ct_config.controllers.*.rendered[count.index]
# cost
credit_specification {
cpu_credits = var.controller_cpu_credits
}
lifecycle {
ignore_changes = [
ami,
@ -61,7 +69,6 @@ data "ct_config" "controllers" {
kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
})
strict = true
snippets = var.controller_snippets

View File

@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
resource "aws_subnet" "public" {
count = length(data.aws_availability_zones.all.names)
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
tags = {
"Name" = "${var.cluster_name}-public-${count.index}"
}
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
# IPv4 and IPv6 CIDR blocks
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
# Assign IPv4 and IPv6 addresses to instances
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
# Hostnames assigned to instances
# resource-name: <ec2-instance-id>.region.compute.internal
private_dns_hostname_type_on_launch = "resource-name"
enable_resource_name_dns_a_record_on_launch = true
enable_resource_name_dns_aaaa_record_on_launch = true
}
resource "aws_route_table_association" "public" {

View File

@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" {
self = true
}
resource "aws_security_group_rule" "controller-cilium-metrics" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
source_security_group_id = aws_security_group.worker.id
}
resource "aws_security_group_rule" "controller-cilium-metrics-self" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
self = true
}
# IANA VXLAN default
resource "aws_security_group_rule" "controller-vxlan" {
count = var.networking == "flannel" ? 1 : 0
@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" {
self = true
}
resource "aws_security_group_rule" "worker-cilium-metrics" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.worker.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
source_security_group_id = aws_security_group.controller.id
}
resource "aws_security_group_rule" "worker-cilium-metrics-self" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.worker.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
self = true
}
# IANA VXLAN default
resource "aws_security_group_rule" "worker-vxlan" {
count = var.networking == "flannel" ? 1 : 0

View File

@ -17,30 +17,6 @@ variable "dns_zone_id" {
# instances
variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "controller_type" {
type = string
description = "EC2 instance type for controllers"
default = "t3.small"
}
variable "worker_type" {
type = string
description = "EC2 instance type for workers"
default = "t3.small"
}
variable "os_stream" {
type = string
description = "Fedora CoreOS image stream for instances (e.g. stable, testing, next)"
@ -52,24 +28,78 @@ variable "os_stream" {
}
}
variable "disk_size" {
variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "controller_type" {
type = string
description = "EC2 instance type for controllers"
default = "t3.small"
}
variable "controller_disk_size" {
type = number
description = "Size of the EBS volume in GB"
default = 30
}
variable "disk_type" {
variable "controller_disk_type" {
type = string
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
default = "gp3"
}
variable "disk_iops" {
variable "controller_disk_iops" {
type = number
description = "IOPS of the EBS volume (e.g. 3000)"
default = 3000
}
variable "controller_cpu_credits" {
type = string
description = "CPU credits mode (if using a burstable instance type)"
default = null
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "worker_type" {
type = string
description = "EC2 instance type for workers"
default = "t3.small"
}
variable "worker_disk_size" {
type = number
description = "Size of the EBS volume in GB"
default = 30
}
variable "worker_disk_type" {
type = string
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
default = "gp3"
}
variable "worker_disk_iops" {
type = number
description = "IOPS of the EBS volume (e.g. 3000)"
default = 3000
}
variable "worker_cpu_credits" {
type = string
description = "CPU credits mode (if using a burstable instance type)"
default = null
}
variable "worker_price" {
type = number
description = "Spot price in USD for worker instances or 0 to use on-demand instances"
@ -134,40 +164,31 @@ EOD
default = "10.3.0.0/16"
}
variable "enable_reporting" {
type = bool
description = "Enable usage or analytics reporting to upstreams (Calico)"
default = false
}
variable "enable_aggregation" {
type = bool
description = "Enable the Kubernetes Aggregation Layer"
default = true
}
variable "worker_node_labels" {
type = list(string)
description = "List of initial worker node labels"
default = []
}
# unofficial, undocumented, unsupported
# advanced
variable "cluster_domain_suffix" {
variable "controller_arch" {
type = string
description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
default = "cluster.local"
description = "Controller node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = contains(["amd64", "arm64"], var.controller_arch)
error_message = "The controller_arch must be amd64 or arm64."
}
}
variable "arch" {
variable "worker_arch" {
type = string
description = "Container architecture (amd64 or arm64)"
description = "Worker node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
error_message = "The arch must be amd64 or arm64."
condition = contains(["amd64", "arm64"], var.worker_arch)
error_message = "The worker_arch must be amd64 or arm64."
}
}
@ -176,3 +197,19 @@ variable "daemonset_tolerations" {
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
default = []
}
variable "components" {
description = "Configure pre-installed cluster components"
# Component configs are passed through to terraform-render-bootstrap,
# which handles type enforcement and defines defaults
# https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
type = object({
enable = optional(bool)
coredns = optional(map(any))
kube_proxy = optional(map(any))
flannel = optional(map(any))
calico = optional(map(any))
cilium = optional(map(any))
})
default = null
}

View File

@ -6,11 +6,16 @@ module "workers" {
vpc_id = aws_vpc.network.id
subnet_ids = aws_subnet.public.*.id
security_groups = [aws_security_group.worker.id]
# instances
os_stream = var.os_stream
worker_count = var.worker_count
instance_type = var.worker_type
os_stream = var.os_stream
arch = var.arch
disk_size = var.disk_size
arch = var.worker_arch
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
disk_iops = var.worker_disk_iops
cpu_credits = var.worker_cpu_credits
spot_price = var.worker_price
target_groups = var.worker_target_groups
@ -18,7 +23,6 @@ module "workers" {
kubeconfig = module.bootstrap.kubeconfig-kubelet
ssh_authorized_key = var.ssh_authorized_key
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
snippets = var.worker_snippets
node_labels = var.worker_node_labels
}

View File

@ -29,7 +29,7 @@ systemd:
After=afterburn.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
EnvironmentFile=/run/metadata/afterburn
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -104,7 +104,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s

View File

@ -69,6 +69,12 @@ variable "spot_price" {
default = 0
}
variable "cpu_credits" {
type = string
description = "CPU burst credits mode (if applicable)"
default = null
}
variable "target_groups" {
type = list(string)
description = "Additional target group ARNs to which instances should be added"
@ -102,12 +108,6 @@ EOD
default = "10.3.0.0/16"
}
variable "cluster_domain_suffix" {
type = string
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
default = "cluster.local"
}
variable "node_labels" {
type = list(string)
description = "List of initial node labels"
@ -120,15 +120,14 @@ variable "node_taints" {
default = []
}
# unofficial, undocumented, unsupported
# advanced
variable "arch" {
type = string
description = "Container architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
condition = contains(["amd64", "arm64"], var.arch)
error_message = "The arch must be amd64 or arm64."
}
}

View File

@ -6,13 +6,11 @@ resource "aws_autoscaling_group" "workers" {
desired_capacity = var.worker_count
min_size = var.worker_count
max_size = var.worker_count + 2
default_cooldown = 30
health_check_grace_period = 30
# network
vpc_zone_identifier = var.subnet_ids
# template
# instance template
launch_template {
id = aws_launch_template.worker.id
version = aws_launch_template.worker.latest_version
@ -32,6 +30,11 @@ resource "aws_autoscaling_group" "workers" {
min_healthy_percentage = 90
}
}
# Grace period before checking new instance's health
health_check_grace_period = 30
# Cooldown period between scaling activities
default_cooldown = 30
lifecycle {
# override the default destroy and replace update behavior
@ -56,11 +59,6 @@ resource "aws_launch_template" "worker" {
name_prefix = "${var.name}-worker"
image_id = local.ami_id
instance_type = var.instance_type
monitoring {
enabled = false
}
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# storage
ebs_optimized = true
@ -76,9 +74,26 @@ resource "aws_launch_template" "worker" {
}
# network
vpc_security_group_ids = var.security_groups
network_interfaces {
associate_public_ip_address = true
security_groups = var.security_groups
}
# spot
# boot
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# metadata
metadata_options {
http_tokens = "optional"
}
monitoring {
enabled = false
}
# cost
credit_specification {
cpu_credits = var.cpu_credits
}
dynamic "instance_market_options" {
for_each = var.spot_price > 0 ? [1] : []
content {
@ -102,7 +117,6 @@ data "ct_config" "worker" {
kubeconfig = indent(10, var.kubeconfig)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
node_labels = join(",", var.node_labels)
node_taints = join(",", var.node_taints)
})

View File

@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
* Kubernetes v1.28.3 (upstream)
* Kubernetes v1.31.3 (upstream)
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot](https://typhoon.psdn.io/flatcar-linux/aws/#spot) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization

View File

@ -1,7 +1,7 @@
locals {
# Pick a Flatcar Linux AMI
# flatcar-stable -> Flatcar Linux AMI
ami_id = var.arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
ami_id = var.controller_arch == "arm64" ? data.aws_ami.flatcar-arm64[0].image_id : data.aws_ami.flatcar.image_id
channel = split("-", var.os_image)[1]
}
@ -26,7 +26,7 @@ data "aws_ami" "flatcar" {
}
data "aws_ami" "flatcar-arm64" {
count = var.arch == "arm64" ? 1 : 0
count = var.controller_arch == "arm64" ? 1 : 0
most_recent = true
owners = ["075585003325"]

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"
cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
@ -9,9 +9,7 @@ module "bootstrap" {
network_mtu = var.network_mtu
pod_cidr = var.pod_cidr
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
enable_reporting = var.enable_reporting
enable_aggregation = var.enable_aggregation
daemonset_tolerations = var.daemonset_tolerations
components = var.components
}

View File

@ -11,7 +11,7 @@ systemd:
Requires=docker.service
After=docker.service
[Service]
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
ExecStartPre=/usr/bin/docker run -d \
--name etcd \
--network host \
@ -58,7 +58,7 @@ systemd:
After=coreos-metadata.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
EnvironmentFile=/run/metadata/coreos
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -109,7 +109,7 @@ systemd:
Type=oneshot
RemainAfterExit=true
WorkingDirectory=/opt/bootstrap
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStart=/usr/bin/docker run \
-v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
-v /opt/bootstrap/assets:/assets:ro \
@ -148,7 +148,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s
@ -162,7 +162,7 @@ storage:
contents:
inline: |
#!/bin/bash -e
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
awk '/#####/ {filename=$2; next} {print > filename}' assets
mkdir -p /etc/ssl/etcd/etcd
mkdir -p /etc/kubernetes/pki
@ -177,8 +177,7 @@ storage:
mv static-manifests/* /etc/kubernetes/manifests/
mkdir -p /opt/bootstrap/assets
mv manifests /opt/bootstrap/assets/manifests
mv manifests-networking/* /opt/bootstrap/assets/manifests/
rm -rf assets auth static-manifests tls manifests-networking
rm -rf assets auth static-manifests tls manifests
- path: /opt/bootstrap/apply
mode: 0544
contents:

View File

@ -20,19 +20,18 @@ resource "aws_instance" "controllers" {
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
instance_type = var.controller_type
ami = local.ami_id
user_data = data.ct_config.controllers.*.rendered[count.index]
# storage
root_block_device {
volume_type = var.disk_type
volume_size = var.disk_size
iops = var.disk_iops
volume_type = var.controller_disk_type
volume_size = var.controller_disk_size
iops = var.controller_disk_iops
encrypted = true
tags = {}
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
}
# network
@ -40,6 +39,14 @@ resource "aws_instance" "controllers" {
subnet_id = element(aws_subnet.public.*.id, count.index)
vpc_security_group_ids = [aws_security_group.controller.id]
# boot
user_data = data.ct_config.controllers.*.rendered[count.index]
# cost
credit_specification {
cpu_credits = var.controller_cpu_credits
}
lifecycle {
ignore_changes = [
ami,
@ -62,7 +69,6 @@ data "ct_config" "controllers" {
kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
})
strict = true
snippets = var.controller_snippets

View File

@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
resource "aws_subnet" "public" {
count = length(data.aws_availability_zones.all.names)
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
tags = {
"Name" = "${var.cluster_name}-public-${count.index}"
}
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
# IPv4 and IPv6 CIDR blocks
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
# Assign IPv4 and IPv6 addresses to instances
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
# Hostnames assigned to instances
# resource-name: <ec2-instance-id>.region.compute.internal
private_dns_hostname_type_on_launch = "resource-name"
enable_resource_name_dns_a_record_on_launch = true
enable_resource_name_dns_aaaa_record_on_launch = true
}
resource "aws_route_table_association" "public" {

View File

@ -92,6 +92,30 @@ resource "aws_security_group_rule" "controller-cilium-health-self" {
self = true
}
resource "aws_security_group_rule" "controller-cilium-metrics" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
source_security_group_id = aws_security_group.worker.id
}
resource "aws_security_group_rule" "controller-cilium-metrics-self" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
self = true
}
# IANA VXLAN default
resource "aws_security_group_rule" "controller-vxlan" {
count = var.networking == "flannel" ? 1 : 0
@ -379,6 +403,30 @@ resource "aws_security_group_rule" "worker-cilium-health-self" {
self = true
}
resource "aws_security_group_rule" "worker-cilium-metrics" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.worker.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
source_security_group_id = aws_security_group.controller.id
}
resource "aws_security_group_rule" "worker-cilium-metrics-self" {
count = var.networking == "cilium" ? 1 : 0
security_group_id = aws_security_group.worker.id
type = "ingress"
protocol = "tcp"
from_port = 9962
to_port = 9965
self = true
}
# IANA VXLAN default
resource "aws_security_group_rule" "worker-vxlan" {
count = var.networking == "flannel" ? 1 : 0

View File

@ -17,30 +17,6 @@ variable "dns_zone_id" {
# instances
variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "controller_type" {
type = string
description = "EC2 instance type for controllers"
default = "t3.small"
}
variable "worker_type" {
type = string
description = "EC2 instance type for workers"
default = "t3.small"
}
variable "os_image" {
type = string
description = "AMI channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
@ -52,24 +28,78 @@ variable "os_image" {
}
}
variable "disk_size" {
variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "controller_type" {
type = string
description = "EC2 instance type for controllers"
default = "t3.small"
}
variable "controller_disk_size" {
type = number
description = "Size of the EBS volume in GB"
default = 30
}
variable "disk_type" {
variable "controller_disk_type" {
type = string
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
default = "gp3"
}
variable "disk_iops" {
variable "controller_disk_iops" {
type = number
description = "IOPS of the EBS volume (e.g. 3000)"
default = 3000
}
variable "controller_cpu_credits" {
type = string
description = "CPU credits mode (if using a burstable instance type)"
default = null
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "worker_type" {
type = string
description = "EC2 instance type for workers"
default = "t3.small"
}
variable "worker_disk_size" {
type = number
description = "Size of the EBS volume in GB"
default = 30
}
variable "worker_disk_type" {
type = string
description = "Type of the EBS volume (e.g. standard, gp2, gp3, io1)"
default = "gp3"
}
variable "worker_disk_iops" {
type = number
description = "IOPS of the EBS volume (e.g. 3000)"
default = 3000
}
variable "worker_cpu_credits" {
type = string
description = "CPU credits mode (if using a burstable instance type)"
default = null
}
variable "worker_price" {
type = number
description = "Spot price in USD for worker instances or 0 to use on-demand instances"
@ -134,40 +164,31 @@ EOD
default = "10.3.0.0/16"
}
variable "enable_reporting" {
type = bool
description = "Enable usage or analytics reporting to upstreams (Calico)"
default = false
}
variable "enable_aggregation" {
type = bool
description = "Enable the Kubernetes Aggregation Layer"
default = true
}
variable "worker_node_labels" {
type = list(string)
description = "List of initial worker node labels"
default = []
}
# unofficial, undocumented, unsupported
# advanced
variable "cluster_domain_suffix" {
variable "controller_arch" {
type = string
description = "Queries for domains with the suffix will be answered by CoreDNS. Default is cluster.local (e.g. foo.default.svc.cluster.local)"
default = "cluster.local"
description = "Controller node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = contains(["amd64", "arm64"], var.controller_arch)
error_message = "The controller_arch must be amd64 or arm64."
}
}
variable "arch" {
variable "worker_arch" {
type = string
description = "Container architecture (amd64 or arm64)"
description = "Worker node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
error_message = "The arch must be amd64 or arm64."
condition = contains(["amd64", "arm64"], var.worker_arch)
error_message = "The worker_arch must be amd64 or arm64."
}
}
@ -176,3 +197,19 @@ variable "daemonset_tolerations" {
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
default = []
}
variable "components" {
description = "Configure pre-installed cluster components"
# Component configs are passed through to terraform-render-bootstrap,
# which handles type enforcement and defines defaults
# https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
type = object({
enable = optional(bool)
coredns = optional(map(any))
kube_proxy = optional(map(any))
flannel = optional(map(any))
calico = optional(map(any))
cilium = optional(map(any))
})
default = null
}

View File

@ -7,7 +7,7 @@ terraform {
null = ">= 2.1"
ct = {
source = "poseidon/ct"
version = "~> 0.11"
version = "~> 0.13"
}
}
}

View File

@ -6,11 +6,15 @@ module "workers" {
vpc_id = aws_vpc.network.id
subnet_ids = aws_subnet.public.*.id
security_groups = [aws_security_group.worker.id]
# instances
os_image = var.os_image
worker_count = var.worker_count
instance_type = var.worker_type
os_image = var.os_image
arch = var.arch
disk_size = var.disk_size
arch = var.worker_arch
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
disk_iops = var.worker_disk_iops
spot_price = var.worker_price
target_groups = var.worker_target_groups
@ -18,7 +22,6 @@ module "workers" {
kubeconfig = module.bootstrap.kubeconfig-kubelet
ssh_authorized_key = var.ssh_authorized_key
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
snippets = var.worker_snippets
node_labels = var.worker_node_labels
}

View File

@ -30,7 +30,7 @@ systemd:
After=coreos-metadata.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
EnvironmentFile=/run/metadata/coreos
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
@ -103,7 +103,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s

View File

@ -69,6 +69,12 @@ variable "spot_price" {
default = 0
}
variable "cpu_credits" {
type = string
description = "CPU burst credits mode (if applicable)"
default = null
}
variable "target_groups" {
type = list(string)
description = "Additional target group ARNs to which instances should be added"
@ -102,12 +108,6 @@ EOD
default = "10.3.0.0/16"
}
variable "cluster_domain_suffix" {
type = string
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
default = "cluster.local"
}
variable "node_labels" {
type = list(string)
description = "List of initial node labels"
@ -128,7 +128,7 @@ variable "arch" {
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
condition = contains(["amd64", "arm64"], var.arch)
error_message = "The arch must be amd64 or arm64."
}
}

View File

@ -6,7 +6,7 @@ terraform {
aws = ">= 2.23, <= 6.0"
ct = {
source = "poseidon/ct"
version = "~> 0.11"
version = "~> 0.13"
}
}
}

View File

@ -6,13 +6,11 @@ resource "aws_autoscaling_group" "workers" {
desired_capacity = var.worker_count
min_size = var.worker_count
max_size = var.worker_count + 2
default_cooldown = 30
health_check_grace_period = 30
# network
vpc_zone_identifier = var.subnet_ids
# template
# instance template
launch_template {
id = aws_launch_template.worker.id
version = aws_launch_template.worker.latest_version
@ -32,6 +30,10 @@ resource "aws_autoscaling_group" "workers" {
min_healthy_percentage = 90
}
}
# Grace period before checking new instance's health
health_check_grace_period = 30
# Cooldown period between scaling activities
default_cooldown = 30
lifecycle {
# override the default destroy and replace update behavior
@ -56,11 +58,6 @@ resource "aws_launch_template" "worker" {
name_prefix = "${var.name}-worker"
image_id = local.ami_id
instance_type = var.instance_type
monitoring {
enabled = false
}
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# storage
ebs_optimized = true
@ -76,9 +73,26 @@ resource "aws_launch_template" "worker" {
}
# network
vpc_security_group_ids = var.security_groups
network_interfaces {
associate_public_ip_address = true
security_groups = var.security_groups
}
# spot
# boot
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# metadata
metadata_options {
http_tokens = "optional"
}
monitoring {
enabled = false
}
# cost
credit_specification {
cpu_credits = var.cpu_credits
}
dynamic "instance_market_options" {
for_each = var.spot_price > 0 ? [1] : []
content {
@ -102,7 +116,6 @@ data "ct_config" "worker" {
kubeconfig = indent(10, var.kubeconfig)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
node_labels = join(",", var.node_labels)
node_taints = join(",", var.node_taints)
})

View File

@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
* Kubernetes v1.28.3 (upstream)
* Kubernetes v1.31.3 (upstream)
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [spot priority](https://typhoon.psdn.io/fedora-coreos/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization

View File

@ -1,13 +1,12 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"
cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)
networking = var.networking
# only effective with Calico networking
# we should be able to use 1450 MTU, but in practice, 1410 was needed
network_encapsulation = "vxlan"
@ -15,9 +14,7 @@ module "bootstrap" {
pod_cidr = var.pod_cidr
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
enable_reporting = var.enable_reporting
enable_aggregation = var.enable_aggregation
daemonset_tolerations = var.daemonset_tolerations
components = var.components
}

View File

@ -12,7 +12,7 @@ systemd:
Wants=network-online.target
After=network-online.target
[Service]
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
Type=exec
ExecStartPre=/bin/mkdir -p /var/lib/etcd
ExecStartPre=-/usr/bin/podman rm etcd
@ -54,7 +54,7 @@ systemd:
Description=Kubelet (System Container)
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -111,7 +111,7 @@ systemd:
--volume /opt/bootstrap/assets:/assets:ro,Z \
--volume /opt/bootstrap/apply:/apply:ro,Z \
--entrypoint=/apply \
quay.io/poseidon/kubelet:v1.28.3
quay.io/poseidon/kubelet:v1.31.3
ExecStartPost=/bin/touch /opt/bootstrap/bootstrap.done
ExecStartPost=-/usr/bin/podman stop bootstrap
storage:
@ -144,7 +144,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s
@ -158,7 +158,7 @@ storage:
contents:
inline: |
#!/bin/bash -e
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
awk '/#####/ {filename=$2; next} {print > filename}' assets
mkdir -p /etc/ssl/etcd/etcd
mkdir -p /etc/kubernetes/pki
@ -172,8 +172,7 @@ storage:
mv static-manifests/* /etc/kubernetes/manifests/
mkdir -p /opt/bootstrap/assets
mv manifests /opt/bootstrap/assets/manifests
mv manifests-networking/* /opt/bootstrap/assets/manifests/
rm -rf assets auth static-manifests tls manifests-networking
rm -rf assets auth static-manifests tls manifests
chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
- path: /opt/bootstrap/apply
mode: 0544

View File

@ -9,25 +9,22 @@ locals {
# Discrete DNS records for each controller's private IPv4 for etcd usage
resource "azurerm_dns_a_record" "etcds" {
count = var.controller_count
resource_group_name = var.dns_zone_group
# DNS Zone name where record should be created
zone_name = var.dns_zone
resource_group_name = var.dns_zone_group
# DNS record
name = format("%s-etcd%d", var.cluster_name, count.index)
ttl = 300
# private IPv4 address for etcd
records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
records = [azurerm_network_interface.controllers[count.index].private_ip_address]
}
# Controller availability set to spread controllers
resource "azurerm_availability_set" "controllers" {
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controllers"
location = var.region
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
platform_fault_domain_count = 2
platform_update_domain_count = 4
managed = true
@ -36,30 +33,34 @@ resource "azurerm_availability_set" "controllers" {
# Controller instances
resource "azurerm_linux_virtual_machine" "controllers" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
location = var.region
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
availability_set_id = azurerm_availability_set.controllers.id
size = var.controller_type
custom_data = base64encode(data.ct_config.controllers.*.rendered[count.index])
# storage
source_image_id = var.os_image
os_disk {
name = "${var.cluster_name}-controller-${count.index}"
storage_account_type = var.controller_disk_type
disk_size_gb = var.controller_disk_size
caching = "None"
disk_size_gb = var.disk_size
storage_account_type = "Premium_LRS"
}
# network
network_interface_ids = [
azurerm_network_interface.controllers.*.id[count.index]
azurerm_network_interface.controllers[count.index].id
]
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
# boot
custom_data = base64encode(data.ct_config.controllers[count.index].rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# Azure requires an RSA admin_ssh_key
admin_username = "core"
admin_ssh_key {
username = "core"
@ -74,31 +75,52 @@ resource "azurerm_linux_virtual_machine" "controllers" {
}
}
# Controller public IPv4 addresses
resource "azurerm_public_ip" "controllers" {
# Controller node public IPv4 addresses
resource "azurerm_public_ip" "controllers-ipv4" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
name = "${var.cluster_name}-controller-${count.index}-ipv4"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
ip_version = "IPv4"
sku = "Standard"
allocation_method = "Static"
}
# Controller NICs with public and private IPv4
# Controller node public IPv6 addresses
resource "azurerm_public_ip" "controllers-ipv6" {
count = var.controller_count
name = "${var.cluster_name}-controller-${count.index}-ipv6"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
ip_version = "IPv6"
sku = "Standard"
allocation_method = "Static"
}
# Controllers' network interfaces
resource "azurerm_network_interface" "controllers" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
ip_configuration {
name = "ip0"
name = "ipv4"
primary = true
subnet_id = azurerm_subnet.controller.id
private_ip_address_allocation = "Dynamic"
# instance public IPv4
public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
private_ip_address_version = "IPv4"
public_ip_address_id = azurerm_public_ip.controllers-ipv4[count.index].id
}
ip_configuration {
name = "ipv6"
subnet_id = azurerm_subnet.controller.id
private_ip_address_allocation = "Dynamic"
private_ip_address_version = "IPv6"
public_ip_address_id = azurerm_public_ip.controllers-ipv6[count.index].id
}
}
@ -111,12 +133,20 @@ resource "azurerm_network_interface_security_group_association" "controllers" {
}
# Associate controller network interface with controller backend address pool
resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" {
count = var.controller_count
network_interface_id = azurerm_network_interface.controllers[count.index].id
ip_configuration_name = "ip0"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
ip_configuration_name = "ipv4"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id
}
resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" {
count = var.controller_count
network_interface_id = azurerm_network_interface.controllers[count.index].id
ip_configuration_name = "ipv6"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id
}
# Fedora CoreOS controllers
@ -133,7 +163,6 @@ data "ct_config" "controllers" {
kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
})
strict = true
snippets = var.controller_snippets

View File

@ -1,116 +1,164 @@
# DNS record for the apiserver load balancer
# DNS A record for the apiserver load balancer
resource "azurerm_dns_a_record" "apiserver" {
resource_group_name = var.dns_zone_group
# DNS Zone name where record should be created
zone_name = var.dns_zone
resource_group_name = var.dns_zone_group
# DNS record
name = var.cluster_name
ttl = 300
# IPv4 address of apiserver load balancer
records = [azurerm_public_ip.apiserver-ipv4.ip_address]
records = [azurerm_public_ip.frontend-ipv4.ip_address]
}
# Static IPv4 address for the apiserver frontend
resource "azurerm_public_ip" "apiserver-ipv4" {
resource_group_name = azurerm_resource_group.cluster.name
# DNS AAAA record for the apiserver load balancer
resource "azurerm_dns_aaaa_record" "apiserver" {
# DNS Zone name where record should be created
zone_name = var.dns_zone
resource_group_name = var.dns_zone_group
# DNS record
name = var.cluster_name
ttl = 300
# IPv4 address of apiserver load balancer
records = [azurerm_public_ip.frontend-ipv6.ip_address]
}
name = "${var.cluster_name}-apiserver-ipv4"
location = var.region
# Static IPv4 address for the load balancer
resource "azurerm_public_ip" "frontend-ipv4" {
name = "${var.cluster_name}-frontend-ipv4"
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
ip_version = "IPv4"
sku = "Standard"
allocation_method = "Static"
}
# Static IPv4 address for the ingress frontend
resource "azurerm_public_ip" "ingress-ipv4" {
# Static IPv6 address for the load balancer
resource "azurerm_public_ip" "frontend-ipv6" {
name = "${var.cluster_name}-frontend-ipv6"
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-ingress-ipv4"
location = var.region
location = var.location
ip_version = "IPv6"
sku = "Standard"
allocation_method = "Static"
}
# Network Load Balancer for apiservers and ingress
resource "azurerm_lb" "cluster" {
resource_group_name = azurerm_resource_group.cluster.name
name = var.cluster_name
location = var.region
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
sku = "Standard"
frontend_ip_configuration {
name = "apiserver"
public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id
name = "frontend-ipv4"
public_ip_address_id = azurerm_public_ip.frontend-ipv4.id
}
frontend_ip_configuration {
name = "ingress"
public_ip_address_id = azurerm_public_ip.ingress-ipv4.id
name = "frontend-ipv6"
public_ip_address_id = azurerm_public_ip.frontend-ipv6.id
}
}
resource "azurerm_lb_rule" "apiserver" {
name = "apiserver"
resource "azurerm_lb_rule" "apiserver-ipv4" {
name = "apiserver-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "apiserver"
frontend_ip_configuration_name = "frontend-ipv4"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 6443
backend_port = 6443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id]
probe_id = azurerm_lb_probe.apiserver.id
}
resource "azurerm_lb_rule" "ingress-http" {
name = "ingress-http"
resource "azurerm_lb_rule" "apiserver-ipv6" {
name = "apiserver-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "ingress"
frontend_ip_configuration_name = "frontend-ipv6"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 6443
backend_port = 6443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id]
probe_id = azurerm_lb_probe.apiserver.id
}
resource "azurerm_lb_rule" "ingress-http-ipv4" {
name = "ingress-http-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "frontend-ipv4"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 80
backend_port = 80
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
probe_id = azurerm_lb_probe.ingress.id
}
resource "azurerm_lb_rule" "ingress-https" {
name = "ingress-https"
resource "azurerm_lb_rule" "ingress-https-ipv4" {
name = "ingress-https-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "ingress"
frontend_ip_configuration_name = "frontend-ipv4"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 443
backend_port = 443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
probe_id = azurerm_lb_probe.ingress.id
}
# Worker outbound TCP/UDP SNAT
resource "azurerm_lb_outbound_rule" "worker-outbound" {
name = "worker"
resource "azurerm_lb_rule" "ingress-http-ipv6" {
name = "ingress-http-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration {
name = "ingress"
frontend_ip_configuration_name = "frontend-ipv6"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 80
backend_port = 80
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
probe_id = azurerm_lb_probe.ingress.id
}
protocol = "All"
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
resource "azurerm_lb_rule" "ingress-https-ipv6" {
name = "ingress-https-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "frontend-ipv6"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 443
backend_port = 443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
probe_id = azurerm_lb_probe.ingress.id
}
# Backend Address Pools
# Address pool of controllers
resource "azurerm_lb_backend_address_pool" "controller" {
name = "controller"
resource "azurerm_lb_backend_address_pool" "controller-ipv4" {
name = "controller-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
}
resource "azurerm_lb_backend_address_pool" "controller-ipv6" {
name = "controller-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
}
# Address pool of workers
resource "azurerm_lb_backend_address_pool" "worker" {
name = "worker"
resource "azurerm_lb_backend_address_pool" "worker-ipv4" {
name = "worker-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
}
resource "azurerm_lb_backend_address_pool" "worker-ipv6" {
name = "worker-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
}
@ -122,10 +170,8 @@ resource "azurerm_lb_probe" "apiserver" {
loadbalancer_id = azurerm_lb.cluster.id
protocol = "Tcp"
port = 6443
# unhealthy threshold
number_of_probes = 3
interval_in_seconds = 5
}
@ -136,10 +182,29 @@ resource "azurerm_lb_probe" "ingress" {
protocol = "Http"
port = 10254
request_path = "/healthz"
# unhealthy threshold
number_of_probes = 3
interval_in_seconds = 5
}
# Outbound SNAT
resource "azurerm_lb_outbound_rule" "outbound-ipv4" {
name = "outbound-ipv4"
protocol = "All"
loadbalancer_id = azurerm_lb.cluster.id
backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id
frontend_ip_configuration {
name = "frontend-ipv4"
}
}
resource "azurerm_lb_outbound_rule" "outbound-ipv6" {
name = "outbound-ipv6"
protocol = "All"
loadbalancer_id = azurerm_lb.cluster.id
backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id
frontend_ip_configuration {
name = "frontend-ipv6"
}
}

View File

@ -0,0 +1,6 @@
locals {
backend_address_pool_ids = {
ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
}
}

View File

@ -1,27 +1,64 @@
# Choose an IPv6 ULA subnet at random
# https://datatracker.ietf.org/doc/html/rfc4193
resource "random_id" "ula-netnum" {
byte_length = 5 # 40 bits
}
locals {
# fd00::/8 -> shift 40 -> 2^40 possible /48 subnets
ula-range = cidrsubnet("fd00::/8", 40, random_id.ula-netnum.dec)
network_cidr = {
ipv4 = var.network_cidr.ipv4
ipv6 = length(var.network_cidr.ipv6) > 0 ? var.network_cidr.ipv6 : [local.ula-range]
}
# Subdivide the virtual network into subnets
# - controllers use netnum 0
# - workers use netnum 1
controller_subnets = {
ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 0)]
ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 0)]
}
worker_subnets = {
ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 1)]
ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 1)]
}
cluster_subnets = {
ipv4 = concat(local.controller_subnets.ipv4, local.worker_subnets.ipv4)
ipv6 = concat(local.controller_subnets.ipv6, local.worker_subnets.ipv6)
}
}
# Organize cluster into a resource group
resource "azurerm_resource_group" "cluster" {
name = var.cluster_name
location = var.region
location = var.location
}
resource "azurerm_virtual_network" "network" {
resource_group_name = azurerm_resource_group.cluster.name
name = var.cluster_name
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
address_space = [var.host_cidr]
address_space = concat(
local.network_cidr.ipv4,
local.network_cidr.ipv6
)
}
# Subnets - separate subnets for controller and workers because Azure
# network security groups are based on IPv4 CIDR rather than instance
# tags like GCP or security group membership like AWS
# Subnets - separate subnets for controllers and workers because Azure
# network security groups are oriented around address prefixes rather
# than instance tags (GCP) or security group membership (AWS)
resource "azurerm_subnet" "controller" {
resource_group_name = azurerm_resource_group.cluster.name
name = "controller"
resource_group_name = azurerm_resource_group.cluster.name
virtual_network_name = azurerm_virtual_network.network.name
address_prefixes = [cidrsubnet(var.host_cidr, 1, 0)]
address_prefixes = concat(
local.controller_subnets.ipv4,
local.controller_subnets.ipv6,
)
default_outbound_access_enabled = false
}
resource "azurerm_subnet_network_security_group_association" "controller" {
@ -30,11 +67,14 @@ resource "azurerm_subnet_network_security_group_association" "controller" {
}
resource "azurerm_subnet" "worker" {
resource_group_name = azurerm_resource_group.cluster.name
name = "worker"
resource_group_name = azurerm_resource_group.cluster.name
virtual_network_name = azurerm_virtual_network.network.name
address_prefixes = [cidrsubnet(var.host_cidr, 1, 1)]
address_prefixes = concat(
local.worker_subnets.ipv4,
local.worker_subnets.ipv6,
)
default_outbound_access_enabled = false
}
resource "azurerm_subnet_network_security_group_association" "worker" {

View File

@ -6,13 +6,18 @@ output "kubeconfig-admin" {
# Outputs for Kubernetes Ingress
output "ingress_static_ipv4" {
value = azurerm_public_ip.ingress-ipv4.ip_address
value = azurerm_public_ip.frontend-ipv4.ip_address
description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers"
}
output "ingress_static_ipv6" {
value = azurerm_public_ip.frontend-ipv6.ip_address
description = "IPv6 address of the load balancer for distributing traffic to Ingress controllers"
}
# Outputs for worker pools
output "region" {
output "location" {
value = azurerm_resource_group.cluster.location
}
@ -39,13 +44,24 @@ output "kubeconfig" {
# Outputs for custom firewalling
output "controller_security_group_name" {
description = "Network Security Group for controller nodes"
value = azurerm_network_security_group.controller.name
}
output "worker_security_group_name" {
description = "Network Security Group for worker nodes"
value = azurerm_network_security_group.worker.name
}
output "controller_address_prefixes" {
description = "Controller network subnet CIDR addresses (for source/destination)"
value = local.controller_subnets
}
output "worker_address_prefixes" {
description = "Worker network subnet CIDR addresses (for source/destination)"
value = azurerm_subnet.worker.address_prefixes
value = local.worker_subnets
}
# Outputs for custom load balancing
@ -55,9 +71,12 @@ output "loadbalancer_id" {
value = azurerm_lb.cluster.id
}
output "backend_address_pool_id" {
description = "ID of the worker backend address pool"
value = azurerm_lb_backend_address_pool.worker.id
output "backend_address_pool_ids" {
description = "IDs of the worker backend address pools"
value = {
ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
}
}
# Outputs for debug

View File

@ -1,198 +1,223 @@
# Controller security group
resource "azurerm_network_security_group" "controller" {
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
}
resource "azurerm_network_security_rule" "controller-icmp" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-icmp"
name = "allow-icmp-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "1995"
priority = 1995 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Icmp"
source_port_range = "*"
destination_port_range = "*"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-ssh" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-ssh"
name = "allow-ssh-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2000"
priority = 2000 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "22"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-etcd" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-etcd"
name = "allow-etcd-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2005"
priority = 2005 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "2379-2380"
source_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.controller_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape etcd metrics
resource "azurerm_network_security_rule" "controller-etcd-metrics" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-etcd-metrics"
name = "allow-etcd-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2010"
priority = 2010 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "2381"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape kube-proxy metrics
resource "azurerm_network_security_rule" "controller-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-kube-proxy-metrics"
name = "allow-kube-proxy-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2011"
priority = 2012 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
resource "azurerm_network_security_rule" "controller-kube-metrics" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-kube-metrics"
name = "allow-kube-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2012"
priority = 2014 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10257-10259"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-apiserver" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-apiserver"
name = "allow-apiserver-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2015"
priority = 2016 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "6443"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-cilium-health" {
resource_group_name = azurerm_resource_group.cluster.name
count = var.networking == "cilium" ? 1 : 0
for_each = var.networking == "cilium" ? local.controller_subnets : {}
name = "allow-cilium-health"
name = "allow-cilium-health-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2019"
priority = 2018 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "4240"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-cilium-metrics" {
for_each = var.networking == "cilium" ? local.controller_subnets : {}
name = "allow-cilium-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = 2035 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9962-9965"
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-vxlan"
name = "allow-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2020"
priority = 2020 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "4789"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-linux-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-linux-vxlan"
name = "allow-linux-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2021"
priority = 2022 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "8472"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape node-exporter daemonset
resource "azurerm_network_security_rule" "controller-node-exporter" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-node-exporter"
name = "allow-node-exporter-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2025"
priority = 2025 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9100"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow apiserver to access kubelet's for exec, log, port-forward
resource "azurerm_network_security_rule" "controller-kubelet" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-kubelet"
name = "allow-kubelet-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2030"
priority = 2030 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10250"
# allow Prometheus to scrape kubelet metrics too
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
@ -231,166 +256,189 @@ resource "azurerm_network_security_rule" "controller-deny-all" {
# Worker security group
resource "azurerm_network_security_group" "worker" {
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-worker"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
}
resource "azurerm_network_security_rule" "worker-icmp" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-icmp"
name = "allow-icmp-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "1995"
priority = 1995 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Icmp"
source_port_range = "*"
destination_port_range = "*"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-ssh" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-ssh"
name = "allow-ssh-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2000"
priority = 2000 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "22"
source_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.controller_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-http" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-http"
name = "allow-http-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2005"
priority = 2005 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "80"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-https" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-https"
name = "allow-https-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2010"
priority = 2010 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "443"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-cilium-health" {
resource_group_name = azurerm_resource_group.cluster.name
count = var.networking == "cilium" ? 1 : 0
for_each = var.networking == "cilium" ? local.worker_subnets : {}
name = "allow-cilium-health"
name = "allow-cilium-health-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2014"
priority = 2012 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "4240"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-cilium-metrics" {
for_each = var.networking == "cilium" ? local.worker_subnets : {}
name = "allow-cilium-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = 2014 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9962-9965"
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-vxlan"
name = "allow-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2015"
priority = 2016 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "4789"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-linux-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-linux-vxlan"
name = "allow-linux-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2016"
priority = 2018 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "8472"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Allow Prometheus to scrape node-exporter daemonset
resource "azurerm_network_security_rule" "worker-node-exporter" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-node-exporter"
name = "allow-node-exporter-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2020"
priority = 2020 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9100"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Allow Prometheus to scrape kube-proxy
resource "azurerm_network_security_rule" "worker-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-kube-proxy"
name = "allow-kube-proxy-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2024"
priority = 2024 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Allow apiserver to access kubelet's for exec, log, port-forward
resource "azurerm_network_security_rule" "worker-kubelet" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-kubelet"
name = "allow-kubelet-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2025"
priority = 2026 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10250"
# allow Prometheus to scrape kubelet metrics too
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound

View File

@ -18,7 +18,7 @@ resource "null_resource" "copy-controller-secrets" {
connection {
type = "ssh"
host = azurerm_public_ip.controllers.*.ip_address[count.index]
host = azurerm_public_ip.controllers-ipv4[count.index].ip_address
user = "core"
timeout = "15m"
}
@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" {
connection {
type = "ssh"
host = azurerm_public_ip.controllers.*.ip_address[0]
host = azurerm_public_ip.controllers-ipv4[0].ip_address
user = "core"
timeout = "15m"
}

View File

@ -5,9 +5,9 @@ variable "cluster_name" {
# Azure
variable "region" {
variable "location" {
type = string
description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)"
description = "Azure location (e.g. centralus , see `az account list-locations --output table`)"
}
variable "dns_zone" {
@ -22,41 +22,65 @@ variable "dns_zone_group" {
# instances
variable "os_image" {
type = string
description = "Fedora CoreOS image for instances"
}
variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "controller_type" {
type = string
description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
default = "Standard_B2s"
}
variable "controller_disk_type" {
type = string
description = "Type of managed disk for controller node(s)"
default = "Premium_LRS"
}
variable "controller_disk_size" {
type = number
description = "Size of the managed disk in GB for controller node(s)"
default = 30
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "worker_type" {
type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5"
}
variable "os_image" {
variable "worker_disk_type" {
type = string
description = "Fedora CoreOS image for instances"
description = "Type of managed disk for worker nodes"
default = "Standard_LRS"
}
variable "disk_size" {
variable "worker_disk_size" {
type = number
description = "Size of the disk in GB"
description = "Size of the managed disk in GB for worker nodes"
default = 30
}
variable "worker_ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "worker_priority" {
type = string
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
@ -94,10 +118,15 @@ variable "networking" {
default = "cilium"
}
variable "host_cidr" {
type = string
description = "CIDR IPv4 range to assign to instances"
default = "10.0.0.0/16"
variable "network_cidr" {
type = object({
ipv4 = list(string)
ipv6 = optional(list(string), [])
})
description = "Virtual network CIDR ranges"
default = {
ipv4 = ["10.0.0.0/16"]
}
}
variable "pod_cidr" {
@ -115,34 +144,32 @@ EOD
default = "10.3.0.0/16"
}
variable "enable_reporting" {
type = bool
description = "Enable usage or analytics reporting to upstreams (Calico)"
default = false
}
variable "enable_aggregation" {
type = bool
description = "Enable the Kubernetes Aggregation Layer"
default = true
}
variable "worker_node_labels" {
type = list(string)
description = "List of initial worker node labels"
default = []
}
# unofficial, undocumented, unsupported
variable "cluster_domain_suffix" {
type = string
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
default = "cluster.local"
}
# advanced
variable "daemonset_tolerations" {
type = list(string)
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
default = []
}
variable "components" {
description = "Configure pre-installed cluster components"
# Component configs are passed through to terraform-render-bootstrap,
# which handles type enforcement and defines defaults
# https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
type = object({
enable = optional(bool)
coredns = optional(map(any))
kube_proxy = optional(map(any))
flannel = optional(map(any))
calico = optional(map(any))
cilium = optional(map(any))
})
default = null
}

View File

@ -3,7 +3,7 @@
terraform {
required_version = ">= 0.13.0, < 2.0.0"
required_providers {
azurerm = ">= 2.8, < 4.0"
azurerm = ">= 2.8"
null = ">= 2.1"
ct = {
source = "poseidon/ct"

View File

@ -4,14 +4,18 @@ module "workers" {
# Azure
resource_group_name = azurerm_resource_group.cluster.name
region = azurerm_resource_group.cluster.location
location = azurerm_resource_group.cluster.location
subnet_id = azurerm_subnet.worker.id
security_group_id = azurerm_network_security_group.worker.id
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
backend_address_pool_ids = local.backend_address_pool_ids
# instances
os_image = var.os_image
worker_count = var.worker_count
vm_type = var.worker_type
os_image = var.os_image
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
ephemeral_disk = var.worker_ephemeral_disk
priority = var.worker_priority
# configuration
@ -19,7 +23,6 @@ module "workers" {
ssh_authorized_key = var.ssh_authorized_key
azure_authorized_key = var.azure_authorized_key
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
snippets = var.worker_snippets
node_labels = var.worker_node_labels
}

View File

@ -26,7 +26,7 @@ systemd:
Description=Kubelet (System Container)
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -99,7 +99,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s

View File

@ -5,9 +5,9 @@ variable "name" {
# Azure
variable "region" {
variable "location" {
type = string
description = "Must be set to the Azure Region of cluster"
description = "Must be set to the Azure location of cluster"
}
variable "resource_group_name" {
@ -25,9 +25,12 @@ variable "security_group_id" {
description = "Must be set to the `worker_security_group_id` output by cluster"
}
variable "backend_address_pool_id" {
type = string
description = "Must be set to the `worker_backend_address_pool_id` output by cluster"
variable "backend_address_pool_ids" {
type = object({
ipv4 = list(string)
ipv6 = list(string)
})
description = "Must be set to the `backend_address_pool_ids` output by cluster"
}
# instances
@ -49,6 +52,24 @@ variable "os_image" {
description = "Fedora CoreOS image for instances"
}
variable "disk_type" {
type = string
description = "Type of managed disk"
default = "Standard_LRS"
}
variable "disk_size" {
type = number
description = "Size of the managed disk in GB"
default = 30
}
variable "ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "priority" {
type = string
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
@ -99,12 +120,3 @@ variable "node_taints" {
description = "List of initial node taints"
default = []
}
# unofficial, undocumented, unsupported
variable "cluster_domain_suffix" {
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
type = string
default = "cluster.local"
}

View File

@ -3,7 +3,7 @@
terraform {
required_version = ">= 0.13.0, < 2.0.0"
required_providers {
azurerm = ">= 2.8, < 4.0"
azurerm = ">= 2.8"
ct = {
source = "poseidon/ct"
version = "~> 0.13"

View File

@ -3,30 +3,29 @@ locals {
}
# Workers scale set
resource "azurerm_linux_virtual_machine_scale_set" "workers" {
resource_group_name = var.resource_group_name
resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker"
location = var.region
sku = var.vm_type
resource_group_name = var.resource_group_name
location = var.location
platform_fault_domain_count = 1
sku_name = var.vm_type
instances = var.worker_count
# instance name prefix for instances in the set
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
custom_data = base64encode(data.ct_config.worker.rendered)
# storage
encryption_at_host_enabled = true
source_image_id = var.os_image
os_disk {
storage_account_type = "Standard_LRS"
caching = "ReadWrite"
storage_account_type = var.disk_type
disk_size_gb = var.disk_size
caching = "ReadOnly"
# Optionally, use the ephemeral disk of the instance type (support varies)
dynamic "diff_disk_settings" {
for_each = var.ephemeral_disk ? [1] : []
content {
option = "Local"
placement = "ResourceDisk"
}
}
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = var.azure_authorized_key
}
# network
@ -36,41 +35,46 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
network_security_group_id = var.security_group_id
ip_configuration {
name = "ip0"
name = "ipv4"
version = "IPv4"
primary = true
subnet_id = var.subnet_id
# backend address pool to which the NIC should be added
load_balancer_backend_address_pool_ids = [var.backend_address_pool_id]
load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4
}
ip_configuration {
name = "ipv6"
version = "IPv6"
subnet_id = var.subnet_id
# backend address pool to which the NIC should be added
load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6
}
}
# boot
user_data_base64 = base64encode(data.ct_config.worker.rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# Azure requires an RSA admin_ssh_key
os_profile {
linux_configuration {
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
computer_name_prefix = "${var.name}-worker"
}
}
# lifecycle
upgrade_mode = "Manual"
# eviction policy may only be set when priority is Spot
priority = var.priority
eviction_policy = var.priority == "Spot" ? "Delete" : null
}
# Scale up or down to maintain desired number, tolerating deallocations.
resource "azurerm_monitor_autoscale_setting" "workers" {
resource_group_name = var.resource_group_name
name = "${var.name}-maintain-desired"
location = var.region
# autoscale
termination_notification {
enabled = true
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
profile {
name = "default"
capacity {
minimum = var.worker_count
default = var.worker_count
maximum = var.worker_count
}
}
}
@ -80,7 +84,6 @@ data "ct_config" "worker" {
kubeconfig = indent(10, var.kubeconfig)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
node_labels = join(",", var.node_labels)
node_taints = join(",", var.node_taints)
})

View File

@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
* Kubernetes v1.28.3 (upstream)
* Kubernetes v1.31.3 (upstream)
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/)
* Advanced features like [worker pools](https://typhoon.psdn.io/advanced/worker-pools/), [low-priority](https://typhoon.psdn.io/flatcar-linux/azure/#low-priority) workers, and [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization

View File

@ -1,13 +1,12 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"
cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]
etcd_servers = formatlist("%s.%s", azurerm_dns_a_record.etcds.*.name, var.dns_zone)
networking = var.networking
# only effective with Calico networking
# we should be able to use 1450 MTU, but in practice, 1410 was needed
network_encapsulation = "vxlan"
@ -15,9 +14,7 @@ module "bootstrap" {
pod_cidr = var.pod_cidr
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
enable_reporting = var.enable_reporting
enable_aggregation = var.enable_aggregation
daemonset_tolerations = var.daemonset_tolerations
components = var.components
}

View File

@ -11,7 +11,7 @@ systemd:
Requires=docker.service
After=docker.service
[Service]
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
ExecStartPre=/usr/bin/docker run -d \
--name etcd \
--network host \
@ -56,7 +56,7 @@ systemd:
After=docker.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -105,7 +105,7 @@ systemd:
Type=oneshot
RemainAfterExit=true
WorkingDirectory=/opt/bootstrap
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStart=/usr/bin/docker run \
-v /etc/kubernetes/pki:/etc/kubernetes/pki:ro \
-v /opt/bootstrap/assets:/assets:ro \
@ -144,7 +144,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s
@ -158,7 +158,7 @@ storage:
contents:
inline: |
#!/bin/bash -e
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
awk '/#####/ {filename=$2; next} {print > filename}' assets
mkdir -p /etc/ssl/etcd/etcd
mkdir -p /etc/kubernetes/pki
@ -173,8 +173,7 @@ storage:
mv static-manifests/* /etc/kubernetes/manifests/
mkdir -p /opt/bootstrap/assets
mv manifests /opt/bootstrap/assets/manifests
mv manifests-networking/* /opt/bootstrap/assets/manifests/
rm -rf assets auth static-manifests tls manifests-networking
rm -rf assets auth static-manifests tls manifests
- path: /opt/bootstrap/apply
mode: 0544
contents:

View File

@ -1,25 +1,9 @@
# Discrete DNS records for each controller's private IPv4 for etcd usage
resource "azurerm_dns_a_record" "etcds" {
count = var.controller_count
resource_group_name = var.dns_zone_group
# DNS Zone name where record should be created
zone_name = var.dns_zone
# DNS record
name = format("%s-etcd%d", var.cluster_name, count.index)
ttl = 300
# private IPv4 address for etcd
records = [azurerm_network_interface.controllers.*.private_ip_address[count.index]]
}
locals {
# Container Linux derivative
# flatcar-stable -> Flatcar Linux Stable
channel = split("-", var.os_image)[1]
offer_suffix = var.arch == "arm64" ? "corevm" : "free"
urn = var.arch == "arm64" ? local.channel : "${local.channel}-gen2"
offer_suffix = var.controller_arch == "arm64" ? "corevm" : "free"
urn = var.controller_arch == "arm64" ? local.channel : "${local.channel}-gen2"
# Typhoon ssh_authorized_key supports RSA or a newer formats (e.g. ed25519).
# However, Azure requires an older RSA key to pass validations. To use a
@ -28,12 +12,25 @@ locals {
azure_authorized_key = var.azure_authorized_key == "" ? var.ssh_authorized_key : var.azure_authorized_key
}
# Discrete DNS records for each controller's private IPv4 for etcd usage
resource "azurerm_dns_a_record" "etcds" {
count = var.controller_count
# DNS Zone name where record should be created
zone_name = var.dns_zone
resource_group_name = var.dns_zone_group
# DNS record
name = format("%s-etcd%d", var.cluster_name, count.index)
ttl = 300
# private IPv4 address for etcd
records = [azurerm_network_interface.controllers[count.index].private_ip_address]
}
# Controller availability set to spread controllers
resource "azurerm_availability_set" "controllers" {
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controllers"
location = var.region
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
platform_fault_domain_count = 2
platform_update_domain_count = 4
managed = true
@ -42,24 +39,19 @@ resource "azurerm_availability_set" "controllers" {
# Controller instances
resource "azurerm_linux_virtual_machine" "controllers" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
location = var.region
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
availability_set_id = azurerm_availability_set.controllers.id
size = var.controller_type
custom_data = base64encode(data.ct_config.controllers.*.rendered[count.index])
boot_diagnostics {
# defaults to a managed storage account
}
# storage
os_disk {
name = "${var.cluster_name}-controller-${count.index}"
storage_account_type = var.controller_disk_type
disk_size_gb = var.controller_disk_size
caching = "None"
disk_size_gb = var.disk_size
storage_account_type = "Premium_LRS"
}
# Flatcar Container Linux
@ -71,7 +63,7 @@ resource "azurerm_linux_virtual_machine" "controllers" {
}
dynamic "plan" {
for_each = var.arch == "arm64" ? [] : [1]
for_each = var.controller_arch == "arm64" ? [] : [1]
content {
publisher = "kinvolk"
product = "flatcar-container-linux-${local.offer_suffix}"
@ -84,7 +76,13 @@ resource "azurerm_linux_virtual_machine" "controllers" {
azurerm_network_interface.controllers[count.index].id
]
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
# boot
custom_data = base64encode(data.ct_config.controllers[count.index].rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# Azure requires an RSA admin_ssh_key
admin_username = "core"
admin_ssh_key {
username = "core"
@ -99,31 +97,52 @@ resource "azurerm_linux_virtual_machine" "controllers" {
}
}
# Controller public IPv4 addresses
resource "azurerm_public_ip" "controllers" {
# Controller node public IPv4 addresses
resource "azurerm_public_ip" "controllers-ipv4" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
name = "${var.cluster_name}-controller-${count.index}-ipv4"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
ip_version = "IPv4"
sku = "Standard"
allocation_method = "Static"
}
# Controller NICs with public and private IPv4
# Controller node public IPv6 addresses
resource "azurerm_public_ip" "controllers-ipv6" {
count = var.controller_count
name = "${var.cluster_name}-controller-${count.index}-ipv6"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
ip_version = "IPv6"
sku = "Standard"
allocation_method = "Static"
}
# Controllers' network interfaces
resource "azurerm_network_interface" "controllers" {
count = var.controller_count
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller-${count.index}"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
ip_configuration {
name = "ip0"
name = "ipv4"
primary = true
subnet_id = azurerm_subnet.controller.id
private_ip_address_allocation = "Dynamic"
# instance public IPv4
public_ip_address_id = azurerm_public_ip.controllers.*.id[count.index]
private_ip_address_version = "IPv4"
public_ip_address_id = azurerm_public_ip.controllers-ipv4[count.index].id
}
ip_configuration {
name = "ipv6"
subnet_id = azurerm_subnet.controller.id
private_ip_address_allocation = "Dynamic"
private_ip_address_version = "IPv6"
public_ip_address_id = azurerm_public_ip.controllers-ipv6[count.index].id
}
}
@ -135,13 +154,21 @@ resource "azurerm_network_interface_security_group_association" "controllers" {
network_security_group_id = azurerm_network_security_group.controller.id
}
# Associate controller network interface with controller backend address pool
resource "azurerm_network_interface_backend_address_pool_association" "controllers" {
# Associate controller network interface with controller backend address pools
resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv4" {
count = var.controller_count
network_interface_id = azurerm_network_interface.controllers[count.index].id
ip_configuration_name = "ip0"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller.id
ip_configuration_name = "ipv4"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv4.id
}
resource "azurerm_network_interface_backend_address_pool_association" "controllers-ipv6" {
count = var.controller_count
network_interface_id = azurerm_network_interface.controllers[count.index].id
ip_configuration_name = "ipv6"
backend_address_pool_id = azurerm_lb_backend_address_pool.controller-ipv6.id
}
# Flatcar Linux controllers
@ -158,7 +185,6 @@ data "ct_config" "controllers" {
kubeconfig = indent(10, module.bootstrap.kubeconfig-kubelet)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
})
strict = true
snippets = var.controller_snippets

View File

@ -1,116 +1,164 @@
# DNS record for the apiserver load balancer
# DNS A record for the apiserver load balancer
resource "azurerm_dns_a_record" "apiserver" {
resource_group_name = var.dns_zone_group
# DNS Zone name where record should be created
zone_name = var.dns_zone
resource_group_name = var.dns_zone_group
# DNS record
name = var.cluster_name
ttl = 300
# IPv4 address of apiserver load balancer
records = [azurerm_public_ip.apiserver-ipv4.ip_address]
records = [azurerm_public_ip.frontend-ipv4.ip_address]
}
# Static IPv4 address for the apiserver frontend
resource "azurerm_public_ip" "apiserver-ipv4" {
resource_group_name = azurerm_resource_group.cluster.name
# DNS AAAA record for the apiserver load balancer
resource "azurerm_dns_aaaa_record" "apiserver" {
# DNS Zone name where record should be created
zone_name = var.dns_zone
resource_group_name = var.dns_zone_group
# DNS record
name = var.cluster_name
ttl = 300
# IPv6 address of apiserver load balancer
records = [azurerm_public_ip.frontend-ipv6.ip_address]
}
name = "${var.cluster_name}-apiserver-ipv4"
location = var.region
# Static IPv4 address for the load balancer
resource "azurerm_public_ip" "frontend-ipv4" {
name = "${var.cluster_name}-frontend-ipv4"
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
ip_version = "IPv4"
sku = "Standard"
allocation_method = "Static"
}
# Static IPv4 address for the ingress frontend
resource "azurerm_public_ip" "ingress-ipv4" {
# Static IPv6 address for the load balancer
resource "azurerm_public_ip" "frontend-ipv6" {
name = "${var.cluster_name}-frontend-ipv6"
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-ingress-ipv4"
location = var.region
location = var.location
ip_version = "IPv6"
sku = "Standard"
allocation_method = "Static"
}
# Network Load Balancer for apiservers and ingress
resource "azurerm_lb" "cluster" {
resource_group_name = azurerm_resource_group.cluster.name
name = var.cluster_name
location = var.region
resource_group_name = azurerm_resource_group.cluster.name
location = var.location
sku = "Standard"
frontend_ip_configuration {
name = "apiserver"
public_ip_address_id = azurerm_public_ip.apiserver-ipv4.id
name = "frontend-ipv4"
public_ip_address_id = azurerm_public_ip.frontend-ipv4.id
}
frontend_ip_configuration {
name = "ingress"
public_ip_address_id = azurerm_public_ip.ingress-ipv4.id
name = "frontend-ipv6"
public_ip_address_id = azurerm_public_ip.frontend-ipv6.id
}
}
resource "azurerm_lb_rule" "apiserver" {
name = "apiserver"
resource "azurerm_lb_rule" "apiserver-ipv4" {
name = "apiserver-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "apiserver"
frontend_ip_configuration_name = "frontend-ipv4"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 6443
backend_port = 6443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller.id]
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv4.id]
probe_id = azurerm_lb_probe.apiserver.id
}
resource "azurerm_lb_rule" "ingress-http" {
name = "ingress-http"
resource "azurerm_lb_rule" "apiserver-ipv6" {
name = "apiserver-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "ingress"
frontend_ip_configuration_name = "frontend-ipv6"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 6443
backend_port = 6443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.controller-ipv6.id]
probe_id = azurerm_lb_probe.apiserver.id
}
resource "azurerm_lb_rule" "ingress-http-ipv4" {
name = "ingress-http-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "frontend-ipv4"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 80
backend_port = 80
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
probe_id = azurerm_lb_probe.ingress.id
}
resource "azurerm_lb_rule" "ingress-https" {
name = "ingress-https"
resource "azurerm_lb_rule" "ingress-https-ipv4" {
name = "ingress-https-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "ingress"
frontend_ip_configuration_name = "frontend-ipv4"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 443
backend_port = 443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker.id]
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv4.id]
probe_id = azurerm_lb_probe.ingress.id
}
# Worker outbound TCP/UDP SNAT
resource "azurerm_lb_outbound_rule" "worker-outbound" {
name = "worker"
resource "azurerm_lb_rule" "ingress-http-ipv6" {
name = "ingress-http-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration {
name = "ingress"
frontend_ip_configuration_name = "frontend-ipv6"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 80
backend_port = 80
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
probe_id = azurerm_lb_probe.ingress.id
}
protocol = "All"
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
resource "azurerm_lb_rule" "ingress-https-ipv6" {
name = "ingress-https-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
frontend_ip_configuration_name = "frontend-ipv6"
disable_outbound_snat = true
protocol = "Tcp"
frontend_port = 443
backend_port = 443
backend_address_pool_ids = [azurerm_lb_backend_address_pool.worker-ipv6.id]
probe_id = azurerm_lb_probe.ingress.id
}
# Backend Address Pools
# Address pool of controllers
resource "azurerm_lb_backend_address_pool" "controller" {
name = "controller"
resource "azurerm_lb_backend_address_pool" "controller-ipv4" {
name = "controller-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
}
# Address pool of workers
resource "azurerm_lb_backend_address_pool" "worker" {
name = "worker"
resource "azurerm_lb_backend_address_pool" "controller-ipv6" {
name = "controller-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
}
# Address pools for workers
resource "azurerm_lb_backend_address_pool" "worker-ipv4" {
name = "worker-ipv4"
loadbalancer_id = azurerm_lb.cluster.id
}
resource "azurerm_lb_backend_address_pool" "worker-ipv6" {
name = "worker-ipv6"
loadbalancer_id = azurerm_lb.cluster.id
}
@ -122,10 +170,8 @@ resource "azurerm_lb_probe" "apiserver" {
loadbalancer_id = azurerm_lb.cluster.id
protocol = "Tcp"
port = 6443
# unhealthy threshold
number_of_probes = 3
interval_in_seconds = 5
}
@ -136,10 +182,29 @@ resource "azurerm_lb_probe" "ingress" {
protocol = "Http"
port = 10254
request_path = "/healthz"
# unhealthy threshold
number_of_probes = 3
interval_in_seconds = 5
}
# Outbound SNAT
resource "azurerm_lb_outbound_rule" "outbound-ipv4" {
name = "outbound-ipv4"
protocol = "All"
loadbalancer_id = azurerm_lb.cluster.id
backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv4.id
frontend_ip_configuration {
name = "frontend-ipv4"
}
}
resource "azurerm_lb_outbound_rule" "outbound-ipv6" {
name = "outbound-ipv6"
protocol = "All"
loadbalancer_id = azurerm_lb.cluster.id
backend_address_pool_id = azurerm_lb_backend_address_pool.worker-ipv6.id
frontend_ip_configuration {
name = "frontend-ipv6"
}
}

View File

@ -0,0 +1,6 @@
locals {
backend_address_pool_ids = {
ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
}
}

View File

@ -1,27 +1,63 @@
# Choose an IPv6 ULA subnet at random
# https://datatracker.ietf.org/doc/html/rfc4193
resource "random_id" "ula-netnum" {
byte_length = 5 # 40 bits
}
locals {
# fd00::/8 -> shift 40 -> 2^40 possible /48 subnets
ula-range = cidrsubnet("fd00::/8", 40, random_id.ula-netnum.dec)
network_cidr = {
ipv4 = var.network_cidr.ipv4
ipv6 = length(var.network_cidr.ipv6) > 0 ? var.network_cidr.ipv6 : [local.ula-range]
}
# Subdivide the virtual network into subnets
# - controllers use netnum 0
# - workers use netnum 1
controller_subnets = {
ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 0)]
ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 0)]
}
worker_subnets = {
ipv4 = [for i, cidr in local.network_cidr.ipv4 : cidrsubnet(cidr, 1, 1)]
ipv6 = [for i, cidr in local.network_cidr.ipv6 : cidrsubnet(cidr, 16, 1)]
}
cluster_subnets = {
ipv4 = concat(local.controller_subnets.ipv4, local.worker_subnets.ipv4)
ipv6 = concat(local.controller_subnets.ipv6, local.worker_subnets.ipv6)
}
}
# Organize cluster into a resource group
resource "azurerm_resource_group" "cluster" {
name = var.cluster_name
location = var.region
location = var.location
}
resource "azurerm_virtual_network" "network" {
resource_group_name = azurerm_resource_group.cluster.name
name = var.cluster_name
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
address_space = [var.host_cidr]
address_space = concat(
local.network_cidr.ipv4,
local.network_cidr.ipv6
)
}
# Subnets - separate subnets for controller and workers because Azure
# network security groups are based on IPv4 CIDR rather than instance
# tags like GCP or security group membership like AWS
# Subnets - separate subnets for controllers and workers because Azure
# network security groups are oriented around address prefixes rather
# than instance tags (GCP) or security group membership (AWS)
resource "azurerm_subnet" "controller" {
resource_group_name = azurerm_resource_group.cluster.name
name = "controller"
resource_group_name = azurerm_resource_group.cluster.name
virtual_network_name = azurerm_virtual_network.network.name
address_prefixes = [cidrsubnet(var.host_cidr, 1, 0)]
address_prefixes = concat(
local.controller_subnets.ipv4,
local.controller_subnets.ipv6,
)
default_outbound_access_enabled = false
}
resource "azurerm_subnet_network_security_group_association" "controller" {
@ -30,11 +66,14 @@ resource "azurerm_subnet_network_security_group_association" "controller" {
}
resource "azurerm_subnet" "worker" {
resource_group_name = azurerm_resource_group.cluster.name
name = "worker"
resource_group_name = azurerm_resource_group.cluster.name
virtual_network_name = azurerm_virtual_network.network.name
address_prefixes = [cidrsubnet(var.host_cidr, 1, 1)]
address_prefixes = concat(
local.worker_subnets.ipv4,
local.worker_subnets.ipv6,
)
default_outbound_access_enabled = false
}
resource "azurerm_subnet_network_security_group_association" "worker" {

View File

@ -6,13 +6,18 @@ output "kubeconfig-admin" {
# Outputs for Kubernetes Ingress
output "ingress_static_ipv4" {
value = azurerm_public_ip.ingress-ipv4.ip_address
value = azurerm_public_ip.frontend-ipv4.ip_address
description = "IPv4 address of the load balancer for distributing traffic to Ingress controllers"
}
output "ingress_static_ipv6" {
value = azurerm_public_ip.frontend-ipv6.ip_address
description = "IPv6 address of the load balancer for distributing traffic to Ingress controllers"
}
# Outputs for worker pools
output "region" {
output "location" {
value = azurerm_resource_group.cluster.location
}
@ -39,13 +44,24 @@ output "kubeconfig" {
# Outputs for custom firewalling
output "controller_security_group_name" {
description = "Network Security Group for controller nodes"
value = azurerm_network_security_group.controller.name
}
output "worker_security_group_name" {
description = "Network Security Group for worker nodes"
value = azurerm_network_security_group.worker.name
}
output "controller_address_prefixes" {
description = "Controller network subnet CIDR addresses (for source/destination)"
value = local.controller_subnets
}
output "worker_address_prefixes" {
description = "Worker network subnet CIDR addresses (for source/destination)"
value = azurerm_subnet.worker.address_prefixes
value = local.worker_subnets
}
# Outputs for custom load balancing
@ -55,9 +71,12 @@ output "loadbalancer_id" {
value = azurerm_lb.cluster.id
}
output "backend_address_pool_id" {
description = "ID of the worker backend address pool"
value = azurerm_lb_backend_address_pool.worker.id
output "backend_address_pool_ids" {
description = "IDs of the worker backend address pools"
value = {
ipv4 = [azurerm_lb_backend_address_pool.worker-ipv4.id]
ipv6 = [azurerm_lb_backend_address_pool.worker-ipv6.id]
}
}
# Outputs for debug

View File

@ -1,198 +1,223 @@
# Controller security group
resource "azurerm_network_security_group" "controller" {
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-controller"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
}
resource "azurerm_network_security_rule" "controller-icmp" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-icmp"
name = "allow-icmp-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "1995"
priority = 1995 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Icmp"
source_port_range = "*"
destination_port_range = "*"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-ssh" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-ssh"
name = "allow-ssh-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2000"
priority = 2000 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "22"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-etcd" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-etcd"
name = "allow-etcd-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2005"
priority = 2005 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "2379-2380"
source_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.controller_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape etcd metrics
resource "azurerm_network_security_rule" "controller-etcd-metrics" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-etcd-metrics"
name = "allow-etcd-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2010"
priority = 2010 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "2381"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape kube-proxy metrics
resource "azurerm_network_security_rule" "controller-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-kube-proxy-metrics"
name = "allow-kube-proxy-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2011"
priority = 2012 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
resource "azurerm_network_security_rule" "controller-kube-metrics" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-kube-metrics"
name = "allow-kube-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2012"
priority = 2014 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10257-10259"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-apiserver" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-apiserver"
name = "allow-apiserver-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2015"
priority = 2016 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "6443"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-cilium-health" {
resource_group_name = azurerm_resource_group.cluster.name
count = var.networking == "cilium" ? 1 : 0
for_each = var.networking == "cilium" ? local.controller_subnets : {}
name = "allow-cilium-health"
name = "allow-cilium-health-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2019"
priority = 2018 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "4240"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-cilium-metrics" {
for_each = var.networking == "cilium" ? local.controller_subnets : {}
name = "allow-cilium-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = 2035 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9962-9965"
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-vxlan"
name = "allow-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2020"
priority = 2020 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "4789"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
resource "azurerm_network_security_rule" "controller-linux-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-linux-vxlan"
name = "allow-linux-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2021"
priority = 2022 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "8472"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow Prometheus to scrape node-exporter daemonset
resource "azurerm_network_security_rule" "controller-node-exporter" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-node-exporter"
name = "allow-node-exporter-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2025"
priority = 2025 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9100"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Allow apiserver to access kubelet's for exec, log, port-forward
resource "azurerm_network_security_rule" "controller-kubelet" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.controller_subnets
name = "allow-kubelet"
name = "allow-kubelet-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2030"
priority = 2030 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10250"
# allow Prometheus to scrape kubelet metrics too
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.controller.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.controller_subnets[each.key]
}
# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound
@ -231,166 +256,189 @@ resource "azurerm_network_security_rule" "controller-deny-all" {
# Worker security group
resource "azurerm_network_security_group" "worker" {
resource_group_name = azurerm_resource_group.cluster.name
name = "${var.cluster_name}-worker"
resource_group_name = azurerm_resource_group.cluster.name
location = azurerm_resource_group.cluster.location
}
resource "azurerm_network_security_rule" "worker-icmp" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-icmp"
name = "allow-icmp-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "1995"
priority = 1995 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Icmp"
source_port_range = "*"
destination_port_range = "*"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-ssh" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-ssh"
name = "allow-ssh-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2000"
priority = 2000 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "22"
source_address_prefixes = azurerm_subnet.controller.address_prefixes
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.controller_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-http" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-http"
name = "allow-http-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2005"
priority = 2005 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "80"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-https" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-https"
name = "allow-https-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2010"
priority = 2010 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "443"
source_address_prefix = "*"
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-cilium-health" {
resource_group_name = azurerm_resource_group.cluster.name
count = var.networking == "cilium" ? 1 : 0
for_each = var.networking == "cilium" ? local.worker_subnets : {}
name = "allow-cilium-health"
name = "allow-cilium-health-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2014"
priority = 2012 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "4240"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-cilium-metrics" {
for_each = var.networking == "cilium" ? local.worker_subnets : {}
name = "allow-cilium-metrics-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = 2014 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9962-9965"
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-vxlan"
name = "allow-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2015"
priority = 2016 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "4789"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
resource "azurerm_network_security_rule" "worker-linux-vxlan" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-linux-vxlan"
name = "allow-linux-vxlan-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2016"
priority = 2018 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Udp"
source_port_range = "*"
destination_port_range = "8472"
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Allow Prometheus to scrape node-exporter daemonset
resource "azurerm_network_security_rule" "worker-node-exporter" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-node-exporter"
name = "allow-node-exporter-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2020"
priority = 2020 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "9100"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Allow Prometheus to scrape kube-proxy
resource "azurerm_network_security_rule" "worker-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-kube-proxy"
name = "allow-kube-proxy-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2024"
priority = 2024 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefixes = azurerm_subnet.worker.address_prefixes
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.worker_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Allow apiserver to access kubelet's for exec, log, port-forward
resource "azurerm_network_security_rule" "worker-kubelet" {
resource_group_name = azurerm_resource_group.cluster.name
for_each = local.worker_subnets
name = "allow-kubelet"
name = "allow-kubelet-${each.key}"
resource_group_name = azurerm_resource_group.cluster.name
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2025"
priority = 2026 + (each.key == "ipv4" ? 0 : 1)
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10250"
# allow Prometheus to scrape kubelet metrics too
source_address_prefixes = concat(azurerm_subnet.controller.address_prefixes, azurerm_subnet.worker.address_prefixes)
destination_address_prefixes = azurerm_subnet.worker.address_prefixes
source_address_prefixes = local.cluster_subnets[each.key]
destination_address_prefixes = local.worker_subnets[each.key]
}
# Override Azure AllowVNetInBound and AllowAzureLoadBalancerInBound

View File

@ -18,7 +18,7 @@ resource "null_resource" "copy-controller-secrets" {
connection {
type = "ssh"
host = azurerm_public_ip.controllers.*.ip_address[count.index]
host = azurerm_public_ip.controllers-ipv4[count.index].ip_address
user = "core"
timeout = "15m"
}
@ -45,7 +45,7 @@ resource "null_resource" "bootstrap" {
connection {
type = "ssh"
host = azurerm_public_ip.controllers.*.ip_address[0]
host = azurerm_public_ip.controllers-ipv4[0].ip_address
user = "core"
timeout = "15m"
}

View File

@ -5,9 +5,9 @@ variable "cluster_name" {
# Azure
variable "region" {
variable "location" {
type = string
description = "Azure Region (e.g. centralus , see `az account list-locations --output table`)"
description = "Azure location (e.g. centralus , see `az account list-locations --output table`)"
}
variable "dns_zone" {
@ -22,30 +22,6 @@ variable "dns_zone_group" {
# instances
variable "controller_count" {
type = number
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "controller_type" {
type = string
description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
default = "Standard_B2s"
}
variable "worker_type" {
type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5"
}
variable "os_image" {
type = string
description = "Channel for a Container Linux derivative (flatcar-stable, flatcar-beta, flatcar-alpha)"
@ -57,12 +33,60 @@ variable "os_image" {
}
}
variable "disk_size" {
variable "controller_count" {
type = number
description = "Size of the disk in GB"
description = "Number of controllers (i.e. masters)"
default = 1
}
variable "controller_type" {
type = string
description = "Machine type for controllers (see `az vm list-skus --location centralus`)"
default = "Standard_B2s"
}
variable "controller_disk_type" {
type = string
description = "Type of managed disk for controller node(s)"
default = "Premium_LRS"
}
variable "controller_disk_size" {
type = number
description = "Size of the managed disk in GB for controller node(s)"
default = 30
}
variable "worker_count" {
type = number
description = "Number of workers"
default = 1
}
variable "worker_type" {
type = string
description = "Machine type for workers (see `az vm list-skus --location centralus`)"
default = "Standard_D2as_v5"
}
variable "worker_disk_type" {
type = string
description = "Type of managed disk for worker nodes"
default = "Standard_LRS"
}
variable "worker_disk_size" {
type = number
description = "Size of the managed disk in GB for worker nodes"
default = 30
}
variable "worker_ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "worker_priority" {
type = string
description = "Set worker priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be deallocated at any time."
@ -100,10 +124,15 @@ variable "networking" {
default = "cilium"
}
variable "host_cidr" {
type = string
description = "CIDR IPv4 range to assign to instances"
default = "10.0.0.0/16"
variable "network_cidr" {
type = object({
ipv4 = list(string)
ipv6 = optional(list(string), [])
})
description = "Virtual network CIDR ranges"
default = {
ipv4 = ["10.0.0.0/16"]
}
}
variable "pod_cidr" {
@ -121,32 +150,31 @@ EOD
default = "10.3.0.0/16"
}
variable "enable_reporting" {
type = bool
description = "Enable usage or analytics reporting to upstreams (Calico)"
default = false
}
variable "enable_aggregation" {
type = bool
description = "Enable the Kubernetes Aggregation Layer"
default = true
}
variable "worker_node_labels" {
type = list(string)
description = "List of initial worker node labels"
default = []
}
variable "arch" {
type = string
description = "Container architecture (amd64 or arm64)"
default = "amd64"
# advanced
variable "controller_arch" {
type = string
description = "Controller node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = var.arch == "amd64" || var.arch == "arm64"
error_message = "The arch must be amd64 or arm64."
condition = contains(["amd64", "arm64"], var.controller_arch)
error_message = "The controller_arch must be amd64 or arm64."
}
}
variable "worker_arch" {
type = string
description = "Worker node(s) architecture (amd64 or arm64)"
default = "amd64"
validation {
condition = contains(["amd64", "arm64"], var.worker_arch)
error_message = "The worker_arch must be amd64 or arm64."
}
}
@ -156,10 +184,18 @@ variable "daemonset_tolerations" {
default = []
}
# unofficial, undocumented, unsupported
variable "cluster_domain_suffix" {
type = string
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
default = "cluster.local"
variable "components" {
description = "Configure pre-installed cluster components"
# Component configs are passed through to terraform-render-bootstrap,
# which handles type enforcement and defines defaults
# https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
type = object({
enable = optional(bool)
coredns = optional(map(any))
kube_proxy = optional(map(any))
flannel = optional(map(any))
calico = optional(map(any))
cilium = optional(map(any))
})
default = null
}

View File

@ -3,11 +3,11 @@
terraform {
required_version = ">= 0.13.0, < 2.0.0"
required_providers {
azurerm = ">= 2.8, < 4.0"
azurerm = ">= 2.8"
null = ">= 2.1"
ct = {
source = "poseidon/ct"
version = "~> 0.11"
version = "~> 0.13"
}
}
}

View File

@ -4,14 +4,17 @@ module "workers" {
# Azure
resource_group_name = azurerm_resource_group.cluster.name
region = azurerm_resource_group.cluster.location
location = azurerm_resource_group.cluster.location
subnet_id = azurerm_subnet.worker.id
security_group_id = azurerm_network_security_group.worker.id
backend_address_pool_id = azurerm_lb_backend_address_pool.worker.id
backend_address_pool_ids = local.backend_address_pool_ids
worker_count = var.worker_count
vm_type = var.worker_type
os_image = var.os_image
disk_type = var.worker_disk_type
disk_size = var.worker_disk_size
ephemeral_disk = var.worker_ephemeral_disk
priority = var.worker_priority
# configuration
@ -19,8 +22,7 @@ module "workers" {
ssh_authorized_key = var.ssh_authorized_key
azure_authorized_key = var.azure_authorized_key
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
snippets = var.worker_snippets
node_labels = var.worker_node_labels
arch = var.arch
arch = var.worker_arch
}

View File

@ -28,7 +28,7 @@ systemd:
After=docker.service
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -99,7 +99,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s

View File

@ -5,9 +5,9 @@ variable "name" {
# Azure
variable "region" {
variable "location" {
type = string
description = "Must be set to the Azure Region of cluster"
description = "Must be set to the Azure location of cluster"
}
variable "resource_group_name" {
@ -25,9 +25,12 @@ variable "security_group_id" {
description = "Must be set to the `worker_security_group_id` output by cluster"
}
variable "backend_address_pool_id" {
type = string
description = "Must be set to the `worker_backend_address_pool_id` output by cluster"
variable "backend_address_pool_ids" {
type = object({
ipv4 = list(string)
ipv6 = list(string)
})
description = "Must be set to the `backend_address_pool_ids` output by cluster"
}
# instances
@ -55,6 +58,24 @@ variable "os_image" {
}
}
variable "disk_type" {
type = string
description = "Type of managed disk"
default = "Standard_LRS"
}
variable "disk_size" {
type = number
description = "Size of the managed disk in GB"
default = 30
}
variable "ephemeral_disk" {
type = bool
description = "Use ephemeral local disk instead of managed disk (requires vm_type with local storage)"
default = false
}
variable "priority" {
type = string
description = "Set priority to Spot to use reduced cost surplus capacity, with the tradeoff that instances can be evicted at any time."
@ -116,12 +137,3 @@ variable "arch" {
error_message = "The arch must be amd64 or arm64."
}
}
# unofficial, undocumented, unsupported
variable "cluster_domain_suffix" {
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
type = string
default = "cluster.local"
}

View File

@ -3,10 +3,10 @@
terraform {
required_version = ">= 0.13.0, < 2.0.0"
required_providers {
azurerm = ">= 2.8, < 4.0"
azurerm = ">= 2.8"
ct = {
source = "poseidon/ct"
version = "~> 0.11"
version = "~> 0.13"
}
}
}

View File

@ -8,25 +8,28 @@ locals {
}
# Workers scale set
resource "azurerm_linux_virtual_machine_scale_set" "workers" {
resource_group_name = var.resource_group_name
resource "azurerm_orchestrated_virtual_machine_scale_set" "workers" {
name = "${var.name}-worker"
location = var.region
sku = var.vm_type
resource_group_name = var.resource_group_name
location = var.location
platform_fault_domain_count = 1
sku_name = var.vm_type
instances = var.worker_count
# instance name prefix for instances in the set
computer_name_prefix = "${var.name}-worker"
single_placement_group = false
custom_data = base64encode(data.ct_config.worker.rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# storage
encryption_at_host_enabled = true
os_disk {
storage_account_type = "Standard_LRS"
caching = "ReadWrite"
storage_account_type = var.disk_type
disk_size_gb = var.disk_size
caching = "ReadOnly"
# Optionally, use the ephemeral disk of the instance type (support varies)
dynamic "diff_disk_settings" {
for_each = var.ephemeral_disk ? [1] : []
content {
option = "Local"
placement = "ResourceDisk"
}
}
}
# Flatcar Container Linux
@ -46,13 +49,6 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
}
}
# Azure requires setting admin_ssh_key, though Ignition custom_data handles it too
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
# network
network_interface {
name = "nic0"
@ -60,17 +56,41 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
network_security_group_id = var.security_group_id
ip_configuration {
name = "ip0"
name = "ipv4"
version = "IPv4"
primary = true
subnet_id = var.subnet_id
# backend address pool to which the NIC should be added
load_balancer_backend_address_pool_ids = [var.backend_address_pool_id]
load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv4
}
ip_configuration {
name = "ipv6"
version = "IPv6"
subnet_id = var.subnet_id
# backend address pool to which the NIC should be added
load_balancer_backend_address_pool_ids = var.backend_address_pool_ids.ipv6
}
}
# boot
user_data_base64 = base64encode(data.ct_config.worker.rendered)
boot_diagnostics {
# defaults to a managed storage account
}
# Azure requires an RSA admin_ssh_key
os_profile {
linux_configuration {
admin_username = "core"
admin_ssh_key {
username = "core"
public_key = local.azure_authorized_key
}
computer_name_prefix = "${var.name}-worker"
}
}
# lifecycle
upgrade_mode = "Manual"
# eviction policy may only be set when priority is Spot
priority = var.priority
eviction_policy = var.priority == "Spot" ? "Delete" : null
@ -79,35 +99,12 @@ resource "azurerm_linux_virtual_machine_scale_set" "workers" {
}
}
# Scale up or down to maintain desired number, tolerating deallocations.
resource "azurerm_monitor_autoscale_setting" "workers" {
resource_group_name = var.resource_group_name
name = "${var.name}-maintain-desired"
location = var.region
# autoscale
enabled = true
target_resource_id = azurerm_linux_virtual_machine_scale_set.workers.id
profile {
name = "default"
capacity {
minimum = var.worker_count
default = var.worker_count
maximum = var.worker_count
}
}
}
# Flatcar Linux worker
data "ct_config" "worker" {
content = templatefile("${path.module}/butane/worker.yaml", {
kubeconfig = indent(10, var.kubeconfig)
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
node_labels = join(",", var.node_labels)
node_taints = join(",", var.node_taints)
})

View File

@ -11,7 +11,7 @@ Typhoon distributes upstream Kubernetes, architectural conventions, and cluster
## Features <a href="https://www.cncf.io/certification/software-conformance/"><img align="right" src="https://storage.googleapis.com/poseidon/certified-kubernetes.png"></a>
* Kubernetes v1.28.3 (upstream)
* Kubernetes v1.31.3 (upstream)
* Single or multi-master, [Calico](https://www.projectcalico.org/) or [Cilium](https://github.com/cilium/cilium) or [flannel](https://github.com/coreos/flannel) networking
* On-cluster etcd with TLS, [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/)-enabled, [network policy](https://kubernetes.io/docs/concepts/services-networking/network-policies/), SELinux enforcing
* Advanced features like [snippets](https://typhoon.psdn.io/advanced/customization/#hosts) customization

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests)
module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=d151ab77b7ebdfb878ea110c86cc77238189f1ed"
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=e6a1c7bccfc45ab299b5f8149bc3840f99b30b2b"
cluster_name = var.cluster_name
api_servers = [var.k8s_domain_name]
@ -10,9 +10,7 @@ module "bootstrap" {
network_ip_autodetection_method = var.network_ip_autodetection_method
pod_cidr = var.pod_cidr
service_cidr = var.service_cidr
cluster_domain_suffix = var.cluster_domain_suffix
enable_reporting = var.enable_reporting
enable_aggregation = var.enable_aggregation
components = var.components
}

View File

@ -12,7 +12,7 @@ systemd:
Wants=network-online.target
After=network-online.target
[Service]
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.10
Environment=ETCD_IMAGE=quay.io/coreos/etcd:v3.5.13
Type=exec
ExecStartPre=/bin/mkdir -p /var/lib/etcd
ExecStartPre=-/usr/bin/podman rm etcd
@ -53,7 +53,7 @@ systemd:
Description=Kubelet (System Container)
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -113,7 +113,7 @@ systemd:
Type=oneshot
RemainAfterExit=true
WorkingDirectory=/opt/bootstrap
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStartPre=-/usr/bin/podman rm bootstrap
ExecStart=/usr/bin/podman run --name bootstrap \
--network host \
@ -154,7 +154,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s
@ -168,7 +168,7 @@ storage:
contents:
inline: |
#!/bin/bash -e
mkdir -p -- auth tls/etcd tls/k8s static-manifests manifests/coredns manifests-networking
mkdir -p -- auth tls/{etcd,k8s} static-manifests manifests/{coredns,kube-proxy,network}
awk '/#####/ {filename=$2; next} {print > filename}' assets
mkdir -p /etc/ssl/etcd/etcd
mkdir -p /etc/kubernetes/pki
@ -182,8 +182,7 @@ storage:
mv static-manifests/* /etc/kubernetes/manifests/
mkdir -p /opt/bootstrap/assets
mv manifests /opt/bootstrap/assets/manifests
mv manifests-networking/* /opt/bootstrap/assets/manifests/
rm -rf assets auth static-manifests tls manifests-networking
rm -rf assets auth static-manifests tls manifests
chcon -R -u system_u -t container_file_t /etc/kubernetes/pki
- path: /opt/bootstrap/apply
mode: 0544

View File

@ -59,7 +59,6 @@ data "ct_config" "controllers" {
etcd_name = var.controllers.*.name[count.index]
etcd_initial_cluster = join(",", formatlist("%s=https://%s:2380", var.controllers.*.name, var.controllers.*.domain))
cluster_dns_service_ip = module.bootstrap.cluster_dns_service_ip
cluster_domain_suffix = var.cluster_domain_suffix
ssh_authorized_key = var.ssh_authorized_key
})
strict = true

View File

@ -139,23 +139,20 @@ variable "kernel_args" {
default = []
}
variable "enable_reporting" {
type = bool
description = "Enable usage or analytics reporting to upstreams (Calico)"
default = false
# advanced
variable "components" {
description = "Configure pre-installed cluster components"
# Component configs are passed through to terraform-render-bootstrap,
# which handles type enforcement and defines defaults
# https://github.com/poseidon/terraform-render-bootstrap/blob/main/variables.tf#L95
type = object({
enable = optional(bool)
coredns = optional(map(any))
kube_proxy = optional(map(any))
flannel = optional(map(any))
calico = optional(map(any))
cilium = optional(map(any))
})
default = null
}
variable "enable_aggregation" {
type = bool
description = "Enable the Kubernetes Aggregation Layer"
default = true
}
# unofficial, undocumented, unsupported
variable "cluster_domain_suffix" {
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
type = string
default = "cluster.local"
}

View File

@ -25,7 +25,7 @@ systemd:
Description=Kubelet (System Container)
Wants=rpc-statd.service
[Service]
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.28.3
Environment=KUBELET_IMAGE=quay.io/poseidon/kubelet:v1.31.3
ExecStartPre=/bin/mkdir -p /etc/cni/net.d
ExecStartPre=/bin/mkdir -p /etc/kubernetes/manifests
ExecStartPre=/bin/mkdir -p /opt/cni/bin
@ -108,7 +108,7 @@ storage:
cgroupDriver: systemd
clusterDNS:
- ${cluster_dns_service_ip}
clusterDomain: ${cluster_domain_suffix}
clusterDomain: cluster.local
healthzPort: 0
rotateCertificates: true
shutdownGracePeriod: 45s

View File

@ -53,7 +53,6 @@ data "ct_config" "worker" {
domain_name = var.domain
ssh_authorized_key = var.ssh_authorized_key
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
cluster_domain_suffix = var.cluster_domain_suffix
node_labels = join(",", var.node_labels)
node_taints = join(",", var.node_taints)
})

View File

@ -103,9 +103,3 @@ The 1st IP will be reserved for kube_apiserver, the 10th IP will be reserved for
EOD
default = "10.3.0.0/16"
}
variable "cluster_domain_suffix" {
description = "Queries for domains with the suffix will be answered by coredns. Default is cluster.local (e.g. foo.default.svc.cluster.local) "
type = string
default = "cluster.local"
}

Some files were not shown because too many files have changed in this diff Show More