Allow custom initial node taints on worker pool nodes
* Add `node_taints` variable to worker modules to set custom initial node taints on cloud platforms that support auto-scaling worker pools of heterogeneous nodes (i.e. AWS, Azure, GCP) * Worker pools could use custom `node_labels` to allowed workloads to select among differentiated nodes, while custom `node_taints` allows a worker pool's nodes to be tainted as special to prevent scheduling, except by workloads that explicitly tolerate the taint * Expose `daemonset_tolerations` in AWS, Azure, and GCP kubernetes cluster modules, to determine whether `kube-system` components should tolerate the custom taint (advanced use covered in docs) Rel: #550, #663 Closes #429
This commit is contained in:
parent
d73621c838
commit
084e8bea49
18
CHANGES.md
18
CHANGES.md
|
@ -6,6 +6,24 @@ Notable changes between versions.
|
||||||
|
|
||||||
* Kubernetes [v1.21.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.21.md#v1210)
|
* Kubernetes [v1.21.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.21.md#v1210)
|
||||||
|
|
||||||
|
### AWS
|
||||||
|
|
||||||
|
* Allow setting custom initial node taints on worker pools ([#968](https://github.com/poseidon/typhoon/pull/968))
|
||||||
|
* Add `node_taints` variable to internal `workers` pool module to set initial node taints
|
||||||
|
* Add `daemonset_tolerations` so `kube-system` DaemonSets can tolerate custom taints
|
||||||
|
|
||||||
|
### Azure
|
||||||
|
|
||||||
|
* Allow setting custom initial node taints on worker pools ([#968](https://github.com/poseidon/typhoon/pull/968))
|
||||||
|
* Add `node_taints` variable to internal `workers` pool module to set initial node taints
|
||||||
|
* Add `daemonset_tolerations` so `kube-system` DaemonSets can tolerate custom taints
|
||||||
|
|
||||||
|
### Google Cloud
|
||||||
|
|
||||||
|
* Allow setting custom initial node taints on worker pools ([#968](https://github.com/poseidon/typhoon/pull/968))
|
||||||
|
* Add `node_taints` variable to internal `workers` pool module to set initial node taints
|
||||||
|
* Add `daemonset_tolerations` so `kube-system` DaemonSets can tolerate custom taints
|
||||||
|
|
||||||
### Addons
|
### Addons
|
||||||
|
|
||||||
* Update kube-state-metrics from v2.0.0-rc.0 to [v2.0.0-rc.1](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0-rc.1)
|
* Update kube-state-metrics from v2.0.0-rc.0 to [v2.0.0-rc.1](https://github.com/kubernetes/kube-state-metrics/releases/tag/v2.0.0-rc.1)
|
||||||
|
|
|
@ -176,4 +176,3 @@ variable "daemonset_tolerations" {
|
||||||
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||||
default = []
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,5 +12,6 @@ module "bootstrap" {
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
enable_reporting = var.enable_reporting
|
enable_reporting = var.enable_reporting
|
||||||
enable_aggregation = var.enable_aggregation
|
enable_aggregation = var.enable_aggregation
|
||||||
|
daemonset_tolerations = var.daemonset_tolerations
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -160,3 +160,8 @@ variable "cluster_domain_suffix" {
|
||||||
default = "cluster.local"
|
default = "cluster.local"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "daemonset_tolerations" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
|
@ -73,6 +73,9 @@ systemd:
|
||||||
%{~ for label in split(",", node_labels) ~}
|
%{~ for label in split(",", node_labels) ~}
|
||||||
--node-labels=${label} \
|
--node-labels=${label} \
|
||||||
%{~ endfor ~}
|
%{~ endfor ~}
|
||||||
|
%{~ for taint in split(",", node_taints) ~}
|
||||||
|
--register-with-taints=${taint} \
|
||||||
|
%{~ endfor ~}
|
||||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
--provider-id=aws:///$${COREOS_EC2_AVAILABILITY_ZONE}/$${COREOS_EC2_INSTANCE_ID} \
|
--provider-id=aws:///$${COREOS_EC2_AVAILABILITY_ZONE}/$${COREOS_EC2_INSTANCE_ID} \
|
||||||
--read-only-port=0 \
|
--read-only-port=0 \
|
||||||
|
|
|
@ -113,3 +113,9 @@ variable "node_labels" {
|
||||||
description = "List of initial node labels"
|
description = "List of initial node labels"
|
||||||
default = []
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "node_taints" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of initial node taints"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
|
@ -86,6 +86,7 @@ data "template_file" "worker-config" {
|
||||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
node_labels = join(",", var.node_labels)
|
node_labels = join(",", var.node_labels)
|
||||||
|
node_taints = join(",", var.node_taints)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ module "bootstrap" {
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
enable_reporting = var.enable_reporting
|
enable_reporting = var.enable_reporting
|
||||||
enable_aggregation = var.enable_aggregation
|
enable_aggregation = var.enable_aggregation
|
||||||
|
daemonset_tolerations = var.daemonset_tolerations
|
||||||
|
|
||||||
# Fedora CoreOS
|
# Fedora CoreOS
|
||||||
trusted_certs_dir = "/etc/pki/tls/certs"
|
trusted_certs_dir = "/etc/pki/tls/certs"
|
||||||
|
|
|
@ -135,3 +135,8 @@ variable "cluster_domain_suffix" {
|
||||||
default = "cluster.local"
|
default = "cluster.local"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "daemonset_tolerations" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
|
@ -67,6 +67,9 @@ systemd:
|
||||||
%{~ for label in split(",", node_labels) ~}
|
%{~ for label in split(",", node_labels) ~}
|
||||||
--node-labels=${label} \
|
--node-labels=${label} \
|
||||||
%{~ endfor ~}
|
%{~ endfor ~}
|
||||||
|
%{~ for taint in split(",", node_taints) ~}
|
||||||
|
--register-with-taints=${taint} \
|
||||||
|
%{~ endfor ~}
|
||||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
--read-only-port=0 \
|
--read-only-port=0 \
|
||||||
--rotate-certificates \
|
--rotate-certificates \
|
||||||
|
|
|
@ -88,6 +88,12 @@ variable "node_labels" {
|
||||||
default = []
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "node_taints" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of initial node taints"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
# unofficial, undocumented, unsupported
|
# unofficial, undocumented, unsupported
|
||||||
|
|
||||||
variable "cluster_domain_suffix" {
|
variable "cluster_domain_suffix" {
|
||||||
|
|
|
@ -87,6 +87,7 @@ data "template_file" "worker-config" {
|
||||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
node_labels = join(",", var.node_labels)
|
node_labels = join(",", var.node_labels)
|
||||||
|
node_taints = join(",", var.node_taints)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,5 +18,6 @@ module "bootstrap" {
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
enable_reporting = var.enable_reporting
|
enable_reporting = var.enable_reporting
|
||||||
enable_aggregation = var.enable_aggregation
|
enable_aggregation = var.enable_aggregation
|
||||||
|
daemonset_tolerations = var.daemonset_tolerations
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -141,3 +141,8 @@ variable "cluster_domain_suffix" {
|
||||||
default = "cluster.local"
|
default = "cluster.local"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "daemonset_tolerations" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
|
@ -70,6 +70,9 @@ systemd:
|
||||||
%{~ for label in split(",", node_labels) ~}
|
%{~ for label in split(",", node_labels) ~}
|
||||||
--node-labels=${label} \
|
--node-labels=${label} \
|
||||||
%{~ endfor ~}
|
%{~ endfor ~}
|
||||||
|
%{~ for taint in split(",", node_taints) ~}
|
||||||
|
--register-with-taints=${taint} \
|
||||||
|
%{~ endfor ~}
|
||||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
--read-only-port=0 \
|
--read-only-port=0 \
|
||||||
--rotate-certificates \
|
--rotate-certificates \
|
||||||
|
|
|
@ -94,6 +94,12 @@ variable "node_labels" {
|
||||||
default = []
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "node_taints" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of initial node taints"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
# unofficial, undocumented, unsupported
|
# unofficial, undocumented, unsupported
|
||||||
|
|
||||||
variable "cluster_domain_suffix" {
|
variable "cluster_domain_suffix" {
|
||||||
|
|
|
@ -105,6 +105,7 @@ data "template_file" "worker-config" {
|
||||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
node_labels = join(",", var.node_labels)
|
node_labels = join(",", var.node_labels)
|
||||||
|
node_taints = join(",", var.node_taints)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ Create a cluster with ARM64 controller and worker nodes. Container workloads mus
|
||||||
|
|
||||||
```tf
|
```tf
|
||||||
module "gravitas" {
|
module "gravitas" {
|
||||||
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.19.4"
|
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.21.0"
|
||||||
|
|
||||||
# AWS
|
# AWS
|
||||||
cluster_name = "gravitas"
|
cluster_name = "gravitas"
|
||||||
|
@ -47,9 +47,9 @@ Verify the cluster has only arm64 (`aarch64`) nodes.
|
||||||
```
|
```
|
||||||
$ kubectl get nodes -o wide
|
$ kubectl get nodes -o wide
|
||||||
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
|
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
|
||||||
ip-10-0-12-178 Ready <none> 101s v1.19.4 10.0.12.178 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
ip-10-0-12-178 Ready <none> 101s v1.21.0 10.0.12.178 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||||
ip-10-0-18-93 Ready <none> 102s v1.19.4 10.0.18.93 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
ip-10-0-18-93 Ready <none> 102s v1.21.0 10.0.18.93 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||||
ip-10-0-90-10 Ready <none> 104s v1.19.4 10.0.90.10 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
ip-10-0-90-10 Ready <none> 104s v1.21.0 10.0.90.10 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||||
```
|
```
|
||||||
|
|
||||||
## Hybrid
|
## Hybrid
|
||||||
|
@ -60,7 +60,7 @@ Create a hybrid/mixed arch cluster by defining an AWS cluster. Then define a [wo
|
||||||
|
|
||||||
```tf
|
```tf
|
||||||
module "gravitas" {
|
module "gravitas" {
|
||||||
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.19.4"
|
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes?ref=v1.21.0"
|
||||||
|
|
||||||
# AWS
|
# AWS
|
||||||
cluster_name = "gravitas"
|
cluster_name = "gravitas"
|
||||||
|
@ -83,7 +83,7 @@ Create a hybrid/mixed arch cluster by defining an AWS cluster. Then define a [wo
|
||||||
|
|
||||||
```tf
|
```tf
|
||||||
module "gravitas-arm64" {
|
module "gravitas-arm64" {
|
||||||
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.19.4"
|
source = "git::https://github.com/poseidon/typhoon//aws/fedora-coreos/kubernetes/workers?ref=v1.21.0"
|
||||||
|
|
||||||
# AWS
|
# AWS
|
||||||
vpc_id = module.gravitas.vpc_id
|
vpc_id = module.gravitas.vpc_id
|
||||||
|
@ -108,9 +108,9 @@ Verify amd64 (x86_64) and arm64 (aarch64) nodes are present.
|
||||||
```
|
```
|
||||||
$ kubectl get nodes -o wide
|
$ kubectl get nodes -o wide
|
||||||
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
|
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
|
||||||
ip-10-0-14-73 Ready <none> 116s v1.19.4 10.0.14.73 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
ip-10-0-14-73 Ready <none> 116s v1.21.0 10.0.14.73 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
||||||
ip-10-0-17-167 Ready <none> 104s v1.19.4 10.0.17.167 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
ip-10-0-17-167 Ready <none> 104s v1.21.0 10.0.17.167 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
||||||
ip-10-0-47-166 Ready <none> 110s v1.19.4 10.0.47.166 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
ip-10-0-47-166 Ready <none> 110s v1.21.0 10.0.47.166 <none> Fedora CoreOS 32.20201104.dev.0 5.8.17-200.fc32.aarch64 docker://19.3.11
|
||||||
ip-10-0-7-237 Ready <none> 111s v1.19.4 10.0.7.237 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
ip-10-0-7-237 Ready <none> 111s v1.21.0 10.0.7.237 <none> Fedora CoreOS 32.20201018.3.0 5.8.15-201.fc32.x86_64 docker://19.3.11
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,134 @@
|
||||||
|
# Nodes
|
||||||
|
|
||||||
|
Typhoon clusters consist of controller node(s) and a (default) set of worker nodes.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Typhoon nodes use the standard set of Kubernetes node labels.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
Labels: kubernetes.io/arch=amd64
|
||||||
|
kubernetes.io/hostname=node-name
|
||||||
|
kubernetes.io/os=linux
|
||||||
|
```
|
||||||
|
|
||||||
|
Controller node(s) are labeled to allow node selection (for rare components that run on controllers) and tainted to prevent ordinary workloads running on controllers.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
Labels: node.kubernetes.io/controller=true
|
||||||
|
Taints: node-role.kubernetes.io/controller:NoSchedule
|
||||||
|
```
|
||||||
|
|
||||||
|
Worker nodes are labeled to allow node selection and untainted. Workloads will schedule on worker nodes by default, baring any contraindications.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
Labels: node.kubernetes.io/node=
|
||||||
|
Taints: <none>
|
||||||
|
```
|
||||||
|
|
||||||
|
On auto-scaling cloud platforms, you may add [worker pools](/advanced/worker-pools) with different groups of nodes with their own labels and taints. On platforms like bare-metal, with heterogeneous machines, you may manage node labels and taints per node.
|
||||||
|
|
||||||
|
## Node Labels
|
||||||
|
|
||||||
|
Add custom initial worker node labels to default workers or worker pool nodes to allow workloads to select among nodes that differ.
|
||||||
|
|
||||||
|
=== "Cluster"
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "yavin" {
|
||||||
|
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.21.0"
|
||||||
|
|
||||||
|
# Google Cloud
|
||||||
|
cluster_name = "yavin"
|
||||||
|
region = "us-central1"
|
||||||
|
dns_zone = "example.com"
|
||||||
|
dns_zone_name = "example-zone"
|
||||||
|
|
||||||
|
# configuration
|
||||||
|
ssh_authorized_key = local.ssh_key
|
||||||
|
|
||||||
|
# optional
|
||||||
|
worker_count = 2
|
||||||
|
worker_node_labels = ["pool=default"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "Worker Pool"
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "yavin-pool" {
|
||||||
|
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.21.0"
|
||||||
|
|
||||||
|
# Google Cloud
|
||||||
|
cluster_name = "yavin"
|
||||||
|
region = "europe-west2"
|
||||||
|
network = module.yavin.network_name
|
||||||
|
|
||||||
|
# configuration
|
||||||
|
name = "yavin-16x"
|
||||||
|
kubeconfig = module.yavin.kubeconfig
|
||||||
|
ssh_authorized_key = local.ssh_key
|
||||||
|
|
||||||
|
# optional
|
||||||
|
worker_count = 1
|
||||||
|
machine_type = "n1-standard-16"
|
||||||
|
node_labels = ["pool=big"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In the example above, the two default workers would be labeled `pool: default` and the additional worker would be labeled `pool: big`.
|
||||||
|
|
||||||
|
## Node Taints
|
||||||
|
|
||||||
|
Add custom initial taints on worker pool nodes to indicate a node is unique and should only schedule workloads that explicitly tolerate a given taint key.
|
||||||
|
|
||||||
|
!!! warning
|
||||||
|
Since taints prevent workloads scheduling onto a node, you must decide whether `kube-system` DaemonSets (e.g. flannel, Calico, Cilium) should tolerate your custom taint by setting `daemonset_tolerations`. If you don't list your custom taint(s), important components won't run on these nodes.
|
||||||
|
|
||||||
|
=== "Cluster"
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "yavin" {
|
||||||
|
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes?ref=v1.21.0"
|
||||||
|
|
||||||
|
# Google Cloud
|
||||||
|
cluster_name = "yavin"
|
||||||
|
region = "us-central1"
|
||||||
|
dns_zone = "example.com"
|
||||||
|
dns_zone_name = "example-zone"
|
||||||
|
|
||||||
|
# configuration
|
||||||
|
ssh_authorized_key = local.ssh_key
|
||||||
|
|
||||||
|
# optional
|
||||||
|
worker_count = 2
|
||||||
|
daemonset_tolerations = ["role"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
=== "Worker Pool"
|
||||||
|
|
||||||
|
```tf
|
||||||
|
module "yavin-pool" {
|
||||||
|
source = "git::https://github.com/poseidon/typhoon//google-cloud/fedora-coreos/kubernetes/workers?ref=v1.21.0"
|
||||||
|
|
||||||
|
# Google Cloud
|
||||||
|
cluster_name = "yavin"
|
||||||
|
region = "europe-west2"
|
||||||
|
network = module.yavin.network_name
|
||||||
|
|
||||||
|
# configuration
|
||||||
|
name = "yavin-16x"
|
||||||
|
kubeconfig = module.yavin.kubeconfig
|
||||||
|
ssh_authorized_key = local.ssh_key
|
||||||
|
|
||||||
|
# optional
|
||||||
|
worker_count = 1
|
||||||
|
accelerator_type = "nvidia-tesla-p100"
|
||||||
|
accelerator_count = 1
|
||||||
|
node_taints = ["role=gpu:NoSchedule"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
In the example above, the the additional worker would be tainted with `role=gpu:NoSchedule` to prevent workloads scheduling, but `kube-system` components like flannel, Calico, or Cilium would tolerate that custom taint to run there.
|
||||||
|
|
|
@ -99,6 +99,7 @@ The AWS internal `workers` module supports a number of [variables](https://githu
|
||||||
| snippets | Fedora CoreOS or Container Linux Config snippets | [] | [examples](/advanced/customization/) |
|
| snippets | Fedora CoreOS or Container Linux Config snippets | [] | [examples](/advanced/customization/) |
|
||||||
| service_cidr | Must match `service_cidr` of cluster | "10.3.0.0/16" | "10.3.0.0/24" |
|
| service_cidr | Must match `service_cidr` of cluster | "10.3.0.0/16" | "10.3.0.0/24" |
|
||||||
| node_labels | List of initial node labels | [] | ["worker-pool=foo"] |
|
| node_labels | List of initial node labels | [] | ["worker-pool=foo"] |
|
||||||
|
| node_taints | List of initial node taints | [] | ["role=gpu:NoSchedule"] |
|
||||||
|
|
||||||
Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-types/) or per-region and per-type [spot prices](https://aws.amazon.com/ec2/spot/pricing/).
|
Check the list of valid [instance types](https://aws.amazon.com/ec2/instance-types/) or per-region and per-type [spot prices](https://aws.amazon.com/ec2/spot/pricing/).
|
||||||
|
|
||||||
|
@ -194,6 +195,7 @@ The Azure internal `workers` module supports a number of [variables](https://git
|
||||||
| snippets | Container Linux Config snippets | [] | [examples](/advanced/customization/) |
|
| snippets | Container Linux Config snippets | [] | [examples](/advanced/customization/) |
|
||||||
| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" |
|
| service_cidr | CIDR IPv4 range to assign to Kubernetes services | "10.3.0.0/16" | "10.3.0.0/24" |
|
||||||
| node_labels | List of initial node labels | [] | ["worker-pool=foo"] |
|
| node_labels | List of initial node labels | [] | ["worker-pool=foo"] |
|
||||||
|
| node_taints | List of initial node taints | [] | ["role=gpu:NoSchedule"] |
|
||||||
|
|
||||||
Check the list of valid [machine types](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/) and their [specs](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general). Use `az vm list-skus` to get the identifier.
|
Check the list of valid [machine types](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/) and their [specs](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-general). Use `az vm list-skus` to get the identifier.
|
||||||
|
|
||||||
|
@ -297,6 +299,7 @@ Check the list of regions [docs](https://cloud.google.com/compute/docs/regions-z
|
||||||
| snippets | Container Linux Config snippets | [] | [examples](/advanced/customization/) |
|
| snippets | Container Linux Config snippets | [] | [examples](/advanced/customization/) |
|
||||||
| service_cidr | Must match `service_cidr` of cluster | "10.3.0.0/16" | "10.3.0.0/24" |
|
| service_cidr | Must match `service_cidr` of cluster | "10.3.0.0/16" | "10.3.0.0/24" |
|
||||||
| node_labels | List of initial node labels | [] | ["worker-pool=foo"] |
|
| node_labels | List of initial node labels | [] | ["worker-pool=foo"] |
|
||||||
|
| node_taints | List of initial node taints | [] | ["role=gpu:NoSchedule"] |
|
||||||
|
|
||||||
Check the list of valid [machine types](https://cloud.google.com/compute/docs/machine-types).
|
Check the list of valid [machine types](https://cloud.google.com/compute/docs/machine-types).
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ module "bootstrap" {
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
enable_reporting = var.enable_reporting
|
enable_reporting = var.enable_reporting
|
||||||
enable_aggregation = var.enable_aggregation
|
enable_aggregation = var.enable_aggregation
|
||||||
|
daemonset_tolerations = var.daemonset_tolerations
|
||||||
|
|
||||||
trusted_certs_dir = "/etc/pki/tls/certs"
|
trusted_certs_dir = "/etc/pki/tls/certs"
|
||||||
|
|
||||||
|
|
|
@ -136,3 +136,8 @@ variable "cluster_domain_suffix" {
|
||||||
default = "cluster.local"
|
default = "cluster.local"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "daemonset_tolerations" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
|
@ -67,6 +67,9 @@ systemd:
|
||||||
%{~ for label in split(",", node_labels) ~}
|
%{~ for label in split(",", node_labels) ~}
|
||||||
--node-labels=${label} \
|
--node-labels=${label} \
|
||||||
%{~ endfor ~}
|
%{~ endfor ~}
|
||||||
|
%{~ for taint in split(",", node_taints) ~}
|
||||||
|
--register-with-taints=${taint} \
|
||||||
|
%{~ endfor ~}
|
||||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
--read-only-port=0 \
|
--read-only-port=0 \
|
||||||
--rotate-certificates \
|
--rotate-certificates \
|
||||||
|
|
|
@ -90,6 +90,12 @@ variable "node_labels" {
|
||||||
default = []
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "node_taints" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of initial node taints"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
# unofficial, undocumented, unsupported, temporary
|
# unofficial, undocumented, unsupported, temporary
|
||||||
|
|
||||||
variable "cluster_domain_suffix" {
|
variable "cluster_domain_suffix" {
|
||||||
|
|
|
@ -89,6 +89,7 @@ data "template_file" "worker-config" {
|
||||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
node_labels = join(",", var.node_labels)
|
node_labels = join(",", var.node_labels)
|
||||||
|
node_taints = join(",", var.node_taints)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ module "bootstrap" {
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
enable_reporting = var.enable_reporting
|
enable_reporting = var.enable_reporting
|
||||||
enable_aggregation = var.enable_aggregation
|
enable_aggregation = var.enable_aggregation
|
||||||
|
daemonset_tolerations = var.daemonset_tolerations
|
||||||
|
|
||||||
// temporary
|
// temporary
|
||||||
external_apiserver_port = 443
|
external_apiserver_port = 443
|
||||||
|
|
|
@ -130,3 +130,8 @@ variable "cluster_domain_suffix" {
|
||||||
default = "cluster.local"
|
default = "cluster.local"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "daemonset_tolerations" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of additional taint keys kube-system DaemonSets should tolerate (e.g. ['custom-role', 'gpu-role'])"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
|
@ -70,6 +70,9 @@ systemd:
|
||||||
%{~ for label in split(",", node_labels) ~}
|
%{~ for label in split(",", node_labels) ~}
|
||||||
--node-labels=${label} \
|
--node-labels=${label} \
|
||||||
%{~ endfor ~}
|
%{~ endfor ~}
|
||||||
|
%{~ for taint in split(",", node_taints) ~}
|
||||||
|
--register-with-taints=${taint} \
|
||||||
|
%{~ endfor ~}
|
||||||
--pod-manifest-path=/etc/kubernetes/manifests \
|
--pod-manifest-path=/etc/kubernetes/manifests \
|
||||||
--read-only-port=0 \
|
--read-only-port=0 \
|
||||||
--rotate-certificates \
|
--rotate-certificates \
|
||||||
|
|
|
@ -84,6 +84,12 @@ variable "node_labels" {
|
||||||
default = []
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "node_taints" {
|
||||||
|
type = list(string)
|
||||||
|
description = "List of initial node taints"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
# unofficial, undocumented, unsupported, temporary
|
# unofficial, undocumented, unsupported, temporary
|
||||||
|
|
||||||
variable "cluster_domain_suffix" {
|
variable "cluster_domain_suffix" {
|
||||||
|
|
|
@ -86,6 +86,7 @@ data "template_file" "worker-config" {
|
||||||
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
cluster_dns_service_ip = cidrhost(var.service_cidr, 10)
|
||||||
cluster_domain_suffix = var.cluster_domain_suffix
|
cluster_domain_suffix = var.cluster_domain_suffix
|
||||||
node_labels = join(",", var.node_labels)
|
node_labels = join(",", var.node_labels)
|
||||||
|
node_taints = join(",", var.node_taints)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,6 +80,7 @@ nav:
|
||||||
- 'Overview': 'advanced/overview.md'
|
- 'Overview': 'advanced/overview.md'
|
||||||
- 'ARM64': 'advanced/arm64.md'
|
- 'ARM64': 'advanced/arm64.md'
|
||||||
- 'Customization': 'advanced/customization.md'
|
- 'Customization': 'advanced/customization.md'
|
||||||
|
- 'Nodes': 'advanced/nodes.md'
|
||||||
- 'Worker Pools': 'advanced/worker-pools.md'
|
- 'Worker Pools': 'advanced/worker-pools.md'
|
||||||
- 'Addons':
|
- 'Addons':
|
||||||
- 'Overview': 'addons/overview.md'
|
- 'Overview': 'addons/overview.md'
|
||||||
|
|
Loading…
Reference in New Issue