mirror of
https://github.com/puppetmaster/typhoon.git
synced 2025-07-17 03:01:35 +02:00
Adjust Google Cloud worker health checks to use kube-proxy healthz
* Change the workers managed instance group to health check nodes via HTTP probe of the kube-proxy port 10256 /healthz endpoints * Advantages: kube-proxy is a lower value target (in case there were bugs in firewalls) that Kubelet, its more representative than health checking Kubelet (Kubelet must run AND kube-proxy Daemonset must be healthy), and its already used by kube-proxy liveness probes (better discoverability via kubectl or alerts on pods crashlooping) * Another motivator is that GKE clusters also use kube-proxy port 10256 checks to assess node health
This commit is contained in:
@ -196,13 +196,13 @@ resource "google_compute_firewall" "allow-ingress" {
|
||||
target_tags = ["${var.cluster_name}-worker"]
|
||||
}
|
||||
|
||||
resource "google_compute_firewall" "google-kubelet-health-checks" {
|
||||
name = "${var.cluster_name}-kubelet-health"
|
||||
resource "google_compute_firewall" "google-worker-health-checks" {
|
||||
name = "${var.cluster_name}-worker-health"
|
||||
network = google_compute_network.network.name
|
||||
|
||||
allow {
|
||||
protocol = "tcp"
|
||||
ports = [10250]
|
||||
ports = [10256]
|
||||
}
|
||||
|
||||
# https://cloud.google.com/compute/docs/instance-groups/autohealing-instances-in-migs
|
||||
|
@ -36,14 +36,14 @@ resource "google_compute_region_instance_group_manager" "workers" {
|
||||
|
||||
auto_healing_policies {
|
||||
health_check = google_compute_health_check.worker.id
|
||||
initial_delay_sec = 120
|
||||
initial_delay_sec = 300
|
||||
}
|
||||
}
|
||||
|
||||
# Health check for worker Kubelet
|
||||
resource "google_compute_health_check" "worker" {
|
||||
name = "${var.name}-kubelet-health"
|
||||
description = "Health check for worker Kubelet"
|
||||
name = "${var.name}-worker-health"
|
||||
description = "Health check for worker node"
|
||||
|
||||
timeout_sec = 20
|
||||
check_interval_sec = 30
|
||||
@ -51,8 +51,9 @@ resource "google_compute_health_check" "worker" {
|
||||
healthy_threshold = 1
|
||||
unhealthy_threshold = 6
|
||||
|
||||
ssl_health_check {
|
||||
port = "10250"
|
||||
http_health_check {
|
||||
port = "10256"
|
||||
request_path = "/healthz"
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user