Enable kube-proxy metrics and allow Prometheus scrapes

* Configure kube-proxy --metrics-bind-address=0.0.0.0 (default
127.0.0.1) to serve metrics on 0.0.0.0:10249
* Add firewall rules to allow Prometheus (resides on a worker) to
scrape kube-proxy service endpoints on controllers or workers
* Add a clusterIP: None service for kube-proxy endpoint discovery
This commit is contained in:
Dalton Hubble 2019-12-29 12:21:49 -08:00
parent b2eb3e05d0
commit 43e05b9131
16 changed files with 153 additions and 33 deletions

View File

@ -8,10 +8,13 @@ Notable changes between versions.
* Update Calico from v3.10.2 to v3.11.1 ([#604](https://github.com/poseidon/typhoon/pull/604)) * Update Calico from v3.10.2 to v3.11.1 ([#604](https://github.com/poseidon/typhoon/pull/604))
* Inline Kubelet service on Container Linux nodes ([#606](https://github.com/poseidon/typhoon/pull/606)) * Inline Kubelet service on Container Linux nodes ([#606](https://github.com/poseidon/typhoon/pull/606))
* Disable unused Kubelet `127.0.0.1:10248` healthz listener ([#607](https://github.com/poseidon/typhoon/pull/607)) * Disable unused Kubelet `127.0.0.1:10248` healthz listener ([#607](https://github.com/poseidon/typhoon/pull/607))
* Enable kube-proxy metrics and allow Prometheus scrapes
* Allow TCP/10249 traffic with worker node sources
#### Addons #### Addons
* Update Prometheus from v2.14.0 to [v2.15.1](https://github.com/prometheus/prometheus/releases/tag/v2.15.1) * Update Prometheus from v2.14.0 to [v2.15.1](https://github.com/prometheus/prometheus/releases/tag/v2.15.1)
* Add discovery for kube-proxy service endpoints
* Update kube-state-metrics from v1.8.0 to v1.9.0 * Update kube-state-metrics from v1.8.0 to v1.9.0
* Update Grafana from v6.5.1 to v6.5.2 * Update Grafana from v6.5.1 to v6.5.2

View File

@ -1,3 +1,4 @@
# Allow Prometheus to scrape service endpoints
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
metadata: metadata:
@ -7,7 +8,6 @@ metadata:
prometheus.io/scrape: 'true' prometheus.io/scrape: 'true'
spec: spec:
type: ClusterIP type: ClusterIP
# service is created to allow prometheus to scrape endpoints
clusterIP: None clusterIP: None
selector: selector:
k8s-app: kube-controller-manager k8s-app: kube-controller-manager

View File

@ -0,0 +1,19 @@
# Allow Prometheus to scrape service endpoints
apiVersion: v1
kind: Service
metadata:
name: kube-proxy
namespace: kube-system
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '10249'
spec:
type: ClusterIP
clusterIP: None
selector:
k8s-app: kube-proxy
ports:
- name: metrics
protocol: TCP
port: 10249
targetPort: 10249

View File

@ -1,3 +1,4 @@
# Allow Prometheus to scrape service endpoints
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
metadata: metadata:
@ -7,7 +8,6 @@ metadata:
prometheus.io/scrape: 'true' prometheus.io/scrape: 'true'
spec: spec:
type: ClusterIP type: ClusterIP
# service is created to allow prometheus to scrape endpoints
clusterIP: None clusterIP: None
selector: selector:
k8s-app: kube-scheduler k8s-app: kube-scheduler

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests) # Kubernetes assets (kubeconfig, manifests)
module "bootstrap" { module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"
cluster_name = var.cluster_name cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]

View File

@ -33,6 +33,28 @@ resource "aws_security_group_rule" "controller-etcd" {
self = true self = true
} }
# Allow Prometheus to scrape etcd metrics
resource "aws_security_group_rule" "controller-etcd-metrics" {
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 2381
to_port = 2381
source_security_group_id = aws_security_group.worker.id
}
# Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "kube-proxy-metrics" {
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
source_security_group_id = aws_security_group.worker.id
}
# Allow Prometheus to scrape kube-scheduler # Allow Prometheus to scrape kube-scheduler
resource "aws_security_group_rule" "controller-scheduler-metrics" { resource "aws_security_group_rule" "controller-scheduler-metrics" {
security_group_id = aws_security_group.controller.id security_group_id = aws_security_group.controller.id
@ -55,17 +77,6 @@ resource "aws_security_group_rule" "controller-manager-metrics" {
source_security_group_id = aws_security_group.worker.id source_security_group_id = aws_security_group.worker.id
} }
# Allow Prometheus to scrape etcd metrics
resource "aws_security_group_rule" "controller-etcd-metrics" {
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 2381
to_port = 2381
source_security_group_id = aws_security_group.worker.id
}
resource "aws_security_group_rule" "controller-vxlan" { resource "aws_security_group_rule" "controller-vxlan" {
count = var.networking == "flannel" ? 1 : 0 count = var.networking == "flannel" ? 1 : 0
@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" {
self = true self = true
} }
resource "aws_security_group_rule" "ingress-health" { # Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "worker-kube-proxy" {
security_group_id = aws_security_group.worker.id security_group_id = aws_security_group.worker.id
type = "ingress" type = "ingress"
protocol = "tcp" protocol = "tcp"
from_port = 10254 from_port = 10249
to_port = 10254 to_port = 10249
cidr_blocks = ["0.0.0.0/0"] self = true
} }
# Allow apiserver to access kubelets for exec, log, port-forward # Allow apiserver to access kubelets for exec, log, port-forward
@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" {
self = true self = true
} }
resource "aws_security_group_rule" "ingress-health" {
security_group_id = aws_security_group.worker.id
type = "ingress"
protocol = "tcp"
from_port = 10254
to_port = 10254
cidr_blocks = ["0.0.0.0/0"]
}
resource "aws_security_group_rule" "worker-bgp" { resource "aws_security_group_rule" "worker-bgp" {
security_group_id = aws_security_group.worker.id security_group_id = aws_security_group.worker.id

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests) # Kubernetes assets (kubeconfig, manifests)
module "bootstrap" { module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"
cluster_name = var.cluster_name cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]

View File

@ -44,6 +44,17 @@ resource "aws_security_group_rule" "controller-etcd-metrics" {
source_security_group_id = aws_security_group.worker.id source_security_group_id = aws_security_group.worker.id
} }
# Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "kube-proxy-metrics" {
security_group_id = aws_security_group.controller.id
type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
source_security_group_id = aws_security_group.worker.id
}
# Allow Prometheus to scrape kube-scheduler # Allow Prometheus to scrape kube-scheduler
resource "aws_security_group_rule" "controller-scheduler-metrics" { resource "aws_security_group_rule" "controller-scheduler-metrics" {
security_group_id = aws_security_group.controller.id security_group_id = aws_security_group.controller.id
@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" {
self = true self = true
} }
resource "aws_security_group_rule" "ingress-health" { # Allow Prometheus to scrape kube-proxy
resource "aws_security_group_rule" "worker-kube-proxy" {
security_group_id = aws_security_group.worker.id security_group_id = aws_security_group.worker.id
type = "ingress" type = "ingress"
protocol = "tcp" protocol = "tcp"
from_port = 10254 from_port = 10249
to_port = 10254 to_port = 10249
cidr_blocks = ["0.0.0.0/0"] self = true
} }
# Allow apiserver to access kubelets for exec, log, port-forward # Allow apiserver to access kubelets for exec, log, port-forward
@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" {
self = true self = true
} }
resource "aws_security_group_rule" "ingress-health" {
security_group_id = aws_security_group.worker.id
type = "ingress"
protocol = "tcp"
from_port = 10254
to_port = 10254
cidr_blocks = ["0.0.0.0/0"]
}
resource "aws_security_group_rule" "worker-bgp" { resource "aws_security_group_rule" "worker-bgp" {
security_group_id = aws_security_group.worker.id security_group_id = aws_security_group.worker.id

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests) # Kubernetes assets (kubeconfig, manifests)
module "bootstrap" { module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"
cluster_name = var.cluster_name cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]

View File

@ -53,13 +53,29 @@ resource "azurerm_network_security_rule" "controller-etcd-metrics" {
destination_address_prefix = azurerm_subnet.controller.address_prefix destination_address_prefix = azurerm_subnet.controller.address_prefix
} }
# Allow Prometheus to scrape kube-proxy metrics
resource "azurerm_network_security_rule" "controller-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name
name = "allow-kube-proxy-metrics"
network_security_group_name = azurerm_network_security_group.controller.name
priority = "2011"
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefix = azurerm_subnet.worker.address_prefix
destination_address_prefix = azurerm_subnet.controller.address_prefix
}
# Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics # Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics
resource "azurerm_network_security_rule" "controller-kube-metrics" { resource "azurerm_network_security_rule" "controller-kube-metrics" {
resource_group_name = azurerm_resource_group.cluster.name resource_group_name = azurerm_resource_group.cluster.name
name = "allow-kube-metrics" name = "allow-kube-metrics"
network_security_group_name = azurerm_network_security_group.controller.name network_security_group_name = azurerm_network_security_group.controller.name
priority = "2011" priority = "2012"
access = "Allow" access = "Allow"
direction = "Inbound" direction = "Inbound"
protocol = "Tcp" protocol = "Tcp"
@ -251,6 +267,22 @@ resource "azurerm_network_security_rule" "worker-node-exporter" {
destination_address_prefix = azurerm_subnet.worker.address_prefix destination_address_prefix = azurerm_subnet.worker.address_prefix
} }
# Allow Prometheus to scrape kube-proxy
resource "azurerm_network_security_rule" "worker-kube-proxy" {
resource_group_name = azurerm_resource_group.cluster.name
name = "allow-kube-proxy"
network_security_group_name = azurerm_network_security_group.worker.name
priority = "2024"
access = "Allow"
direction = "Inbound"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "10249"
source_address_prefix = azurerm_subnet.worker.address_prefix
destination_address_prefix = azurerm_subnet.worker.address_prefix
}
# Allow apiserver to access kubelet's for exec, log, port-forward # Allow apiserver to access kubelet's for exec, log, port-forward
resource "azurerm_network_security_rule" "worker-kubelet" { resource "azurerm_network_security_rule" "worker-kubelet" {
resource_group_name = azurerm_resource_group.cluster.name resource_group_name = azurerm_resource_group.cluster.name

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests) # Kubernetes assets (kubeconfig, manifests)
module "bootstrap" { module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"
cluster_name = var.cluster_name cluster_name = var.cluster_name
api_servers = [var.k8s_domain_name] api_servers = [var.k8s_domain_name]

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests) # Kubernetes assets (kubeconfig, manifests)
module "bootstrap" { module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"
cluster_name = var.cluster_name cluster_name = var.cluster_name
api_servers = [var.k8s_domain_name] api_servers = [var.k8s_domain_name]

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests) # Kubernetes assets (kubeconfig, manifests)
module "bootstrap" { module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"
cluster_name = var.cluster_name cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]

View File

@ -16,12 +16,20 @@ resource "digitalocean_firewall" "rules" {
source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name] source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name]
} }
# Allow Prometheus to scrape node-exporter
inbound_rule { inbound_rule {
protocol = "tcp" protocol = "tcp"
port_range = "9100" port_range = "9100"
source_tags = [digitalocean_tag.workers.name] source_tags = [digitalocean_tag.workers.name]
} }
# Allow Prometheus to scrape kube-proxy
inbound_rule {
protocol = "tcp"
port_range = "10249"
source_tags = [digitalocean_tag.workers.name]
}
inbound_rule { inbound_rule {
protocol = "tcp" protocol = "tcp"
port_range = "10250" port_range = "10250"

View File

@ -1,6 +1,6 @@
# Kubernetes assets (kubeconfig, manifests) # Kubernetes assets (kubeconfig, manifests)
module "bootstrap" { module "bootstrap" {
source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1"
cluster_name = var.cluster_name cluster_name = var.cluster_name
api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)]

View File

@ -126,6 +126,20 @@ resource "google_compute_firewall" "internal-node-exporter" {
target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"]
} }
# Allow Prometheus to scrape kube-proxy metrics
resource "google_compute_firewall" "internal-kube-proxy" {
name = "${var.cluster_name}-internal-kube-proxy"
network = google_compute_network.network.name
allow {
protocol = "tcp"
ports = [10249]
}
source_tags = ["${var.cluster_name}-worker"]
target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"]
}
# Allow apiserver to access kubelets for exec, log, port-forward # Allow apiserver to access kubelets for exec, log, port-forward
resource "google_compute_firewall" "internal-kubelet" { resource "google_compute_firewall" "internal-kubelet" {
name = "${var.cluster_name}-internal-kubelet" name = "${var.cluster_name}-internal-kubelet"