From cbef202eec5d781a9e54cb73a41614058490510b Mon Sep 17 00:00:00 2001 From: Dalton Hubble Date: Tue, 10 Aug 2021 21:08:49 -0700 Subject: [PATCH] Update Prometheus discovery of kube components * Kubernetes v1.22.0 disabled kube-controller-manager insecure port, which was used internally for Prometheus metrics scraping * Configure Prometheus to discover and scrape endpoints for kube-scheduler and kube-controller-manager via the authenticated https ports, via bearer token * Change firewall ports to allow Prometheus (on worker nodes) to scrape kube-scheduler and kube-controller-manager targets that run on controller(s) with hostNetwork * Disable the insecure port on kube-scheduler --- CHANGES.md | 2 + addons/prometheus/config.yaml | 42 +++++++++++++++++++ .../discovery/kube-controller-manager.yaml | 8 ++-- .../prometheus/discovery/kube-scheduler.yaml | 8 ++-- aws/fedora-coreos/kubernetes/bootstrap.tf | 2 +- aws/fedora-coreos/kubernetes/security.tf | 8 ++-- aws/flatcar-linux/kubernetes/bootstrap.tf | 2 +- aws/flatcar-linux/kubernetes/security.tf | 8 ++-- azure/fedora-coreos/kubernetes/bootstrap.tf | 2 +- azure/fedora-coreos/kubernetes/security.tf | 2 +- azure/flatcar-linux/kubernetes/bootstrap.tf | 2 +- azure/flatcar-linux/kubernetes/security.tf | 2 +- .../fedora-coreos/kubernetes/bootstrap.tf | 2 +- .../flatcar-linux/kubernetes/bootstrap.tf | 2 +- .../fedora-coreos/kubernetes/bootstrap.tf | 2 +- .../fedora-coreos/kubernetes/network.tf | 2 +- .../flatcar-linux/kubernetes/bootstrap.tf | 2 +- .../flatcar-linux/kubernetes/network.tf | 2 +- .../fedora-coreos/kubernetes/bootstrap.tf | 2 +- .../fedora-coreos/kubernetes/network.tf | 2 +- .../flatcar-linux/kubernetes/bootstrap.tf | 2 +- .../flatcar-linux/kubernetes/network.tf | 2 +- 22 files changed, 74 insertions(+), 34 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 3a4320d8..6675ffe3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,8 @@ Notable changes between versions. ## v1.22.0 * Kubernetes [v1.22.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.22.md#v1220) +* Switch `kube-controller-manager` and `kube-scheduler` to use secure port only + * Update Prometheus config to discover endpoints and use a bearer token to scrape ### Fedora CoreOS diff --git a/addons/prometheus/config.yaml b/addons/prometheus/config.yaml index b5d91091..8d693ad5 100644 --- a/addons/prometheus/config.yaml +++ b/addons/prometheus/config.yaml @@ -72,6 +72,48 @@ data: regex: apiserver_request_duration_seconds_count;.+ action: drop + # Scrape config for kube-controller-manager endpoints. + # + # kube-controller-manager service endpoints can be discovered by using the + # `endpoints` role and relabelling to only keep only endpoints associated with + # kube-system/kube-controller-manager and the `https` port. + - job_name: 'kube-controller-manager' + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: kube-system;kube-controller-manager;metrics + - replacement: kube-controller-manager + action: replace + target_label: job + + # Scrape config for kube-scheduler endpoints. + # + # kube-scheduler service endpoints can be discovered by using the `endpoints` + # role and relabelling to only keep only endpoints associated with + # kube-system/kube-scheduler and the `https` port. + - job_name: 'kube-scheduler' + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: kube-system;kube-scheduler;metrics + - replacement: kube-scheduler + action: replace + target_label: job + # Scrape config for node (i.e. kubelet) /metrics (e.g. 'kubelet_'). Explore # metrics from a node by scraping kubelet (127.0.0.1:10250/metrics). - job_name: 'kubelet' diff --git a/addons/prometheus/discovery/kube-controller-manager.yaml b/addons/prometheus/discovery/kube-controller-manager.yaml index 1dabf724..63ef60ca 100644 --- a/addons/prometheus/discovery/kube-controller-manager.yaml +++ b/addons/prometheus/discovery/kube-controller-manager.yaml @@ -1,11 +1,9 @@ -# Allow Prometheus to scrape service endpoints +# Allow Prometheus to discover service endpoints apiVersion: v1 kind: Service metadata: name: kube-controller-manager namespace: kube-system - annotations: - prometheus.io/scrape: 'true' spec: type: ClusterIP clusterIP: None @@ -14,5 +12,5 @@ spec: ports: - name: metrics protocol: TCP - port: 10252 - targetPort: 10252 + port: 10257 + targetPort: 10257 diff --git a/addons/prometheus/discovery/kube-scheduler.yaml b/addons/prometheus/discovery/kube-scheduler.yaml index 0032cf1b..9d6f3967 100644 --- a/addons/prometheus/discovery/kube-scheduler.yaml +++ b/addons/prometheus/discovery/kube-scheduler.yaml @@ -1,11 +1,9 @@ -# Allow Prometheus to scrape service endpoints +# Allow Prometheus to discover service endpoints apiVersion: v1 kind: Service metadata: name: kube-scheduler namespace: kube-system - annotations: - prometheus.io/scrape: 'true' spec: type: ClusterIP clusterIP: None @@ -14,5 +12,5 @@ spec: ports: - name: metrics protocol: TCP - port: 10251 - targetPort: 10251 + port: 10259 + targetPort: 10259 diff --git a/aws/fedora-coreos/kubernetes/bootstrap.tf b/aws/fedora-coreos/kubernetes/bootstrap.tf index 94f444ca..981480c2 100644 --- a/aws/fedora-coreos/kubernetes/bootstrap.tf +++ b/aws/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/aws/fedora-coreos/kubernetes/security.tf b/aws/fedora-coreos/kubernetes/security.tf index 5a19930d..3d94cd66 100644 --- a/aws/fedora-coreos/kubernetes/security.tf +++ b/aws/fedora-coreos/kubernetes/security.tf @@ -201,8 +201,8 @@ resource "aws_security_group_rule" "controller-scheduler-metrics" { type = "ingress" protocol = "tcp" - from_port = 10251 - to_port = 10251 + from_port = 10259 + to_port = 10259 source_security_group_id = aws_security_group.worker.id } @@ -212,8 +212,8 @@ resource "aws_security_group_rule" "controller-manager-metrics" { type = "ingress" protocol = "tcp" - from_port = 10252 - to_port = 10252 + from_port = 10257 + to_port = 10257 source_security_group_id = aws_security_group.worker.id } diff --git a/aws/flatcar-linux/kubernetes/bootstrap.tf b/aws/flatcar-linux/kubernetes/bootstrap.tf index ef7c59a7..71705f1e 100644 --- a/aws/flatcar-linux/kubernetes/bootstrap.tf +++ b/aws/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/aws/flatcar-linux/kubernetes/security.tf b/aws/flatcar-linux/kubernetes/security.tf index 5a19930d..3d94cd66 100644 --- a/aws/flatcar-linux/kubernetes/security.tf +++ b/aws/flatcar-linux/kubernetes/security.tf @@ -201,8 +201,8 @@ resource "aws_security_group_rule" "controller-scheduler-metrics" { type = "ingress" protocol = "tcp" - from_port = 10251 - to_port = 10251 + from_port = 10259 + to_port = 10259 source_security_group_id = aws_security_group.worker.id } @@ -212,8 +212,8 @@ resource "aws_security_group_rule" "controller-manager-metrics" { type = "ingress" protocol = "tcp" - from_port = 10252 - to_port = 10252 + from_port = 10257 + to_port = 10257 source_security_group_id = aws_security_group.worker.id } diff --git a/azure/fedora-coreos/kubernetes/bootstrap.tf b/azure/fedora-coreos/kubernetes/bootstrap.tf index d0d3e669..60744b12 100644 --- a/azure/fedora-coreos/kubernetes/bootstrap.tf +++ b/azure/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/azure/fedora-coreos/kubernetes/security.tf b/azure/fedora-coreos/kubernetes/security.tf index c258ec2d..e106f3ba 100644 --- a/azure/fedora-coreos/kubernetes/security.tf +++ b/azure/fedora-coreos/kubernetes/security.tf @@ -95,7 +95,7 @@ resource "azurerm_network_security_rule" "controller-kube-metrics" { direction = "Inbound" protocol = "Tcp" source_port_range = "*" - destination_port_range = "10251-10252" + destination_port_range = "10257-10259" source_address_prefix = azurerm_subnet.worker.address_prefix destination_address_prefix = azurerm_subnet.controller.address_prefix } diff --git a/azure/flatcar-linux/kubernetes/bootstrap.tf b/azure/flatcar-linux/kubernetes/bootstrap.tf index 5edf06dd..6bfd03f6 100644 --- a/azure/flatcar-linux/kubernetes/bootstrap.tf +++ b/azure/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/azure/flatcar-linux/kubernetes/security.tf b/azure/flatcar-linux/kubernetes/security.tf index c258ec2d..e106f3ba 100644 --- a/azure/flatcar-linux/kubernetes/security.tf +++ b/azure/flatcar-linux/kubernetes/security.tf @@ -95,7 +95,7 @@ resource "azurerm_network_security_rule" "controller-kube-metrics" { direction = "Inbound" protocol = "Tcp" source_port_range = "*" - destination_port_range = "10251-10252" + destination_port_range = "10257-10259" source_address_prefix = azurerm_subnet.worker.address_prefix destination_address_prefix = azurerm_subnet.controller.address_prefix } diff --git a/bare-metal/fedora-coreos/kubernetes/bootstrap.tf b/bare-metal/fedora-coreos/kubernetes/bootstrap.tf index d044f172..036496dd 100644 --- a/bare-metal/fedora-coreos/kubernetes/bootstrap.tf +++ b/bare-metal/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [var.k8s_domain_name] diff --git a/bare-metal/flatcar-linux/kubernetes/bootstrap.tf b/bare-metal/flatcar-linux/kubernetes/bootstrap.tf index 37aeeca7..551260c9 100644 --- a/bare-metal/flatcar-linux/kubernetes/bootstrap.tf +++ b/bare-metal/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [var.k8s_domain_name] diff --git a/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf b/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf index a83cfcf6..ec0fe9b2 100644 --- a/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf +++ b/digital-ocean/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/digital-ocean/fedora-coreos/kubernetes/network.tf b/digital-ocean/fedora-coreos/kubernetes/network.tf index 0d3438bb..0d506e58 100644 --- a/digital-ocean/fedora-coreos/kubernetes/network.tf +++ b/digital-ocean/fedora-coreos/kubernetes/network.tf @@ -116,7 +116,7 @@ resource "digitalocean_firewall" "controllers" { # kube-scheduler metrics, kube-controller-manager metrics inbound_rule { protocol = "tcp" - port_range = "10251-10252" + port_range = "10257-10259" source_tags = [digitalocean_tag.workers.name] } } diff --git a/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf b/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf index a5910ad8..79369284 100644 --- a/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf +++ b/digital-ocean/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/digital-ocean/flatcar-linux/kubernetes/network.tf b/digital-ocean/flatcar-linux/kubernetes/network.tf index 0d3438bb..0d506e58 100644 --- a/digital-ocean/flatcar-linux/kubernetes/network.tf +++ b/digital-ocean/flatcar-linux/kubernetes/network.tf @@ -116,7 +116,7 @@ resource "digitalocean_firewall" "controllers" { # kube-scheduler metrics, kube-controller-manager metrics inbound_rule { protocol = "tcp" - port_range = "10251-10252" + port_range = "10257-10259" source_tags = [digitalocean_tag.workers.name] } } diff --git a/google-cloud/fedora-coreos/kubernetes/bootstrap.tf b/google-cloud/fedora-coreos/kubernetes/bootstrap.tf index aff89116..f2114c7c 100644 --- a/google-cloud/fedora-coreos/kubernetes/bootstrap.tf +++ b/google-cloud/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/google-cloud/fedora-coreos/kubernetes/network.tf b/google-cloud/fedora-coreos/kubernetes/network.tf index 67c6afde..e7c3090c 100644 --- a/google-cloud/fedora-coreos/kubernetes/network.tf +++ b/google-cloud/fedora-coreos/kubernetes/network.tf @@ -55,7 +55,7 @@ resource "google_compute_firewall" "internal-kube-metrics" { allow { protocol = "tcp" - ports = [10251, 10252] + ports = [10257, 10259] } source_tags = ["${var.cluster_name}-worker"] diff --git a/google-cloud/flatcar-linux/kubernetes/bootstrap.tf b/google-cloud/flatcar-linux/kubernetes/bootstrap.tf index 3b16c6cf..d803f152 100644 --- a/google-cloud/flatcar-linux/kubernetes/bootstrap.tf +++ b/google-cloud/flatcar-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b766ff2346921a4f5587a45b948b5c79969357ae" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=b5f5d843ec9babcd2eeea98b8edcef972a5c178d" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/google-cloud/flatcar-linux/kubernetes/network.tf b/google-cloud/flatcar-linux/kubernetes/network.tf index 67c6afde..e7c3090c 100644 --- a/google-cloud/flatcar-linux/kubernetes/network.tf +++ b/google-cloud/flatcar-linux/kubernetes/network.tf @@ -55,7 +55,7 @@ resource "google_compute_firewall" "internal-kube-metrics" { allow { protocol = "tcp" - ports = [10251, 10252] + ports = [10257, 10259] } source_tags = ["${var.cluster_name}-worker"]