diff --git a/CHANGES.md b/CHANGES.md index 5f9f8792..a2c9b3be 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,10 +8,13 @@ Notable changes between versions. * Update Calico from v3.10.2 to v3.11.1 ([#604](https://github.com/poseidon/typhoon/pull/604)) * Inline Kubelet service on Container Linux nodes ([#606](https://github.com/poseidon/typhoon/pull/606)) * Disable unused Kubelet `127.0.0.1:10248` healthz listener ([#607](https://github.com/poseidon/typhoon/pull/607)) +* Enable kube-proxy metrics and allow Prometheus scrapes + * Allow TCP/10249 traffic with worker node sources #### Addons * Update Prometheus from v2.14.0 to [v2.15.1](https://github.com/prometheus/prometheus/releases/tag/v2.15.1) + * Add discovery for kube-proxy service endpoints * Update kube-state-metrics from v1.8.0 to v1.9.0 * Update Grafana from v6.5.1 to v6.5.2 diff --git a/addons/prometheus/discovery/kube-controller-manager.yaml b/addons/prometheus/discovery/kube-controller-manager.yaml index 19faa6cf..1dabf724 100644 --- a/addons/prometheus/discovery/kube-controller-manager.yaml +++ b/addons/prometheus/discovery/kube-controller-manager.yaml @@ -1,3 +1,4 @@ +# Allow Prometheus to scrape service endpoints apiVersion: v1 kind: Service metadata: @@ -7,7 +8,6 @@ metadata: prometheus.io/scrape: 'true' spec: type: ClusterIP - # service is created to allow prometheus to scrape endpoints clusterIP: None selector: k8s-app: kube-controller-manager diff --git a/addons/prometheus/discovery/kube-proxy.yaml b/addons/prometheus/discovery/kube-proxy.yaml new file mode 100644 index 00000000..9c49bef2 --- /dev/null +++ b/addons/prometheus/discovery/kube-proxy.yaml @@ -0,0 +1,19 @@ +# Allow Prometheus to scrape service endpoints +apiVersion: v1 +kind: Service +metadata: + name: kube-proxy + namespace: kube-system + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '10249' +spec: + type: ClusterIP + clusterIP: None + selector: + k8s-app: kube-proxy + ports: + - name: metrics + protocol: TCP + port: 10249 + targetPort: 10249 diff --git a/addons/prometheus/discovery/kube-scheduler.yaml b/addons/prometheus/discovery/kube-scheduler.yaml index 80fd1913..0032cf1b 100644 --- a/addons/prometheus/discovery/kube-scheduler.yaml +++ b/addons/prometheus/discovery/kube-scheduler.yaml @@ -1,3 +1,4 @@ +# Allow Prometheus to scrape service endpoints apiVersion: v1 kind: Service metadata: @@ -7,7 +8,6 @@ metadata: prometheus.io/scrape: 'true' spec: type: ClusterIP - # service is created to allow prometheus to scrape endpoints clusterIP: None selector: k8s-app: kube-scheduler diff --git a/aws/container-linux/kubernetes/bootstrap.tf b/aws/container-linux/kubernetes/bootstrap.tf index ae850c6b..5b9f7c98 100644 --- a/aws/container-linux/kubernetes/bootstrap.tf +++ b/aws/container-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/aws/container-linux/kubernetes/security.tf b/aws/container-linux/kubernetes/security.tf index 706f80f8..60727af8 100644 --- a/aws/container-linux/kubernetes/security.tf +++ b/aws/container-linux/kubernetes/security.tf @@ -33,6 +33,28 @@ resource "aws_security_group_rule" "controller-etcd" { self = true } +# Allow Prometheus to scrape etcd metrics +resource "aws_security_group_rule" "controller-etcd-metrics" { + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 2381 + to_port = 2381 + source_security_group_id = aws_security_group.worker.id +} + +# Allow Prometheus to scrape kube-proxy +resource "aws_security_group_rule" "kube-proxy-metrics" { + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 10249 + to_port = 10249 + source_security_group_id = aws_security_group.worker.id +} + # Allow Prometheus to scrape kube-scheduler resource "aws_security_group_rule" "controller-scheduler-metrics" { security_group_id = aws_security_group.controller.id @@ -55,17 +77,6 @@ resource "aws_security_group_rule" "controller-manager-metrics" { source_security_group_id = aws_security_group.worker.id } -# Allow Prometheus to scrape etcd metrics -resource "aws_security_group_rule" "controller-etcd-metrics" { - security_group_id = aws_security_group.controller.id - - type = "ingress" - protocol = "tcp" - from_port = 2381 - to_port = 2381 - source_security_group_id = aws_security_group.worker.id -} - resource "aws_security_group_rule" "controller-vxlan" { count = var.networking == "flannel" ? 1 : 0 @@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" { self = true } -resource "aws_security_group_rule" "ingress-health" { +# Allow Prometheus to scrape kube-proxy +resource "aws_security_group_rule" "worker-kube-proxy" { security_group_id = aws_security_group.worker.id - type = "ingress" - protocol = "tcp" - from_port = 10254 - to_port = 10254 - cidr_blocks = ["0.0.0.0/0"] + type = "ingress" + protocol = "tcp" + from_port = 10249 + to_port = 10249 + self = true } # Allow apiserver to access kubelets for exec, log, port-forward @@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" { self = true } +resource "aws_security_group_rule" "ingress-health" { + security_group_id = aws_security_group.worker.id + + type = "ingress" + protocol = "tcp" + from_port = 10254 + to_port = 10254 + cidr_blocks = ["0.0.0.0/0"] +} + resource "aws_security_group_rule" "worker-bgp" { security_group_id = aws_security_group.worker.id diff --git a/aws/fedora-coreos/kubernetes/bootstrap.tf b/aws/fedora-coreos/kubernetes/bootstrap.tf index 7d60bb94..b2015fa3 100644 --- a/aws/fedora-coreos/kubernetes/bootstrap.tf +++ b/aws/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/aws/fedora-coreos/kubernetes/security.tf b/aws/fedora-coreos/kubernetes/security.tf index ddc4e8e4..60727af8 100644 --- a/aws/fedora-coreos/kubernetes/security.tf +++ b/aws/fedora-coreos/kubernetes/security.tf @@ -44,6 +44,17 @@ resource "aws_security_group_rule" "controller-etcd-metrics" { source_security_group_id = aws_security_group.worker.id } +# Allow Prometheus to scrape kube-proxy +resource "aws_security_group_rule" "kube-proxy-metrics" { + security_group_id = aws_security_group.controller.id + + type = "ingress" + protocol = "tcp" + from_port = 10249 + to_port = 10249 + source_security_group_id = aws_security_group.worker.id +} + # Allow Prometheus to scrape kube-scheduler resource "aws_security_group_rule" "controller-scheduler-metrics" { security_group_id = aws_security_group.controller.id @@ -281,14 +292,15 @@ resource "aws_security_group_rule" "worker-node-exporter" { self = true } -resource "aws_security_group_rule" "ingress-health" { +# Allow Prometheus to scrape kube-proxy +resource "aws_security_group_rule" "worker-kube-proxy" { security_group_id = aws_security_group.worker.id - type = "ingress" - protocol = "tcp" - from_port = 10254 - to_port = 10254 - cidr_blocks = ["0.0.0.0/0"] + type = "ingress" + protocol = "tcp" + from_port = 10249 + to_port = 10249 + self = true } # Allow apiserver to access kubelets for exec, log, port-forward @@ -313,6 +325,16 @@ resource "aws_security_group_rule" "worker-kubelet-self" { self = true } +resource "aws_security_group_rule" "ingress-health" { + security_group_id = aws_security_group.worker.id + + type = "ingress" + protocol = "tcp" + from_port = 10254 + to_port = 10254 + cidr_blocks = ["0.0.0.0/0"] +} + resource "aws_security_group_rule" "worker-bgp" { security_group_id = aws_security_group.worker.id diff --git a/azure/container-linux/kubernetes/bootstrap.tf b/azure/container-linux/kubernetes/bootstrap.tf index 218b050e..f7607daa 100644 --- a/azure/container-linux/kubernetes/bootstrap.tf +++ b/azure/container-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/azure/container-linux/kubernetes/security.tf b/azure/container-linux/kubernetes/security.tf index c2e97307..feb6fef5 100644 --- a/azure/container-linux/kubernetes/security.tf +++ b/azure/container-linux/kubernetes/security.tf @@ -53,13 +53,29 @@ resource "azurerm_network_security_rule" "controller-etcd-metrics" { destination_address_prefix = azurerm_subnet.controller.address_prefix } +# Allow Prometheus to scrape kube-proxy metrics +resource "azurerm_network_security_rule" "controller-kube-proxy" { + resource_group_name = azurerm_resource_group.cluster.name + + name = "allow-kube-proxy-metrics" + network_security_group_name = azurerm_network_security_group.controller.name + priority = "2011" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "10249" + source_address_prefix = azurerm_subnet.worker.address_prefix + destination_address_prefix = azurerm_subnet.controller.address_prefix +} + # Allow Prometheus to scrape kube-scheduler and kube-controller-manager metrics resource "azurerm_network_security_rule" "controller-kube-metrics" { resource_group_name = azurerm_resource_group.cluster.name name = "allow-kube-metrics" network_security_group_name = azurerm_network_security_group.controller.name - priority = "2011" + priority = "2012" access = "Allow" direction = "Inbound" protocol = "Tcp" @@ -251,6 +267,22 @@ resource "azurerm_network_security_rule" "worker-node-exporter" { destination_address_prefix = azurerm_subnet.worker.address_prefix } +# Allow Prometheus to scrape kube-proxy +resource "azurerm_network_security_rule" "worker-kube-proxy" { + resource_group_name = azurerm_resource_group.cluster.name + + name = "allow-kube-proxy" + network_security_group_name = azurerm_network_security_group.worker.name + priority = "2024" + access = "Allow" + direction = "Inbound" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "10249" + source_address_prefix = azurerm_subnet.worker.address_prefix + destination_address_prefix = azurerm_subnet.worker.address_prefix +} + # Allow apiserver to access kubelet's for exec, log, port-forward resource "azurerm_network_security_rule" "worker-kubelet" { resource_group_name = azurerm_resource_group.cluster.name diff --git a/bare-metal/container-linux/kubernetes/bootstrap.tf b/bare-metal/container-linux/kubernetes/bootstrap.tf index 7def8550..5e8c940b 100644 --- a/bare-metal/container-linux/kubernetes/bootstrap.tf +++ b/bare-metal/container-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1" cluster_name = var.cluster_name api_servers = [var.k8s_domain_name] diff --git a/bare-metal/fedora-coreos/kubernetes/bootstrap.tf b/bare-metal/fedora-coreos/kubernetes/bootstrap.tf index bc236462..8bbb756d 100644 --- a/bare-metal/fedora-coreos/kubernetes/bootstrap.tf +++ b/bare-metal/fedora-coreos/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1" cluster_name = var.cluster_name api_servers = [var.k8s_domain_name] diff --git a/digital-ocean/container-linux/kubernetes/bootstrap.tf b/digital-ocean/container-linux/kubernetes/bootstrap.tf index ebb1f80a..7c195c7d 100644 --- a/digital-ocean/container-linux/kubernetes/bootstrap.tf +++ b/digital-ocean/container-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/digital-ocean/container-linux/kubernetes/network.tf b/digital-ocean/container-linux/kubernetes/network.tf index 6a8d4095..bc543485 100644 --- a/digital-ocean/container-linux/kubernetes/network.tf +++ b/digital-ocean/container-linux/kubernetes/network.tf @@ -16,12 +16,20 @@ resource "digitalocean_firewall" "rules" { source_tags = [digitalocean_tag.controllers.name, digitalocean_tag.workers.name] } + # Allow Prometheus to scrape node-exporter inbound_rule { protocol = "tcp" port_range = "9100" source_tags = [digitalocean_tag.workers.name] } + # Allow Prometheus to scrape kube-proxy + inbound_rule { + protocol = "tcp" + port_range = "10249" + source_tags = [digitalocean_tag.workers.name] + } + inbound_rule { protocol = "tcp" port_range = "10250" diff --git a/google-cloud/container-linux/kubernetes/bootstrap.tf b/google-cloud/container-linux/kubernetes/bootstrap.tf index 9eeeb979..f6b385f7 100644 --- a/google-cloud/container-linux/kubernetes/bootstrap.tf +++ b/google-cloud/container-linux/kubernetes/bootstrap.tf @@ -1,6 +1,6 @@ # Kubernetes assets (kubeconfig, manifests) module "bootstrap" { - source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=c8c21deb7682c2a83a1b86ff6ed88f3e5a20262d" + source = "git::https://github.com/poseidon/terraform-render-bootstrap.git?ref=ac4b7af57012d477cd53bd74ce632ac581e807e1" cluster_name = var.cluster_name api_servers = [format("%s.%s", var.cluster_name, var.dns_zone)] diff --git a/google-cloud/container-linux/kubernetes/network.tf b/google-cloud/container-linux/kubernetes/network.tf index 8d0bbefc..bd7067d7 100644 --- a/google-cloud/container-linux/kubernetes/network.tf +++ b/google-cloud/container-linux/kubernetes/network.tf @@ -126,6 +126,20 @@ resource "google_compute_firewall" "internal-node-exporter" { target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] } +# Allow Prometheus to scrape kube-proxy metrics +resource "google_compute_firewall" "internal-kube-proxy" { + name = "${var.cluster_name}-internal-kube-proxy" + network = google_compute_network.network.name + + allow { + protocol = "tcp" + ports = [10249] + } + + source_tags = ["${var.cluster_name}-worker"] + target_tags = ["${var.cluster_name}-controller", "${var.cluster_name}-worker"] +} + # Allow apiserver to access kubelets for exec, log, port-forward resource "google_compute_firewall" "internal-kubelet" { name = "${var.cluster_name}-internal-kubelet"