aws: Switch EC2 instances to use resource-based hostnames

* Use EC2 resource-based hostnames instead of IP-based hostnames. The Amazon
DNS server can resolve A and AAAA queries to IPv4 and IPv6 node addresses
* For example, nodes used to be named like `ip-10-11-12-13.us-east-1.compute.internal`
but going forward use the instance id `i-0123456789abcdef.us-east-1.compute.internal`
* Tag controller node EBS volumes with a name based on the controller node name
This commit is contained in:
Dalton Hubble 2024-08-22 20:02:53 -07:00
parent effa13c141
commit 808b8a948f
No known key found for this signature in database
GPG Key ID: BD34C2E3EF32B7A0
7 changed files with 90 additions and 50 deletions

View File

@ -9,6 +9,11 @@ Notable changes between versions.
* Kubernetes [v1.31.0](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.31.md#v1310)
* Fix invalid flannel-cni container image for those using flannel networking
### AWS
* Use EC2 resource-based hostnames instead of IP-based hostnames. The Amazon DNS server can resolve A and AAAA queries to IPv4 and IPv6 node addresses
* Tag controller node EBS volumes with a name based on the controller node name
## v1.30.4
* Kubernetes [v1.30.4](https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.30.md#v1304)

View File

@ -20,10 +20,8 @@ resource "aws_instance" "controllers" {
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
instance_type = var.controller_type
ami = var.controller_arch == "arm64" ? data.aws_ami.fedora-coreos-arm[0].image_id : data.aws_ami.fedora-coreos.image_id
user_data = data.ct_config.controllers.*.rendered[count.index]
# storage
root_block_device {
@ -31,7 +29,9 @@ resource "aws_instance" "controllers" {
volume_size = var.controller_disk_size
iops = var.controller_disk_iops
encrypted = true
tags = {}
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
}
# network
@ -39,6 +39,10 @@ resource "aws_instance" "controllers" {
subnet_id = element(aws_subnet.public.*.id, count.index)
vpc_security_group_ids = [aws_security_group.controller.id]
# boot
user_data = data.ct_config.controllers.*.rendered[count.index]
# cost
credit_specification {
cpu_credits = var.controller_cpu_credits
}

View File

@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
resource "aws_subnet" "public" {
count = length(data.aws_availability_zones.all.names)
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
tags = {
"Name" = "${var.cluster_name}-public-${count.index}"
}
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
# IPv4 and IPv6 CIDR blocks
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
# Assign IPv4 and IPv6 addresses to instances
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
# Hostnames assigned to instances
# resource-name: <ec2-instance-id>.region.compute.internal
private_dns_hostname_type_on_launch = "resource-name"
enable_resource_name_dns_a_record_on_launch = true
enable_resource_name_dns_aaaa_record_on_launch = true
}
resource "aws_route_table_association" "public" {

View File

@ -3,16 +3,14 @@ resource "aws_autoscaling_group" "workers" {
name = "${var.name}-worker"
# count
desired_capacity = var.worker_count
min_size = var.worker_count
max_size = var.worker_count + 2
default_cooldown = 30
health_check_grace_period = 30
desired_capacity = var.worker_count
min_size = var.worker_count
max_size = var.worker_count + 2
# network
vpc_zone_identifier = var.subnet_ids
# template
# instance template
launch_template {
id = aws_launch_template.worker.id
version = aws_launch_template.worker.latest_version
@ -32,6 +30,11 @@ resource "aws_autoscaling_group" "workers" {
min_healthy_percentage = 90
}
}
# Grace period before checking new instance's health
health_check_grace_period = 30
# Cooldown period between scaling activities
default_cooldown = 30
lifecycle {
# override the default destroy and replace update behavior
@ -56,11 +59,6 @@ resource "aws_launch_template" "worker" {
name_prefix = "${var.name}-worker"
image_id = local.ami_id
instance_type = var.instance_type
monitoring {
enabled = false
}
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# storage
ebs_optimized = true
@ -76,14 +74,26 @@ resource "aws_launch_template" "worker" {
}
# network
vpc_security_group_ids = var.security_groups
network_interfaces {
associate_public_ip_address = true
security_groups = var.security_groups
}
# boot
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# metadata
metadata_options {
http_tokens = "optional"
}
monitoring {
enabled = false
}
# spot
# cost
credit_specification {
cpu_credits = var.cpu_credits
}
dynamic "instance_market_options" {
for_each = var.spot_price > 0 ? [1] : []
content {
@ -94,10 +104,6 @@ resource "aws_launch_template" "worker" {
}
}
credit_specification {
cpu_credits = var.cpu_credits
}
lifecycle {
// Override the default destroy and replace update behavior
create_before_destroy = true

View File

@ -20,11 +20,8 @@ resource "aws_instance" "controllers" {
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
instance_type = var.controller_type
ami = local.ami_id
user_data = data.ct_config.controllers.*.rendered[count.index]
ami = local.ami_id
# storage
root_block_device {
@ -32,7 +29,9 @@ resource "aws_instance" "controllers" {
volume_size = var.controller_disk_size
iops = var.controller_disk_iops
encrypted = true
tags = {}
tags = {
Name = "${var.cluster_name}-controller-${count.index}"
}
}
# network
@ -40,6 +39,10 @@ resource "aws_instance" "controllers" {
subnet_id = element(aws_subnet.public.*.id, count.index)
vpc_security_group_ids = [aws_security_group.controller.id]
# boot
user_data = data.ct_config.controllers.*.rendered[count.index]
# cost
credit_specification {
cpu_credits = var.controller_cpu_credits
}

View File

@ -47,17 +47,25 @@ resource "aws_route" "egress-ipv6" {
resource "aws_subnet" "public" {
count = length(data.aws_availability_zones.all.names)
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
tags = {
"Name" = "${var.cluster_name}-public-${count.index}"
}
vpc_id = aws_vpc.network.id
availability_zone = data.aws_availability_zones.all.names[count.index]
# IPv4 and IPv6 CIDR blocks
cidr_block = cidrsubnet(var.host_cidr, 4, count.index)
ipv6_cidr_block = cidrsubnet(aws_vpc.network.ipv6_cidr_block, 8, count.index)
# Assign IPv4 and IPv6 addresses to instances
map_public_ip_on_launch = true
assign_ipv6_address_on_creation = true
# Hostnames assigned to instances
# resource-name: <ec2-instance-id>.region.compute.internal
private_dns_hostname_type_on_launch = "resource-name"
enable_resource_name_dns_a_record_on_launch = true
enable_resource_name_dns_aaaa_record_on_launch = true
}
resource "aws_route_table_association" "public" {

View File

@ -3,16 +3,14 @@ resource "aws_autoscaling_group" "workers" {
name = "${var.name}-worker"
# count
desired_capacity = var.worker_count
min_size = var.worker_count
max_size = var.worker_count + 2
default_cooldown = 30
health_check_grace_period = 30
desired_capacity = var.worker_count
min_size = var.worker_count
max_size = var.worker_count + 2
# network
vpc_zone_identifier = var.subnet_ids
# template
# instance template
launch_template {
id = aws_launch_template.worker.id
version = aws_launch_template.worker.latest_version
@ -32,6 +30,10 @@ resource "aws_autoscaling_group" "workers" {
min_healthy_percentage = 90
}
}
# Grace period before checking new instance's health
health_check_grace_period = 30
# Cooldown period between scaling activities
default_cooldown = 30
lifecycle {
# override the default destroy and replace update behavior
@ -60,8 +62,6 @@ resource "aws_launch_template" "worker" {
enabled = false
}
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# storage
ebs_optimized = true
block_device_mappings {
@ -76,7 +76,13 @@ resource "aws_launch_template" "worker" {
}
# network
vpc_security_group_ids = var.security_groups
network_interfaces {
associate_public_ip_address = true
security_groups = var.security_groups
}
# boot
user_data = sensitive(base64encode(data.ct_config.worker.rendered))
# metadata
metadata_options {