From c098936e6a815ef23281066b6c07bebd25d761c7 Mon Sep 17 00:00:00 2001 From: Alexander Amiri Date: Thu, 12 Mar 2026 15:20:25 +0100 Subject: [PATCH 1/2] Speed up ECS deploys: reduce deregistration delay and health check interval - Reduce deregistration_delay from 300s (AWS default) to 30s - Reduce health check interval from 30s to 10s - Reduce unhealthy_threshold from 3 to 2 - Wire deregistration_delay through registry (app.yaml: routing.deregistration_delay) Deploy time drops from ~6 min to ~1 min: 20s health check + 30s drain. --- scripts/registry.py | 1 + terraform/modules/service-routing/main.tf | 16 +++++++++------- terraform/modules/service-routing/variables.tf | 6 ++++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/scripts/registry.py b/scripts/registry.py index 9f2301a..b69eb64 100644 --- a/scripts/registry.py +++ b/scripts/registry.py @@ -104,6 +104,7 @@ "route53_zone_id": "ref:platform.route53_zone_id", "alb_dns_name": "ref:platform.alb_dns_name", "alb_zone_id": "ref:platform.alb_zone_id", + "deregistration_delay": "yaml:routing.deregistration_delay|default:30", }, "rename": "routing", "output_map": { diff --git a/terraform/modules/service-routing/main.tf b/terraform/modules/service-routing/main.tf index bc6abe1..4ca1344 100644 --- a/terraform/modules/service-routing/main.tf +++ b/terraform/modules/service-routing/main.tf @@ -3,19 +3,21 @@ ################################################################################ resource "aws_lb_target_group" "this" { - name = "${var.project}-${var.name}" - port = var.port - protocol = "HTTP" - vpc_id = var.vpc_id - target_type = "ip" + name = "${var.project}-${var.name}" + port = var.port + protocol = "HTTP" + vpc_id = var.vpc_id + target_type = "ip" + deregistration_delay = var.deregistration_delay + load_balancing_algorithm_type = "round_robin" health_check { path = var.health_check_path protocol = "HTTP" healthy_threshold = 2 - unhealthy_threshold = 3 + unhealthy_threshold = 2 timeout = 5 - interval = 30 + interval = 10 matcher = var.health_check_matcher } diff --git a/terraform/modules/service-routing/variables.tf b/terraform/modules/service-routing/variables.tf index 7e60aae..8d307bb 100644 --- a/terraform/modules/service-routing/variables.tf +++ b/terraform/modules/service-routing/variables.tf @@ -65,3 +65,9 @@ variable "alb_zone_id" { description = "ALB hosted zone ID for the Route53 alias target" type = string } + +variable "deregistration_delay" { + description = "Seconds to wait for in-flight requests before deregistering targets" + type = number + default = 30 +} From d2aadc715a5ff0bd5b6ed30d968d7ff590bacb28 Mon Sep 17 00:00:00 2001 From: Alexander Amiri Date: Thu, 12 Mar 2026 15:22:09 +0100 Subject: [PATCH 2/2] Fix ecs-deploy to use latest task def revision, not service's current The deploy script was reading the task definition from the running service, which missed structural changes from Terraform (like readonlyRootFilesystem). Now uses the latest family revision which includes Terraform's changes before swapping the image tag. --- scripts/ecs-deploy.sh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/ecs-deploy.sh b/scripts/ecs-deploy.sh index 3470aec..f39fd44 100644 --- a/scripts/ecs-deploy.sh +++ b/scripts/ecs-deploy.sh @@ -10,11 +10,9 @@ set -e SCRIPT_DIR=$(dirname "$0") export ECR_URI="${ACCOUNT_ID}.dkr.ecr.${REGION}.amazonaws.com/${ECR_REPO}:${IMAGE_TAG}" -TASK_DEF_ARN=$(aws ecs describe-services \ - --cluster "$CLUSTER" --services "$SERVICE" \ - --query 'services[0].taskDefinition' --output text) - -aws ecs describe-task-definition --task-definition "$TASK_DEF_ARN" \ +# Use the latest family revision (includes structural changes from Terraform) +# rather than the revision currently running on the service. +aws ecs describe-task-definition --task-definition "$SERVICE" \ --query 'taskDefinition' > task-def.json sh "$SCRIPT_DIR/update-task-def.sh" task-def.json task-def-new.json