From 9a63b31e7e7d70b8e60fcc2b61ce7a6a3326ee3a Mon Sep 17 00:00:00 2001
From: Alexander Amiri <alexander.amiri@piano.io>
Date: Thu, 26 Mar 2026 22:43:11 +0100
Subject: [PATCH] Add CUR 2.0 cost analytics, Athena querying, and billing
 protection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- New cost-analytics module: CUR 2.0 export (daily Parquet with resource IDs),
  Glue database + crawler, Athena workgroup, CloudWatch billing alarm ($200),
  account-level budget with 80%/100% notifications ($500)
- Shared athena.py utility for synchronous Athena queries with graceful degradation
- Weekly cost report: resource-level drilldown (top 10 resources), enriched LLM narrative
- Daily spike check: per-spike resource breakdown (top 5 resources per spiking service)
- Lambda IAM policies updated for Athena/Glue/S3 access
- Lambda timeouts increased (cost-report 60→120s, daily-check 60→90s)
---
 terraform/lambda-src/cost_report/handler.py   | 125 ++++++-
 .../lambda-src/daily_cost_check/handler.py    |  66 ++++
 terraform/lambda-src/shared/athena.py         |  93 +++++
 terraform/platform/cost-analytics/main.tf     | 343 ++++++++++++++++++
 terraform/platform/cost-analytics/outputs.tf  |  24 ++
 .../platform/cost-analytics/variables.tf      |  31 ++
 terraform/platform/lambdas/main.tf            | 104 +++++-
 terraform/platform/lambdas/variables.tf       |  27 ++
 terraform/platform/main.tf                    |  27 +-
 9 files changed, 833 insertions(+), 7 deletions(-)
 create mode 100644 terraform/lambda-src/shared/athena.py
 create mode 100644 terraform/platform/cost-analytics/main.tf
 create mode 100644 terraform/platform/cost-analytics/outputs.tf
 create mode 100644 terraform/platform/cost-analytics/variables.tf

diff --git a/terraform/lambda-src/cost_report/handler.py b/terraform/lambda-src/cost_report/handler.py
index 8b59cee..632f56f 100644
--- a/terraform/lambda-src/cost_report/handler.py
+++ b/terraform/lambda-src/cost_report/handler.py
@@ -7,6 +7,7 @@
 
 import boto3
 from botocore.config import Config
+from shared.athena import run_query
 from shared.constants import USD_TO_NOK, risk_emoji
 from shared.slack import get_webhook_url, post_to_slack
 
@@ -16,6 +17,9 @@
 ssm = boto3.client("ssm")
 
 COST_WEBHOOK_PARAM = os.environ["COST_WEBHOOK_PARAM"]
+CUR_DATABASE = os.environ.get("CUR_DATABASE", "")
+CUR_TABLE = os.environ.get("CUR_TABLE", "")
+ATHENA_WORKGROUP = os.environ.get("ATHENA_WORKGROUP", "")
 
 # ---------------------------------------------------------------------------
 # Service categorisation — pattern-based, not hardcoded lists
@@ -315,7 +319,7 @@ def _llm_structured(prompt, tool_config):
 
 
 def generate_narrative(this_week, prev_week, mtd, tw_total, pw_total,
-                       mtd_total, projected, month_name):
+                       mtd_total, projected, month_name, resource_summary=""):
     """Generate executive summary. Returns dict with summary/notable or None."""
     top_services = sorted(this_week.items(), key=lambda x: x[1], reverse=True)[:8]
     svc_summary = ", ".join(f"{s}: ${c:.2f}" for s, c in top_services)
@@ -341,8 +345,9 @@ def generate_narrative(this_week, prev_week, mtd, tw_total, pw_total,
 
 Top services: {svc_summary}
 Biggest movers WoW: {mover_lines or 'no significant changes'}
+{resource_summary}
 
-Be specific about which services drove changes. Set notable=true only if there is a meaningful change worth highlighting, false if costs are stable week-over-week."""
+Be specific about which services and resources drove changes. Set notable=true only if there is a meaningful change worth highlighting, false if costs are stable week-over-week."""
     return _llm_structured(prompt, _NARRATIVE_TOOL)
 
 
@@ -375,6 +380,104 @@ def analyze_spike(this_week, prev_week, tw_total, pw_total):
     return _llm_structured(prompt, _SPIKE_TOOL)
 
 
+# ---------------------------------------------------------------------------
+# CUR resource-level drilldown via Athena
+# ---------------------------------------------------------------------------
+def _friendly_resource_id(resource_id):
+    """Shorten an ARN to a readable name."""
+    if not resource_id:
+        return "(no resource ID)"
+    # Strip common ARN prefix, keep the useful tail
+    if "::" in resource_id:
+        # S3 bucket: arn:aws:s3:::bucket-name → bucket-name
+        return resource_id.rsplit(":::", 1)[-1]
+    if "/" in resource_id:
+        # ECS/Lambda/etc: ...service/cluster/name → name
+        parts = resource_id.split("/")
+        return parts[-1] if len(parts) <= 3 else "/".join(parts[-2:])
+    if ":" in resource_id:
+        return resource_id.rsplit(":", 1)[-1]
+    return resource_id
+
+
+def get_resource_drilldown(week_start, week_end):
+    """Query CUR via Athena for top resources this week. Returns dict or None."""
+    if not (CUR_DATABASE and CUR_TABLE and ATHENA_WORKGROUP):
+        return None
+
+    year = str(week_start.year)
+    month = f"{week_start.month:02d}"
+
+    # Top 10 resources overall
+    top_query = f"""
+    SELECT line_item_resource_id,
+           line_item_product_code,
+           COALESCE(resource_tags_user_team, '') as team,
+           SUM(CAST(line_item_unblended_cost AS double)) as total_cost
+    FROM "{CUR_DATABASE}"."{CUR_TABLE}"
+    WHERE year = '{year}' AND month = '{month}'
+      AND line_item_usage_start_date >= TIMESTAMP '{week_start}'
+      AND line_item_usage_start_date < TIMESTAMP '{week_end + timedelta(days=1)}'
+      AND line_item_resource_id != ''
+      AND line_item_line_item_type = 'Usage'
+    GROUP BY line_item_resource_id, line_item_product_code,
+             COALESCE(resource_tags_user_team, '')
+    HAVING SUM(CAST(line_item_unblended_cost AS double)) >= 0.01
+    ORDER BY total_cost DESC
+    LIMIT 10
+    """
+
+    top_resources = run_query(CUR_DATABASE, top_query, ATHENA_WORKGROUP)
+    if not top_resources:
+        return None
+
+    return {"top_resources": top_resources}
+
+
+def build_resource_drilldown_blocks(drilldown):
+    """Build Block Kit blocks for resource-level cost drilldown."""
+    blocks = []
+    top = drilldown.get("top_resources", [])
+    if not top:
+        return blocks
+
+    blocks.append({
+        "type": "section",
+        "text": {"type": "mrkdwn", "text": ":mag: *Resource-Level Drilldown*"}
+    })
+
+    header = [
+        {"type": "raw_text", "text": "Resource"},
+        {"type": "raw_text", "text": "Service"},
+        {"type": "raw_text", "text": "Team"},
+        {"type": "raw_text", "text": "Cost"},
+    ]
+    rows = [header]
+
+    for item in top:
+        cost = float(item.get("total_cost", 0))
+        nok = cost * USD_TO_NOK
+        rows.append([
+            {"type": "raw_text", "text": _friendly_resource_id(item.get("line_item_resource_id", ""))},
+            {"type": "raw_text", "text": item.get("line_item_product_code", "")},
+            {"type": "raw_text", "text": item.get("team", "(untagged)")},
+            {"type": "raw_text", "text": f"${cost:.2f} (~{nok:.0f} NOK)"},
+        ])
+
+    blocks.append({
+        "type": "table",
+        "column_settings": [
+            {"is_wrapped": True},
+            {},
+            {},
+            {"align": "right"},
+        ],
+        "rows": rows,
+    })
+
+    return blocks
+
+
 # ---------------------------------------------------------------------------
 # Block Kit builder
 # ---------------------------------------------------------------------------
@@ -493,6 +596,14 @@ def build_blocks(this_week, prev_week, mtd, prev_mtd, project_costs, team_costs,
 
     blocks.append({"type": "divider"})
 
+    # Resource-level drilldown from CUR (graceful — skipped if unavailable)
+    drilldown = get_resource_drilldown(tw_start, tw_end)
+    if drilldown:
+        blocks.extend(build_resource_drilldown_blocks(drilldown))
+        blocks.append({"type": "divider"})
+    else:
+        logger.info("CUR resource drilldown unavailable — skipping")
+
     # LLM spike root cause (shown as a section — important)
     spike_result = analyze_spike(this_week, prev_week, tw_total, pw_total)
     if spike_result:
@@ -506,9 +617,19 @@ def build_blocks(this_week, prev_week, mtd, prev_mtd, project_costs, team_costs,
         blocks.append({"type": "divider"})
 
     # LLM narrative + footer as context blocks
+    resource_summary = ""
+    if drilldown:
+        top_res = drilldown.get("top_resources", [])[:5]
+        resource_summary = "\nTop resources by cost: " + ", ".join(
+            f"{_friendly_resource_id(r.get('line_item_resource_id',''))} "
+            f"({r.get('line_item_product_code','')}, {r.get('team','?')}): "
+            f"${float(r.get('total_cost',0)):.2f}"
+            for r in top_res
+        )
     narrative_result = generate_narrative(
         this_week, prev_week, mtd,
         tw_total, pw_total, mtd_total, projected, curr_month_name,
+        resource_summary=resource_summary,
     )
 
     ce_url = cost_explorer_url(tw_start, tw_end)
diff --git a/terraform/lambda-src/daily_cost_check/handler.py b/terraform/lambda-src/daily_cost_check/handler.py
index d958c27..cb2619f 100644
--- a/terraform/lambda-src/daily_cost_check/handler.py
+++ b/terraform/lambda-src/daily_cost_check/handler.py
@@ -5,6 +5,7 @@
 from datetime import datetime, timedelta, timezone
 
 import boto3
+from shared.athena import run_query
 from shared.constants import USD_TO_NOK
 from shared.slack import get_webhook_url, post_to_slack
 
@@ -17,6 +18,9 @@
 SPIKE_THRESHOLD = float(os.environ.get("SPIKE_THRESHOLD", "1.2"))  # 20% above average
 # Minimum daily spend (USD) to qualify as a spike — filters noise on tiny amounts
 MIN_SPIKE_AMOUNT = float(os.environ.get("MIN_SPIKE_AMOUNT", "1.00"))
+CUR_DATABASE = os.environ.get("CUR_DATABASE", "")
+CUR_TABLE = os.environ.get("CUR_TABLE", "")
+ATHENA_WORKGROUP = os.environ.get("ATHENA_WORKGROUP", "")
 
 
 # ---------------------------------------------------------------------------
@@ -129,6 +133,54 @@ def cost_explorer_url(start, end, service=None):
     return f"{base}?{urllib.parse.urlencode(params)}"
 
 
+# ---------------------------------------------------------------------------
+# CUR resource drilldown for spiking services
+# ---------------------------------------------------------------------------
+def get_spike_resources(day, service):
+    """Query CUR for top resources in a spiking service. Returns list or []."""
+    if not (CUR_DATABASE and CUR_TABLE and ATHENA_WORKGROUP):
+        return []
+
+    year = str(day.year)
+    month = f"{day.month:02d}"
+    next_day = day + timedelta(days=1)
+
+    query = f"""
+    SELECT line_item_resource_id,
+           line_item_usage_type,
+           COALESCE(resource_tags_user_team, '') as team,
+           SUM(CAST(line_item_unblended_cost AS double)) as total_cost
+    FROM "{CUR_DATABASE}"."{CUR_TABLE}"
+    WHERE year = '{year}' AND month = '{month}'
+      AND line_item_usage_start_date >= TIMESTAMP '{day}'
+      AND line_item_usage_start_date < TIMESTAMP '{next_day}'
+      AND line_item_product_code = '{service}'
+      AND line_item_resource_id != ''
+      AND line_item_line_item_type = 'Usage'
+    GROUP BY line_item_resource_id, line_item_usage_type,
+             COALESCE(resource_tags_user_team, '')
+    HAVING SUM(CAST(line_item_unblended_cost AS double)) >= 0.01
+    ORDER BY total_cost DESC
+    LIMIT 5
+    """
+
+    return run_query(CUR_DATABASE, query, ATHENA_WORKGROUP)
+
+
+def _friendly_resource_id(resource_id):
+    """Shorten an ARN to a readable name."""
+    if not resource_id:
+        return "(no resource ID)"
+    if ":::" in resource_id:
+        return resource_id.rsplit(":::", 1)[-1]
+    if "/" in resource_id:
+        parts = resource_id.split("/")
+        return parts[-1] if len(parts) <= 3 else "/".join(parts[-2:])
+    if ":" in resource_id:
+        return resource_id.rsplit(":", 1)[-1]
+    return resource_id
+
+
 # ---------------------------------------------------------------------------
 # Spike detection
 # ---------------------------------------------------------------------------
@@ -237,6 +289,16 @@ def build_alert_blocks(spikes, spike_details, yesterday_date):
             )
             detail_parts.append(f"*By team:* {team_lines}")
 
+        resources = detail.get("resources")
+        if resources:
+            res_lines = "\n".join(
+                f"  \u2022 {_friendly_resource_id(r.get('line_item_resource_id', ''))}: "
+                f"${float(r.get('total_cost', 0)):.2f}"
+                f"{' (' + r['team'] + ')' if r.get('team') else ''}"
+                for r in resources
+            )
+            detail_parts.append(f"*Top resources:*\n{res_lines}")
+
         ce_url = detail.get("url")
         if ce_url:
             detail_parts.append(f"<{ce_url}|View in Cost Explorer>")
@@ -295,6 +357,10 @@ def handler(event, context):
             detail["team_tags"] = get_tag_breakdown(ce, yesterday, svc, tag_key="team")
         except Exception as e:
             logger.warning("Team tag query failed for %s: %s", svc, e)
+        try:
+            detail["resources"] = get_spike_resources(yesterday, svc)
+        except Exception as e:
+            logger.warning("CUR resource query failed for %s: %s", svc, e)
         spike_details[svc] = detail
 
     blocks = build_alert_blocks(spikes, spike_details, yesterday)
diff --git a/terraform/lambda-src/shared/athena.py b/terraform/lambda-src/shared/athena.py
new file mode 100644
index 0000000..a0134d1
--- /dev/null
+++ b/terraform/lambda-src/shared/athena.py
@@ -0,0 +1,93 @@
+"""Shared Athena query utility for CUR cost analytics."""
+
+import logging
+import time
+
+import boto3
+
+logger = logging.getLogger(__name__)
+
+
+def run_query(database, query, workgroup, timeout_seconds=30):
+    """Execute an Athena query synchronously and return rows as list of dicts.
+
+    On failure (timeout, query error, missing table), logs a warning and returns
+    an empty list so callers can gracefully degrade.
+    """
+    athena = boto3.client("athena")
+
+    try:
+        start = athena.start_query_execution(
+            QueryString=query,
+            QueryExecutionContext={"Database": database},
+            WorkGroup=workgroup,
+        )
+        execution_id = start["QueryExecutionId"]
+    except Exception as e:
+        logger.warning("Athena start_query_execution failed: %s", e)
+        return []
+
+    # Poll until done
+    deadline = time.time() + timeout_seconds
+    while time.time() < deadline:
+        try:
+            status = athena.get_query_execution(QueryExecutionId=execution_id)
+            state = status["QueryExecution"]["Status"]["State"]
+        except Exception as e:
+            logger.warning("Athena get_query_execution failed: %s", e)
+            return []
+
+        if state == "SUCCEEDED":
+            break
+        if state in ("FAILED", "CANCELLED"):
+            reason = status["QueryExecution"]["Status"].get(
+                "StateChangeReason", "unknown"
+            )
+            logger.warning("Athena query %s: %s", state, reason)
+            return []
+
+        time.sleep(1)
+    else:
+        logger.warning("Athena query timed out after %ds", timeout_seconds)
+        try:
+            athena.stop_query_execution(QueryExecutionId=execution_id)
+        except Exception:
+            pass
+        return []
+
+    # Fetch results with pagination
+    rows = []
+    columns = None
+    next_token = None
+
+    while True:
+        kwargs = {"QueryExecutionId": execution_id}
+        if next_token:
+            kwargs["NextToken"] = next_token
+
+        try:
+            result = athena.get_query_results(**kwargs)
+        except Exception as e:
+            logger.warning("Athena get_query_results failed: %s", e)
+            return rows
+
+        result_set = result["ResultSet"]
+
+        if columns is None:
+            columns = [
+                col["Name"] for col in result_set["ResultSetMetadata"]["ColumnInfo"]
+            ]
+            # First page includes the header row — skip it
+            data_rows = result_set["Rows"][1:]
+        else:
+            data_rows = result_set["Rows"]
+
+        for row in data_rows:
+            values = [d.get("VarCharValue", "") for d in row["Data"]]
+            rows.append(dict(zip(columns, values)))
+
+        next_token = result.get("NextToken")
+        if not next_token:
+            break
+
+    return rows
diff --git a/terraform/platform/cost-analytics/main.tf b/terraform/platform/cost-analytics/main.tf
new file mode 100644
index 0000000..0ca0e66
--- /dev/null
+++ b/terraform/platform/cost-analytics/main.tf
@@ -0,0 +1,343 @@
+################################################################################
+# Cost Analytics — CUR, Athena, Glue, billing protection
+#
+# Resources:
+#   S3 buckets          CUR data + Athena query results
+#   CUR report          Daily Parquet export with resource IDs
+#   Glue database       Catalog for CUR data
+#   Glue crawler        Auto-discovers CUR Parquet schema daily
+#   Athena workgroup    Scoped workgroup for cost queries
+#   Billing alarm       CloudWatch alarm on EstimatedCharges (~4-6h delay)
+#   Account budget      Monthly budget with notifications at 80% and 100%
+################################################################################
+
+terraform {
+  required_providers {
+    aws = {
+      source                = "hashicorp/aws"
+      configuration_aliases = [aws, aws.us_east_1]
+    }
+  }
+}
+
+locals {
+  cur_bucket_name    = "${var.project}-cur-${var.aws_account_id}"
+  athena_bucket_name = "${var.project}-athena-results-${var.aws_account_id}"
+  glue_database_name = "${var.project}_cur"
+  crawler_role_name  = "${var.project}-cur-crawler"
+  athena_workgroup   = "${var.project}-cost-analytics"
+  cur_report_name    = "${var.project}-cur"
+  cur_s3_prefix      = "cur"
+}
+
+################################################################################
+# S3 — CUR data bucket
+################################################################################
+
+resource "aws_s3_bucket" "cur_data" {
+  bucket = local.cur_bucket_name
+}
+
+resource "aws_s3_bucket_public_access_block" "cur_data" {
+  bucket                  = aws_s3_bucket.cur_data.id
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
+
+resource "aws_s3_bucket_server_side_encryption_configuration" "cur_data" {
+  bucket = aws_s3_bucket.cur_data.id
+
+  rule {
+    apply_server_side_encryption_by_default {
+      sse_algorithm = "AES256"
+    }
+  }
+}
+
+resource "aws_s3_bucket_lifecycle_configuration" "cur_data" {
+  bucket = aws_s3_bucket.cur_data.id
+
+  rule {
+    id     = "archive-and-expire"
+    status = "Enabled"
+    filter {}
+
+    transition {
+      days          = 90
+      storage_class = "STANDARD_IA"
+    }
+
+    expiration {
+      days = 365
+    }
+  }
+}
+
+resource "aws_s3_bucket_policy" "cur_data" {
+  bucket = aws_s3_bucket.cur_data.id
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      {
+        Sid       = "AllowDataExportsDelivery"
+        Effect    = "Allow"
+        Principal = { Service = "bcm-data-exports.amazonaws.com" }
+        Action = [
+          "s3:PutObject",
+          "s3:GetBucketPolicy",
+        ]
+        Resource = [
+          aws_s3_bucket.cur_data.arn,
+          "${aws_s3_bucket.cur_data.arn}/*",
+        ]
+        Condition = {
+          StringEquals = {
+            "aws:SourceAccount" = var.aws_account_id
+          }
+        }
+      },
+    ]
+  })
+}
+
+################################################################################
+# S3 — Athena query results bucket
+################################################################################
+
+resource "aws_s3_bucket" "athena_results" {
+  bucket = local.athena_bucket_name
+}
+
+resource "aws_s3_bucket_public_access_block" "athena_results" {
+  bucket                  = aws_s3_bucket.athena_results.id
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
+
+resource "aws_s3_bucket_server_side_encryption_configuration" "athena_results" {
+  bucket = aws_s3_bucket.athena_results.id
+
+  rule {
+    apply_server_side_encryption_by_default {
+      sse_algorithm = "AES256"
+    }
+  }
+}
+
+resource "aws_s3_bucket_lifecycle_configuration" "athena_results" {
+  bucket = aws_s3_bucket.athena_results.id
+
+  rule {
+    id     = "expire-query-results"
+    status = "Enabled"
+    filter {}
+
+    expiration {
+      days = 7
+    }
+  }
+}
+
+################################################################################
+# CUR 2.0 export via Data Exports (must be in us-east-1)
+################################################################################
+
+resource "aws_bcmdataexports_export" "cur" {
+  provider = aws.us_east_1
+
+  export {
+    name = local.cur_report_name
+
+    data_query {
+      query_statement = "SELECT identity_line_item_id, identity_time_interval, bill_invoice_id, bill_invoicing_entity, bill_billing_entity, bill_bill_type, bill_payer_account_id, bill_billing_period_start_date, bill_billing_period_end_date, line_item_usage_account_id, line_item_line_item_type, line_item_usage_start_date, line_item_usage_end_date, line_item_product_code, line_item_usage_type, line_item_operation, line_item_availability_zone, line_item_resource_id, line_item_usage_amount, line_item_normalization_factor, line_item_normalized_usage_amount, line_item_currency_code, line_item_unblended_rate, line_item_unblended_cost, line_item_blended_rate, line_item_blended_cost, line_item_line_item_description, product_product_name, product_region, pricing_unit, pricing_public_on_demand_cost, pricing_public_on_demand_rate, pricing_term, pricing_offering_class, resource_tags_user_team, resource_tags_user_service, resource_tags_user_environment, resource_tags_user_repo, resource_tags_user_managed_by FROM COST_AND_USAGE_REPORT"
+
+      table_configurations = {
+        COST_AND_USAGE_REPORT = {
+          TIME_GRANULARITY                      = "DAILY"
+          INCLUDE_RESOURCES                     = "TRUE"
+          INCLUDE_MANUAL_DISCOUNT_COMPATIBILITY = "FALSE"
+          INCLUDE_SPLIT_COST_ALLOCATION_DATA    = "FALSE"
+        }
+      }
+    }
+
+    destination_configurations {
+      s3_destination {
+        s3_bucket = aws_s3_bucket.cur_data.id
+        s3_prefix = local.cur_s3_prefix
+        s3_region = var.region
+
+        s3_output_configurations {
+          overwrite   = "OVERWRITE_REPORT"
+          format      = "PARQUET"
+          compression = "PARQUET"
+          output_type = "CUSTOM"
+        }
+      }
+    }
+
+    refresh_cadence {
+      frequency = "SYNCHRONOUS"
+    }
+  }
+}
+
+################################################################################
+# Glue — catalog database + crawler
+################################################################################
+
+resource "aws_glue_catalog_database" "cur" {
+  name = local.glue_database_name
+}
+
+resource "aws_iam_role" "cur_crawler" {
+  name = local.crawler_role_name
+
+  assume_role_policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Effect    = "Allow"
+      Principal = { Service = "glue.amazonaws.com" }
+      Action    = "sts:AssumeRole"
+    }]
+  })
+}
+
+resource "aws_iam_role_policy_attachment" "cur_crawler_glue" {
+  role       = aws_iam_role.cur_crawler.name
+  policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole"
+}
+
+resource "aws_iam_role_policy" "cur_crawler_s3" {
+  name = "${local.crawler_role_name}-s3"
+  role = aws_iam_role.cur_crawler.id
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Sid    = "ReadCURData"
+      Effect = "Allow"
+      Action = [
+        "s3:GetObject",
+        "s3:ListBucket",
+      ]
+      Resource = [
+        aws_s3_bucket.cur_data.arn,
+        "${aws_s3_bucket.cur_data.arn}/*",
+      ]
+    }]
+  })
+}
+
+resource "aws_glue_crawler" "cur" {
+  name          = "${var.project}-cur-crawler"
+  database_name = aws_glue_catalog_database.cur.name
+  role          = aws_iam_role.cur_crawler.arn
+  schedule      = "cron(0 6 * * ? *)" # Daily at 06:00 UTC, before 08:00 reports
+
+  s3_target {
+    path = "s3://${aws_s3_bucket.cur_data.id}/${local.cur_s3_prefix}/"
+  }
+
+  schema_change_policy {
+    update_behavior = "UPDATE_IN_DATABASE"
+    delete_behavior = "DELETE_FROM_DATABASE"
+  }
+
+  configuration = jsonencode({
+    Version = 1.0
+    Grouping = {
+      TableGroupingPolicy = "CombineCompatibleSchemas"
+    }
+  })
+}
+
+################################################################################
+# Athena workgroup
+################################################################################
+
+resource "aws_athena_workgroup" "cur" {
+  name = local.athena_workgroup
+
+  configuration {
+    enforce_workgroup_configuration = true
+    bytes_scanned_cutoff_per_query  = 104857600 # 100 MB safety limit
+
+    result_configuration {
+      output_location = "s3://${aws_s3_bucket.athena_results.id}/"
+
+      encryption_configuration {
+        encryption_option = "SSE_S3"
+      }
+    }
+  }
+}
+
+################################################################################
+# Billing protection — CloudWatch billing alarm (us-east-1)
+################################################################################
+
+# SNS topic in us-east-1 — CloudWatch billing alarms can only target same-region SNS.
+# A Lambda forwarder (us-east-1 → eu-central-1) can be added later for Slack integration.
+# For now, the budget notifications below go directly to the main alerts topic.
+resource "aws_sns_topic" "billing_alarm" {
+  provider = aws.us_east_1
+  name     = "${var.project}-billing-alarm"
+}
+
+resource "aws_cloudwatch_metric_alarm" "billing" {
+  provider = aws.us_east_1
+
+  alarm_name          = "${var.project}-billing-alarm"
+  alarm_description   = "Account estimated charges exceeded $${var.billing_alarm_threshold_usd}"
+  comparison_operator = "GreaterThanThreshold"
+  evaluation_periods  = 1
+  metric_name         = "EstimatedCharges"
+  namespace           = "AWS/Billing"
+  period              = 21600 # 6 hours
+  statistic           = "Maximum"
+  threshold           = var.billing_alarm_threshold_usd
+  treat_missing_data  = "missing"
+
+  dimensions = {
+    Currency = "USD"
+  }
+
+  alarm_actions = [aws_sns_topic.billing_alarm.arn]
+  ok_actions    = [aws_sns_topic.billing_alarm.arn]
+}
+
+################################################################################
+# Billing protection — account-level AWS Budget (notifications only)
+################################################################################
+
+resource "aws_budgets_budget" "account" {
+  name         = "${var.project}-account-monthly"
+  budget_type  = "COST"
+  limit_amount = tostring(var.account_budget_usd)
+  limit_unit   = "USD"
+  time_unit    = "MONTHLY"
+
+  # 80% warning
+  notification {
+    comparison_operator       = "GREATER_THAN"
+    notification_type         = "ACTUAL"
+    threshold                 = 80
+    threshold_type            = "PERCENTAGE"
+    subscriber_sns_topic_arns = [var.alerts_topic_arn]
+  }
+
+  # 100% critical
+  notification {
+    comparison_operator       = "GREATER_THAN"
+    notification_type         = "ACTUAL"
+    threshold                 = 100
+    threshold_type            = "PERCENTAGE"
+    subscriber_sns_topic_arns = [var.alerts_topic_arn]
+  }
+}
diff --git a/terraform/platform/cost-analytics/outputs.tf b/terraform/platform/cost-analytics/outputs.tf
new file mode 100644
index 0000000..64d1160
--- /dev/null
+++ b/terraform/platform/cost-analytics/outputs.tf
@@ -0,0 +1,24 @@
+output "glue_database_name" {
+  description = "Glue catalog database name for CUR data"
+  value       = aws_glue_catalog_database.cur.name
+}
+
+output "glue_table_name" {
+  description = "Glue catalog table name (discovered by crawler)"
+  value       = "${var.project}_cur"
+}
+
+output "athena_workgroup_name" {
+  description = "Athena workgroup name for cost queries"
+  value       = aws_athena_workgroup.cur.name
+}
+
+output "athena_results_bucket_arn" {
+  description = "ARN of the S3 bucket for Athena query results"
+  value       = aws_s3_bucket.athena_results.arn
+}
+
+output "cur_data_bucket_arn" {
+  description = "ARN of the S3 bucket containing CUR Parquet data"
+  value       = aws_s3_bucket.cur_data.arn
+}
diff --git a/terraform/platform/cost-analytics/variables.tf b/terraform/platform/cost-analytics/variables.tf
new file mode 100644
index 0000000..d6e4312
--- /dev/null
+++ b/terraform/platform/cost-analytics/variables.tf
@@ -0,0 +1,31 @@
+variable "project" {
+  description = "Project name used for resource naming"
+  type        = string
+}
+
+variable "region" {
+  description = "AWS region for Glue/Athena resources"
+  type        = string
+}
+
+variable "aws_account_id" {
+  description = "AWS account ID"
+  type        = string
+}
+
+variable "alerts_topic_arn" {
+  description = "ARN of the javabin-alerts SNS topic for budget notifications"
+  type        = string
+}
+
+variable "billing_alarm_threshold_usd" {
+  description = "CloudWatch billing alarm threshold in USD"
+  type        = number
+  default     = 200
+}
+
+variable "account_budget_usd" {
+  description = "Account-level monthly budget in USD — auto-deny at 100%"
+  type        = number
+  default     = 500
+}
diff --git a/terraform/platform/lambdas/main.tf b/terraform/platform/lambdas/main.tf
index 2156008..5894fbd 100644
--- a/terraform/platform/lambdas/main.tf
+++ b/terraform/platform/lambdas/main.tf
@@ -69,6 +69,10 @@ data "archive_file" "cost_report" {
     content  = file("${local.lambda_src_path}/shared/constants.py")
     filename = "shared/constants.py"
   }
+  source {
+    content  = file("${local.lambda_src_path}/shared/athena.py")
+    filename = "shared/athena.py"
+  }
 }
 
 data "archive_file" "daily_cost_check" {
@@ -92,6 +96,10 @@ data "archive_file" "daily_cost_check" {
     content  = file("${local.lambda_src_path}/shared/constants.py")
     filename = "shared/constants.py"
   }
+  source {
+    content  = file("${local.lambda_src_path}/shared/athena.py")
+    filename = "shared/athena.py"
+  }
 }
 
 data "archive_file" "compliance_reporter" {
@@ -313,6 +321,49 @@ resource "aws_iam_role_policy" "cost_report" {
           }
         }
       },
+      {
+        Sid    = "AthenaQuery"
+        Effect = "Allow"
+        Action = [
+          "athena:StartQueryExecution",
+          "athena:GetQueryExecution",
+          "athena:GetQueryResults",
+          "athena:StopQueryExecution",
+        ]
+        Resource = "arn:aws:athena:${var.region}:${var.aws_account_id}:workgroup/${var.athena_workgroup}"
+      },
+      {
+        Sid    = "GlueReadCatalog"
+        Effect = "Allow"
+        Action = [
+          "glue:GetDatabase",
+          "glue:GetTable",
+          "glue:GetPartitions",
+        ]
+        Resource = [
+          "arn:aws:glue:${var.region}:${var.aws_account_id}:catalog",
+          "arn:aws:glue:${var.region}:${var.aws_account_id}:database/${var.cur_glue_database}",
+          "arn:aws:glue:${var.region}:${var.aws_account_id}:table/${var.cur_glue_database}/${var.cur_glue_table}",
+        ]
+      },
+      {
+        Sid    = "S3ReadCURData"
+        Effect = "Allow"
+        Action = ["s3:GetObject", "s3:ListBucket"]
+        Resource = [
+          var.cur_data_bucket_arn,
+          "${var.cur_data_bucket_arn}/*",
+        ]
+      },
+      {
+        Sid    = "S3WriteAthenaResults"
+        Effect = "Allow"
+        Action = ["s3:PutObject", "s3:GetObject", "s3:GetBucketLocation", "s3:AbortMultipartUpload", "s3:ListBucket"]
+        Resource = [
+          var.athena_results_bucket_arn,
+          "${var.athena_results_bucket_arn}/*",
+        ]
+      },
     ]
   })
 }
@@ -355,6 +406,49 @@ resource "aws_iam_role_policy" "daily_cost_check" {
         Action   = "ce:GetCostAndUsage"
         Resource = "*"
       },
+      {
+        Sid    = "AthenaQuery"
+        Effect = "Allow"
+        Action = [
+          "athena:StartQueryExecution",
+          "athena:GetQueryExecution",
+          "athena:GetQueryResults",
+          "athena:StopQueryExecution",
+        ]
+        Resource = "arn:aws:athena:${var.region}:${var.aws_account_id}:workgroup/${var.athena_workgroup}"
+      },
+      {
+        Sid    = "GlueReadCatalog"
+        Effect = "Allow"
+        Action = [
+          "glue:GetDatabase",
+          "glue:GetTable",
+          "glue:GetPartitions",
+        ]
+        Resource = [
+          "arn:aws:glue:${var.region}:${var.aws_account_id}:catalog",
+          "arn:aws:glue:${var.region}:${var.aws_account_id}:database/${var.cur_glue_database}",
+          "arn:aws:glue:${var.region}:${var.aws_account_id}:table/${var.cur_glue_database}/${var.cur_glue_table}",
+        ]
+      },
+      {
+        Sid    = "S3ReadCURData"
+        Effect = "Allow"
+        Action = ["s3:GetObject", "s3:ListBucket"]
+        Resource = [
+          var.cur_data_bucket_arn,
+          "${var.cur_data_bucket_arn}/*",
+        ]
+      },
+      {
+        Sid    = "S3WriteAthenaResults"
+        Effect = "Allow"
+        Action = ["s3:PutObject", "s3:GetObject", "s3:GetBucketLocation", "s3:AbortMultipartUpload", "s3:ListBucket"]
+        Resource = [
+          var.athena_results_bucket_arn,
+          "${var.athena_results_bucket_arn}/*",
+        ]
+      },
     ]
   })
 }
@@ -695,7 +789,7 @@ resource "aws_lambda_function" "cost_report" {
   role             = aws_iam_role.cost_report.arn
   handler          = "handler.handler"
   runtime          = "python3.12"
-  timeout          = 60
+  timeout          = 120
   memory_size      = 256
   filename         = data.archive_file.cost_report.output_path
   source_code_hash = data.archive_file.cost_report.output_base64sha256
@@ -704,6 +798,9 @@ resource "aws_lambda_function" "cost_report" {
     variables = {
       COST_WEBHOOK_PARAM = "/javabin/slack/platform-cost-alerts-webhook"
       DEPLOY_REGION      = var.region
+      CUR_DATABASE       = var.cur_glue_database
+      CUR_TABLE          = var.cur_glue_table
+      ATHENA_WORKGROUP   = var.athena_workgroup
     }
   }
 }
@@ -713,7 +810,7 @@ resource "aws_lambda_function" "daily_cost_check" {
   role             = aws_iam_role.daily_cost_check.arn
   handler          = "handler.handler"
   runtime          = "python3.12"
-  timeout          = 60
+  timeout          = 90
   memory_size      = 128
   filename         = data.archive_file.daily_cost_check.output_path
   source_code_hash = data.archive_file.daily_cost_check.output_base64sha256
@@ -721,6 +818,9 @@ resource "aws_lambda_function" "daily_cost_check" {
   environment {
     variables = {
       COST_WEBHOOK_PARAM = "/javabin/slack/platform-cost-alerts-webhook"
+      CUR_DATABASE       = var.cur_glue_database
+      CUR_TABLE          = var.cur_glue_table
+      ATHENA_WORKGROUP   = var.athena_workgroup
     }
   }
 }
diff --git a/terraform/platform/lambdas/variables.tf b/terraform/platform/lambdas/variables.tf
index 9e3d348..ffc97cd 100644
--- a/terraform/platform/lambdas/variables.tf
+++ b/terraform/platform/lambdas/variables.tf
@@ -95,3 +95,30 @@ variable "developer_boundary_arn" {
   type        = string
 }
 
+# --- Cost analytics (CUR / Athena) ---
+
+variable "cur_glue_database" {
+  description = "Glue catalog database name for CUR data"
+  type        = string
+}
+
+variable "cur_glue_table" {
+  description = "Glue catalog table name for CUR data"
+  type        = string
+}
+
+variable "athena_workgroup" {
+  description = "Athena workgroup name for cost queries"
+  type        = string
+}
+
+variable "athena_results_bucket_arn" {
+  description = "ARN of the S3 bucket for Athena query results"
+  type        = string
+}
+
+variable "cur_data_bucket_arn" {
+  description = "ARN of the S3 bucket containing CUR Parquet data"
+  type        = string
+}
+
diff --git a/terraform/platform/main.tf b/terraform/platform/main.tf
index 6f7a931..2eca0f1 100644
--- a/terraform/platform/main.tf
+++ b/terraform/platform/main.tf
@@ -9,9 +9,10 @@
 #   ingress     ALB, ACM wildcard cert, Route53 DNS
 #   iam         GitHub OIDC, CI roles, permission boundary, ECS execution role
 #   compute     ECS cluster, public ECR images
-#   monitoring  SNS topics, EventBridge rules, Config, GuardDuty, Security Hub
-#   lambdas     slack-alert, cost-report, daily-cost-check, auto-tagger
-#   identity    Cognito pools (Identity Center is in terraform/org/)
+#   monitoring       SNS topics, EventBridge rules, Config, GuardDuty, Security Hub
+#   cost-analytics   CUR, Athena, Glue, billing alarms, account budget
+#   lambdas          slack-alert, cost-report, daily-cost-check, auto-tagger
+#   identity         Cognito pools (Identity Center is in terraform/org/)
 ################################################################################
 
 module "networking" {
@@ -53,6 +54,21 @@ module "monitoring" {
   aws_account_id = var.aws_account_id
 }
 
+module "cost_analytics" {
+  source                      = "./cost-analytics"
+  project                     = var.project
+  region                      = var.region
+  aws_account_id              = var.aws_account_id
+  alerts_topic_arn            = module.monitoring.alerts_topic_arn
+  billing_alarm_threshold_usd = 200
+  account_budget_usd          = 500
+
+  providers = {
+    aws           = aws
+    aws.us_east_1 = aws.us_east_1
+  }
+}
+
 module "lambdas" {
   source                         = "./lambdas"
   project                        = var.project
@@ -74,6 +90,11 @@ module "lambdas" {
   route53_zone_id                = module.ingress.route53_zone_id
   org_boundary_arn               = module.iam.org_boundary_arn
   developer_boundary_arn         = module.iam.developer_boundary_arn
+  cur_glue_database              = module.cost_analytics.glue_database_name
+  cur_glue_table                 = module.cost_analytics.glue_table_name
+  athena_workgroup               = module.cost_analytics.athena_workgroup_name
+  athena_results_bucket_arn      = module.cost_analytics.athena_results_bucket_arn
+  cur_data_bucket_arn            = module.cost_analytics.cur_data_bucket_arn
 }
 
 module "identity" {