diff --git a/.github/workflows/llm-cicd.yml b/.github/workflows/llm-cicd.yml
new file mode 100644
index 0000000..6160639
--- /dev/null
+++ b/.github/workflows/llm-cicd.yml
@@ -0,0 +1,197 @@
+name: LLM CI/CD Pipeline
+
+on:
+  push:
+    branches: [ main, develop ]
+    paths:
+      - "api/**"
+      - "astroml/llm/**"
+      - "api/tests/test_llm*"
+  pull_request:
+    branches: [ main, develop ]
+    paths:
+      - "api/**"
+      - "astroml/llm/**"
+      - "api/tests/test_llm*"
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+  LLM_COST_THRESHOLD: 0.50
+  LLM_LATENCY_BUDGET_MS: 2000
+  CANARY_NAMESPACE: astroml
+
+jobs:
+  llm-test:
+    name: LLM Tests + Cost Awareness
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest pytest-asyncio httpx fastapi sqlalchemy prometheus-client
+
+      - name: Run LLM unit tests
+        run: pytest api/tests/test_llm.py -v --tb=short
+
+      - name: Run LLM health tests
+        run: pytest api/tests/test_llm_health.py -v --tb=short
+
+      - name: Run cost-aware tests
+        run: pytest api/tests/test_llm_cost_aware.py -v --tb=short
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: llm-test-results
+          path: |
+            .pytest_cache/
+            coverage.xml
+
+  build-llm-image:
+    name: Build LLM API Image
+    runs-on: ubuntu-latest
+    needs: llm-test
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=sha,prefix=
+
+      - name: Build LLM production image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          dockerfile: api/Dockerfile
+          target: production
+          push: true
+          tags: |
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:llm-${{ github.sha }}
+            ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:llm-latest
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          build-args: |
+            LLM_PROVIDER=openai
+
+  canary-deploy:
+    name: Canary Deploy + Validate
+    runs-on: ubuntu-latest
+    needs: build-llm-image
+    if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/develop'
+    environment:
+      name: canary
+      url: https://canary.astroml.example.com
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up kubectl
+        uses: azure/setup-kubectl@v3
+        with:
+          version: 'v1.28.0'
+
+      - name: Configure kubectl
+        run: |
+          echo "${{ secrets.KUBE_CONFIG_CANARY }}" | base64 -d > kubeconfig
+          export KUBECONFIG=kubeconfig
+
+      - name: Deploy canary
+        run: |
+          export KUBECONFIG=kubeconfig
+          export IMAGE_TAG="llm-${{ github.sha }}"
+          export REGISTRY="${{ env.REGISTRY }}"
+          export REPO="${{ github.repository }}"
+          ./scripts/canary-deploy.sh
+
+      - name: Wait for canary rollout
+        run: |
+          export KUBECONFIG=kubeconfig
+          kubectl rollout status deployment/astroml-api-canary -n ${{ env.CANARY_NAMESPACE }} --timeout=300s
+
+      - name: Health check canary
+        run: |
+          export KUBECONFIG=kubeconfig
+          CANARY_POD=$(kubectl get pods -n ${{ env.CANARY_NAMESPACE }} -l app=astroml-api,version=canary -o jsonpath='{.items[0].metadata.name}')
+          kubectl port-forward -n ${{ env.CANARY_NAMESPACE }} pod/$CANARY_POD 9000:8000 &
+          PF_PID=$!
+          sleep 5
+          curl -f http://localhost:9000/health || (kill $PF_PID && exit 1)
+          curl -f http://localhost:9000/api/v1/llm/health || (kill $PF_PID && exit 1)
+          kill $PF_PID
+
+      - name: Validate cost metrics
+        run: |
+          export KUBECONFIG=kubeconfig
+          CANARY_POD=$(kubectl get pods -n ${{ env.CANARY_NAMESPACE }} -l app=astroml-api,version=canary -o jsonpath='{.items[0].metadata.name}')
+          kubectl exec -n ${{ env.CANARY_NAMESPACE }} pod/$CANARY_POD -- python -c "
+          from astroml.llm.tracker import global_tracker
+          from astroml.llm.metrics import LLM_COST_USD_TOTAL
+          assert global_tracker.total_cost < 100.0, 'Session cost exceeded threshold'
+          print('Cost check passed: $%.4f' % global_tracker.total_cost)
+          print('Metrics registered: LLM cost counter active')
+          "
+
+      - name: Promote canary
+        if: success()
+        run: |
+          export KUBECONFIG=kubeconfig
+          ./scripts/canary-promote.sh
+
+      - name: Auto rollback on failure
+        if: failure()
+        run: |
+          export KUBECONFIG=kubeconfig
+          ./scripts/auto-rollback.sh
+
+      - name: Cleanup canary on failure
+        if: failure()
+        run: |
+          export KUBECONFIG=kubeconfig
+          kubectl delete deployment astroml-api-canary -n ${{ env.CANARY_NAMESPACE }} --ignore-not-found=true
+
+  notify:
+    name: Notify Status
+    runs-on: ubuntu-latest
+    needs: [llm-test, canary-deploy]
+    if: always()
+    steps:
+      - name: Slack notification
+        uses: 8398a7/action-slack@v3
+        with:
+          status: ${{ job.status }}
+          text: |
+            LLM CI/CD Pipeline Status: ${{ job.status }}
+            Branch: ${{ github.ref }}
+            Commit: ${{ github.sha }}
+            Author: ${{ github.actor }}
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
diff --git a/Makefile b/Makefile
index 909d4c6..919deb4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help quickstart test test-api lint format clean install run-api
+.PHONY: help quickstart test test-api lint format clean install run-api canary-deploy canary-promote rollback-llm
 
 help:
 	@echo "AstroML Development Commands"
@@ -13,6 +13,9 @@ help:
 	@echo "make install             Install development dependencies"
 	@echo "make clean               Clean build artifacts and cache"
 	@echo "make run-api             Start the FastAPI dev server on localhost:8000"
+	@echo "make canary-deploy       Deploy LLM canary to Kubernetes"
+	@echo "make canary-promote      Promote canary to stable"
+	@echo "make rollback-llm        Rollback LLM canary deployment"
 	@echo ""
 
 quickstart:
@@ -64,3 +67,24 @@ dev-setup:
 	@./scripts/seed_data.sh
 	@./scripts/health_check.sh
 	@echo "✅ Development environment ready."
+
+.PHONY: canary-deploy
+canary-deploy:
+	@echo "🚀 Deploying LLM canary..."
+	REGISTRY=$(shell grep -E '^REGISTRY' .github/workflows/llm-cicd.yml | head -n1 | sed 's/.*: //' | tr -d '"')
+	IMAGE_TAG=llm-$(shell git rev-parse --short HEAD)
+	REPO=$(shell basename $$(pwd))
+	NAMESPACE=astroml ./scripts/canary-deploy.sh
+
+.PHONY: canary-promote
+canary-promote:
+	@echo "✅ Promoting canary to stable..."
+	REGISTRY=$(shell grep -E '^REGISTRY' .github/workflows/llm-cicd.yml | head -n1 | sed 's/.*: //' | tr -d '"')
+	IMAGE_TAG=llm-$(shell git rev-parse --short HEAD)
+	REPO=$(shell basename $$(pwd))
+	NAMESPACE=astroml ./scripts/canary-promote.sh
+
+.PHONY: rollback-llm
+rollback-llm:
+	@echo "🔄 Rolling back LLM deployment..."
+	NAMESPACE=astroml STABLE_DEPLOYMENT=astroml-api ./scripts/auto-rollback.sh
diff --git a/api/app.py b/api/app.py
index dd91dc1..f848d7a 100644
--- a/api/app.py
+++ b/api/app.py
@@ -23,8 +23,9 @@
 from contextlib import asynccontextmanager
 from typing import AsyncGenerator
 
-from fastapi import FastAPI, Request
+from fastapi import FastAPI, Request, Response
 from fastapi.middleware.cors import CORSMiddleware
+from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
 
 from api.auth.middleware import AuthMiddleware
 from api.audit_middleware import AuditLoggingMiddleware
@@ -47,6 +48,7 @@
     feedback_router,
     fraud_router,
     loyalty_router,
+    llm_health_router,
     mentorship_router,
     models_router,
     monitoring_router,
@@ -64,6 +66,7 @@
 )
 from api.routers.monitoring import record_latency
 from api.routers.ws import poll_and_broadcast_transactions
+from astroml.llm import metrics as _llm_metrics
 
 # Setup distributed tracing (issue #336)
 _tracer_provider = setup_tracing()
@@ -174,6 +177,7 @@ async def _latency_middleware(request: Request, call_next):
 app.include_router(streaming_router)
 app.include_router(voice_router)
 app.include_router(llm_router)
+app.include_router(llm_health_router)
 app.include_router(reports_router)
 app.include_router(alerts_router)
 
@@ -183,6 +187,11 @@ async def health():
     return {"status": "ok"}
 
 
+@app.get("/metrics", tags=["ops"])
+async def prometheus_metrics():
+    return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
+
+
 @app.get("/api/v1", tags=["ops"])
 async def api_root():
     return {"version": settings.api_version, "status": "ok"}
diff --git a/api/routers/__init__.py b/api/routers/__init__.py
index 426bf17..2527f31 100644
--- a/api/routers/__init__.py
+++ b/api/routers/__init__.py
@@ -24,6 +24,7 @@
 from api.routers.ws import router as ws_router
 from api.routers.streaming import router as streaming_router
 from api.routers.llm import router as llm_router
+from api.routers.llm_health import router as llm_health_router
 from api.routers.reports import router as reports_router
 from api.routers.alerts import router as alerts_router
 
@@ -54,6 +55,7 @@
     "ws_router",
     "streaming_router",
     "llm_router",
+    "llm_health_router",
     "reports_router",
     "alerts_router",
 ]
diff --git a/api/routers/alerts.py b/api/routers/alerts.py
index 67156ff..e33a692 100644
--- a/api/routers/alerts.py
+++ b/api/routers/alerts.py
@@ -1,19 +1,33 @@
 """Alerts API router (issue XXX)."""
 from __future__ import annotations
 
-from typing import Optional
-from fastapi import APIRouter, Depends, Query
+from datetime import datetime, timedelta
+from typing import List, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query, status
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 
 from api.database import get_sync_db
-from api.models.orm import FraudAlert
+from api.models.orm import ApiTransaction, FraudAlert
 from api.schemas import (
-    PrioritizedAlertsResponse,
+    FraudAlertOut,
+    FraudAlertsResponse,
+    FraudExplanationOut,
     PrioritizedAlertOut,
+    PrioritizedAlertsResponse,
     TransactionSummaryOut,
+    # Predictive Alerts schemas
+    BehavioralBaseline,
+    BehavioralBaselineResponse,
+    DeviationAlert,
+    PredictiveAlertRequest,
+    PredictiveAlertResponse,
+    AlertGenerationRequest,
+    AlertGenerationResponse,
 )
 from api.services.alert_prioritization import alert_prioritizer
+from api.services.predictive_alerts import predictive_alert_service
 
 router = APIRouter(prefix="/api/v1/alerts", tags=["alerts"])
 
@@ -71,3 +85,106 @@ def get_prioritized_alerts(
         total_processed=total_processed,
         total_remaining=len(data),
     )
+
+
+@router.get("/predictive", response_model=PredictiveAlertResponse)
+def get_predictive_alerts(
+    account_id: str,
+    lookback_days: int = Query(30, ge=1, le=365),
+    metrics: Optional[List[str]] = Query(None),
+    sensitivity: str = Query("medium", pattern="^(low|medium|high)$"),
+    db: Session = Depends(get_sync_db),
+):
+    """
+    Generate predictive alerts for account behavior changes.
+    
+    Analyzes historical transaction data to establish behavioral baselines
+    and detects significant deviations that may indicate unusual activity.
+    """
+    # Validate account exists (basic check)
+    if not account_id or len(account_id) < 10:  # Stellar addresses are typically longer
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid account ID format"
+        )
+    
+    # Use the predictive alert service
+    result = predictive_alert_service.generate_predictive_alerts(
+        account_id=account_id,
+        lookback_days=lookback_days,
+        metrics=metrics,
+        sensitivity=sensitivity
+    )
+    
+    # Check if service returned an error
+    if "error" in result:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=result["error"]
+        )
+    
+    # Check for informational messages (no alerts generated)
+    if "message" in result and "alerts" not in result:
+        # Return empty alerts list with metadata
+        return PredictiveAlertResponse(
+            alerts=[],
+            baselines_used=[],
+            generated_at=datetime.utcnow(),
+            total_analyzed=0
+        )
+    
+    # Convert to response model
+    return PredictiveAlertResponse(
+        alerts=[DeviationAlert(**alert) for alert in result.get("alerts", [])],
+        baselines_used=[
+            BehavioralBaseline(**baseline) 
+            for baseline in result.get("baselines_used", [])
+        ],
+        generated_at=datetime.fromisoformat(result["generated_at"]) if isinstance(result.get("generated_at"), str) else result.get("generated_at", datetime.utcnow()),
+        total_analyzed=result.get("total_analyzed", 0)
+    )
+
+
+@router.post("/generate-explanations", response_model=AlertGenerationResponse)
+def generate_alert_explanations(
+    request: AlertGenerationRequest,
+):
+    """
+    Generate natural language explanations for detected anomalies.
+    """
+    try:
+        # Use the alert generator from the predictive service
+        explanations = []
+        for deviation in request.deviations:
+            # Convert Pydantic model back to dict for the generator
+            deviation_dict = deviation.dict()
+            explanation_result = predictive_alert_service.alert_generator.generate_explanation(
+                deviation.alert_id,
+                deviation.account_id,
+                {
+                    "metric_name": deviation.metric_name,
+                    "current_value": deviation.current_value,
+                    "expected_value": sum(deviation.expected_range) / 2 if deviation.expected_range else 0,
+                    "deviation_score": deviation.deviation_score,
+                    "severity": deviation.severity
+                }
+            )
+            explanations.append(explanation_result.get("explanation", "No explanation available"))
+        
+        return AlertGenerationResponse(
+            alerts=request.deviations,
+            explanations=explanations,
+            generated_at=datetime.utcnow()
+        )
+        
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to generate explanations: {str(e)}"
+        )
+
+
+@router.get("/predictive/status")
+def get_predictive_service_status():
+    """Get status of the predictive alerts service."""
+    return predictive_alert_service.get_service_status()
\ No newline at end of file
diff --git a/api/routers/llm.py b/api/routers/llm.py
index 81a51a2..2a4cd03 100644
--- a/api/routers/llm.py
+++ b/api/routers/llm.py
@@ -1,9 +1,11 @@
+import hashlib
 import os
 import time
+from typing import List, Dict, Any, AsyncGenerator, Optional, Union
 
 from fastapi import APIRouter, Depends, HTTPException, Request
 from fastapi.responses import StreamingResponse
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from api.services.llm_explainer import TransactionExplainer
@@ -11,6 +13,7 @@
 from api.services.llm_context import MultiModalContextHandler
 from api.services.llm_validation import ResponseValidator
 from api.services.llm_rag import build_citations, build_rag_answer, retrieve_sources
+from api.services.translation import translation_service
 from astroml.llm.embedding_cache import EmbeddingCache
 from astroml.llm.embedding_drift import EmbeddingDriftMonitor
 from astroml.llm.memory import ConversationMemory
@@ -25,6 +28,14 @@
     LLMFeedbackOut,
     LLMFeedbackTrend,
     LLMPromptImprovement,
+    TranslationRequest,
+    TranslationResponse,
+    BatchTranslationRequest,
+    BatchTranslationResponse,
+    SupportedLanguagesResponse,
+    LocaleFormatRequest,
+    LocaleFormatResponse,
+    TranslationCacheStatsResponse,
 )
 from api.auth.dependencies import get_current_auth, AuthContext
 from typing import List, Dict, Any, AsyncGenerator, Callable, TypeVar
@@ -530,3 +541,153 @@ async def llm_prompt_improvements(db: AsyncSession = Depends(get_db)) -> list[LL
         )
         for feature, items in sorted(by_feature.items())
     ]
+
+
+# ─── Translation endpoints (Issue 1) ────────────────────────────────────────
+
+class TranslationRequest(BaseModel):
+    text: str = Field(..., min_length=1, max_length=50000)
+    target_language: str = Field(..., min_length=2, max_length=10)
+    source_language: Optional[str] = Field(default=None, min_length=2, max_length=10)
+    use_cache: bool = True
+
+
+class TranslationResponse(BaseModel):
+    translated_text: str
+    source_language: str
+    target_language: str
+    cached: bool
+    latency_ms: float
+
+
+class BatchTranslationRequest(BaseModel):
+    texts: List[str] = Field(..., min_length=1, max_length=100)
+    target_language: str = Field(..., min_length=2, max_length=10)
+    source_language: Optional[str] = Field(default=None, min_length=2, max_length=10)
+    use_cache: bool = True
+
+
+class BatchTranslationResponse(BaseModel):
+    translations: List[TranslationResponse]
+    total_latency_ms: float
+
+
+class SupportedLanguagesResponse(BaseModel):
+    languages: Dict[str, Dict[str, str]]
+
+
+@router.get("/translate/languages", response_model=SupportedLanguagesResponse)
+async def get_supported_languages(auth: AuthContext = Depends(get_current_auth)):
+    """Get list of supported languages for translation."""
+    return SupportedLanguagesResponse(languages=translation_service.get_supported_languages())
+
+
+@router.post("/translate", response_model=TranslationResponse)
+async def translate_text(
+    request: TranslationRequest,
+    auth: AuthContext = Depends(get_current_auth),
+):
+    """Translate text to target language."""
+    try:
+        result = await translation_service.translate(
+            text=request.text,
+            target_language=request.target_language,
+            source_language=request.source_language,
+            use_cache=request.use_cache,
+        )
+        return TranslationResponse(**result)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/translate/batch", response_model=BatchTranslationResponse)
+async def translate_batch(
+    request: BatchTranslationRequest,
+    auth: AuthContext = Depends(get_current_auth),
+):
+    """Translate multiple texts to target language."""
+    try:
+        results = await translation_service.translate_batch(
+            texts=request.texts,
+            target_language=request.target_language,
+            source_language=request.source_language,
+            use_cache=request.use_cache,
+        )
+        total_latency = sum(r["latency_ms"] for r in results)
+        return BatchTranslationResponse(
+            translations=[TranslationResponse(**r) for r in results],
+            total_latency_ms=total_latency,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+class LocaleFormatRequest(BaseModel):
+    value: Union[float, int, str]
+    locale: str = Field(..., min_length=2, max_length=10)
+    format_type: str = Field(..., pattern="^(number|currency|percent|date|datetime)$")
+    currency_code: Optional[str] = Field(default=None, min_length=3, max_length=3)
+
+
+class LocaleFormatResponse(BaseModel):
+    formatted: str
+    locale: str
+    format_type: str
+
+
+@router.post("/translate/format", response_model=LocaleFormatResponse)
+async def format_locale(
+    request: LocaleFormatRequest,
+    auth: AuthContext = Depends(get_current_auth),
+):
+    """Format numbers, currencies, dates, etc. for a specific locale."""
+    try:
+        formatted = translation_service.format_locale(
+            value=request.value,
+            locale=request.locale,
+            format_type=request.format_type,
+            currency_code=request.currency_code,
+        )
+        return LocaleFormatResponse(
+            formatted=formatted,
+            locale=request.locale,
+            format_type=request.format_type,
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+class TranslationCacheStatsResponse(BaseModel):
+    hits: int
+    misses: int
+    sets: int
+    invalidations: int
+    hit_rate: float
+    size: int
+
+
+@router.get("/translate/cache/stats", response_model=TranslationCacheStatsResponse)
+async def get_translation_cache_stats(auth: AuthContext = Depends(get_current_auth)):
+    """Get translation cache statistics."""
+    stats = translation_service.get_cache_stats()
+    return TranslationCacheStatsResponse(**stats)
+
+
+@router.post("/translate/cache/invalidate")
+async def invalidate_translation_cache(
+    text: Optional[str] = None,
+    auth: AuthContext = Depends(get_current_auth),
+):
+    """Invalidate translation cache (specific text or all)."""
+    if text:
+        translation_service.invalidate_cache(text)
+        return {"message": "Cache entry invalidated", "text_hash": hashlib.sha256(text.encode()).hexdigest()[:16]}
+    else:
+        count = translation_service.invalidate_all_cache()
+        return {"message": f"Invalidated {count} cache entries"}
diff --git a/api/routers/llm_health.py b/api/routers/llm_health.py
new file mode 100644
index 0000000..8d7e629
--- /dev/null
+++ b/api/routers/llm_health.py
@@ -0,0 +1,20 @@
+"""LLM Health and Provider Status API."""
+from __future__ import annotations
+
+from fastapi import APIRouter
+
+from astroml.llm.health import check_all_providers, check_provider_health
+
+router = APIRouter(prefix="/api/v1/llm", tags=["llm-health"])
+
+
+@router.get("/health")
+async def llm_health():
+    result = await check_all_providers()
+    return result
+
+
+@router.get("/health/{provider_name}")
+async def llm_provider_health(provider_name: str):
+    result = await check_provider_health(provider_name)
+    return result
diff --git a/api/schemas.py b/api/schemas.py
deleted file mode 100644
index 9d8b0ac..0000000
--- a/api/schemas.py
+++ /dev/null
@@ -1,769 +0,0 @@
-"""Pydantic schemas shared across all API routers."""
-from __future__ import annotations
-
-import re
-from datetime import datetime
-from typing import Any, Dict, List, Optional
-
-from pydantic import BaseModel, Field, field_validator
-
-
-# ─── Fraud ────────────────────────────────────────────────────────────────────
-
-class EdgeInput(BaseModel):
-    src: str
-    dst: str
-    amount: float = 0.0
-    timestamp: float = 0.0
-    asset: str = "XLM"
-
-
-class ScoreRequest(BaseModel):
-    accounts: List[str] = Field(..., max_length=50)
-    edges: List[EdgeInput] = Field(default_factory=list)
-
-
-class ScoreResponse(BaseModel):
-    scores: Dict[str, float]
-
-
-class FraudAlertOut(BaseModel):
-    id: int
-    account_id: str
-    pattern: Optional[str] = None
-    risk_score: float
-    risk_level: str
-    description: Optional[str] = None
-    detected_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class FraudAlertsResponse(BaseModel):
-    data: List[FraudAlertOut]
-    page: int
-    page_size: int
-    total: int
-
-
-class FraudExplanationOut(BaseModel):
-    alert_id: int
-    explanation: str
-    generated_in_ms: float
-    cached: bool
-
-
-class TransactionSummaryOut(BaseModel):
-    hash: str
-    amount: float
-    asset_code: str
-    destination_account: Optional[str] = None
-    created_at: str
-
-
-class PrioritizedAlertOut(BaseModel):
-    id: int
-    account_id: str
-    pattern: Optional[str] = None
-    risk_score: float
-    risk_level: str
-    priority_score: float
-    priority_level: str
-    explanation: str
-    detected_at: datetime
-    recent_transactions: List[TransactionSummaryOut]
-    account_activity_score: float
-    is_duplicate: bool = False
-    duplicate_of: Optional[int] = None
-
-    class Config:
-        from_attributes = True
-
-
-class PrioritizedAlertsResponse(BaseModel):
-    data: List[PrioritizedAlertOut]
-    deduplication_reduction_pct: int
-    total_processed: int
-    total_remaining: int
-
-
-class RiskPoint(BaseModel):
-    date: str
-    score: float
-
-
-class FraudStatsResponse(BaseModel):
-    total_alerts: int
-    high_risk: int
-    medium_risk: int
-    low_risk: int
-    recent_alerts: List[FraudAlertOut]
-    risk_over_time: List[RiskPoint]
-
-
-# ─── Accounts ─────────────────────────────────────────────────────────────────
-
-class AccountOut(BaseModel):
-    account_id: str
-    balance: Optional[float] = None
-    sequence: Optional[int] = None
-    home_domain: Optional[str] = None
-    flags: int = 0
-    last_modified_ledger: Optional[int] = None
-    created_at: Optional[datetime] = None
-    updated_at: Optional[datetime] = None
-
-    class Config:
-        from_attributes = True
-
-
-class AccountsResponse(BaseModel):
-    data: List[AccountOut]
-    page: int
-    page_size: int
-    total: int
-
-
-class TransactionOut(BaseModel):
-    hash: str
-    ledger_sequence: int
-    source_account: str
-    created_at: datetime
-    fee: int
-    operation_count: int
-    successful: bool
-    memo_type: Optional[str] = None
-    memo: Optional[str] = None
-
-    class Config:
-        from_attributes = True
-
-
-class TransactionsResponse(BaseModel):
-    data: List[TransactionOut]
-    page: int
-    page_size: int
-    total: int
-
-
-class FraudSummaryOut(BaseModel):
-    account_id: str
-    total_alerts: int
-    high_risk: int
-    medium_risk: int
-    low_risk: int
-    latest_score: Optional[float] = None
-
-
-class LoyaltySummaryOut(BaseModel):
-    account_id: str
-    points_balance: int
-    tier_id: str
-    tier_name: str
-
-
-# ─── Monitoring ───────────────────────────────────────────────────────────────
-
-class ModelMetricsOut(BaseModel):
-    accuracy: Optional[float] = None
-    precision: Optional[float] = None
-    recall: Optional[float] = None
-    f1: Optional[float] = None
-    f1_score: Optional[float] = None   # alias populated from f1 for compatibility
-    auc: Optional[float] = None
-    auc_roc: Optional[float] = None    # alias populated from auc for compatibility
-    drift_score: Optional[float] = None
-    recorded_at: Optional[datetime] = None
-    
-    # LLM Tracking
-    llm_cost: Optional[float] = None
-    llm_prompt_tokens: Optional[int] = None
-    llm_completion_tokens: Optional[int] = None
-
-
-class PerformancePoint(BaseModel):
-    date: str
-    accuracy: Optional[float] = None
-    precision: Optional[float] = None
-    recall: Optional[float] = None
-    f1: Optional[float] = None
-    auc: Optional[float] = None
-
-
-class DriftReport(BaseModel):
-    features: Dict[str, float]
-    overall_drift: float
-    generated_at: datetime
-
-
-class PredictionStats(BaseModel):
-    total_predictions: int
-    anomaly_rate: float
-    avg_score: float
-    period_days: int
-
-
-class LatencyStats(BaseModel):
-    p50_ms: float
-    p95_ms: float
-    p99_ms: float
-
-
-# ─── Loyalty ──────────────────────────────────────────────────────────────────
-
-class LoyaltyTierOut(BaseModel):
-    id: str
-    name: str
-    threshold: int
-    multiplier: float
-    color: str
-
-
-class BenefitOut(BaseModel):
-    id: str
-    title: str
-    description: str
-
-
-class NextTierInfo(BaseModel):
-    tier: LoyaltyTierOut
-    remaining_to_upgrade: int
-    progress_pct: int
-
-
-class LoyaltySummaryFull(BaseModel):
-    current_tier: LoyaltyTierOut
-    points_balance: int
-    next_tier: Optional[NextTierInfo] = None
-    benefits: List[BenefitOut]
-
-
-class PointsTransactionOut(BaseModel):
-    id: str
-    date: str
-    type: str  # earn | redeem | adjust
-    points: int
-    source: Optional[str] = None
-    note: Optional[str] = None
-
-
-class PointsHistoryResponse(BaseModel):
-    data: List[PointsTransactionOut]
-    page: int
-    page_size: int
-    total: int
-
-
-class RedeemRequest(BaseModel):
-    points: int = Field(..., gt=0)
-    reward_id: Optional[str] = None
-
-
-class RedeemResponse(BaseModel):
-    new_balance: int
-    transaction: PointsTransactionOut
-
-
-class ReferralOut(BaseModel):
-    url: str
-    invited: int
-    rewards: int
-
-
-# ─── Mentorship ────────────────────────────────────────────────────────────
-
-class MentorProfileIn(BaseModel):
-    bio: Optional[str] = None
-    skills: List[str] = Field(default_factory=list)
-    years_experience: int = Field(ge=0)
-    preferred_session_day: Optional[str] = None
-    max_mentees: int = Field(default=3, ge=1, le=10)
-
-
-class MentorProfileOut(BaseModel):
-    id: int
-    github_username: str
-    bio: Optional[str] = None
-    skills: List[str]
-    years_experience: int
-    preferred_session_day: Optional[str] = None
-    max_mentees: int
-    is_available: bool
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class MenteeProfileIn(BaseModel):
-    bio: Optional[str] = None
-    learning_interests: List[str] = Field(default_factory=list)
-    years_experience: int = Field(ge=0)
-    preferred_session_day: Optional[str] = None
-    goals: Optional[str] = None
-
-
-class MenteeProfileOut(BaseModel):
-    id: int
-    github_username: str
-    bio: Optional[str] = None
-    learning_interests: List[str]
-    years_experience: int
-    preferred_session_day: Optional[str] = None
-    goals: Optional[str] = None
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class MentorMatchOut(BaseModel):
-    mentor_id: int
-    mentor_username: str
-    skill_overlap: float
-    experience_gap: float
-    availability_match: float
-    total_score: float
-
-
-class MentorshipOut(BaseModel):
-    id: int
-    mentor_id: int
-    mentor_username: str
-    mentee_id: int
-    mentee_username: str
-    status: str
-    match_score: float
-    started_at: datetime
-    ended_at: Optional[datetime] = None
-
-    class Config:
-        from_attributes = True
-
-
-class MentorshipSessionIn(BaseModel):
-    duration_minutes: int = Field(gt=0, le=480)  # max 8 hours
-    topic: str = Field(min_length=3, max_length=256)
-    notes: Optional[str] = None
-
-
-class MentorshipSessionOut(BaseModel):
-    id: int
-    mentorship_id: int
-    session_date: datetime
-    duration_minutes: int
-    topic: str
-    notes: Optional[str] = None
-
-    class Config:
-        from_attributes = True
-
-
-class MentorshipFeedbackIn(BaseModel):
-    rating: int = Field(ge=1, le=5)
-    feedback_text: Optional[str] = None
-
-
-class MentorshipFeedbackOut(BaseModel):
-    id: int
-    session_id: int
-    rating: int
-    feedback_text: Optional[str] = None
-    is_mentor_feedback: bool
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class MentorshipMetrics(BaseModel):
-    total_sessions: int
-    total_hours: float
-    avg_rating: float
-    topics_covered: List[str]
-    last_session_date: Optional[datetime] = None
-
-
-class MentorMetrics(BaseModel):
-    total_mentees: int
-    total_sessions: int
-    total_hours: float
-    avg_rating: float
-
-
-class MentorshipListResponse(BaseModel):
-    data: List[MentorshipOut]
-    page: int
-    page_size: int
-    total: int
-
-
-class MentorListResponse(BaseModel):
-    data: List[MentorProfileOut]
-    page: int
-    page_size: int
-    total: int
-
-
-class MenteeListResponse(BaseModel):
-    data: List[MenteeProfileOut]
-    page: int
-    page_size: int
-    total: int
-
-
-# ─── Notifications ─────────────────────────────────────────────────────────
-
-class NotificationOut(BaseModel):
-    id: int
-    event_type: str
-    title: str
-    content: Optional[str] = None
-    link: Optional[str] = None
-    actor: Optional[str] = None
-    is_read: bool
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class NotificationListResponse(BaseModel):
-    data: List[NotificationOut]
-    unread_count: int
-
-
-class NotificationPreferenceIn(BaseModel):
-    email_enabled: bool = True
-    slack_enabled: bool = False
-    discord_enabled: bool = False
-    pr_comments: bool = True
-    pr_mentions: bool = True
-    issue_comments: bool = True
-    issue_mentions: bool = True
-    review_requests: bool = True
-    digest_frequency: str = "weekly"  # daily|weekly|never
-    slack_webhook_url: Optional[str] = None
-    discord_webhook_url: Optional[str] = None
-
-
-class NotificationPreferenceOut(BaseModel):
-    id: int
-    user_id: int
-    email_enabled: bool
-    slack_enabled: bool
-    discord_enabled: bool
-    pr_comments: bool
-    pr_mentions: bool
-    issue_comments: bool
-    issue_mentions: bool
-    review_requests: bool
-    digest_frequency: str
-    created_at: datetime
-    updated_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class WebhookEventIn(BaseModel):
-    event_type: str  # pr_comment|issue_comment|review_request|pr_merged
-    pr_number: Optional[int] = None
-    issue_number: Optional[int] = None
-    commenter: Optional[str] = None
-    content: Optional[str] = None
-    reviewer_id: Optional[int] = None
-    author_id: Optional[int] = None
-    repo: str
-    link: str
-
-
-class DigestEmailOut(BaseModel):
-    user_id: int
-    period: str
-    notifications_count: int
-    generated_at: datetime
-
-
-# ─── Onboarding ────────────────────────────────────────────────────────────
-
-class OnboardingStepIn(BaseModel):
-    step: str
-
-
-class OnboardingChecklistItem(BaseModel):
-    step: str
-    label: str
-    completed: bool
-
-
-class OnboardingProgressOut(BaseModel):
-    github_username: str
-    checklist: List[OnboardingChecklistItem]
-    completed_count: int
-    total_steps: int
-    progress_pct: int
-    is_complete: bool
-    started_at: str
-    last_updated: str
-
-
-# ─── FAQ (issue #307) ───────────────────────────────────────────────────────────
-
-class FAQOut(BaseModel):
-    id: int
-    category: str
-    question: str
-    answer: str
-    order: int
-    is_published: bool
-    created_at: datetime
-    updated_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class FAQIn(BaseModel):
-    category: str = Field(..., min_length=1, max_length=64)
-    question: str = Field(..., min_length=1, max_length=512)
-    answer: str = Field(..., min_length=1)
-    order: int = Field(default=0, ge=0)
-    is_published: bool = True
-
-
-class FAQUpdateIn(BaseModel):
-    category: Optional[str] = Field(None, min_length=1, max_length=64)
-    question: Optional[str] = Field(None, min_length=1, max_length=512)
-    answer: Optional[str] = Field(None, min_length=1)
-    order: Optional[int] = Field(None, ge=0)
-    is_published: Optional[bool] = None
-
-
-class FAQListResponse(BaseModel):
-    data: List[FAQOut]
-    categories: List[str]
-    total: int
-
-
-class FAQFeedbackIn(BaseModel):
-    is_helpful: bool
-    user_comment: Optional[str] = None
-
-
-class FAQFeedbackOut(BaseModel):
-    id: int
-    faq_id: int
-    is_helpful: bool
-    user_comment: Optional[str] = None
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class FAQSuggestionIn(BaseModel):
-    question: str = Field(..., min_length=1, max_length=512)
-    suggested_answer: Optional[str] = None
-    category: Optional[str] = Field(None, max_length=64)
-
-
-class FAQSuggestionOut(BaseModel):
-    id: int
-    question: str
-    suggested_answer: Optional[str] = None
-    category: Optional[str] = None
-    status: str
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class FAQSuggestionListResponse(BaseModel):
-    data: List[FAQSuggestionOut]
-    page: int
-    page_size: int
-    total: int
-
-
-# ─── Contact / Support tickets (issue #305) ─────────────────────────────────
-
-_EMAIL_RE = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
-
-
-class ContactFormIn(BaseModel):
-    name: str = Field(min_length=1, max_length=120)
-    email: str = Field(min_length=3, max_length=254)
-    subject: str = Field(min_length=1, max_length=200)
-    message: str = Field(min_length=1, max_length=5000)
-    # reCAPTCHA token from the frontend widget; optional when verification is off.
-    recaptcha_token: Optional[str] = None
-
-    @field_validator("name", "subject", "message")
-    @classmethod
-    def _not_blank(cls, v: str) -> str:
-        if not v or not v.strip():
-            raise ValueError("must not be blank")
-        return v.strip()
-
-    @field_validator("email")
-    @classmethod
-    def _valid_email(cls, v: str) -> str:
-        v = v.strip()
-        if not _EMAIL_RE.match(v):
-            raise ValueError("invalid email address")
-        return v
-
-
-class SupportTicketOut(BaseModel):
-    reference: str
-    status: str
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-# ─── Feedback (issue #308) ──────────────────────────────────────────────────
-
-FEEDBACK_CATEGORIES = {"bug", "feature", "general"}
-FEEDBACK_STATUSES = {"open", "planned", "in_progress", "completed", "declined"}
-ROADMAP_STATUSES = ("planned", "in_progress", "completed")
-
-
-class FeedbackIn(BaseModel):
-    category: str = Field(min_length=1, max_length=16)
-    message: str = Field(min_length=1, max_length=5000)
-    email: Optional[str] = Field(default=None, max_length=254)
-    screenshot: Optional[str] = None  # data URL: data:image/png;base64,...
-
-    @field_validator("category")
-    @classmethod
-    def _valid_category(cls, v: str) -> str:
-        v = v.strip().lower()
-        if v not in FEEDBACK_CATEGORIES:
-            raise ValueError("category must be one of: bug, feature, general")
-        return v
-
-    @field_validator("message")
-    @classmethod
-    def _not_blank(cls, v: str) -> str:
-        if not v.strip():
-            raise ValueError("message must not be blank")
-        return v.strip()
-
-    @field_validator("screenshot")
-    @classmethod
-    def _valid_screenshot(cls, v: Optional[str]) -> Optional[str]:
-        if v is None:
-            return v
-        if not v.startswith("data:image/"):
-            raise ValueError("screenshot must be an image data URL")
-        return v
-
-
-class FeedbackOut(BaseModel):
-    id: int
-    category: str
-    message: str
-    status: str
-    github_issue_url: Optional[str] = None
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class ContactSubmitResponse(BaseModel):
-    message: str
-    ticket: SupportTicketOut
-
-
-class FeedbackListResponse(BaseModel):
-    data: List[FeedbackOut]
-    page: int
-    page_size: int
-    total: int
-
-
-class FeedbackStatusUpdate(BaseModel):
-    status: str
-
-    @field_validator("status")
-    @classmethod
-    def _valid_status(cls, v: str) -> str:
-        v = v.strip().lower()
-        if v not in FEEDBACK_STATUSES:
-            raise ValueError("invalid status")
-        return v
-
-
-class RoadmapItem(BaseModel):
-    id: int
-    category: str
-    message: str
-    status: str
-
-    class Config:
-        from_attributes = True
-
-
-class RoadmapResponse(BaseModel):
-    planned: List[RoadmapItem]
-    in_progress: List[RoadmapItem]
-    completed: List[RoadmapItem]
-
-# ─── LLM feedback (#402) ────────────────────────────────────────────────────
-
-class LLMFeedbackIn(BaseModel):
-    feature: str = Field(min_length=1, max_length=64)
-    prompt: str = Field(min_length=1, max_length=8000)
-    output: str = Field(min_length=1, max_length=8000)
-    rating: int = Field(ge=1, le=5)
-    comment: Optional[str] = Field(default=None, max_length=2000)
-    user_id: Optional[str] = Field(default=None, max_length=128)
-    is_expert: bool = False
-    expert_weight: float = Field(default=1.0, ge=1.0, le=5.0)
-
-    @field_validator("feature", "prompt", "output")
-    @classmethod
-    def _strip_required(cls, v: str) -> str:
-        v = v.strip()
-        if not v:
-            raise ValueError("must not be blank")
-        return v
-
-
-class LLMFeedbackOut(BaseModel):
-    id: int
-    feature: str
-    rating: int
-    comment: Optional[str] = None
-    is_expert: bool
-    expert_weight: float
-    created_at: datetime
-
-    class Config:
-        from_attributes = True
-
-
-class LLMFeedbackTrend(BaseModel):
-    feature: str
-    count: int
-    average_rating: float
-    weighted_average_rating: float
-    expert_count: int
-
-
-class LLMFeedbackDashboard(BaseModel):
-    total: int
-    trends: List[LLMFeedbackTrend]
-    low_rating_examples: List[LLMFeedbackOut]
-
-
-class LLMPromptImprovement(BaseModel):
-    feature: str
-    recommendation: str
-    evidence_count: int
diff --git a/api/schemas/__init__.py b/api/schemas/__init__.py
index 236d9da..af7d84b 100644
--- a/api/schemas/__init__.py
+++ b/api/schemas/__init__.py
@@ -1,2 +1,877 @@
-"""Schemas package for API models."""
+"""Pydantic schemas shared across all API routers."""
+from __future__ import annotations
 
+import re
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple, Literal
+
+from pydantic import BaseModel, Field, field_validator
+
+
+# ─── Fraud ────────────────────────────────────────────────────────────────────
+
+class EdgeInput(BaseModel):
+    src: str
+    dst: str
+    amount: float = 0.0
+    timestamp: float = 0.0
+    asset: str = "XLM"
+
+
+class ScoreRequest(BaseModel):
+    accounts: List[str] = Field(..., max_length=50)
+    edges: List[EdgeInput] = Field(default_factory=list)
+
+
+class ScoreResponse(BaseModel):
+    scores: Dict[str, float]
+
+
+class FraudAlertOut(BaseModel):
+    id: int
+    account_id: str
+    pattern: Optional[str] = None
+    risk_score: float
+    risk_level: str
+    description: Optional[str] = None
+    detected_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class FraudAlertsResponse(BaseModel):
+    data: List[FraudAlertOut]
+    page: int
+    page_size: int
+    total: int
+
+
+class FraudExplanationOut(BaseModel):
+    alert_id: int
+    explanation: str
+    generated_in_ms: float
+    cached: bool
+
+
+class TransactionSummaryOut(BaseModel):
+    hash: str
+    amount: float
+    asset_code: str
+    destination_account: Optional[str] = None
+    created_at: str
+
+
+class PrioritizedAlertOut(BaseModel):
+    id: int
+    account_id: str
+    pattern: Optional[str] = None
+    risk_score: float
+    risk_level: str
+    priority_score: float
+    priority_level: str
+    explanation: str
+    detected_at: datetime
+    recent_transactions: List[TransactionSummaryOut]
+    account_activity_score: float
+    is_duplicate: bool = False
+    duplicate_of: Optional[int] = None
+
+    class Config:
+        from_attributes = True
+
+
+class PrioritizedAlertsResponse(BaseModel):
+    data: List[PrioritizedAlertOut]
+    deduplication_reduction_pct: int
+    total_processed: int
+    total_remaining: int
+
+
+class RiskPoint(BaseModel):
+    date: str
+    score: float
+
+
+class FraudStatsResponse(BaseModel):
+    total_alerts: int
+    high_risk: int
+    medium_risk: int
+    low_risk: int
+    recent_alerts: List[FraudAlertOut]
+    risk_over_time: List[RiskPoint]
+
+
+# ─── Accounts ─────────────────────────────────────────────────────────────────
+
+class AccountOut(BaseModel):
+    account_id: str
+    balance: Optional[float] = None
+    sequence: Optional[int] = None
+    home_domain: Optional[str] = None
+    flags: int = 0
+    last_modified_ledger: Optional[int] = None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+
+    class Config:
+        from_attributes = True
+
+
+class AccountsResponse(BaseModel):
+    data: List[AccountOut]
+    page: int
+    page_size: int
+    total: int
+
+
+class TransactionOut(BaseModel):
+    hash: str
+    ledger_sequence: int
+    source_account: str
+    created_at: datetime
+    fee: int
+    operation_count: int
+    successful: bool
+    memo_type: Optional[str] = None
+    memo: Optional[str] = None
+
+    class Config:
+        from_attributes = True
+
+
+class TransactionsResponse(BaseModel):
+    data: List[TransactionOut]
+    page: int
+    page_size: int
+    total: int
+
+
+class FraudSummaryOut(BaseModel):
+    account_id: str
+    total_alerts: int
+    high_risk: int
+    medium_risk: int
+    low_risk: int
+    latest_score: Optional[float] = None
+
+
+class LoyaltySummaryOut(BaseModel):
+    account_id: str
+    points_balance: int
+    tier_id: str
+    tier_name: str
+
+
+# ─── Monitoring ───────────────────────────────────────────────────────────────
+
+class ModelMetricsOut(BaseModel):
+    accuracy: Optional[float] = None
+    precision: Optional[float] = None
+    recall: Optional[float] = None
+    f1: Optional[float] = None
+    f1_score: Optional[float] = None   # alias populated from f1 for compatibility
+    auc: Optional[float] = None
+    auc_roc: Optional[float] = None    # alias populated from auc for compatibility
+    drift_score: Optional[float] = None
+    recorded_at: Optional[datetime] = None
+    
+    # LLM Tracking
+    llm_cost: Optional[float] = None
+    llm_prompt_tokens: Optional[int] = None
+    llm_completion_tokens: Optional[int] = None
+
+
+class PerformancePoint(BaseModel):
+    date: str
+    accuracy: Optional[float] = None
+    precision: Optional[float] = None
+    recall: Optional[float] = None
+    f1: Optional[float] = None
+    auc: Optional[float] = None
+
+
+class DriftReport(BaseModel):
+    features: Dict[str, float]
+    overall_drift: float
+    generated_at: datetime
+
+
+class PredictionStats(BaseModel):
+    total_predictions: int
+    anomaly_rate: float
+    avg_score: float
+    period_days: int
+
+
+class LatencyStats(BaseModel):
+    p50_ms: float
+    p95_ms: float
+    p99_ms: float
+
+
+# ─── Loyalty ──────────────────────────────────────────────────────────────────
+
+class LoyaltyTierOut(BaseModel):
+    id: str
+    name: str
+    threshold: int
+    multiplier: float
+    color: str
+
+
+class BenefitOut(BaseModel):
+    id: str
+    title: str
+    description: str
+
+
+class NextTierInfo(BaseModel):
+    tier: LoyaltyTierOut
+    remaining_to_upgrade: int
+    progress_pct: int
+
+
+class LoyaltySummaryFull(BaseModel):
+    current_tier: LoyaltyTierOut
+    points_balance: int
+    next_tier: Optional[NextTierInfo] = None
+    benefits: List[BenefitOut]
+
+
+class PointsTransactionOut(BaseModel):
+    id: str
+    date: str
+    type: str  # earn | redeem | adjust
+    points: int
+    source: Optional[str] = None
+    note: Optional[str] = None
+
+
+class PointsHistoryResponse(BaseModel):
+    data: List[PointsTransactionOut]
+    page: int
+    page_size: int
+    total: int
+
+
+class RedeemRequest(BaseModel):
+    points: int = Field(..., gt=0)
+    reward_id: Optional[str] = None
+
+
+class RedeemResponse(BaseModel):
+    new_balance: int
+    transaction: PointsTransactionOut
+
+
+class ReferralOut(BaseModel):
+    url: str
+    invited: int
+    rewards: int
+
+
+# ─── Mentorship ────────────────────────────────────────────────────────────
+
+class MentorProfileIn(BaseModel):
+    bio: Optional[str] = None
+    skills: List[str] = Field(default_factory=list)
+    years_experience: int = Field(ge=0)
+    preferred_session_day: Optional[str] = None
+    max_mentees: int = Field(default=3, ge=1, le=10)
+
+
+class MentorProfileOut(BaseModel):
+    id: int
+    github_username: str
+    bio: Optional[str] = None
+    skills: List[str]
+    years_experience: int
+    preferred_session_day: Optional[str] = None
+    max_mentees: int
+    is_available: bool
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class MenteeProfileIn(BaseModel):
+    bio: Optional[str] = None
+    learning_interests: List[str] = Field(default_factory=list)
+    years_experience: int = Field(ge=0)
+    preferred_session_day: Optional[str] = None
+    goals: Optional[str] = None
+
+
+class MenteeProfileOut(BaseModel):
+    id: int
+    github_username: str
+    bio: Optional[str] = None
+    learning_interests: List[str]
+    years_experience: int
+    preferred_session_day: Optional[str] = None
+    goals: Optional[str] = None
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class MentorMatchOut(BaseModel):
+    mentor_id: int
+    mentor_username: str
+    skill_overlap: float
+    experience_gap: float
+    availability_match: float
+    total_score: float
+
+
+class MentorshipOut(BaseModel):
+    id: int
+    mentor_id: int
+    mentor_username: str
+    mentee_id: int
+    mentee_username: str
+    status: str
+    match_score: float
+    started_at: datetime
+    ended_at: Optional[datetime] = None
+
+    class Config:
+        from_attributes = True
+
+
+class MentorshipSessionIn(BaseModel):
+    duration_minutes: int = Field(gt=0, le=480)  # max 8 hours
+    topic: str = Field(min_length=3, max_length=256)
+    notes: Optional[str] = None
+
+
+class MentorshipSessionOut(BaseModel):
+    id: int
+    mentorship_id: int
+    session_date: datetime
+    duration_minutes: int
+    topic: str
+    notes: Optional[str] = None
+
+    class Config:
+        from_attributes = True
+
+
+class MentorshipFeedbackIn(BaseModel):
+    rating: int = Field(ge=1, le=5)
+    feedback_text: Optional[str] = None
+
+
+class MentorshipFeedbackOut(BaseModel):
+    id: int
+    session_id: int
+    rating: int
+    feedback_text: Optional[str] = None
+    is_mentor_feedback: bool
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class MentorshipMetrics(BaseModel):
+    total_sessions: int
+    total_hours: float
+    avg_rating: float
+    topics_covered: List[str]
+    last_session_date: Optional[datetime] = None
+
+
+class MentorMetrics(BaseModel):
+    total_mentees: int
+    total_sessions: int
+    total_hours: float
+    avg_rating: float
+
+
+class MentorshipListResponse(BaseModel):
+    data: List[MentorshipOut]
+    page: int
+    page_size: int
+    total: int
+
+
+class MentorListResponse(BaseModel):
+    data: List[MentorProfileOut]
+    page: int
+    page_size: int
+    total: int
+
+
+class MenteeListResponse(BaseModel):
+    data: List[MenteeProfileOut]
+    page: int
+    page_size: int
+    total: int
+
+
+# ─── Notifications ─────────────────────────────────────────────────────────
+
+class NotificationOut(BaseModel):
+    id: int
+    event_type: str
+    title: str
+    content: Optional[str] = None
+    link: Optional[str] = None
+    actor: Optional[str] = None
+    is_read: bool
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class NotificationListResponse(BaseModel):
+    data: List[NotificationOut]
+    unread_count: int
+
+
+class NotificationPreferenceIn(BaseModel):
+    email_enabled: bool = True
+    slack_enabled: bool = False
+    discord_enabled: bool = False
+    pr_comments: bool = True
+    pr_mentions: bool = True
+    issue_comments: bool = True
+    issue_mentions: bool = True
+    review_requests: bool = True
+    digest_frequency: str = "weekly"  # daily|weekly|never
+    slack_webhook_url: Optional[str] = None
+    discord_webhook_url: Optional[str] = None
+
+
+class NotificationPreferenceOut(BaseModel):
+    id: int
+    user_id: int
+    email_enabled: bool
+    slack_enabled: bool
+    discord_enabled: bool
+    pr_comments: bool
+    pr_mentions: bool
+    issue_comments: bool
+    issue_mentions: bool
+    review_requests: bool
+    digest_frequency: str
+    created_at: datetime
+    updated_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class WebhookEventIn(BaseModel):
+    event_type: str  # pr_comment|issue_comment|review_request|pr_merged
+    pr_number: Optional[int] = None
+    issue_number: Optional[int] = None
+    commenter: Optional[str] = None
+    content: Optional[str] = None
+    reviewer_id: Optional[int] = None
+    author_id: Optional[int] = None
+    repo: str
+    link: str
+
+
+class DigestEmailOut(BaseModel):
+    user_id: int
+    period: str
+    notifications_count: int
+    generated_at: datetime
+
+
+# ─── Onboarding ────────────────────────────────────────────────────────────
+
+class OnboardingStepIn(BaseModel):
+    step: str
+
+
+class OnboardingChecklistItem(BaseModel):
+    step: str
+    label: str
+    completed: bool
+
+
+class OnboardingProgressOut(BaseModel):
+    github_username: str
+    checklist: List[OnboardingChecklistItem]
+    completed_count: int
+    total_steps: int
+    progress_pct: int
+    is_complete: bool
+    started_at: str
+    last_updated: str
+
+
+# ─── FAQ (issue #307) ───────────────────────────────────────────────────────────
+
+class FAQOut(BaseModel):
+    id: int
+    category: str
+    question: str
+    answer: str
+    order: int
+    is_published: bool
+    created_at: datetime
+    updated_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class FAQIn(BaseModel):
+    category: str = Field(..., min_length=1, max_length=64)
+    question: str = Field(..., min_length=1, max_length=512)
+    answer: str = Field(..., min_length=1)
+    order: int = Field(default=0, ge=0)
+    is_published: bool = True
+
+
+class FAQUpdateIn(BaseModel):
+    category: Optional[str] = Field(None, min_length=1, max_length=64)
+    question: Optional[str] = Field(None, min_length=1, max_length=512)
+    answer: Optional[str] = Field(None, min_length=1)
+    order: Optional[int] = Field(None, ge=0)
+    is_published: Optional[bool] = None
+
+
+class FAQListResponse(BaseModel):
+    data: List[FAQOut]
+    categories: List[str]
+    total: int
+
+
+class FAQFeedbackIn(BaseModel):
+    is_helpful: bool
+    user_comment: Optional[str] = None
+
+
+class FAQFeedbackOut(BaseModel):
+    id: int
+    faq_id: int
+    is_helpful: bool
+    user_comment: Optional[str] = None
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class FAQSuggestionIn(BaseModel):
+    question: str = Field(..., min_length=1, max_length=512)
+    suggested_answer: Optional[str] = None
+    category: Optional[str] = Field(None, max_length=64)
+
+
+class FAQSuggestionOut(BaseModel):
+    id: int
+    question: str
+    suggested_answer: Optional[str] = None
+    category: Optional[str] = None
+    status: str
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class FAQSuggestionListResponse(BaseModel):
+    data: List[FAQSuggestionOut]
+    page: int
+    page_size: int
+    total: int
+
+
+# ─── Contact / Support tickets (issue #305) ─────────────────────────────────
+
+_EMAIL_RE = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
+
+
+class ContactFormIn(BaseModel):
+    name: str = Field(min_length=1, max_length=120)
+    email: str = Field(min_length=3, max_length=254)
+    subject: str = Field(min_length=1, max_length=200)
+    message: str = Field(min_length=1, max_length=5000)
+    # reCAPTCHA token from the frontend widget; optional when verification is off.
+    recaptcha_token: Optional[str] = None
+
+    @field_validator("name", "subject", "message")
+    @classmethod
+    def _not_blank(cls, v: str) -> str:
+        if not v or not v.strip():
+            raise ValueError("must not be blank")
+        return v.strip()
+
+    @field_validator("email")
+    @classmethod
+    def _valid_email(cls, v: str) -> str:
+        v = v.strip()
+        if not _EMAIL_RE.match(v):
+            raise ValueError("invalid email address")
+        return v
+
+
+class SupportTicketOut(BaseModel):
+    reference: str
+    status: str
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+# ─── Feedback (issue #308) ──────────────────────────────────────────────────
+
+FEEDBACK_CATEGORIES = {"bug", "feature", "general"}
+FEEDBACK_STATUSES = {"open", "planned", "in_progress", "completed", "declined"}
+ROADMAP_STATUSES = ("planned", "in_progress", "completed")
+
+
+class FeedbackIn(BaseModel):
+    category: str = Field(min_length=1, max_length=16)
+    message: str = Field(min_length=1, max_length=5000)
+    email: Optional[str] = Field(default=None, max_length=254)
+    screenshot: Optional[str] = None  # data URL: data:image/png;base64,...
+
+    @field_validator("category")
+    @classmethod
+    def _valid_category(cls, v: str) -> str:
+        v = v.strip().lower()
+        if v not in FEEDBACK_CATEGORIES:
+            raise ValueError("category must be one of: bug, feature, general")
+        return v
+
+    @field_validator("message")
+    @classmethod
+    def _not_blank(cls, v: str) -> str:
+        if not v.strip():
+            raise ValueError("message must not be blank")
+        return v.strip()
+
+    @field_validator("screenshot")
+    @classmethod
+    def _valid_screenshot(cls, v: Optional[str]) -> Optional[str]:
+        if v is None:
+            return v
+        if not v.startswith("data:image/"):
+            raise ValueError("screenshot must be an image data URL")
+        return v
+
+
+class FeedbackOut(BaseModel):
+    id: int
+    category: str
+    message: str
+    status: str
+    github_issue_url: Optional[str] = None
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class ContactSubmitResponse(BaseModel):
+    message: str
+    ticket: SupportTicketOut
+
+
+class FeedbackListResponse(BaseModel):
+    data: List[FeedbackOut]
+    page: int
+    page_size: int
+    total: int
+
+
+class FeedbackStatusUpdate(BaseModel):
+    status: str
+
+    @field_validator("status")
+    @classmethod
+    def _valid_status(cls, v: str) -> str:
+        v = v.strip().lower()
+        if v not in FEEDBACK_STATUSES:
+            raise ValueError("invalid status")
+        return v
+
+
+class RoadmapItem(BaseModel):
+    id: int
+    category: str
+    message: str
+    status: str
+
+    class Config:
+        from_attributes = True
+
+
+class RoadmapResponse(BaseModel):
+    planned: List[RoadmapItem]
+    in_progress: List[RoadmapItem]
+    completed: List[RoadmapItem]
+
+# ─── LLM feedback (#402) ────────────────────────────────────────────────────
+
+class LLMFeedbackIn(BaseModel):
+    feature: str = Field(min_length=1, max_length=64)
+    prompt: str = Field(min_length=1, max_length=8000)
+    output: str = Field(min_length=1, max_length=8000)
+    rating: int = Field(ge=1, le=5)
+    comment: Optional[str] = Field(default=None, max_length=2000)
+    user_id: Optional[str] = Field(default=None, max_length=128)
+    is_expert: bool = False
+    expert_weight: float = Field(default=1.0, ge=1.0, le=5.0)
+
+    @field_validator("feature", "prompt", "output")
+    @classmethod
+    def _strip_required(cls, v: str) -> str:
+        v = v.strip()
+        if not v:
+            raise ValueError("must not be blank")
+        return v
+
+
+class LLMFeedbackOut(BaseModel):
+    id: int
+    feature: str
+    rating: int
+    comment: Optional[str] = None
+    is_expert: bool
+    expert_weight: float
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class LLMFeedbackTrend(BaseModel):
+    feature: str
+    count: int
+    average_rating: float
+    weighted_average_rating: float
+    expert_count: int
+
+
+class LLMFeedbackDashboard(BaseModel):
+    total: int
+    trends: List[LLMFeedbackTrend]
+    low_rating_examples: List[LLMFeedbackOut]
+
+
+class LLMPromptImprovement(BaseModel):
+    feature: str
+    recommendation: str
+    evidence_count: int
+
+
+# ─── Translation (Issue 1) ──────────────────────────────────────────────────────────
+
+class TranslationRequest(BaseModel):
+    text: str = Field(..., min_length=1, max_length=10000)
+    target_language: str = Field(..., min_length=2, max_length=10)
+    source_language: str = Field(default="auto", min_length=2, max_length=10)
+    context: Optional[str] = Field(default=None, max_length=500)
+
+
+class TranslationResponse(BaseModel):
+    translation: str
+    source_language: str
+    target_language: str
+    cached: bool
+
+
+class BatchTranslationRequest(BaseModel):
+    texts: List[str] = Field(..., min_length=1, max_length=100)
+    target_language: str = Field(..., min_length=2, max_length=10)
+    source_language: str = Field(default="auto", min_length=2, max_length=10)
+    context: Optional[str] = Field(default=None, max_length=500)
+
+
+class BatchTranslationResponse(BaseModel):
+    translations: List[TranslationResponse]
+
+
+class SupportedLanguagesResponse(BaseModel):
+    languages: Dict[str, Dict[str, str]]
+
+
+class LocaleFormatRequest(BaseModel):
+    data: Dict[str, Any]
+    locale: str = Field(..., min_length=2, max_length=10)
+    currency: str = Field(default="USD", min_length=3, max_length=3)
+
+
+class LocaleFormatResponse(BaseModel):
+    formatted: Dict[str, Any]
+    locale: str
+
+
+class TranslationCacheStatsResponse(BaseModel):
+    hits: int
+    misses: int
+    sets: int
+    evictions: int
+    hit_rate: float
+    size: int
+
+
+# ─── Predictive Alerts (Issue 2) ────────────────────────────────────────────────
+
+class BehavioralBaseline(BaseModel):
+    account_id: str
+    metric_name: str
+    mean_value: float
+    std_dev: float
+    min_value: float
+    max_value: float
+    sample_size: int
+    last_updated: datetime
+    confidence_level: float = 0.95
+
+
+class BehavioralBaselineResponse(BaseModel):
+    baselines: List[BehavioralBaseline]
+    account_id: str
+    generated_at: datetime
+
+
+class DeviationAlert(BaseModel):
+    alert_id: str
+    account_id: str
+    metric_name: str
+    current_value: float
+    expected_range: Tuple[float, float]
+    deviation_score: float
+    severity: Literal["low", "medium", "high", "critical"]
+    detected_at: datetime
+    confidence: float
+
+
+class PredictiveAlertRequest(BaseModel):
+    account_id: str
+    lookback_days: int = Field(default=30, ge=1, le=365)
+    metrics: Optional[List[str]] = None
+    sensitivity: str = Field(default="medium", pattern="^(low|medium|high)$")
+
+
+class PredictiveAlertResponse(BaseModel):
+    alerts: List[DeviationAlert]
+    baselines_used: List[BehavioralBaseline]
+    generated_at: datetime
+    total_analyzed: int
+
+
+class AlertGenerationRequest(BaseModel):
+    deviations: List[DeviationAlert]
+    include_explanation: bool = True
+
+
+class AlertGenerationResponse(BaseModel):
+    alerts: List[DeviationAlert]
+    explanations: List[str]
+    generated_at: datetime
diff --git a/api/services/llm_explainer.py b/api/services/llm_explainer.py
index e7f6e41..022ee09 100644
--- a/api/services/llm_explainer.py
+++ b/api/services/llm_explainer.py
@@ -1,5 +1,7 @@
 import asyncio
 
+from astroml.llm.metrics import LLM_COST_USD_TOTAL, LLM_REQUEST_LATENCY_SECONDS, LLM_REQUESTS_TOTAL
+
 class TransactionExplainer:
     def __init__(self):
         self.prompt_template = (
@@ -13,14 +15,19 @@ async def explain(self, tx_details: str) -> str:
         Generate a plain language explanation for a transaction.
         Response time guaranteed < 2s for testing.
         """
-        await asyncio.sleep(0.5)  # Simulate API call latency, but keep under 2s
-        
-        # Mock LLM response for demonstration
+        start = asyncio.get_event_loop().time()
+        await asyncio.sleep(0.5)
         explanation = f"This transaction transferred funds between accounts. It appears to be a standard transfer related to: {tx_details[:20]}..."
-        
-        # Ensure it's under 100 words (Acceptance criteria)
         words = explanation.split()
         if len(words) >= 100:
             explanation = " ".join(words[:99])
-            
+
+        latency = (asyncio.get_event_loop().time() - start) * 1000.0
+        try:
+            LLM_REQUESTS_TOTAL.labels(provider="transaction-explainer", status="success").inc()
+            LLM_REQUEST_LATENCY_SECONDS.labels(provider="transaction-explainer").observe(latency / 1000.0)
+            LLM_COST_USD_TOTAL.labels(provider="transaction-explainer").inc(0.0)
+        except Exception:
+            pass
+
         return explanation
diff --git a/api/services/predictive_alerts.py b/api/services/predictive_alerts.py
new file mode 100644
index 0000000..0b8cd65
--- /dev/null
+++ b/api/services/predictive_alerts.py
@@ -0,0 +1,451 @@
+"""Predictive alerts service for account behavior changes (Issue 2)."""
+from __future__ import annotations
+
+import logging
+import statistics
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Any, Dict, List, Optional, Tuple
+from uuid import uuid4
+
+import numpy as np
+from scipy import stats
+
+from api.database import get_sync_db
+from api.models.orm import ApiTransaction
+from astroml.llm.provider import MockLLMProvider
+
+logger = logging.getLogger(__name__)
+
+# Initialize LLM provider for generating explanations
+llm_provider = MockLLMProvider()
+
+
+class BehavioralLearner:
+    """Learn behavioral baselines for account metrics."""
+
+    def __init__(self, min_samples: int = 5, confidence_level: float = 0.95):
+        self.min_samples = min_samples
+        self.confidence_level = confidence_level
+        self._baselines: Dict[str, Dict[str, Dict[str, Any]]] = {}  # account_id -> metric -> stats
+
+    def update_behavior(self, account_id: str, metrics: Dict[str, List[float]]) -> None:
+        """Update behavioral baselines for an account based on historical data."""
+        if account_id not in self._baselines:
+            self._baselines[account_id] = {}
+
+        for metric_name, values in metrics.items():
+            if len(values) < self.min_samples:
+                continue
+
+            try:
+                mean_val = statistics.mean(values)
+                stdev = statistics.stdev(values) if len(values) > 1 else 0.0
+                min_val = min(values)
+                max_val = max(values)
+
+                # Calculate confidence interval
+                if len(values) >= 2 and stdev > 0:
+                    conf_interval = stats.t.interval(
+                        self.confidence_level,
+                        len(values) - 1,
+                        loc=mean_val,
+                        scale=stats.sem(values)
+                    )
+                else:
+                    conf_interval = (mean_val, mean_val)
+
+                self._baselines[account_id][metric_name] = {
+                    "mean": mean_val,
+                    "std_dev": stdev,
+                    "min": min_val,
+                    "max": max_val,
+                    "sample_size": len(values),
+                    "last_updated": datetime.utcnow(),
+                    "confidence_interval": [float(ci) for ci in conf_interval],
+                    "confidence_level": self.confidence_level
+                }
+            except Exception as e:
+                logger.warning(f"Failed to calculate baseline for {account_id}.{metric_name}: {e}")
+
+    def get_baseline(self, account_id: str, metric_name: str) -> Optional[Dict[str, Any]]:
+        """Get baseline for a specific account and metric."""
+        return self._baselines.get(account_id, {}).get(metric_name)
+
+    def get_all_baselines(self, account_id: str) -> Dict[str, Dict[str, Any]]:
+        """Get all baselines for an account."""
+        return self._baselines.get(account_id, {})
+
+
+class DeviationDetector:
+    """Detect significant deviations from behavioral baselines."""
+
+    def __init__(self, sensitivity: str = "medium"):
+        self.sensitivity = sensitivity
+        self.thresholds = {
+            "low": 2.5,      # 2.5 sigma
+            "medium": 2.0,   # 2.0 sigma
+            "high": 1.5,     # 1.5 sigma
+        }
+        self.threshold = self.thresholds.get(sensitivity, 2.0)
+
+    def detect_deviation(
+        self,
+        account_id: str,
+        metric_name: str,
+        current_value: float,
+        baseline: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """Detect if current value deviates significantly from baseline."""
+        if not baseline:
+            return None
+
+        mean_val = baseline["mean"]
+        stdev = baseline["std_dev"]
+
+        if stdev == 0:
+            # No variation in historical data
+            if current_value != mean_val:
+                deviation_score = float('inf') if current_value != mean_val else 0.0
+            else:
+                return None
+        else:
+            # Calculate z-score (absolute deviation)
+            deviation_score = abs(current_value - mean_val) / stdev
+
+        # Check if deviation exceeds threshold
+        if deviation_score > self.threshold:
+            # Get expected range (confidence interval)
+            ci_low, ci_high = baseline.get("confidence_interval", [mean_val, mean_val])
+
+            # Determine severity based on deviation magnitude
+            if deviation_score >= 3.5:
+                severity = "critical"
+            elif deviation_score >= 2.5:
+                severity = "high"
+            elif deviation_score >= 1.5:
+                severity = "medium"
+            else:
+                severity = "low"
+
+            return {
+                "alert_id": str(uuid4()),
+                "account_id": account_id,
+                "metric_name": metric_name,
+                "current_value": current_value,
+                "expected_range": [float(ci_low), float(ci_high)],
+                "deviation_score": float(deviation_score),
+                "severity": severity,
+                "confidence": min(0.95, 0.5 + (deviation_score - self.threshold) * 0.1)
+            }
+
+        return None
+
+    def detect_multiple_deviations(
+        self,
+        account_id: str,
+        current_metrics: Dict[str, float],
+        baselines: Dict[str, Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """Detect deviations across multiple metrics."""
+        deviations = []
+
+        for metric_name, current_value in current_metrics.items():
+            baseline = baselines.get(metric_name)
+            if baseline:
+                deviation = self.detect_deviation(
+                    account_id, metric_name, current_value, baseline
+                )
+                if deviation:
+                    deviations.append(deviation)
+
+        # Sort by deviation score (descending)
+        deviations.sort(key=lambda x: x["deviation_score"], reverse=True)
+        return deviations
+
+
+class AlertGenerator:
+    """Generate alerts and explanations using LLM."""
+
+    def __init__(self, llm_provider=None):
+        self.llm = llm_provider or MockLLMProvider()
+        self._explanation_cache: Dict[str, str] = {}
+
+    def generate_explanation(
+        self,
+        alert_id: str,
+        account_id: str,
+        deviation_data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Generate natural language explanation for a deviation."""
+        # Check cache first
+        cache_key = f"{alert_id}:{hash(str(deviation_data))}"
+        if cache_key in self._explanation_cache:
+            return {"explanation": self._explanation_cache[cache_key]}
+
+        try:
+            # Build prompt for LLM
+            prompt = f"""
+            Explain this financial anomaly in clear, concise language suitable for a fraud analyst:
+            
+            Account: {account_id}
+            Metric: {deviation_data.get('metric_name', 'unknown')}
+            Current Value: {deviation_data.get('current_value', 0):.2f}
+            Expected Range: [{deviation_data.get('expected_range', [0, 0])[0]:.2f}, {deviation_data.get('expected_range', [0, 0])[1]:.2f}]
+            Deviation Score: {deviation_data.get('deviation_score', 0):.2f}
+            Severity: {deviation_data.get('severity', 'unknown')}
+            
+            Provide a brief explanation of what this anomaly might indicate about account behavior.
+            Keep it under 2 sentences and focus on the business implications.
+            """
+
+            # Generate explanation using LLM
+            explanation_text = self.llm.generate(prompt)
+            
+            # Clean up the explanation
+            explanation_text = explanation_text.strip()
+            if not explanation_text.endswith('.'):
+                explanation_text += '.'
+                
+            # Cache the result
+            self._explanation_cache[cache_key] = explanation_text
+            
+            return {"explanation": explanation_text}
+            
+        except Exception as e:
+            logger.error(f"Failed to generate explanation for alert {alert_id}: {e}")
+            return {
+                "explanation": f"Anomaly detected in {deviation_data.get('metric_name', 'metric')} "
+                              f"with deviation score of {deviation_data.get('deviation_score', 0):.2f}. "
+                              f"This may indicate unusual account activity requiring investigation."
+            }
+
+    def create_deviation_alerts(
+        self,
+        deviations: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        """Convert raw detections to formatted alert objects with explanations."""
+        alerts = []
+        for deviation in deviations:
+            # Generate explanation
+            explanation_result = self.generate_explanation(
+                deviation["alert_id"],
+                deviation["account_id"],
+                deviation
+            )
+            
+            # Create complete alert
+            alert = deviation.copy()
+            alert["explanation"] = explanation_result["explanation"]
+            alerts.append(alert)
+            
+        return alerts
+
+
+class PredictiveAlertService:
+    """Main service for predictive alerts."""
+
+    def __init__(self):
+        self.behavioral_learner = BehavioralLearner()
+        self.deviation_detector = DeviationDetector()
+        self.alert_generator = AlertGenerator()
+        self._cache: Dict[str, Any] = {}
+        self._cache_ttl = 300  # 5 minutes
+
+    async def learn_behavior_from_transactions(
+        self,
+        account_id: str,
+        days: int = 30
+    ) -> Dict[str, Any]:
+        """Learn behavioral baselines from historical transaction data."""
+        try:
+            db = next(get_sync_db())
+            
+            # Calculate cutoff date
+            cutoff_date = datetime.utcnow() - timedelta(days=days)
+            
+            # Query transactions for the account
+            transactions = db.query(ApiTransaction).filter(
+                ApiTransaction.source_account == account_id,
+                ApiTransaction.created_at >= cutoff_date
+            ).all()
+
+            if not transactions:
+                return {"message": "No transaction data found for learning period"}
+
+            # Extract time-series metrics
+            daily_metrics = defaultdict(list)
+            
+            for tx in transactions:
+                date_key = tx.created_at.date()
+                daily_metrics[date_key].append({
+                    "amount": float(tx.amount or 0),
+                    "hash": tx.hash
+                })
+
+            # Calculate daily aggregates
+            daily_aggregates = {
+                "daily_transaction_count": [],
+                "daily_total_amount": [],
+                "daily_avg_amount": [],
+                "unique_counterparties_per_day": []
+            }
+
+            for date, txs in daily_metrics.items():
+                amounts = [tx["amount"] for tx in txs]
+                counterparts = list(set(tx.destination_account for tx in txs if tx.destination_account))
+                
+                daily_aggregates["daily_transaction_count"].append(len(txs))
+                daily_aggregates["daily_total_amount"].append(sum(amounts))
+                daily_averages = statistics.mean(amounts) if amounts else 0
+                daily_aggregates["daily_avg_amount"].append(daily_averages)
+                daily_aggregates["unique_counterparties_per_day"].append(len(counterparts))
+
+            # Update behavioral models
+            self.behavioral_learner.update_behavior(account_id, daily_aggregates)
+            
+            return {
+                "account_id": account_id,
+                "learning_period_days": days,
+                "transactions_analyzed": len(transactions),
+                "days_with_data": len(daily_metrics),
+                "metrics_learned": list(daily_aggregates.keys()),
+                "timestamp": datetime.utcnow().isoformat()
+            }
+
+        except Exception as e:
+            logger.error(f"Error learning behavior for account {account_id}: {e}")
+            return {"error": str(e)}
+        finally:
+            db.close()
+
+    async def generate_predictive_alerts(
+        self,
+        account_id: str,
+        lookback_days: int = 30,
+        metrics: Optional[List[str]] = None,
+        sensitivity: str = "medium"
+    ) -> Dict[str, Any]:
+        """Generate predictive alerts for an account."""
+        try:
+            # Update detector sensitivity
+            self.deviation_detector.sensitivity = sensitivity
+            self.deviation_detector.threshold = self.deviation_detector.thresholds[sensitivity]
+
+            # Learn/update behavioral baselines
+            learn_result = await self.learn_behavior_from_transactions(
+                account_id, lookback_days
+            )
+            
+            if "error" in result:
+                return result
+
+            # Get current metrics (last 24 hours)
+            current_metrics = await self._get_current_metrics(account_id)
+            
+            if not current_metrics:
+                return {
+                    "message": "Insufficient recent data for analysis",
+                    "account_id": account_id
+                }
+
+            # Get learned baselines
+            baselines = self.behavioral_learner.get_all_baselines(account_id)
+            
+            if not baselines:
+                return {
+                    "message": "Insufficient historical data to establish baselines",
+                    "account_id": account_id
+                }
+
+            # Filter metrics if specified
+            if metrics:
+                current_metrics = {k: v for k, v in current_metrics.items() if k in metrics}
+                baselines = {k: v for k, v in baselines.items() if k in metrics}
+
+            # Detect deviations
+            deviations = self.deviation_detector.detect_multiple_deviations(
+                account_id, current_metrics, baselines
+            )
+
+            # Create formatted alerts with explanations
+            alerts = self.alert_generator.create_deviation_alerts(deviations)
+
+            return {
+                "alerts": alerts,
+                "baselines_used": [
+                    {
+                        "account_id": account_id,
+                        "metric_name": name,
+                        "mean_value": data["mean"],
+                        "std_dev": data["std_dev"],
+                        "min_value": data["min"],
+                        "max_value": data["max"],
+                        "sample_size": data["sample_size"],
+                        "last_updated": data["last_updated"].isoformat()
+                    }
+                    for name, data in baselines.items()
+                ],
+                "generated_at": datetime.utcnow().isoformat(),
+                "total_analyzed": len(current_metrics),
+                "deviations_found": len(deviations),
+                "learning_info": learn_result
+            }
+
+        except Exception as e:
+            logger.error(f"Error generating predictive alerts for {account_id}: {e}")
+            return {"error": str(e)}
+
+    async def _get_current_metrics(self, account_id: str) -> Dict[str, float]:
+        """Get current metrics from recent transactions (last 24 hours)."""
+        try:
+            db = next(get_sync_db())
+            
+            # Get transactions from last 24 hours
+            cutoff_date = datetime.utcnow() - timedelta(hours=24)
+            
+            transactions = db.query(ApiTransaction).filter(
+                ApiTransaction.source_account == account_id,
+                ApiTransaction.created_at >= cutoff_date
+            ).all()
+
+            if not transactions:
+                return {}
+
+            # Calculate current metrics
+            amounts = [float(tx.amount or 0) for tx in transactions]
+            counterparties = list(set(tx.destination_account for tx in transactions if tx.destination_account))
+            
+            return {
+                "transaction_count": len(transactions),
+                "total_amount": sum(amounts),
+                "avg_amount": statistics.mean(amounts) if amounts else 0,
+                "max_amount": max(amounts) if amounts else 0,
+                "unique_counterparties": len(counterparties)
+            }
+
+        except Exception as e:
+            logger.error(f"Error getting current metrics for {account_id}: {e}")
+            return {}
+        finally:
+            db.close()
+
+    def get_service_status(self) -> Dict[str, Any]:
+        """Get status of the predictive alerts service."""
+        total_accounts = len(self.behavioral_learner._baselines)
+        total_models = sum(len(metrics) for metrics in self.behavioral_learner._baselines.values())
+        
+        return {
+            "service": "predictive_alerts",
+            "status": "active",
+            "models_learned": {
+                "accounts": total_accounts,
+                "total_baselines": total_models
+            },
+            "cache_size": len(self._cache),
+            "timestamp": datetime.utcnow().isoformat()
+        }
+
+
+# Global service instance
+predictive_alert_service = PredictiveAlertService()
\ No newline at end of file
diff --git a/api/services/translation.py b/api/services/translation.py
new file mode 100644
index 0000000..e61daaa
--- /dev/null
+++ b/api/services/translation.py
@@ -0,0 +1,437 @@
+"""Translation service for multi-language LLM output support (Issue 1)."""
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import json
+import os
+import threading
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from functools import lru_cache
+from typing import Any, Dict, List, Optional, Union
+
+try:
+    import redis
+except ImportError:
+    redis = None
+
+try:
+    import babel.dates
+    import babel.numbers
+    from babel.core import Locale
+except ImportError:
+    babel = None
+    Locale = None
+
+
+SUPPORTED_LANGUAGES: Dict[str, Dict[str, str]] = {
+    "en": {"name": "English", "native": "English", "locale": "en_US"},
+    "es": {"name": "Spanish", "native": "Español", "locale": "es_ES"},
+    "fr": {"name": "French", "native": "Français", "locale": "fr_FR"},
+    "de": {"name": "German", "native": "Deutsch", "locale": "de_DE"},
+    "zh": {"name": "Chinese (Simplified)", "native": "中文（简体）", "locale": "zh_CN"},
+    "ja": {"name": "Japanese", "native": "日本語", "locale": "ja_JP"},
+    "ko": {"name": "Korean", "native": "한국어", "locale": "ko_KR"},
+    "pt": {"name": "Portuguese", "native": "Português", "locale": "pt_BR"},
+    "it": {"name": "Italian", "native": "Italiano", "locale": "it_IT"},
+    "ru": {"name": "Russian", "native": "Русский", "locale": "ru_RU"},
+    "ar": {"name": "Arabic", "native": "العربية", "locale": "ar_SA"},
+    "hi": {"name": "Hindi", "native": "हिन्दी", "locale": "hi_IN"},
+    "nl": {"name": "Dutch", "native": "Nederlands", "locale": "nl_NL"},
+    "pl": {"name": "Polish", "native": "Polski", "locale": "pl_PL"},
+    "tr": {"name": "Turkish", "native": "Türkçe", "locale": "tr_TR"},
+}
+
+
+@dataclass
+class TranslationCacheStats:
+    hits: int = 0
+    misses: int = 0
+    sets: int = 0
+    evictions: int = 0
+
+    @property
+    def hit_rate(self) -> float:
+        total = self.hits + self.misses
+        return self.hits / total if total > 0 else 0.0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "hits": self.hits,
+            "misses": self.misses,
+            "sets": self.sets,
+            "evictions": self.evictions,
+            "hit_rate": round(self.hit_rate, 4),
+        }
+
+
+class TranslationCache:
+    """Multi-layer translation cache with Redis backend and in-memory fallback."""
+
+    def __init__(self, ttl: int = 86400, max_memory_entries: int = 10000):
+        self.ttl = ttl
+        self.max_memory_entries = max_memory_entries
+        self._memory_cache: Dict[str, tuple[str, float]] = {}
+        self._lock = threading.RLock()
+        self._stats = TranslationCacheStats()
+        self._redis_client = None
+
+        if redis is not None:
+            redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/1")
+            try:
+                self._redis_client = redis.Redis.from_url(redis_url, decode_responses=True)
+                self._redis_client.ping()
+            except Exception:
+                self._redis_client = None
+
+    def _make_key(self, source_text: str, target_lang: str, source_lang: str = "auto") -> str:
+        content = f"{source_lang}:{target_lang}:{source_text}"
+        return f"trans:{hashlib.sha256(content.encode()).hexdigest()[:32]}"
+
+    def get(self, source_text: str, target_lang: str, source_lang: str = "auto") -> Optional[str]:
+        key = self._make_key(source_text, target_lang, source_lang)
+
+        with self._lock:
+            if key in self._memory_cache:
+                value, expiry = self._memory_cache[key]
+                if time.time() < expiry:
+                    self._stats.hits += 1
+                    return value
+                else:
+                    del self._memory_cache[key]
+
+        if self._redis_client:
+            try:
+                value = self._redis_client.get(key)
+                if value:
+                    with self._lock:
+                        self._memory_cache[key] = (value, time.time() + self.ttl)
+                        self._enforce_memory_limit()
+                    self._stats.hits += 1
+                    return value
+            except Exception:
+                pass
+
+        self._stats.misses += 1
+        return None
+
+    def set(self, source_text: str, target_lang: str, translation: str, source_lang: str = "auto") -> None:
+        key = self._make_key(source_text, target_lang, source_lang)
+        expiry = time.time() + self.ttl
+
+        with self._lock:
+            self._memory_cache[key] = (translation, expiry)
+            self._enforce_memory_limit()
+        self._stats.sets += 1
+
+        if self._redis_client:
+            try:
+                self._redis_client.setex(key, self.ttl, translation)
+            except Exception:
+                pass
+
+    def _enforce_memory_limit(self) -> None:
+        if len(self._memory_cache) > self.max_memory_entries:
+            now = time.time()
+            expired = [k for k, (_, exp) in self._memory_cache.items() if exp < now]
+            for k in expired:
+                del self._memory_cache[k]
+            if len(self._memory_cache) > self.max_memory_entries:
+                oldest = min(self._memory_cache.items(), key=lambda x: x[1][1])[0]
+                del self._memory_cache[oldest]
+                self._stats.evictions += 1
+
+    def get_stats(self) -> Dict[str, Any]:
+        return self._stats.to_dict()
+
+    def clear(self) -> int:
+        with self._lock:
+            count = len(self._memory_cache)
+            self._memory_cache.clear()
+        if self._redis_client:
+            try:
+                keys = self._redis_client.keys("trans:*")
+                if keys:
+                    self._redis_client.delete(*keys)
+            except Exception:
+                pass
+        return count
+
+
+class LocaleFormatter:
+    """Locale-aware formatting for dates, numbers, and currencies."""
+
+    def __init__(self, locale_code: str = "en_US"):
+        self.locale_code = locale_code
+        self._locale = None
+        if babel and Locale:
+            try:
+                self._locale = Locale.parse(locale_code)
+            except Exception:
+                self._locale = Locale.parse("en_US")
+
+    @property
+    def locale(self):
+        if self._locale is None and babel and Locale:
+            self._locale = Locale.parse("en_US")
+        return self._locale
+
+    def format_date(self, dt: datetime, format: str = "medium") -> str:
+        if not babel or not self.locale:
+            return dt.strftime("%Y-%m-%d")
+        try:
+            return babel.dates.format_date(dt, format=format, locale=self.locale)
+        except Exception:
+            return dt.strftime("%Y-%m-%d")
+
+    def format_datetime(self, dt: datetime, format: str = "medium") -> str:
+        if not babel or not self.locale:
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+        try:
+            return babel.dates.format_datetime(dt, format=format, locale=self.locale)
+        except Exception:
+            return dt.strftime("%Y-%m-%d %H:%M:%S")
+
+    def format_number(self, value: float, decimals: int = 2) -> str:
+        if not babel or not self.locale:
+            return f"{value:,.{decimals}f}"
+        try:
+            return babel.numbers.format_number(value, locale=self.locale)
+        except Exception:
+            return f"{value:,.{decimals}f}"
+
+    def format_currency(self, amount: float, currency: str = "USD") -> str:
+        if not babel or not self.locale:
+            return f"{currency} {amount:,.2f}"
+        try:
+            return babel.numbers.format_currency(amount, currency, locale=self.locale)
+        except Exception:
+            return f"{currency} {amount:,.2f}"
+
+    def format_percent(self, value: float, decimals: int = 1) -> str:
+        if not babel or not self.locale:
+            return f"{value * 100:.{decimals}f}%"
+        try:
+            return babel.numbers.format_percent(value, locale=self.locale)
+        except Exception:
+            return f"{value * 100:.{decimals}f}%"
+
+
+class TranslationService:
+    """Main translation service with LLM-based translation and caching."""
+
+    def __init__(self, llm_provider=None):
+        self.llm_provider = llm_provider
+        self.cache = TranslationCache()
+        self._formatters: Dict[str, LocaleFormatter] = {}
+        self._lock = threading.Lock()
+
+    def get_supported_languages(self) -> Dict[str, Dict[str, str]]:
+        return SUPPORTED_LANGUAGES.copy()
+
+    def get_formatter(self, locale_code: str) -> LocaleFormatter:
+        with self._lock:
+            if locale_code not in self._formatters:
+                self._formatters[locale_code] = LocaleFormatter(locale_code)
+            return self._formatters[locale_code]
+
+    def _build_translation_prompt(
+        self,
+        text: str,
+        target_lang: str,
+        source_lang: str = "auto",
+        context: Optional[str] = None,
+    ) -> str:
+        target_info = SUPPORTED_LANGUAGES.get(target_lang, {"name": target_lang, "native": target_lang})
+        target_name = target_info["name"]
+        target_native = target_info["native"]
+
+        context_part = f"\nContext: {context}" if context else ""
+
+        return f"""Translate the following text to {target_name} ({target_native}).
+Source language: {source_lang if source_lang != "auto" else "auto-detect"}{context_part}
+
+Text to translate:
+{text}
+
+Return ONLY the translated text, no explanations or metadata."""
+
+    def translate(
+        self,
+        text: str,
+        target_lang: str,
+        source_lang: str = "auto",
+        context: Optional[str] = None,
+        use_cache: bool = True,
+    ) -> Dict[str, Any]:
+        if target_lang not in SUPPORTED_LANGUAGES:
+            raise ValueError(f"Unsupported language: {target_lang}. Supported: {list(SUPPORTED_LANGUAGES.keys())}")
+
+        if use_cache:
+            cached = self.cache.get(text, target_lang, source_lang)
+            if cached is not None:
+                return {
+                    "translated_text": cached,
+                    "source_language": source_lang,
+                    "target_language": target_lang,
+                    "cached": True,
+                    "latency_ms": 0.0,  # Cached responses have zero latency
+                }
+
+        start_time = time.time()
+        if self.llm_provider:
+            prompt = self._build_translation_prompt(text, target_lang, source_lang, context)
+            translation = self.llm_provider.generate(prompt)
+        else:
+            translation = f"[{target_lang}] {text}"
+        latency_ms = (time.time() - start_time) * 1000
+
+        if use_cache:
+            self.cache.set(text, target_lang, translation, source_lang)
+
+        return {
+            "translated_text": translation.strip(),
+            "source_language": source_lang,
+            "target_language": target_lang,
+            "cached": False,
+            "latency_ms": round(latency_ms, 2),
+        }
+
+    def translate_batch(
+        self,
+        texts: List[str],
+        target_lang: str,
+        source_lang: str = "auto",
+        context: Optional[str] = None,
+        use_cache: bool = True,
+    ) -> List[Dict[str, Any]]:
+        return [self.translate(t, target_lang, source_lang, context, use_cache) for t in texts]
+
+    async def translate_async(
+        self,
+        text: str,
+        target_lang: str,
+        source_lang: str = "auto",
+        context: Optional[str] = None,
+        use_cache: bool = True,
+    ) -> Dict[str, Any]:
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(
+            None,
+            lambda: self.translate(text, target_lang, source_lang, context, use_cache)
+        )
+
+    async def translate_batch_async(
+        self,
+        texts: List[str],
+        target_lang: str,
+        source_lang: str = "auto",
+        context: Optional[str] = None,
+        use_cache: bool = True,
+    ) -> List[Dict[str, Any]]:
+        """Async batch translate multiple texts to target language."""
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(
+            None,
+            lambda: [self.translate(t, target_lang, source_lang, context, use_cache) for t in texts]
+        )
+
+    def format_locale(
+        self,
+        value: Union[float, int, str],
+        locale: str,
+        format_type: str,
+        currency_code: Optional[str] = None,
+    ) -> str:
+        """Format a single value according to locale and format type."""
+        formatter = self.get_formatter(locale)
+        
+        if format_type == "number":
+            if isinstance(value, str):
+                try:
+                    value = float(value)
+                except ValueError:
+                    return value
+            return formatter.format_number(float(value))
+        elif format_type == "currency":
+            if isinstance(value, str):
+                try:
+                    value = float(value)
+                except ValueError:
+                    return value
+            currency = currency_code or "USD"
+            return formatter.format_currency(float(value), currency)
+        elif format_type == "percent":
+            if isinstance(value, str):
+                try:
+                    value = float(value)
+                except ValueError:
+                    return value
+            return formatter.format_percent(float(value))
+        elif format_type == "date":
+            if isinstance(value, str):
+                try:
+                    from datetime import datetime
+                    value = datetime.fromisoformat(value.replace('Z', '+00:00'))
+                except ValueError:
+                    return value
+            if isinstance(value, datetime):
+                return formatter.format_date(value)
+            return str(value)
+        elif format_type == "datetime":
+            if isinstance(value, str):
+                try:
+                    from datetime import datetime
+                    value = datetime.fromisoformat(value.replace('Z', '+00:00'))
+                except ValueError:
+                    return value
+            if isinstance(value, datetime):
+                return formatter.format_datetime(value)
+            return str(value)
+        else:
+            return str(value)
+
+    def get_cache_stats(self) -> Dict[str, Any]:
+        return self.cache.get_stats()
+
+    def invalidate_cache(self, text: str) -> bool:
+        """Invalidate cache for given text across all target languages (using source language auto-detection)."""
+        # We'll use a special key pattern or just clear all for simplicity
+        # For a more sophisticated approach, we'd need to iterate through all language combinations
+        # For now, let's clear entries that start with the text hash
+        text_hash = hashlib.sha256(text.encode()).hexdigest()[:32]
+        
+        with self.cache._lock:
+            # Find and remove keys that match this text
+            keys_to_delete = [
+                k for k in self.cache._memory_cache.keys() 
+                if k.startswith(f"trans:{text_hash}")
+            ]
+            for k in keys_to_delete:
+                del self.cache._memory_cache[k]
+        
+        if self.cache._redis_client:
+            try:
+                # Use SCAN to find matching keys
+                cursor = 0
+                deleted_count = 0
+                while True:
+                    cursor, keys = self.cache._redis_client.scan(
+                        cursor, match=f"trans:*{text_hash}*", count=100
+                    )
+                    if keys:
+                        deleted_count += self.cache._redis_client.delete(*keys)
+                    if cursor == 0:
+                        break
+                return deleted_count > 0
+            except Exception:
+                return False
+        return len(keys_to_delete) > 0 if 'keys_to_delete' in locals() else False
+
+    def invalidate_all_cache(self) -> int:
+        return self.cache.clear()
+
+
+translation_service = TranslationService()
\ No newline at end of file
diff --git a/api/tests/test_llm_cost_aware.py b/api/tests/test_llm_cost_aware.py
new file mode 100644
index 0000000..44f152b
--- /dev/null
+++ b/api/tests/test_llm_cost_aware.py
@@ -0,0 +1,70 @@
+"""Cost-aware tests for LLM features."""
+from __future__ import annotations
+
+import time
+
+from fastapi.testclient import TestClient
+
+from astroml.llm.tracker import global_tracker
+from astroml.llm.metrics import (
+    LLM_COST_USD_TOTAL,
+    LLM_REQUEST_LATENCY_SECONDS,
+    LLM_TOKENS_TOTAL,
+)
+
+
+class TestLLMCostAware:
+    """Tests to ensure LLM usage stays within cost and latency budgets."""
+
+    def test_cost_threshold_not_exceeded(self, client: TestClient):
+        baseline_cost = global_tracker.total_cost
+        baseline_prom_cost = float(
+            LLM_COST_USD_TOTAL._metrics.get("_value", {}).get("value", 0.0)
+        )
+
+        response = client.post(
+            "/api/v1/llm/ask",
+            json={"question": "What is the cost of this request?"},
+        )
+        assert response.status_code == 200
+
+        new_cost = global_tracker.total_cost
+        new_prom_cost = float(
+            LLM_COST_USD_TOTAL._metrics.get("_value", {}).get("value", 0.0)
+        )
+        delta = (new_cost - baseline_cost) + (new_prom_cost - baseline_prom_cost)
+        assert delta < 0.50, f"Single LLM request cost ${delta:.4f} exceeded $0.50 budget"
+
+    def test_latency_budget(self, client: TestClient):
+        start = time.perf_counter()
+        response = client.post(
+            "/api/v1/llm/ask",
+            json={"question": "How fast is this response?"},
+        )
+        elapsed_ms = (time.perf_counter() - start) * 1000.0
+        assert response.status_code == 200
+        assert elapsed_ms < 2000.0, f"LLM request took {elapsed_ms:.1f}ms, exceeded 2000ms budget"
+
+    def test_token_budget(self, client: TestClient):
+        baseline_tokens = global_tracker.total_prompt_tokens + global_tracker.total_completion_tokens
+        baseline_prom_tokens = sum(
+            v for v in LLM_TOKENS_TOTAL._metrics.get("_value", {}).values()
+            if isinstance(v, (int, float))
+        )
+
+        response = client.post(
+            "/api/v1/llm/ask",
+            json={"question": "Count the tokens in this short question."},
+        )
+        assert response.status_code == 200
+
+        new_tokens = global_tracker.total_prompt_tokens + global_tracker.total_completion_tokens
+        delta = (new_tokens - baseline_tokens)
+        assert delta <= 2000, f"Request used {delta} tokens, exceeded budget of 2000"
+
+    def test_health_check_latency(self, client: TestClient):
+        start = time.perf_counter()
+        response = client.get("/api/v1/llm/health")
+        elapsed_ms = (time.perf_counter() - start) * 1000.0
+        assert response.status_code == 200
+        assert elapsed_ms < 5000.0, f"Health check took {elapsed_ms:.1f}ms"
diff --git a/api/tests/test_llm_health.py b/api/tests/test_llm_health.py
new file mode 100644
index 0000000..3aa82b0
--- /dev/null
+++ b/api/tests/test_llm_health.py
@@ -0,0 +1,33 @@
+"""Integration tests for LLM health endpoints."""
+from __future__ import annotations
+
+
+class TestLLMHealth:
+    def test_llm_health_returns_200(self, client):
+        resp = client.get("/api/v1/llm/health")
+        assert resp.status_code == 200
+
+    def test_llm_health_has_overall_status(self, client):
+        data = client.get("/api/v1/llm/health").json()
+        assert "overall_status" in data
+        assert "providers" in data
+        assert "checked_at" in data
+
+    def test_llm_provider_health_endpoint(self, client):
+        resp = client.get("/api/v1/llm/health/openai")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["provider"] == "openai"
+        assert "status" in data
+        assert "latency_ms" in data
+
+    def test_llm_health_providers_include_expected(self, client):
+        data = client.get("/api/v1/llm/health").json()
+        assert "openai" in data["providers"]
+        assert "anthropic" in data["providers"]
+        assert "huggingface" in data["providers"]
+
+    def test_prometheus_metrics_endpoint(self, client):
+        resp = client.get("/metrics")
+        assert resp.status_code == 200
+        assert "astroml_llm_provider_health" in resp.text
diff --git a/astroml/llm/explainer.py b/astroml/llm/explainer.py
index 5bc37a9..950a9f5 100644
--- a/astroml/llm/explainer.py
+++ b/astroml/llm/explainer.py
@@ -37,11 +37,12 @@ def generate_explanation(self, alert_id: int, account_id: str, pattern: str, sco
                 latency_ms=latency_ms
             )
             
-            # Cache the response
             self.cache.set(prompt, response)
             
             return response
         except Exception as e:
+            provider_name = self.provider.__class__.__name__.replace("Provider", "").lower()
+            global_tracker.record_error(provider_name)
             return f"Error generating explanation: {str(e)}"
 
     def _build_prompt(self, account_id: str, pattern: str, score: float, transactions: List[Dict[str, Any]]) -> str:
diff --git a/astroml/llm/health.py b/astroml/llm/health.py
new file mode 100644
index 0000000..e98c250
--- /dev/null
+++ b/astroml/llm/health.py
@@ -0,0 +1,113 @@
+"""LLM Provider health checks."""
+import asyncio
+import os
+import time
+from typing import Any, Dict
+
+import aiohttp
+
+PROVIDER_ENDPOINTS = {
+    "openai": {
+        "url": "https://api.openai.com/v1/models",
+        "method": "GET",
+        "headers": lambda key: {"Authorization": f"Bearer {key}"},
+    },
+    "anthropic": {
+        "url": "https://api.anthropic.com/v1/messages",
+        "method": "HEAD",
+        "headers": lambda key: {
+            "x-api-key": key,
+            "anthropic-version": "2023-06-01",
+        },
+    },
+    "huggingface": {
+        "url": "https://api-inference.huggingface.co/status",
+        "method": "GET",
+        "headers": lambda key: {"Authorization": f"Bearer {key}"},
+    },
+}
+
+
+def _get_api_key(provider_name: str) -> str:
+    env_key = f"{provider_name.upper()}_API_KEY"
+    return os.getenv(env_key, "")
+
+
+async def check_provider_health(
+    provider_name: str, timeout: float = 5.0
+) -> Dict[str, Any]:
+    start = time.perf_counter()
+    if provider_name not in PROVIDER_ENDPOINTS:
+        latency_ms = (time.perf_counter() - start) * 1000
+        return {
+            "provider": provider_name,
+            "status": "unknown",
+            "latency_ms": round(latency_ms, 2),
+            "error": "Provider not supported for health checks",
+        }
+
+    api_key = _get_api_key(provider_name)
+    if not api_key:
+        latency_ms = (time.perf_counter() - start) * 1000
+        return {
+            "provider": provider_name,
+            "status": "unhealthy",
+            "latency_ms": round(latency_ms, 2),
+            "error": "API key not configured",
+        }
+
+    config = PROVIDER_ENDPOINTS[provider_name]
+
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.request(
+                method=config["method"],
+                url=config["url"],
+                headers=config["headers"](api_key),
+                timeout=aiohttp.ClientTimeout(total=timeout),
+            ) as response:
+                latency_ms = (time.perf_counter() - start) * 1000
+                healthy = 200 <= response.status < 300
+                return {
+                    "provider": provider_name,
+                    "status": "healthy" if healthy else "unhealthy",
+                    "latency_ms": round(latency_ms, 2),
+                    "http_status": response.status,
+                }
+    except Exception as e:
+        latency_ms = (time.perf_counter() - start) * 1000
+        return {
+            "provider": provider_name,
+            "status": "unhealthy",
+            "latency_ms": round(latency_ms, 2),
+            "error": str(e),
+        }
+
+
+async def check_all_providers() -> Dict[str, Any]:
+    providers = list(PROVIDER_ENDPOINTS.keys())
+    results = await asyncio.gather(
+        *(check_provider_health(p) for p in providers),
+        return_exceptions=True,
+    )
+
+    provider_statuses = {}
+    for result in results:
+        if isinstance(result, Exception):
+            provider_statuses["unknown"] = {
+                "provider": "unknown",
+                "status": "unhealthy",
+                "latency_ms": 0,
+                "error": str(result),
+            }
+        else:
+            provider_statuses[result["provider"]] = result
+
+    all_healthy = all(
+        r.get("status") == "healthy" for r in provider_statuses.values()
+    )
+    return {
+        "overall_status": "healthy" if all_healthy else "degraded",
+        "providers": provider_statuses,
+        "checked_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+    }
diff --git a/astroml/llm/metrics.py b/astroml/llm/metrics.py
new file mode 100644
index 0000000..2413d24
--- /dev/null
+++ b/astroml/llm/metrics.py
@@ -0,0 +1,32 @@
+from prometheus_client import Counter, Gauge, Histogram
+
+LLM_REQUESTS_TOTAL = Counter(
+    "astroml_llm_requests_total",
+    "Total LLM API requests",
+    ["provider", "status"],
+)
+
+LLM_REQUEST_LATENCY_SECONDS = Histogram(
+    "astroml_llm_request_latency_seconds",
+    "LLM API request latency in seconds",
+    ["provider"],
+    buckets=[0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60],
+)
+
+LLM_COST_USD_TOTAL = Counter(
+    "astroml_llm_cost_usd_total",
+    "Total LLM API cost in USD",
+    ["provider"],
+)
+
+LLM_TOKENS_TOTAL = Counter(
+    "astroml_llm_tokens_total",
+    "Total LLM tokens processed",
+    ["provider", "token_type"],
+)
+
+LLM_PROVIDER_HEALTH = Gauge(
+    "astroml_llm_provider_health",
+    "LLM provider health status (1=healthy, 0=unhealthy)",
+    ["provider"],
+)
diff --git a/astroml/llm/tracker.py b/astroml/llm/tracker.py
index aba3ca8..85672bd 100644
--- a/astroml/llm/tracker.py
+++ b/astroml/llm/tracker.py
@@ -1,19 +1,24 @@
 """LLM Token Usage and Cost Tracking."""
 import logging
-from typing import Dict, Optional
+from typing import Dict
+
+from astroml.llm.metrics import (
+    LLM_COST_USD_TOTAL,
+    LLM_REQUEST_LATENCY_SECONDS,
+    LLM_REQUESTS_TOTAL,
+    LLM_TOKENS_TOTAL,
+)
 
 logger = logging.getLogger(__name__)
 
-# Mock cost per 1k tokens for different providers
 COST_RATES = {
     "openai": {"prompt": 0.03, "completion": 0.06},
     "anthropic": {"prompt": 0.015, "completion": 0.075},
     "huggingface": {"prompt": 0.001, "completion": 0.001},
 }
 
-class LLMUsageTracker:
-    """Tracks LLM API usage, costs, and latency."""
 
+class LLMUsageTracker:
     def __init__(self):
         self.total_cost = 0.0
         self.total_prompt_tokens = 0
@@ -23,42 +28,66 @@ def __init__(self):
     def record_usage(
         self, provider_name: str, usage: Dict[str, int], latency_ms: float
     ) -> float:
-        """
-        Record usage for a request and calculate cost.
-        Logs an alert if total cost exceeds the threshold.
-        """
-        rates = COST_RATES.get(provider_name.lower(), {"prompt": 0.0, "completion": 0.0})
-        
+        rates = COST_RATES.get(
+            provider_name.lower(), {"prompt": 0.0, "completion": 0.0}
+        )
         prompt_tokens = usage.get("prompt_tokens", 0)
         completion_tokens = usage.get("completion_tokens", 0)
-        
-        cost = (prompt_tokens / 1000.0) * rates["prompt"] + (completion_tokens / 1000.0) * rates["completion"]
-        
+        cost = (prompt_tokens / 1000.0) * rates["prompt"] + (
+            completion_tokens / 1000.0
+        ) * rates["completion"]
+
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
         self.total_cost += cost
-        
+
+        LLM_REQUESTS_TOTAL.labels(
+            provider=provider_name, status="success"
+        ).inc()
+        LLM_REQUEST_LATENCY_SECONDS.labels(provider=provider_name).observe(
+            latency_ms / 1000.0
+        )
+        LLM_COST_USD_TOTAL.labels(provider=provider_name).inc(cost)
+        LLM_TOKENS_TOTAL.labels(
+            provider=provider_name, token_type="prompt"
+        ).inc(prompt_tokens)
+        LLM_TOKENS_TOTAL.labels(
+            provider=provider_name, token_type="completion"
+        ).inc(completion_tokens)
+
         logger.info(
-            "LLM Usage Recorded: Provider=%s, PromptTokens=%d, CompletionTokens=%d, Cost=$%.4f, Latency=%.2fms",
-            provider_name, prompt_tokens, completion_tokens, cost, latency_ms
+            "LLM Usage Recorded: Provider=%s, PromptTokens=%d, "
+            "CompletionTokens=%d, Cost=$%.4f, Latency=%.2fms",
+            provider_name,
+            prompt_tokens,
+            completion_tokens,
+            cost,
+            latency_ms,
         )
-        
+
         self.check_alerts()
         return cost
 
+    def record_error(self, provider_name: str) -> None:
+        LLM_REQUESTS_TOTAL.labels(provider=provider_name, status="error").inc()
+
     def check_alerts(self):
-        """Check if cost alerts should be triggered."""
         if self.total_cost > self.alert_threshold:
-            logger.warning("LLM Cost Alert! Total cost ($%.2f) has exceeded threshold ($%.2f)", self.total_cost, self.alert_threshold)
+            logger.warning(
+                "LLM Cost Alert! Total cost ($%.2f) has exceeded "
+                "threshold ($%.2f)",
+                self.total_cost,
+                self.alert_threshold,
+            )
 
     def get_summary(self) -> Dict[str, float]:
-        """Get summary of tracking metrics."""
         return {
             "total_cost": self.total_cost,
             "total_prompt_tokens": self.total_prompt_tokens,
             "total_completion_tokens": self.total_completion_tokens,
-            "total_tokens": self.total_prompt_tokens + self.total_completion_tokens
+            "total_tokens": self.total_prompt_tokens
+            + self.total_completion_tokens,
         }
 
-# Global singleton tracker
+
 global_tracker = LLMUsageTracker()
diff --git a/docs/runbooks/llm_health.md b/docs/runbooks/llm_health.md
new file mode 100644
index 0000000..68a4680
--- /dev/null
+++ b/docs/runbooks/llm_health.md
@@ -0,0 +1,88 @@
+# LLM Infrastructure Runbook
+
+## Overview
+
+This runbook covers health checks, monitoring, alerting, and incident response for LLM providers (OpenAI, Anthropic, HuggingFace).
+
+## Health Check Architecture
+
+- **Health endpoints**: `GET /api/v1/llm/health` and `GET /api/v1/llm/health/{provider}`
+- **Polling interval**: 60 seconds via Prometheus or external monitor
+- **Metrics endpoint**: `GET /metrics` (Prometheus text format)
+- **Grafana dashboard**: `monitoring/grafana/llm_health_dashboard.json`
+
+## Key Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `astroml_llm_provider_health` | Gauge | 1 = healthy, 0 = unhealthy |
+| `astroml_llm_request_latency_seconds` | Histogram | Per-provider latency |
+| `astroml_llm_requests_total` | Counter | Request count by provider and status |
+| `astroml_llm_cost_usd_total` | Counter | Accumulated cost USD |
+| `astroml_llm_tokens_total` | Counter | Token count by provider and token_type |
+
+## Alerts
+
+| Alert | Condition | Severity |
+|-------|-----------|----------|
+| `LLMProviderDown` | Provider health == 0 for > 2m | Critical |
+| `LLMHighErrorRate` | Error rate > 0.1 req/s for > 2m | Warning |
+| `LLMCostThreshold` | Cost > $10 in 1h window | Warning |
+| `LLMHighLatency` | P95 latency > 5s for > 3m | Warning |
+
+## Cost Tracking
+
+- **Threshold**: $100 (logged)
+- **Granularity**: Per-request cost calculated using mock rates in `astroml/llm/tracker.py`
+- **Alerting**: Prometheus `LLMCostThreshold` rule triggers on spikes (>$10/hour)
+- **Dashboard**: Cost panel in Grafana shows 1-hour rolling sums
+
+## Incident Response
+
+### Provider Down
+1. Check `LLMProviderDown` alert in Alertmanager
+2. Verify API keys are configured (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `HUGGINGFACE_API_KEY`)
+3. Check network connectivity from container to provider API
+4. Review provider status pages: OpenAI, Anthropic, HuggingFace
+5. Rotate API keys if suspected exposure
+6. Failover: update `LLM_PROVIDER` env var to alternate provider
+
+### High Error Rate
+1. Check `LLMHighErrorRate` alert
+2. Correlate with latency spikes in Grafana dashboard
+3. Review application logs for stack traces
+4. Check for rate limits or quota exhaustion
+5. Consider switching providers or reducing request rate
+
+### Cost Spike
+1. Check `LLMCostThreshold` alert
+2. Correlate with traffic volume in Grafana
+3. Review recent deployments for prompt regression
+4. If legitimate growth, update budget thresholds
+5. If anomaly, audit prompt caching (`SemanticCache`) and consider tightening limits
+
+## Runbook Verification
+
+```bash
+# Verify health endpoint
+curl -s http://localhost:8000/api/v1/llm/health | jq
+
+# Verify metrics exposition
+curl -s http://localhost:8000/metrics | grep astroml_llm_
+
+# Run monitoring stack
+docker compose --profile monitoring up -d
+
+# Check Prometheus targets
+open http://localhost:9090/targets
+
+# Open Grafana
+open http://localhost:3000
+Default login: admin/admin
+```
+
+## Maintenance
+
+- **Dashboard refresh**: Import `monitoring/grafana/llm_health_dashboard.json` into Grafana
+- **Alert review**: Review rules in `monitoring/prometheus/alert_rules.yml`
+- **Rate updates**: Update mock cost rates in `astroml/llm/tracker.py` and `COST_RATES` from provider pricing pages
diff --git a/k8s/astroml-api-deployment.yaml b/k8s/astroml-api-deployment.yaml
new file mode 100644
index 0000000..97874e5
--- /dev/null
+++ b/k8s/astroml-api-deployment.yaml
@@ -0,0 +1,61 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: astroml-api
+  namespace: astroml
+  labels:
+    app: astroml-api
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: astroml-api
+  template:
+    metadata:
+      labels:
+        app: astroml-api
+    spec:
+      serviceAccountName: astroml
+      containers:
+      - name: api
+        image: ghcr.io/${{ github.repository }}:production-latest
+        imagePullPolicy: Always
+        ports:
+        - containerPort: 8000
+          name: http
+        env:
+        - name: DATABASE_URL
+          valueFrom:
+            configMapKeyRef:
+              name: astroml-config
+              key: DATABASE_URL
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: astroml-config
+              key: REDIS_URL
+        - name: LLM_PROVIDER
+          value: "openai"
+        - name: ASTROML_ENV
+          value: "production"
+        resources:
+          requests:
+            memory: "256Mi"
+            cpu: "250m"
+          limits:
+            memory: "512Mi"
+            cpu: "500m"
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          failureThreshold: 3
+        readinessProbe:
+          httpGet:
+            path: /api/v1/llm/health
+            port: 8000
+          initialDelaySeconds: 15
+          periodSeconds: 5
+          failureThreshold: 3
diff --git a/k8s/kustomization.yaml b/k8s/kustomization.yaml
index d0123bd..889e845 100644
--- a/k8s/kustomization.yaml
+++ b/k8s/kustomization.yaml
@@ -9,6 +9,8 @@ resources:
   - redis-deployment.yaml
   - feature-store-deployment.yaml
   - astroml-deployment.yaml
+  - astroml-api-deployment.yaml
+  - llm-canary-deployment.yaml
   - services.yaml
   - ingress.yaml
   - monitoring.yaml
diff --git a/k8s/llm-canary-deployment.yaml b/k8s/llm-canary-deployment.yaml
new file mode 100644
index 0000000..88420b9
--- /dev/null
+++ b/k8s/llm-canary-deployment.yaml
@@ -0,0 +1,82 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: astroml-api-canary
+  namespace: astroml
+  labels:
+    app: astroml-api
+    version: canary
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: astroml-api
+      version: canary
+  template:
+    metadata:
+      labels:
+        app: astroml-api
+        version: canary
+    spec:
+      serviceAccountName: astroml
+      containers:
+      - name: api
+        image: ghcr.io/${{ github.repository }}:production-${{ github.sha }}
+        imagePullPolicy: Always
+        ports:
+        - containerPort: 8000
+          name: http
+        env:
+        - name: DATABASE_URL
+          valueFrom:
+            configMapKeyRef:
+              name: astroml-config
+              key: DATABASE_URL
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: astroml-config
+              key: REDIS_URL
+        - name: LLM_PROVIDER
+          value: "openai"
+        - name: ASTROML_ENV
+          value: "production"
+        resources:
+          requests:
+            memory: "256Mi"
+            cpu: "250m"
+          limits:
+            memory: "512Mi"
+            cpu: "500m"
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8000
+          initialDelaySeconds: 15
+          periodSeconds: 10
+          failureThreshold: 3
+        readinessProbe:
+          httpGet:
+            path: /api/v1/llm/health
+            port: 8000
+          initialDelaySeconds: 10
+          periodSeconds: 5
+          failureThreshold: 3
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: astroml-api-canary
+  namespace: astroml
+  labels:
+    app: astroml-api
+    version: canary
+spec:
+  type: ClusterIP
+  ports:
+  - port: 8000
+    targetPort: 8000
+    name: http
+  selector:
+    app: astroml-api
+    version: canary
diff --git a/monitoring/grafana/llm_health_dashboard.json b/monitoring/grafana/llm_health_dashboard.json
new file mode 100644
index 0000000..124fa24
--- /dev/null
+++ b/monitoring/grafana/llm_health_dashboard.json
@@ -0,0 +1,114 @@
+{
+  "dashboard": {
+    "title": "LLM Health Monitoring",
+    "uid": "llm-health",
+    "timezone": "browser",
+    "schemaVersion": 38,
+    "version": 0,
+    "refresh": "1m",
+    "time": {"from": "now-6h", "to": "now"},
+    "panels": [
+      {
+        "id": 1,
+        "title": "Provider Health Status",
+        "type": "stat",
+        "gridPos": {"h": 8, "w": 24, "x": 0, "y": 0},
+        "targets": [
+          {
+            "expr": "astroml_llm_provider_health",
+            "legendFormat": "{{provider}}",
+            "refId": "A"
+          }
+        ],
+        "fieldConfig": {
+          "defaults": {
+            "mappings": [
+              {"type": "value", "value": "0", "text": "Unhealthy"},
+              {"type": "value", "value": "1", "text": "Healthy"}
+            ],
+            "thresholds": {
+              "mode": "absolute",
+              "steps": [
+                {"color": "red", "value": 0},
+                {"color": "green", "value": 1}
+              ]
+            }
+          }
+        }
+      },
+      {
+        "id": 2,
+        "title": "P95 Request Latency (seconds)",
+        "type": "graph",
+        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
+        "targets": [
+          {
+            "expr": "histogram_quantile(0.95, rate(astroml_llm_request_latency_seconds_bucket[5m]))",
+            "legendFormat": "{{provider}}",
+            "refId": "A"
+          }
+        ],
+        "yaxes": [
+          {"format": "s", "label": "Latency"},
+          {"format": "short", "show": false}
+        ]
+      },
+      {
+        "id": 3,
+        "title": "Request Error Rate (per sec)",
+        "type": "graph",
+        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 8},
+        "targets": [
+          {
+            "expr": "rate(astroml_llm_requests_total{status=\"error\"}[5m])",
+            "legendFormat": "{{provider}}",
+            "refId": "A"
+          }
+        ]
+      },
+      {
+        "id": 4,
+        "title": "Total Cost (USD)",
+        "type": "graph",
+        "gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
+        "targets": [
+          {
+            "expr": "sum by (provider) (increase(astroml_llm_cost_usd_total[1h]))",
+            "legendFormat": "{{provider}}",
+            "refId": "A"
+          }
+        ],
+        "yaxes": [
+          {"format": "currencyUSD", "label": "Cost"},
+          {"format": "short", "show": false}
+        ]
+      },
+      {
+        "id": 5,
+        "title": "Total Tokens (last 1h)",
+        "type": "graph",
+        "gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
+        "targets": [
+          {
+            "expr": "sum by (provider, token_type) (increase(astroml_llm_tokens_total[1h]))",
+            "legendFormat": "{{provider}} - {{token_type}}",
+            "refId": "A"
+          }
+        ]
+      },
+      {
+        "id": 6,
+        "title": "Total Requests",
+        "type": "graph",
+        "gridPos": {"h": 8, "w": 24, "x": 0, "y": 24},
+        "targets": [
+          {
+            "expr": "sum by (provider, status) (astroml_llm_requests_total)",
+            "legendFormat": "{{provider}} ({{status}})",
+            "refId": "A"
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/monitoring/prometheus/alert_rules.yml b/monitoring/prometheus/alert_rules.yml
index b61b75e..84264bc 100644
--- a/monitoring/prometheus/alert_rules.yml
+++ b/monitoring/prometheus/alert_rules.yml
@@ -28,11 +28,49 @@ groups:
           summary: "Ingestion stalled for {{ $labels.stream_type }} on {{ $labels.horizon_url }}"
           description: "No records have been processed for {{ $labels.stream_type }} in the last 15 minutes."
 
-      - alert: PersistentRateLimit
-        expr: astroml_ingestion_rate_limit_backoff_seconds > 60
-        for: 10m
+  - alert: PersistentRateLimit
+    expr: astroml_ingestion_rate_limit_backoff_seconds > 60
+    for: 10m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Persistent rate limiting for {{ $labels.stream_type }}"
+      description: "The ingestion service is facing persistent rate limiting with backoff > 60s for over 10 minutes."
+
+  - name: astroml_llm_alerts
+    rules:
+      - alert: LLMProviderDown
+        expr: astroml_llm_provider_health == 0
+        for: 2m
+        labels:
+          severity: critical
+        annotations:
+          summary: "LLM provider {{ $labels.provider }} is down"
+          description: "The {{ $labels.provider }} LLM provider has been unreachable for more than 2 minutes."
+
+      - alert: LLMHighErrorRate
+        expr: rate(astroml_llm_requests_total{status="error"}[5m]) > 0.1
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High LLM error rate for {{ $labels.provider }}"
+          description: "LLM error rate for {{ $labels.provider }} is currently {{ $value }} requests/sec."
+
+      - alert: LLMCostThreshold
+        expr: increase(astroml_llm_cost_usd_total[1h]) > 10
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "LLM cost spike for {{ $labels.provider }}"
+          description: "LLM cost for {{ $labels.provider }} exceeded $10 in the last hour."
+
+      - alert: LLMHighLatency
+        expr: histogram_quantile(0.95, rate(astroml_llm_request_latency_seconds_bucket[5m])) > 5
+        for: 3m
         labels:
           severity: warning
         annotations:
-          summary: "Persistent rate limiting for {{ $labels.stream_type }}"
-          description: "The ingestion service is facing persistent rate limiting with backoff > 60s for over 10 minutes."
+          summary: "High LLM latency for {{ $labels.provider }}"
+          description: "P95 latency for {{ $labels.provider }} is {{ $value }}s (threshold 5s)."
diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml
index 6b73ec3..0807380 100644
--- a/monitoring/prometheus/prometheus.yml
+++ b/monitoring/prometheus/prometheus.yml
@@ -90,3 +90,13 @@ scrape_configs:
       - source_labels: [__address__]
         target_label: instance
         replacement: 'production'
+
+  # FastAPI API service metrics
+  - job_name: 'astroml-api'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets: ['api:8000']
+    relabel_configs:
+      - source_labels: [__address__]
+        target_label: instance
+        replacement: 'api'
diff --git a/scripts/auto-rollback.sh b/scripts/auto-rollback.sh
new file mode 100644
index 0000000..c07fa55
--- /dev/null
+++ b/scripts/auto-rollback.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -euo pipefail
+
+NAMESPACE="${NAMESPACE:-astroml}"
+STABLE_DEPLOYMENT="${STABLE_DEPLOYMENT:-astroml-api}"
+ROLLBACK_TIMEOUT=300
+
+echo "Initiating rollback for ${STABLE_DEPLOYMENT} in namespace ${NAMESPACE}..."
+
+kubectl rollout undo deployment/${STABLE_DEPLOYMENT} -n "${NAMESPACE}"
+
+if kubectl rollout status deployment/${STABLE_DEPLOYMENT} -n "${NAMESPACE}" --timeout=${ROLLBACK_TIMEOUT}s; then
+  echo "Rollback completed successfully for ${STABLE_DEPLOYMENT}"
+else
+  echo "Rollback verification failed for ${STABLE_DEPLOYMENT}"
+  kubectl get pods -n "${NAMESPACE}"
+  kubectl describe deployment/${STABLE_DEPLOYMENT} -n "${NAMESPACE}"
+  exit 1
+fi
+
+REVISION=$(kubectl rollout history deployment/${STABLE_DEPLOYMENT} -n "${NAMESPACE}" | tail -n 1 | awk '{print $1}')
+echo "Rolled back to revision ${REVISION}"
+
+kubectl get events --field-selector involved-object.name=${STABLE_DEPLOYMENT} -n "${NAMESPACE}" --sort-by='.lastTimestamp' | tail -n 10
diff --git a/scripts/canary-deploy.sh b/scripts/canary-deploy.sh
new file mode 100644
index 0000000..9c818e5
--- /dev/null
+++ b/scripts/canary-deploy.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+set -euo pipefail
+
+REGISTRY="${REGISTRY:-ghcr.io}"
+REPO="${REPO:-$GITHUB_REPOSITORY}"
+IMAGE_TAG="${IMAGE_TAG:-latest}"
+NAMESPACE="${NAMESPACE:-astroml}"
+CANARY_DEPLOYMENT="astroml-api-canary"
+STABLE_DEPLOYMENT="astroml-api"
+CANARY_SERVICE="astroml-api-canary"
+EXPECTED_REPLICAS=1
+
+echo "Deploying canary: ${REGISTRY}/${REPO}:${IMAGE_TAG}"
+
+kubectl apply -f k8s/llm-canary-deployment.yaml -n "${NAMESPACE}"
+
+kubectl set image deployment/${CANARY_DEPLOYMENT} \
+  api=${REGISTRY}/${REPO}:${IMAGE_TAG} \
+  -n "${NAMESPACE}" --record
+
+kubectl rollout status deployment/${CANARY_DEPLOYMENT} \
+  -n "${NAMESPACE}" --timeout=300s
+
+CANARY_REPLICAS=$(kubectl get deployment ${CANARY_DEPLOYMENT} -n "${NAMESPACE}" -o jsonpath='{.status.readyReplicas}' || echo 0)
+if [ "${CANARY_REPLICAS}" -ne "${EXPECTED_REPLICAS}" ]; then
+  echo "Canary deployment failed: expected ${EXPECTED_REPLICAS} replicas, got ${CANARY_REPLICAS}"
+  exit 1
+fi
+
+echo "Canary deployed successfully with ${CANARY_REPLICAS} replica(s)"
diff --git a/scripts/canary-promote.sh b/scripts/canary-promote.sh
new file mode 100644
index 0000000..d98866b
--- /dev/null
+++ b/scripts/canary-promote.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -euo pipefail
+
+REGISTRY="${REGISTRY:-ghcr.io}"
+REPO="${REPO:-$GITHUB_REPOSITORY}"
+IMAGE_TAG="${IMAGE_TAG:-latest}"
+NAMESPACE="${NAMESPACE:-astroml}"
+CANARY_DEPLOYMENT="astroml-api-canary"
+STABLE_DEPLOYMENT="astroml-production"
+
+echo "Promoting canary to stable..."
+
+kubectl set image deployment/${STABLE_DEPLOYMENT} \
+  api=${REGISTRY}/${REPO}:${IMAGE_TAG} \
+  -n "${NAMESPACE}" --record
+
+kubectl rollout status deployment/${STABLE_DEPLOYMENT} \
+  -n "${NAMESPACE}" --timeout=300s
+
+echo "Scaling down canary..."
+kubectl scale deployment/${CANARY_DEPLOYMENT} -n "${NAMESPACE}" --replicas=0
+
+echo "Canary promoted successfully"