Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,18 @@
validation_router,
ws_router,
streaming_router,
llm_usage_router,
llm_cache_metrics_router,
llm_router,
reports_router,
alerts_router,
)



from api.routers.monitoring import record_latency


from api.routers.ws import poll_and_broadcast_transactions

# Setup distributed tracing (issue #336)
Expand Down Expand Up @@ -167,12 +174,16 @@ async def _latency_middleware(request: Request, call_next):
app.include_router(chat_router)
app.include_router(ws_router)
app.include_router(streaming_router)
app.include_router(llm_usage_router)
app.include_router(llm_cache_metrics_router)

app.include_router(llm_router)
app.include_router(reports_router)
app.include_router(alerts_router)


@app.get("/health", tags=["ops"])

async def health():
return {"status": "ok"}

Expand Down
6 changes: 6 additions & 0 deletions api/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@
from api.routers.validation import router as validation_router
from api.routers.ws import router as ws_router
from api.routers.streaming import router as streaming_router
from api.routers.llm_usage import router as llm_usage_router
from api.routers.llm_cache_metrics import router as llm_cache_metrics_router

from api.routers.llm import router as llm_router
from api.routers.reports import router as reports_router
from api.routers.alerts import router as alerts_router

__all__ = [

"accounts_router",
"audit_router",
"backup_router",
Expand All @@ -48,7 +52,9 @@
"validation_router",
"ws_router",
"streaming_router",
"llm_usage_router",
"llm_router",
"reports_router",
"alerts_router",
]

19 changes: 19 additions & 0 deletions api/routers/llm_cache_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""LLM semantic cache metrics endpoints."""

from __future__ import annotations

from typing import Any, Dict

from fastapi import APIRouter

from astroml.cache.redis_cache import RedisCache
from astroml.llm.llm_cached_client import get_semantic_cache_metrics

router = APIRouter(prefix="/api/v1/llm", tags=["llm"])


@router.get("/cache/semantic/metrics", response_model=Dict[str, Any])
def semantic_cache_metrics():
"""Return semantic cache hit/miss and avg lookup latency."""
return get_semantic_cache_metrics(redis_cache=RedisCache())

41 changes: 41 additions & 0 deletions api/routers/llm_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""LLM usage and cost monitoring endpoints.

These endpoints expose:
- recent LLM call events (all calls logged)
- rolling cost summaries

Prometheus metrics are emitted by ``LLMUsageTracker``.
"""

from __future__ import annotations

from typing import Any, Dict, List, Optional

from fastapi import APIRouter, Query

from astroml.tracking.llm_usage_tracker import default_llm_usage_tracker

router = APIRouter(prefix="/api/v1/llm", tags=["llm"])


@router.get("/usage/recent", response_model=List[Dict[str, Any]])
def recent_llm_usage(limit: int = Query(100, ge=1, le=1000)):
"""Return the most recent recorded LLM calls."""
return default_llm_usage_tracker.recent_calls(limit=limit)


@router.get("/usage/summary", response_model=Dict[str, Any])
def usage_summary():
"""Return a lightweight summary based on recent in-memory buffer."""
events = default_llm_usage_tracker.recent_calls(limit=5000)
total_calls = len(events)
total_cost_usd = sum(float(e.get("cost_usd", 0.0) or 0.0) for e in events)
total_tokens = sum(int(e.get("total_tokens", 0) or 0) for e in events)

return {
"total_calls": total_calls,
"total_cost_usd": round(total_cost_usd, 6),
"total_tokens": total_tokens,
"window": "in-memory-recent (up to last 5000 events)",
}

Loading