OpenGradient · kylexqian · Apr 21, 2026 · Apr 20, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -15,4 +15,7 @@ jobs:
       - uses: actions/checkout@v4
       - uses: astral-sh/setup-uv@v5
       - name: Run unit tests
-        run: uv run --group test pytest tee_gateway/test/test_tool_forwarding.py tee_gateway/test/test_tee_core.py tests/test_pricing.py -v --import-mode=importlib
+        run: uv run --group test pytest tee_gateway/test/test_tool_forwarding.py tee_gateway/test/test_tee_core.py tee_gateway/test/test_price_feed.py tests/test_pricing.py -v --import-mode=importlib
+        # To also run integration tests (real CoinGecko network calls), add:
+        # env:
+        #   RUN_INTEGRATION_TESTS: "1"
diff --git a/tee_gateway/__main__.py b/tee_gateway/__main__.py
@@ -34,9 +34,10 @@
 from x402.server import x402ResourceServerSync
 from x402.session import SessionStore
 import x402.http.middleware.flask as x402_flask
-import types as _types
 
-from .util import dynamic_session_cost_calculator
+from .util import calculate_session_cost
+from .model_registry import get_model_config
+from .price_feed import OPGPriceFeed
 from .definitions import (
     EVM_PAYMENT_ADDRESS,
     BASE_MAINNET_NETWORK,
@@ -107,6 +108,13 @@ def _shutdown_heartbeat():
 
 atexit.register(_shutdown_heartbeat)
 
+# ---------------------------------------------------------------------------
+# OPG price feed — start before x402 middleware so the first request can be
+# priced correctly.  Runs as a daemon thread; no cleanup needed on exit.
+# ---------------------------------------------------------------------------
+_price_feed = OPGPriceFeed()
+_price_feed.start()
-atexit.register(_shutdown_heartbeat)
-
-# ---------------------------------------------------------------------------
-# OPG price feed — start before x402 middleware so the first request can be
-# priced correctly.  Runs as a daemon thread; no cleanup needed on exit.
-# ---------------------------------------------------------------------------
-_price_feed = OPGPriceFeed()
-_price_feed.start()
+def _start_price_feed_async():
+    """Start the OPG price feed without blocking process startup."""
+
+    def _run():
+        try:
+            _price_feed.start()
+        except Exception:
+            logger.exception("Failed to start OPG price feed")
+
+    threading.Thread(
+        target=_run,
+        name="opg-price-feed-startup",
+        daemon=True,
+    ).start()
+
+
+atexit.register(_shutdown_heartbeat)
+
+# ---------------------------------------------------------------------------
+# OPG price feed — start before x402 middleware so the first request can be
+# priced correctly. Start initialization asynchronously so upstream outages do
+# not block process startup. The feed itself continues to run in the
+# background; no cleanup needed on exit.
+# ---------------------------------------------------------------------------
+_price_feed = OPGPriceFeed()
+_start_price_feed_async()
-atexit.register(_shutdown_heartbeat)
-
-# ---------------------------------------------------------------------------
-# OPG price feed — start before x402 middleware so the first request can be
-# priced correctly.  Runs as a daemon thread; no cleanup needed on exit.
-# ---------------------------------------------------------------------------
-_price_feed = OPGPriceFeed()
-_price_feed.start()
+def _start_price_feed_async():
+    """Start the OPG price feed without blocking process startup."""
+
+    def _run():
+        try:
+            _price_feed.start()
+        except Exception:
+            logger.exception("Failed to start OPG price feed")
+
+    threading.Thread(
+        target=_run,
+        name="opg-price-feed-startup",
+        daemon=True,
+    ).start()
+
+
+atexit.register(_shutdown_heartbeat)
+
+# ---------------------------------------------------------------------------
+# OPG price feed — start before x402 middleware so the first request can be
+# priced correctly. Start initialization asynchronously so upstream outages do
+# not block process startup. The feed itself continues to run in the
+# background; no cleanup needed on exit.
+# ---------------------------------------------------------------------------
+_price_feed = OPGPriceFeed()
+_start_price_feed_async()
+
 facilitator = HTTPFacilitatorClientSync(FacilitatorConfig(url=FACILITATOR_URL))
 server = x402ResourceServerSync(facilitator)
 store = SessionStore()
@@ -303,6 +311,7 @@ def health():
         "status": "OK",
         "version": "1.0.0",
         "tee_enabled": True,
+        "price_feed": _price_feed.get_status(),
     }, 200
 
 
@@ -374,109 +383,66 @@ def _patched_read_body_bytes(environ):
 
 x402_flask._read_body_bytes = _patched_read_body_bytes
 
+
+def _session_cost_calculator(ctx: dict) -> int:
+    # Post-inference cost calculation — response already sent to client.
+    # Predictable failures (unknown price, unknown model) are blocked by the
+    # pre-inference gate; any exception here indicates a provider-side error
+    # (e.g. missing usage field in the LLM response).  The x402 middleware
+    # swallows the exception in close(), so the client is not charged.
+    # Log CRITICAL so provider errors are never silently missed.
+    try:
+        return calculate_session_cost(ctx, _price_feed.get_price)
+    except Exception as exc:
+        logger.critical(
+            "Post-inference cost calculation failed (provider error) — "
+            "client was NOT charged: %s",
+            exc,
+            exc_info=True,
+        )
+        raise
+
+
 _payment_mw = payment_middleware(
     application,
     routes=routes,
     server=server,
     session_store=store,
     cost_per_request=100000000000000,  # static precheck/fallback estimate
     session_idle_timeout=100,
-    session_cost_calculator=dynamic_session_cost_calculator,
+    session_cost_calculator=_session_cost_calculator,
 )
 
 # ---------------------------------------------------------------------------
-# Strict cost-resolution patch
-#
-# Why this exists
-# ---------------
-# The upstream x402 PaymentMiddleware._resolve_session_request_cost wraps the
-# call to the session_cost_calculator in a broad try/except.  If the calculator
-# raises (e.g. ValueError for an unrecognised model name, KeyError for missing
-# usage data), the exception is swallowed and the middleware silently falls back
-# to the static session maximum (CHAT_COMPLETIONS_OPG_SESSION_MAX_SPEND /
-# CHAT_COMPLETIONS_USDC_AMOUNT).  That silent fallback means:
-#   • The client is charged the full pre-check cap instead of actual usage.
-#   • The server has no visible indication that pricing failed.
+# Pre-inference pricing gate
 #
-# The fix
-# -------
-# We replace _resolve_session_request_cost with our own implementation that is
-# identical to upstream, except the cost-calculator call is NOT wrapped in a
-# try/except.  Any exception from dynamic_session_cost_calculator() therefore
-# propagates up through the middleware and Flask, producing a proper HTTP 500
-# response to the client instead of an incorrect silent charge.
+# In the upto session scheme the response is streamed to the client before
+# cost is settled, so a post-inference pricing failure cannot be surfaced as
+# an HTTP error.  Instead we validate everything that can be checked up-front
+# and reject the request early if pricing would fail:
+#   1. Price feed has a valid OPG/USD price (CoinGecko fetch succeeded).
+#   2. The requested model is in the registry (has a known per-token price).
 # ---------------------------------------------------------------------------
 
 
-def _strict_resolve_session_request_cost(
-    self,
-    *,
-    method: str,
-    path: str,
-    request_body_bytes: bytes,
-    response_body_bytes: bytes,
-    payment_payload: object,
-    payment_requirements: object,
-    status_code: int | None,
-    output_object: object = None,
-    is_streaming: bool = False,
-) -> int:
-    """Replacement for PaymentMiddleware._resolve_session_request_cost.
-
-    Identical to the upstream implementation except that exceptions raised by
-    the dynamic cost calculator are NOT caught.  This means a request whose
-    cost cannot be determined (unknown model, missing usage data, etc.) will
-    result in a 500 error rather than silently falling back to the static cap
-    amount and charging the user an incorrect amount.
-    """
-    from x402.http.middleware.flask import _parse_json_bytes as _x402_parse_json  # noqa: PLC0415
-
-    default_cost = self._get_session_cost(payment_requirements)
-    if not self._should_charge_response(status_code):
-        return default_cost
-    if not callable(self._session_cost_calculator):
-        return default_cost
-
-    request_object = _x402_parse_json(request_body_bytes)
-    response_object = (
-        output_object
-        if output_object is not None
-        else _x402_parse_json(response_body_bytes)
-    )
-
-    callback_context = {
-        "method": method,
-        "path": path,
-        "status_code": status_code,
-        "is_streaming": is_streaming,
-        "request_body_bytes": request_body_bytes,
-        "response_body_bytes": response_body_bytes,
-        "request_json": request_object
-        if isinstance(request_object, (dict, list))
-        else None,
-        "response_json": response_object
-        if isinstance(response_object, (dict, list))
-        else None,
-        "response_object": response_object,
-        "payment_payload": payment_payload,
-        "payment_requirements": payment_requirements,
-        "default_cost": default_cost,
-    }
-
-    # Do NOT catch exceptions here — let them propagate so the request fails
-    # with a 500 rather than silently charging the static fallback amount.
-    dynamic_cost = self._session_cost_calculator(callback_context)
-    if dynamic_cost is None:
-        raise ValueError(
-            f"dynamic_session_cost_calculator returned None for {method} {path}; "
-            "cannot determine request cost"
-        )
-    return self._coerce_non_negative_int(dynamic_cost)
-
+@application.before_request
+def _check_pricing_ready():
+    if request.path not in ("/v1/chat/completions", "/v1/completions"):
+        return
+    try:
+        _price_feed.get_price()
+    except ValueError as exc:
+        logger.warning("Rejecting inference request — price feed unavailable: %s", exc)
+        return jsonify({"error": f"Pricing unavailable: {exc}"}), 503
+
+    body = request.get_json(silent=True, cache=True) or {}
+    model = body.get("model")
+    if model:
+        try:
+            get_model_config(model)
+        except ValueError:
+            return jsonify({"error": f"Model '{model}' is not supported"}), 400
 
-_payment_mw._resolve_session_request_cost = _types.MethodType(  # type: ignore[method-assign, attr-defined]
-    _strict_resolve_session_request_cost, _payment_mw
-)
 
 logger.info("x402 payment middleware initialized")
 

diff --git a/tee_gateway/price_feed/__init__.py b/tee_gateway/price_feed/__init__.py
@@ -0,0 +1,7 @@
+from .config import PriceFeedConfig
+from .feed import OPGPriceFeed
+
+__all__ = [
+    "OPGPriceFeed",
+    "PriceFeedConfig",
+]
diff --git a/tee_gateway/price_feed/config.py b/tee_gateway/price_feed/config.py
@@ -0,0 +1,52 @@
+"""
+Configuration constants and dataclass for the OPG price feed.
+"""
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from decimal import Decimal
+
+
+# ---------------------------------------------------------------------------
+# CoinGecko API
+# ---------------------------------------------------------------------------
+COINGECKO_BASE_URL = "https://api.coingecko.com/api/v3"
+COINGECKO_PLATFORM = "base"  # Base mainnet platform identifier on CoinGecko
+FETCH_TIMEOUT = 10  # seconds per HTTP request
+
+# ---------------------------------------------------------------------------
+# Refresh / retry defaults
+# ---------------------------------------------------------------------------
+DEFAULT_REFRESH_INTERVAL = 300  # 5 minutes between background refresh cycles
+DEFAULT_MAX_RETRIES = 3  # attempts per refresh cycle before giving up
+DEFAULT_RETRY_DELAY = 10  # seconds between retry attempts within a cycle
+
+# ---------------------------------------------------------------------------
+# TGE (Token Generation Event) fallback
+# ---------------------------------------------------------------------------
+# Before the TGE cutover, OPG is not yet listed on CoinGecko.  Return a fixed
+# fallback price so inference requests can be priced immediately at launch.
+# After the cutover, the live CoinGecko price is used.
+TGE_CUTOVER_UTC = datetime(2026, 4, 21, 12, 30, 0, tzinfo=timezone.utc)
+TGE_FALLBACK_PRICE_USD = Decimal("0.10")
+
+# ---------------------------------------------------------------------------
+# Stale-price thresholds
+# ---------------------------------------------------------------------------
+# get_price() logs WARNING when last successful fetch is older than
+# STALE_WARNING_MULTIPLIER × refresh_interval seconds.
+STALE_WARNING_MULTIPLIER = 2
+
+# get_price() raises ValueError when last successful fetch is older than
+# STALE_PRICE_MAX_AGE seconds — at this point the cached price is considered
+# too outdated to use for billing.
+STALE_PRICE_MAX_AGE = 4 * 60 * 60  # 4 hours
+
+
+@dataclass(frozen=True)
+class PriceFeedConfig:
+    """Runtime configuration for the OPG price feed background service."""
+
+    refresh_interval: int = DEFAULT_REFRESH_INTERVAL
+    max_retries: int = DEFAULT_MAX_RETRIES
+    retry_delay: float = DEFAULT_RETRY_DELAY