Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
353 changes: 353 additions & 0 deletions 0001-Adding-basic-anthropic-Support.patch

Large diffs are not rendered by default.

191 changes: 185 additions & 6 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ google-genai = "^0.3.0"
filelock = "^3.13.0"
numpy = "^2.3.4"
rich = "^14.2.0"
langchain-anthropic = "^0.3.0"
langchain-core = "^0.3.0"
anthropic = "^0.39.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.3"
Expand Down
56 changes: 50 additions & 6 deletions src/api_interceptors/api_interceptor_httpx.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
import httpx
from typing import Optional, Callable, Any
from src.api_interceptors.api_interceptor_parser import UnifiedAPIMetricsExtractor
from src.models.api_limit_configs_models.api_limit_metrics_model import APIMetrics
from src.api_metrics.api_metrics_processor import APIMetricsProcessor

def create_throttled_httpx_response(request, api_metrics):
"""Create a mock HTTPX response for throttled requests"""
# Simple message matching GRPC format
throttled_message = f"Request throttled for provider: {api_metrics.api_provider}"

mock_response = httpx.Response(
status_code=429, # Too Many Requests
content=throttled_message.encode(),
request=request
)
return mock_response

class HTTPXInterceptor:
def __init__(self):
Expand All @@ -13,12 +29,40 @@ def enable(self) -> None:
interceptor = self

def sync_interceptor(self_client, request, **kwargs):
print(f"[HTTPX] {request.method} {request.url}")
print(f"[HEADERS] {dict(request.headers)}")
if request.content:
print(f"[BODY] {request.content}")
response = interceptor.original_send_sync(self_client, request, **kwargs)
print(f"[RESPONSE] {response.status_code}")
if request.content and request.headers:
api_metrics = APIMetrics()
api_metrics_processor = APIMetricsProcessor()
api_metrics = UnifiedAPIMetricsExtractor.from_httpx_interceptor_request(
request_headers=request.headers,
request_body=request.content
)
check_throttle = api_metrics_processor.throttle_api_metrics(api_metrics)
if check_throttle:
print(f"Throttling Request for provider : {api_metrics.api_provider}")
throttled_response = create_throttled_httpx_response(
request=request,
api_metrics=api_metrics
)
return throttled_response
try :
response = interceptor.original_send_sync(self_client, request, **kwargs)
if api_metrics.api_provider and response.status_code == 200:
print(f"[API Provider] {api_metrics.api_provider}")
print(f"[Model] {api_metrics.model}")
api_metrics_response = UnifiedAPIMetricsExtractor.from_httpx_interceptor_response(
api_metric = api_metrics,
response = response.content
)
print(f"[Input Tokens] {api_metrics_response.input_tokens}")
print(f"[Output Tokens] {api_metrics_response.output_tokens}")
if not api_metrics_response:
raise ValueError("Failed to Parse Token Information")
# updating metrics
check_update = api_metrics_processor.update_api_metrics(api_metrics_response)
if not check_update:
raise ValueError("Failed to Update Metrics Information")
except Exception as e:
print(f"[HTTPX ERROR] {e}")
return response

async def async_interceptor(self_client, request, **kwargs):
Expand Down
15 changes: 8 additions & 7 deletions src/api_interceptors/api_interceptor_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import json
import grpc.aio

KNOWN_API_PROVIDERS = {
KNOWN_PROVIDERS = {
"langchain-google-genai",
"langchain-anthropic",
"langchain-openai"
"anthropic",
"openai"
}

MODEL_FIELD = {
Expand Down Expand Up @@ -40,9 +40,10 @@ def extract_api_provider(user_agent: Optional[str], headers: Optional[Dict[str,
print(f"Headers not currently processed : {headers}")

if header:
header_prefix = header.split('/', 1)[0]
if header_prefix in KNOWN_API_PROVIDERS:
return header_prefix
lower_header = header.lower()
for known_provider in KNOWN_PROVIDERS:
if known_provider in lower_header:
return known_provider
else:
print(f"Header Prefix not currently processed:{header}")
return None
Expand Down Expand Up @@ -171,7 +172,7 @@ def from_grpc_interceptor_response(
@classmethod
def from_httpx_interceptor_request(
cls,
request_headers: Dict[str, str],
request_headers: Optional[Dict[str, str]],
request_body: Optional[bytes]
) -> APIMetrics:
"""
Expand Down
19 changes: 15 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from src.api_interceptors.api_interceptor_manager import InterceptorManager
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_anthropic import ChatAnthropic
import time

config = {
Expand Down Expand Up @@ -37,7 +38,7 @@
interceptor = InterceptorManager()
interceptor.update_api_limit_config(config)
interceptor.enable()

"""
model = "models/gemini-2.5-flash-lite"
llm = ChatGoogleGenerativeAI(model=model)
messages = [
Expand All @@ -48,16 +49,26 @@
for i in range(6):
try:
ai_msg = llm.invoke(messages)

# Verify response is valid
if not hasattr(ai_msg, 'usage_metadata'):
raise ValueError(f"Request {i}: No usage_metadata found")

print(f"Request {i}: Success - {ai_msg.content[:50]}")
time.sleep(2)

except Exception as e:
print(f"Request {i} failed unexpectedly: {type(e).__name__}: {str(e)}")
raise
"""
# interceptor.disable()
llm = ChatAnthropic(
model="claude-haiku-4-5-20251001",
max_tokens=1024,
temperature=0.7
)

response = llm.invoke("1+1=")
print(response.content)

interceptor.disable()