From 711f5cda6ead1948a989df4f6e89a3e821075084 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 25 Feb 2026 11:18:44 +0000 Subject: [PATCH 1/2] Add debug script for Anthropic cache token cost investigation Reproduction script investigating why $ai_input_tokens can be inclusive (input + cache_read) instead of exclusive, causing cost overcharges. Tests raw API, SDK wrapper, and posthog_properties override behavior. Writes results to output.md for sharing. --- .../debug-anthropic-cache-cost/README.md | 24 + .../debug-anthropic-cache-cost/output.md | 260 ++++++++ .../scripts/debug-anthropic-cache-cost/run.py | 602 ++++++++++++++++++ 3 files changed, 886 insertions(+) create mode 100644 python/scripts/debug-anthropic-cache-cost/README.md create mode 100644 python/scripts/debug-anthropic-cache-cost/output.md create mode 100644 python/scripts/debug-anthropic-cache-cost/run.py diff --git a/python/scripts/debug-anthropic-cache-cost/README.md b/python/scripts/debug-anthropic-cache-cost/README.md new file mode 100644 index 0000000..7911343 --- /dev/null +++ b/python/scripts/debug-anthropic-cache-cost/README.md @@ -0,0 +1,24 @@ +# Debug: Anthropic Cache Token Cost + +Reproduction script for investigating Anthropic cache token cost discrepancies in the PostHog Python SDK. + +## The Issue + +When using `posthog.ai.anthropic.AsyncAnthropic` with Anthropic's prompt caching, the stored `$ai_input_tokens` can end up as the **inclusive** value (input + cache_read tokens) instead of the **exclusive** value that the Anthropic API returns. This causes the cost calculator to overcharge by treating all tokens (including cached ones) at the full prompt rate. + +## What This Script Tests + +1. **Raw Anthropic API** - Confirms the API returns `input_tokens` exclusive of cached tokens +2. **PostHog SDK wrapper** - Confirms the SDK correctly passes through the exclusive value +3. **`posthog_properties` override** - Demonstrates how passing custom properties can override the SDK's correct values + +## Usage + +```bash +cd python +python scripts/debug-anthropic-cache-cost/run.py +``` + +Requires `ANTHROPIC_API_KEY` and `POSTHOG_API_KEY` in the root `.env` file. + +Results are written to `output.md` (gitignored) in this directory. diff --git a/python/scripts/debug-anthropic-cache-cost/output.md b/python/scripts/debug-anthropic-cache-cost/output.md new file mode 100644 index 0000000..8a7afc6 --- /dev/null +++ b/python/scripts/debug-anthropic-cache-cost/output.md @@ -0,0 +1,260 @@ +# Anthropic Cache Token Cost - Debug Results + +**Date:** 2026-02-25 11:17 UTC +**Model:** `claude-haiku-4-5-20251001` +**PostHog Python SDK:** `v7.9.3` +**Anthropic SDK:** `v0.75.0` + +--- + +## TL;DR + +The PostHog Python SDK (`posthog.ai.anthropic.AsyncAnthropic`) correctly passes through the **exclusive** `input_tokens` value from the Anthropic API. The SDK does **not** modify or inflate this value. + +However, if you pass `posthog_properties` to `messages.create()` containing `$ai_input_tokens` or `$ai_total_tokens`, those values **override** the SDK's correct values. This can cause inflated cost calculations if the overriding values include cached tokens in the input count. + +--- + +## Test 1: Raw Anthropic API Behavior + +Verified that the Anthropic API returns `input_tokens` **exclusive** of cached tokens: + +| Field | Value | +|-------|-------| +| `input_tokens` | 13 | +| `cache_read_input_tokens` | 14100 | +| `cache_creation_input_tokens` | 14100 | +| `output_tokens` | 41 | + +**Result:** `input_tokens` (13) is much smaller than `cache_read_input_tokens` (14100), confirming Anthropic returns **exclusive** counts. + +--- + +## Test 2: PostHog SDK Wrapper (No Custom Properties) + +Spied on `ph_client.capture()` to see what the SDK sends to PostHog: + +| Field | Anthropic API | PostHog SDK | +|-------|--------------|-------------| +| `input_tokens` / `$ai_input_tokens` | 13 | 13 | +| `cache_read_input_tokens` / `$ai_cache_read_input_tokens` | 14100 | 14100 | +| `$ai_total_tokens` | N/A | NOT SET | + +**Result:** SDK correctly passes through the exclusive value. + +The Anthropic wrapper does NOT set `$ai_total_tokens` - this is expected. + +--- + +## Test 3: `posthog_properties` Override (Root Cause) + +### 3a) Direct Override Test + +Passed `posthog_properties={"$ai_input_tokens": 99999}` to `messages.create()`. + +**Result:** The SDK's correct value was **overridden** to 99999. This confirms `posthog_properties` takes precedence. + +### 3b) Simulated Customer Pattern + +Simulated what happens when a customer computes inclusive token counts and passes them via `posthog_properties`: + +```python +# Customer code pattern that causes the overcharge: +inclusive_input = response.usage.input_tokens + response.usage.cache_read_input_tokens +total = inclusive_input + response.usage.output_tokens + +client.messages.create( + ..., + posthog_properties={ + "$ai_input_tokens": inclusive_input, # WRONG - includes cached tokens! + "$ai_total_tokens": total, + }, +) +``` + +| Field | Correct (Exclusive) | Wrong (Inclusive) | +|-------|-------------------|-----------------| +| `$ai_input_tokens` | 12 | 14113 | +| `$ai_cache_read_input_tokens` | 14100 | 14100 | +| `$ai_total_tokens` | not set | 14149 | + +### Cost Impact + +Using `claude-haiku-4-5` pricing ($1/Mtok input, $0.10/Mtok cache read): + +| | Correct | Overcharged | +|--|---------|-------------| +| Uncached input cost | $0.001422 | $0.015523 | +| **Overcharge factor** | | **10.9x** | + +--- + +## How to Check if This Affects You + +Look for any code that passes `posthog_properties` to `messages.create()` with token-related fields: + +```python +# Search your codebase for patterns like: +client.messages.create( + ..., + posthog_properties={ + "$ai_input_tokens": ..., # This overrides the SDK! + "$ai_total_tokens": ..., # This also overrides! + }, +) +``` + +The SDK already extracts the correct exclusive `input_tokens` from the Anthropic response. You do **not** need to pass these values yourself. + +### What to Do + +**Option A (Recommended):** Remove `$ai_input_tokens` and `$ai_total_tokens` from your `posthog_properties`. The SDK handles these correctly. + +**Option B:** If you need to pass custom properties, make sure `$ai_input_tokens` uses the **exclusive** value from `response.usage.input_tokens` (not `input_tokens + cache_read_input_tokens`). + +--- + +## Key Evidence + +1. `$ai_total_tokens` is present in the stored events, but the PostHog Anthropic wrapper **never sets** this property. Only OpenAI Agents sets it. This strongly suggests the value comes from `posthog_properties`. + +2. The stored `$ai_input_tokens` (48268) minus `$ai_cache_read_input_tokens` (45417) equals exactly the expected exclusive value (2851), which is what the Anthropic API returns as `input_tokens`. + +--- + +## Raw Script Output + +
+Click to expand full script output + +``` +====================================================================== + Anthropic Cache Token Cost Debug Script + Investigating: $ai_input_tokens inclusive vs exclusive +====================================================================== + +====================================================================== + STEP 1: Raw Anthropic API (no PostHog wrapper) +====================================================================== + Making two calls to populate cache, then checking usage on second call... + + Call 1 (cache miss - should create cache)... + + Call 1 usage: + input_tokens: 13 + output_tokens: 39 + cache_read_input_tokens: 0 + cache_creation_input_tokens: 14100 + + Call 2 (cache hit expected)... + + Call 2 usage: + input_tokens: 13 + output_tokens: 41 + cache_read_input_tokens: 14100 + cache_creation_input_tokens: 0 + + Analysis: + input_tokens (from API): 13 + cache_read_input_tokens: 14100 + input + cache_read: 14113 + => input_tokens is EXCLUSIVE of cache (correct Anthropic behavior) + +====================================================================== + STEP 2: PostHog AsyncAnthropic wrapper (spy on capture) +====================================================================== + + Call 1 (cache miss - populating cache)... + + [SPY] ph_client.capture() called! + event: $ai_generation + $ai_provider: anthropic + $ai_model: claude-haiku-4-5-20251001 + $ai_input_tokens: 13 + $ai_output_tokens: 41 + $ai_cache_read_input_tokens: 14100 + $ai_cache_creation_input_tokens: None + $ai_total_tokens: NOT SET + + Raw API usage (call 1): + input_tokens: 13 + output_tokens: 41 + cache_read_input_tokens: 14100 + cache_creation_input_tokens: 0 + + Call 2 (cache hit expected)... + + [SPY] ph_client.capture() called! + event: $ai_generation + $ai_provider: anthropic + $ai_model: claude-haiku-4-5-20251001 + $ai_input_tokens: 13 + $ai_output_tokens: 41 + $ai_cache_read_input_tokens: 14100 + $ai_cache_creation_input_tokens: None + $ai_total_tokens: NOT SET + + Raw API usage (call 2): + input_tokens: 13 + output_tokens: 41 + cache_read_input_tokens: 14100 + cache_creation_input_tokens: 0 + +====================================================================== + COMPARISON (Call 2 - cache hit) +====================================================================== + Anthropic API input_tokens: 13 + SDK $ai_input_tokens: 13 + cache_read_input_tokens: 14100 + API input + cache_read: 14113 + + RESULT: SDK correctly passes through EXCLUSIVE input_tokens + + OK: $ai_total_tokens is not set (expected for Anthropic wrapper) + +====================================================================== + STEP 3: Test posthog_properties override (leading theory) +====================================================================== + Simulating customer passing their own inclusive token counts via posthog_properties... + + 3a) Simple override with hardcoded values... + Anthropic API input_tokens: 13 + SDK $ai_input_tokens: 99999 + $ai_total_tokens: 100599 + => posthog_properties OVERRIDES the SDK's correct value + + 3b) Simulating customer computing inclusive tokens from the response... + (Customer code might do: input_tokens = usage.input_tokens + usage.cache_read_input_tokens) + Customer computes: inclusive_input = 13 + 14100 = 14113 + Customer computes: total = 14113 + 36 = 14149 + Anthropic API input_tokens: 12 (exclusive) + Anthropic API cache_read: 14100 + Anthropic API output_tokens: 100 + --- + Stored $ai_input_tokens: 14113 (inclusive!) + Stored $ai_cache_read_input_tokens: 14100 + Stored $ai_total_tokens: 14149 + --- + COST COMPARISON (haiku pricing: $1/Mtok input, $0.10/Mtok cache): + + Correct (exclusive input_tokens = 12): + uncached: 12 * $1/Mtok = $0.000012 + cached: 14100 * $0.10/Mtok = $0.001410 + total input cost: $0.001422 + + Wrong (inclusive input_tokens = 14113, treated as exclusive): + uncached: 14113 * $1/Mtok = $0.014113 + cached: 14100 * $0.10/Mtok = $0.001410 + total input cost: $0.015523 + + OVERCHARGE: 10.9x (992% more) + +====================================================================== + DONE +====================================================================== + Check the output above to see if the bug is reproducible. + If Step 2 shows correct values but the customer sees wrong values, + the issue is likely in posthog_properties overrides (Step 3). +``` + +
diff --git a/python/scripts/debug-anthropic-cache-cost/run.py b/python/scripts/debug-anthropic-cache-cost/run.py new file mode 100644 index 0000000..7137656 --- /dev/null +++ b/python/scripts/debug-anthropic-cache-cost/run.py @@ -0,0 +1,602 @@ +#!/usr/bin/env python3 +""" +Debug script for Anthropic cache token cost investigation. + +Reproduces the customer's scenario: non-streaming AsyncAnthropic with cached content. +The customer sees $ai_input_tokens set to the INCLUSIVE value (input + cache_read) +instead of the EXCLUSIVE value that the Anthropic API returns. + +This script: +1. Makes two calls with the same large system prompt to trigger cache_read +2. Inspects the raw Anthropic API response usage +3. Spies on ph_client.capture() to see what PostHog receives +4. Compares the values to detect any discrepancy +5. Writes results to output.md + +Usage: + cd python && python scripts/debug-anthropic-cache-cost/run.py + +Slack thread: https://posthog.slack.com/archives/C09AJEE3YSY/p1771625286859039 +""" + +import asyncio +import io +import os +import sys +from datetime import datetime, timezone + +# Add python/ dir to path so we can import from the project +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +# Also add python/ itself +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from dotenv import load_dotenv + +load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), ".env")) + +import anthropic +import posthog +from posthog.ai.anthropic import AsyncAnthropic + + +# Generate a large system prompt to ensure caching kicks in. +# Anthropic requires 1024+ tokens for cache eligibility. +LARGE_SYSTEM_PROMPT = "\n".join( + f"Rule {i}: You are an expert assistant who always provides accurate, helpful, and detailed responses. " + f"You must follow all safety guidelines and provide balanced perspectives on complex topics. " + f"When asked about technical subjects, provide code examples where appropriate." + for i in range(300) +) + +USER_MESSAGE = "What is 2+2?" +MODEL = "claude-haiku-4-5-20251001" + +# Output directory (same dir as this script) +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +OUTPUT_FILE = os.path.join(SCRIPT_DIR, "output.md") + + +class OutputWriter: + """Writes to both stdout and a buffer for the markdown file.""" + + def __init__(self): + self.buffer = io.StringIO() + self.md_sections: list[dict] = [] + + def print(self, text: str = ""): + """Print to stdout and buffer.""" + print(text) + self.buffer.write(text + "\n") + + def start_section(self, title: str): + """Start a new section (prints a divider).""" + self.print(f"\n{'=' * 70}") + self.print(f" {title}") + self.print(f"{'=' * 70}") + + def print_usage(self, label: str, usage): + """Pretty-print an Anthropic usage object.""" + self.print(f"\n {label}:") + self.print(f" input_tokens: {getattr(usage, 'input_tokens', 'N/A')}") + self.print(f" output_tokens: {getattr(usage, 'output_tokens', 'N/A')}") + self.print(f" cache_read_input_tokens: {getattr(usage, 'cache_read_input_tokens', 'N/A')}") + self.print(f" cache_creation_input_tokens: {getattr(usage, 'cache_creation_input_tokens', 'N/A')}") + + def get_raw_output(self) -> str: + return self.buffer.getvalue() + + +def make_ph_client(): + """Create a PostHog client using the correct constructor.""" + return posthog.Posthog( + project_api_key=os.getenv("POSTHOG_API_KEY", "test-key"), + host=os.getenv("POSTHOG_HOST", "https://us.posthog.com"), + ) + + +async def step1_raw_anthropic_api(out: OutputWriter) -> dict: + """Call the Anthropic API directly (no PostHog wrapper) to establish ground truth.""" + out.start_section("STEP 1: Raw Anthropic API (no PostHog wrapper)") + out.print(" Making two calls to populate cache, then checking usage on second call...") + + client = anthropic.AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) + + system_with_cache = [ + { + "type": "text", + "text": LARGE_SYSTEM_PROMPT, + "cache_control": {"type": "ephemeral"}, + } + ] + + # First call: populates the cache + out.print("\n Call 1 (cache miss - should create cache)...") + response1 = await client.messages.create( + model=MODEL, + max_tokens=100, + system=system_with_cache, + messages=[{"role": "user", "content": USER_MESSAGE}], + ) + out.print_usage("Call 1 usage", response1.usage) + + cache_creation = getattr(response1.usage, "cache_creation_input_tokens", 0) or 0 + if cache_creation == 0: + out.print("\n WARNING: No cache_creation_input_tokens on call 1!") + out.print(" System prompt may be too small for cache eligibility (need 1024+ tokens).") + out.print(f" System prompt length: ~{len(LARGE_SYSTEM_PROMPT.split())} words") + + # Second call: should hit cache + out.print("\n Call 2 (cache hit expected)...") + response2 = await client.messages.create( + model=MODEL, + max_tokens=100, + system=system_with_cache, + messages=[{"role": "user", "content": USER_MESSAGE}], + ) + out.print_usage("Call 2 usage", response2.usage) + + input_tokens = response2.usage.input_tokens + cache_read = getattr(response2.usage, "cache_read_input_tokens", 0) or 0 + + out.print(f"\n Analysis:") + out.print(f" input_tokens (from API): {input_tokens}") + out.print(f" cache_read_input_tokens: {cache_read}") + out.print(f" input + cache_read: {input_tokens + cache_read}") + + if cache_read > 0: + if input_tokens < cache_read: + out.print(f" => input_tokens is EXCLUSIVE of cache (correct Anthropic behavior)") + else: + out.print(f" => input_tokens is INCLUSIVE of cache (UNEXPECTED!)") + else: + out.print(f" => No cache hit on call 2 - cannot determine inclusive/exclusive") + + return { + "input_tokens": input_tokens, + "cache_read": cache_read, + "cache_creation": cache_creation, + "output_tokens": response2.usage.output_tokens, + } + + +async def step2_posthog_wrapper(out: OutputWriter) -> dict: + """Call through posthog.AsyncAnthropic and spy on capture() to see what properties are sent.""" + out.start_section("STEP 2: PostHog AsyncAnthropic wrapper (spy on capture)") + + captured_events = [] + ph_client = make_ph_client() + + def spy_capture(*args, **kwargs): + captured_events.append(kwargs) + props = kwargs.get("properties", {}) + out.print(f"\n [SPY] ph_client.capture() called!") + out.print(f" event: {kwargs.get('event')}") + out.print(f" $ai_provider: {props.get('$ai_provider')}") + out.print(f" $ai_model: {props.get('$ai_model')}") + out.print(f" $ai_input_tokens: {props.get('$ai_input_tokens')}") + out.print(f" $ai_output_tokens: {props.get('$ai_output_tokens')}") + out.print(f" $ai_cache_read_input_tokens: {props.get('$ai_cache_read_input_tokens')}") + out.print(f" $ai_cache_creation_input_tokens: {props.get('$ai_cache_creation_input_tokens')}") + out.print(f" $ai_total_tokens: {props.get('$ai_total_tokens', 'NOT SET')}") + + ph_client.capture = spy_capture + + client = AsyncAnthropic( + api_key=os.getenv("ANTHROPIC_API_KEY"), + posthog_client=ph_client, + ) + + system_with_cache = [ + { + "type": "text", + "text": LARGE_SYSTEM_PROMPT, + "cache_control": {"type": "ephemeral"}, + } + ] + + # First call: populate cache + out.print("\n Call 1 (cache miss - populating cache)...") + response1 = await client.messages.create( + model=MODEL, + max_tokens=100, + system=system_with_cache, + messages=[{"role": "user", "content": USER_MESSAGE}], + posthog_distinct_id="debug-cache-cost", + ) + out.print_usage("Raw API usage (call 1)", response1.usage) + + # Second call: should hit cache + out.print("\n Call 2 (cache hit expected)...") + response2 = await client.messages.create( + model=MODEL, + max_tokens=100, + system=system_with_cache, + messages=[{"role": "user", "content": USER_MESSAGE}], + posthog_distinct_id="debug-cache-cost", + ) + out.print_usage("Raw API usage (call 2)", response2.usage) + + result = {"sdk_correct": None, "total_tokens_set": False} + + if len(captured_events) >= 2: + event = captured_events[-1] + props = event.get("properties", {}) + api_input = response2.usage.input_tokens + sdk_input = props.get("$ai_input_tokens") + cache_read = getattr(response2.usage, "cache_read_input_tokens", 0) or 0 + + out.start_section("COMPARISON (Call 2 - cache hit)") + out.print(f" Anthropic API input_tokens: {api_input}") + out.print(f" SDK $ai_input_tokens: {sdk_input}") + out.print(f" cache_read_input_tokens: {cache_read}") + out.print(f" API input + cache_read: {api_input + cache_read}") + out.print() + + if cache_read == 0: + out.print(" INCONCLUSIVE: No cache hit, cannot compare inclusive vs exclusive") + elif sdk_input == api_input: + out.print(" RESULT: SDK correctly passes through EXCLUSIVE input_tokens") + result["sdk_correct"] = True + elif sdk_input == api_input + cache_read: + out.print(" RESULT: BUG! SDK is sending INCLUSIVE input_tokens (input + cache_read)") + result["sdk_correct"] = False + else: + out.print(f" RESULT: UNEXPECTED! SDK value ({sdk_input}) doesn't match either pattern") + + if "$ai_total_tokens" in props: + out.print(f"\n WARNING: $ai_total_tokens is set to {props['$ai_total_tokens']}") + out.print(" The Anthropic wrapper should NOT set this property!") + result["total_tokens_set"] = True + else: + out.print(f"\n OK: $ai_total_tokens is not set (expected for Anthropic wrapper)") + + result["api_input"] = api_input + result["sdk_input"] = sdk_input + result["cache_read"] = cache_read + + ph_client.shutdown() + return result + + +async def step3_posthog_wrapper_with_custom_properties(out: OutputWriter) -> dict: + """Test if posthog_properties can override SDK token values (our leading theory).""" + out.start_section("STEP 3: Test posthog_properties override (leading theory)") + out.print(" Simulating customer passing their own inclusive token counts via posthog_properties...") + + captured_events = [] + ph_client = make_ph_client() + + def spy_capture(*args, **kwargs): + captured_events.append(kwargs) + + ph_client.capture = spy_capture + + client = AsyncAnthropic( + api_key=os.getenv("ANTHROPIC_API_KEY"), + posthog_client=ph_client, + ) + + system_with_cache = [ + { + "type": "text", + "text": LARGE_SYSTEM_PROMPT, + "cache_control": {"type": "ephemeral"}, + } + ] + + # Warm up cache first (discard this event) + await client.messages.create( + model=MODEL, + max_tokens=100, + system=system_with_cache, + messages=[{"role": "user", "content": "Hi"}], + posthog_distinct_id="debug-cache-cost", + ) + + # 3a) Simple override with hardcoded values + out.print("\n 3a) Simple override with hardcoded values...") + response = await client.messages.create( + model=MODEL, + max_tokens=100, + system=system_with_cache, + messages=[{"role": "user", "content": USER_MESSAGE}], + posthog_distinct_id="debug-cache-cost", + posthog_properties={ + "$ai_input_tokens": 99999, + "$ai_total_tokens": 100599, + }, + ) + + override_works = False + if len(captured_events) >= 2: + props = captured_events[-1].get("properties", {}) + api_input = response.usage.input_tokens + sdk_input = props.get("$ai_input_tokens") + + out.print(f" Anthropic API input_tokens: {api_input}") + out.print(f" SDK $ai_input_tokens: {sdk_input}") + out.print(f" $ai_total_tokens: {props.get('$ai_total_tokens', 'NOT SET')}") + + if sdk_input == 99999: + out.print(" => posthog_properties OVERRIDES the SDK's correct value") + override_works = True + else: + out.print(f" => posthog_properties did NOT override (got {sdk_input})") + + # 3b) Simulate the exact customer pattern + out.print("\n 3b) Simulating customer computing inclusive tokens from the response...") + out.print(" (Customer code might do: input_tokens = usage.input_tokens + usage.cache_read_input_tokens)") + + captured_events.clear() + + prev_api_input = response.usage.input_tokens + prev_cache_read = getattr(response.usage, "cache_read_input_tokens", 0) or 0 + prev_output = response.usage.output_tokens + inclusive_input = prev_api_input + prev_cache_read + computed_total = inclusive_input + prev_output + + out.print(f" Customer computes: inclusive_input = {prev_api_input} + {prev_cache_read} = {inclusive_input}") + out.print(f" Customer computes: total = {inclusive_input} + {prev_output} = {computed_total}") + + response2 = await client.messages.create( + model=MODEL, + max_tokens=100, + system=system_with_cache, + messages=[{"role": "user", "content": "Tell me a fun fact."}], + posthog_distinct_id="debug-cache-cost", + posthog_properties={ + "$ai_input_tokens": inclusive_input, + "$ai_total_tokens": computed_total, + }, + ) + + result = {"override_works": override_works} + + if captured_events: + props = captured_events[-1].get("properties", {}) + api_input = response2.usage.input_tokens + cache_read = getattr(response2.usage, "cache_read_input_tokens", 0) or 0 + sdk_input = props.get("$ai_input_tokens") + sdk_total = props.get("$ai_total_tokens", "NOT SET") + sdk_cache_read = props.get("$ai_cache_read_input_tokens") + + out.print(f" Anthropic API input_tokens: {api_input} (exclusive)") + out.print(f" Anthropic API cache_read: {cache_read}") + out.print(f" Anthropic API output_tokens: {response2.usage.output_tokens}") + out.print(f" ---") + out.print(f" Stored $ai_input_tokens: {sdk_input} (inclusive!)") + out.print(f" Stored $ai_cache_read_input_tokens: {sdk_cache_read}") + out.print(f" Stored $ai_total_tokens: {sdk_total}") + out.print(f" ---") + + # Cost math + prompt_rate = 1.0 / 1_000_000 + cache_rate = 0.1 / 1_000_000 + + correct_uncached_cost = api_input * prompt_rate + correct_cache_cost = cache_read * cache_rate + correct_total = correct_uncached_cost + correct_cache_cost + + wrong_uncached_cost = sdk_input * prompt_rate if sdk_input else 0 + wrong_cache_cost = (sdk_cache_read or 0) * cache_rate + wrong_total = wrong_uncached_cost + wrong_cache_cost + + out.print(f" COST COMPARISON (haiku pricing: $1/Mtok input, $0.10/Mtok cache):") + out.print(f"") + out.print(f" Correct (exclusive input_tokens = {api_input}):") + out.print(f" uncached: {api_input} * $1/Mtok = ${correct_uncached_cost:.6f}") + out.print(f" cached: {cache_read} * $0.10/Mtok = ${correct_cache_cost:.6f}") + out.print(f" total input cost: ${correct_total:.6f}") + out.print(f"") + out.print(f" Wrong (inclusive input_tokens = {sdk_input}, treated as exclusive):") + out.print(f" uncached: {sdk_input} * $1/Mtok = ${wrong_uncached_cost:.6f}") + out.print(f" cached: {sdk_cache_read} * $0.10/Mtok = ${wrong_cache_cost:.6f}") + out.print(f" total input cost: ${wrong_total:.6f}") + out.print(f"") + if correct_total > 0: + overcharge = wrong_total / correct_total + out.print(f" OVERCHARGE: {overcharge:.1f}x ({overcharge * 100 - 100:.0f}% more)") + result["overcharge_factor"] = overcharge + + result.update({ + "api_input": api_input, + "cache_read": cache_read, + "sdk_input": sdk_input, + "sdk_total": sdk_total, + "sdk_cache_read": sdk_cache_read, + "correct_cost": correct_total, + "wrong_cost": wrong_total, + "output_tokens": response2.usage.output_tokens, + }) + + ph_client.shutdown() + return result + + +def write_output_md(step1_result: dict, step2_result: dict, step3_result: dict, raw_output: str): + """Write a shareable output.md summarizing the investigation.""" + now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + + sdk_version = "unknown" + try: + sdk_version = posthog.__version__ + except Exception: + pass + + anthropic_version = "unknown" + try: + anthropic_version = anthropic.__version__ + except Exception: + pass + + md = f"""# Anthropic Cache Token Cost - Debug Results + +**Date:** {now} +**Model:** `{MODEL}` +**PostHog Python SDK:** `v{sdk_version}` +**Anthropic SDK:** `v{anthropic_version}` + +--- + +## TL;DR + +The PostHog Python SDK (`posthog.ai.anthropic.AsyncAnthropic`) correctly passes through the **exclusive** `input_tokens` value from the Anthropic API. The SDK does **not** modify or inflate this value. + +However, if you pass `posthog_properties` to `messages.create()` containing `$ai_input_tokens` or `$ai_total_tokens`, those values **override** the SDK's correct values. This can cause inflated cost calculations if the overriding values include cached tokens in the input count. + +--- + +## Test 1: Raw Anthropic API Behavior + +Verified that the Anthropic API returns `input_tokens` **exclusive** of cached tokens: + +| Field | Value | +|-------|-------| +| `input_tokens` | {step1_result['input_tokens']} | +| `cache_read_input_tokens` | {step1_result['cache_read']} | +| `cache_creation_input_tokens` | {step1_result['cache_creation']} | +| `output_tokens` | {step1_result['output_tokens']} | + +**Result:** `input_tokens` ({step1_result['input_tokens']}) is much smaller than `cache_read_input_tokens` ({step1_result['cache_read']}), confirming Anthropic returns **exclusive** counts. + +--- + +## Test 2: PostHog SDK Wrapper (No Custom Properties) + +Spied on `ph_client.capture()` to see what the SDK sends to PostHog: + +| Field | Anthropic API | PostHog SDK | +|-------|--------------|-------------| +| `input_tokens` / `$ai_input_tokens` | {step2_result.get('api_input', 'N/A')} | {step2_result.get('sdk_input', 'N/A')} | +| `cache_read_input_tokens` / `$ai_cache_read_input_tokens` | {step2_result.get('cache_read', 'N/A')} | {step2_result.get('cache_read', 'N/A')} | +| `$ai_total_tokens` | N/A | {'SET' if step2_result.get('total_tokens_set') else 'NOT SET'} | + +**Result:** {"SDK correctly passes through the exclusive value." if step2_result.get('sdk_correct') else "See raw output for details."} +{"" if not step2_result.get('total_tokens_set') else "WARNING: $ai_total_tokens was unexpectedly set!"} +{"The Anthropic wrapper does NOT set `$ai_total_tokens` - this is expected." if not step2_result.get('total_tokens_set') else ""} + +--- + +## Test 3: `posthog_properties` Override (Root Cause) + +### 3a) Direct Override Test + +Passed `posthog_properties={{"$ai_input_tokens": 99999}}` to `messages.create()`. + +**Result:** {"The SDK's correct value was **overridden** to 99999. This confirms `posthog_properties` takes precedence." if step3_result.get('override_works') else "Override did not work as expected."} + +### 3b) Simulated Customer Pattern + +Simulated what happens when a customer computes inclusive token counts and passes them via `posthog_properties`: + +```python +# Customer code pattern that causes the overcharge: +inclusive_input = response.usage.input_tokens + response.usage.cache_read_input_tokens +total = inclusive_input + response.usage.output_tokens + +client.messages.create( + ..., + posthog_properties={{ + "$ai_input_tokens": inclusive_input, # WRONG - includes cached tokens! + "$ai_total_tokens": total, + }}, +) +``` + +| Field | Correct (Exclusive) | Wrong (Inclusive) | +|-------|-------------------|-----------------| +| `$ai_input_tokens` | {step3_result.get('api_input', 'N/A')} | {step3_result.get('sdk_input', 'N/A')} | +| `$ai_cache_read_input_tokens` | {step3_result.get('cache_read', 'N/A')} | {step3_result.get('sdk_cache_read', 'N/A')} | +| `$ai_total_tokens` | not set | {step3_result.get('sdk_total', 'N/A')} | + +### Cost Impact + +Using `claude-haiku-4-5` pricing ($1/Mtok input, $0.10/Mtok cache read): + +| | Correct | Overcharged | +|--|---------|-------------| +| Uncached input cost | ${step3_result.get('correct_cost', 0):.6f} | ${step3_result.get('wrong_cost', 0):.6f} | +| **Overcharge factor** | | **{step3_result.get('overcharge_factor', 0):.1f}x** | + +--- + +## How to Check if This Affects You + +Look for any code that passes `posthog_properties` to `messages.create()` with token-related fields: + +```python +# Search your codebase for patterns like: +client.messages.create( + ..., + posthog_properties={{ + "$ai_input_tokens": ..., # This overrides the SDK! + "$ai_total_tokens": ..., # This also overrides! + }}, +) +``` + +The SDK already extracts the correct exclusive `input_tokens` from the Anthropic response. You do **not** need to pass these values yourself. + +### What to Do + +**Option A (Recommended):** Remove `$ai_input_tokens` and `$ai_total_tokens` from your `posthog_properties`. The SDK handles these correctly. + +**Option B:** If you need to pass custom properties, make sure `$ai_input_tokens` uses the **exclusive** value from `response.usage.input_tokens` (not `input_tokens + cache_read_input_tokens`). + +--- + +## Key Evidence + +1. `$ai_total_tokens` is present in the stored events, but the PostHog Anthropic wrapper **never sets** this property. Only OpenAI Agents sets it. This strongly suggests the value comes from `posthog_properties`. + +2. The stored `$ai_input_tokens` (48268) minus `$ai_cache_read_input_tokens` (45417) equals exactly the expected exclusive value (2851), which is what the Anthropic API returns as `input_tokens`. + +--- + +## Raw Script Output + +
+Click to expand full script output + +``` +{raw_output.strip()} +``` + +
+""" + + with open(OUTPUT_FILE, "w") as f: + f.write(md) + + print(f"\n Output written to: {OUTPUT_FILE}") + + +async def main(): + out = OutputWriter() + + out.print("=" * 70) + out.print(" Anthropic Cache Token Cost Debug Script") + out.print(" Investigating: $ai_input_tokens inclusive vs exclusive") + out.print("=" * 70) + + # Step 1: Verify raw API behavior + step1_result = await step1_raw_anthropic_api(out) + + # Step 2: Test through PostHog wrapper + step2_result = await step2_posthog_wrapper(out) + + # Step 3: Test the posthog_properties override theory + step3_result = await step3_posthog_wrapper_with_custom_properties(out) + + out.start_section("DONE") + out.print(" Check the output above to see if the bug is reproducible.") + out.print(" If Step 2 shows correct values but the customer sees wrong values,") + out.print(" the issue is likely in posthog_properties overrides (Step 3).") + out.print() + + # Write the markdown output + write_output_md(step1_result, step2_result, step3_result, out.get_raw_output()) + + +if __name__ == "__main__": + asyncio.run(main()) From 7a799fc91ffe92d4081967e91309a4db2166b72c Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Wed, 25 Feb 2026 13:27:26 +0000 Subject: [PATCH 2/2] dev --- .../debug-anthropic-cache-cost/output.md | 85 ++++++- .../scripts/debug-anthropic-cache-cost/run.py | 121 ++++++++- .../test_local_pipeline.py | 236 ++++++++++++++++++ 3 files changed, 428 insertions(+), 14 deletions(-) create mode 100644 python/scripts/debug-anthropic-cache-cost/test_local_pipeline.py diff --git a/python/scripts/debug-anthropic-cache-cost/output.md b/python/scripts/debug-anthropic-cache-cost/output.md index 8a7afc6..899be7f 100644 --- a/python/scripts/debug-anthropic-cache-cost/output.md +++ b/python/scripts/debug-anthropic-cache-cost/output.md @@ -1,6 +1,6 @@ # Anthropic Cache Token Cost - Debug Results -**Date:** 2026-02-25 11:17 UTC +**Date:** 2026-02-25 11:30 UTC **Model:** `claude-haiku-4-5-20251001` **PostHog Python SDK:** `v7.9.3` **Anthropic SDK:** `v0.75.0` @@ -24,7 +24,7 @@ Verified that the Anthropic API returns `input_tokens` **exclusive** of cached t | `input_tokens` | 13 | | `cache_read_input_tokens` | 14100 | | `cache_creation_input_tokens` | 14100 | -| `output_tokens` | 41 | +| `output_tokens` | 42 | **Result:** `input_tokens` (13) is much smaller than `cache_read_input_tokens` (14100), confirming Anthropic returns **exclusive** counts. @@ -76,7 +76,7 @@ client.messages.create( |-------|-------------------|-----------------| | `$ai_input_tokens` | 12 | 14113 | | `$ai_cache_read_input_tokens` | 14100 | 14100 | -| `$ai_total_tokens` | not set | 14149 | +| `$ai_total_tokens` | not set | 14160 | ### Cost Impact @@ -114,9 +114,34 @@ The SDK already extracts the correct exclusive `input_tokens` from the Anthropic --- +## Test 4: SDK Source Code Inspection + +Programmatically inspected the PostHog Python SDK source to prove `$ai_total_tokens` cannot come from the Anthropic code path: + +| Component | File | Sets `$ai_total_tokens`? | +|-----------|------|------------------------| +| Anthropic converter | `posthog/ai/anthropic/anthropic_converter.py` | No | +| General AI utils | `posthog/ai/utils.py` | No | +| OpenAI Agents processor | `posthog/ai/openai_agents/processor.py` | **Yes** (only place) | + +**The Anthropic converter** (`extract_anthropic_usage_from_response`, lines 206-231) only extracts: +- `input_tokens` (exclusive of cache) +- `output_tokens` +- `cache_read_input_tokens` +- `cache_creation_input_tokens` +- `web_search_count` + +**The general utils** (`posthog/ai/utils.py`) only tags `$ai_input_tokens` and `$ai_output_tokens`. There is no `tag("$ai_total_tokens", ...)` anywhere in this file. + +**The only code that sets `$ai_total_tokens`** is in `posthog/ai/openai_agents/processor.py` (lines 539 and 696), which is exclusively for OpenAI Agents — a completely separate code path from the Anthropic wrapper. + +**Result:** If `$ai_total_tokens` appears in a stored event captured via `posthog.ai.anthropic.AsyncAnthropic`, it **must** have come from outside the SDK — most likely via `posthog_properties`. + +--- + ## Key Evidence -1. `$ai_total_tokens` is present in the stored events, but the PostHog Anthropic wrapper **never sets** this property. Only OpenAI Agents sets it. This strongly suggests the value comes from `posthog_properties`. +1. `$ai_total_tokens` is present in the stored events, but the PostHog Anthropic wrapper **never sets** this property (confirmed by source inspection in Test 4). Only OpenAI Agents sets it. This strongly suggests the value comes from `posthog_properties`. 2. The stored `$ai_input_tokens` (48268) minus `$ai_cache_read_input_tokens` (45417) equals exactly the expected exclusive value (2851), which is what the Anthropic API returns as `input_tokens`. @@ -150,7 +175,7 @@ The SDK already extracts the correct exclusive `input_tokens` from the Anthropic Call 2 usage: input_tokens: 13 - output_tokens: 41 + output_tokens: 42 cache_read_input_tokens: 14100 cache_creation_input_tokens: 0 @@ -171,14 +196,14 @@ The SDK already extracts the correct exclusive `input_tokens` from the Anthropic $ai_provider: anthropic $ai_model: claude-haiku-4-5-20251001 $ai_input_tokens: 13 - $ai_output_tokens: 41 + $ai_output_tokens: 35 $ai_cache_read_input_tokens: 14100 $ai_cache_creation_input_tokens: None $ai_total_tokens: NOT SET Raw API usage (call 1): input_tokens: 13 - output_tokens: 41 + output_tokens: 35 cache_read_input_tokens: 14100 cache_creation_input_tokens: 0 @@ -189,14 +214,14 @@ The SDK already extracts the correct exclusive `input_tokens` from the Anthropic $ai_provider: anthropic $ai_model: claude-haiku-4-5-20251001 $ai_input_tokens: 13 - $ai_output_tokens: 41 + $ai_output_tokens: 38 $ai_cache_read_input_tokens: 14100 $ai_cache_creation_input_tokens: None $ai_total_tokens: NOT SET Raw API usage (call 2): input_tokens: 13 - output_tokens: 41 + output_tokens: 38 cache_read_input_tokens: 14100 cache_creation_input_tokens: 0 @@ -226,14 +251,14 @@ The SDK already extracts the correct exclusive `input_tokens` from the Anthropic 3b) Simulating customer computing inclusive tokens from the response... (Customer code might do: input_tokens = usage.input_tokens + usage.cache_read_input_tokens) Customer computes: inclusive_input = 13 + 14100 = 14113 - Customer computes: total = 14113 + 36 = 14149 + Customer computes: total = 14113 + 47 = 14160 Anthropic API input_tokens: 12 (exclusive) Anthropic API cache_read: 14100 Anthropic API output_tokens: 100 --- Stored $ai_input_tokens: 14113 (inclusive!) Stored $ai_cache_read_input_tokens: 14100 - Stored $ai_total_tokens: 14149 + Stored $ai_total_tokens: 14160 --- COST COMPARISON (haiku pricing: $1/Mtok input, $0.10/Mtok cache): @@ -249,12 +274,50 @@ The SDK already extracts the correct exclusive `input_tokens` from the Anthropic OVERCHARGE: 10.9x (992% more) +====================================================================== + STEP 4: Prove $ai_total_tokens is never set by Anthropic SDK +====================================================================== + Inspecting SDK source code to confirm no code path sets $ai_total_tokens for Anthropic... + + 1) Anthropic converter (extract_anthropic_usage_from_response): + File: posthog/ai/anthropic/anthropic_converter.py + Contains 'total_tokens': False + => CONFIRMED: Anthropic converter does NOT extract total_tokens + + Streaming extractor (extract_anthropic_usage_from_event): + Contains 'total_tokens': False + + 2) General AI utils (posthog/ai/utils.py): + File: posthog/ai/utils.py + Contains 'ai_total_tokens': False + => CONFIRMED: utils.py never tags $ai_total_tokens + + 3) OpenAI Agents processor (for contrast): + File: posthog/ai/openai_agents/processor.py + Contains '$ai_total_tokens': True + => This is the ONLY code path that sets $ai_total_tokens + + CONCLUSION: + The Anthropic SDK code path NEVER sets $ai_total_tokens. + If $ai_total_tokens appears in a stored event from posthog.ai.anthropic, + it MUST have come from outside the SDK — most likely via posthog_properties. + + Code references (posthog-python SDK): + - posthog/ai/anthropic/anthropic_converter.py:206-231 + extract_anthropic_usage_from_response() -> only sets input_tokens, output_tokens, cache_* + - posthog/ai/utils.py -> tag('$ai_input_tokens', ...) and tag('$ai_output_tokens', ...) + NO tag('$ai_total_tokens', ...) anywhere + - posthog/ai/openai_agents/processor.py:539,696 + ONLY place $ai_total_tokens is set (OpenAI Agents only, not Anthropic) + ====================================================================== DONE ====================================================================== Check the output above to see if the bug is reproducible. If Step 2 shows correct values but the customer sees wrong values, the issue is likely in posthog_properties overrides (Step 3). + Step 4 proves via source inspection that $ai_total_tokens cannot + come from the Anthropic SDK code path. ``` diff --git a/python/scripts/debug-anthropic-cache-cost/run.py b/python/scripts/debug-anthropic-cache-cost/run.py index 7137656..828e5a5 100644 --- a/python/scripts/debug-anthropic-cache-cost/run.py +++ b/python/scripts/debug-anthropic-cache-cost/run.py @@ -413,7 +413,92 @@ def spy_capture(*args, **kwargs): return result -def write_output_md(step1_result: dict, step2_result: dict, step3_result: dict, raw_output: str): +async def step4_prove_total_tokens_never_set(out: OutputWriter) -> dict: + """Prove that $ai_total_tokens is never set by the Anthropic SDK code path. + + This step inspects the actual SDK source code to show: + 1. The Anthropic converter only extracts input_tokens, output_tokens, cache_* — no total_tokens + 2. The general utils.py only tags $ai_input_tokens and $ai_output_tokens — no $ai_total_tokens + 3. Only the OpenAI Agents processor sets $ai_total_tokens + + If $ai_total_tokens appears in a stored event from the Anthropic wrapper, + it MUST have come from outside the SDK (e.g. posthog_properties override). + """ + out.start_section("STEP 4: Prove $ai_total_tokens is never set by Anthropic SDK") + out.print(" Inspecting SDK source code to confirm no code path sets $ai_total_tokens for Anthropic...") + + import inspect + + from posthog.ai.anthropic import anthropic_converter + from posthog.ai import utils as ai_utils + + result = {"anthropic_converter_clean": False, "utils_clean": False, "openai_agents_sets_it": False} + + # 1. Check the Anthropic converter source + out.print("\n 1) Anthropic converter (extract_anthropic_usage_from_response):") + converter_source = inspect.getsource(anthropic_converter.extract_anthropic_usage_from_response) + has_total_in_converter = "total_tokens" in converter_source + out.print(f" File: posthog/ai/anthropic/anthropic_converter.py") + out.print(f" Contains 'total_tokens': {has_total_in_converter}") + if not has_total_in_converter: + out.print(f" => CONFIRMED: Anthropic converter does NOT extract total_tokens") + result["anthropic_converter_clean"] = True + else: + out.print(f" => UNEXPECTED: Found 'total_tokens' reference in converter!") + + # Also check the streaming extractor + streaming_source = inspect.getsource(anthropic_converter.extract_anthropic_usage_from_event) + has_total_in_streaming = "total_tokens" in streaming_source + out.print(f"\n Streaming extractor (extract_anthropic_usage_from_event):") + out.print(f" Contains 'total_tokens': {has_total_in_streaming}") + + # 2. Check utils.py - the code that tags properties before capture() + out.print("\n 2) General AI utils (posthog/ai/utils.py):") + utils_source = inspect.getsource(ai_utils) + has_total_in_utils = "ai_total_tokens" in utils_source or "$ai_total_tokens" in utils_source + out.print(f" File: posthog/ai/utils.py") + out.print(f" Contains 'ai_total_tokens': {has_total_in_utils}") + if not has_total_in_utils: + out.print(f" => CONFIRMED: utils.py never tags $ai_total_tokens") + result["utils_clean"] = True + else: + out.print(f" => UNEXPECTED: Found 'ai_total_tokens' reference in utils!") + + # 3. Show that OpenAI Agents DOES set it (for contrast) + out.print("\n 3) OpenAI Agents processor (for contrast):") + try: + from posthog.ai.openai_agents import processor as agents_processor + agents_source = inspect.getsource(agents_processor) + has_total_in_agents = "$ai_total_tokens" in agents_source + out.print(f" File: posthog/ai/openai_agents/processor.py") + out.print(f" Contains '$ai_total_tokens': {has_total_in_agents}") + if has_total_in_agents: + out.print(f" => This is the ONLY code path that sets $ai_total_tokens") + result["openai_agents_sets_it"] = True + except ImportError: + out.print(f" Could not import openai_agents processor (not installed)") + + # 4. Summary + out.print(f"\n CONCLUSION:") + if result["anthropic_converter_clean"] and result["utils_clean"]: + out.print(f" The Anthropic SDK code path NEVER sets $ai_total_tokens.") + out.print(f" If $ai_total_tokens appears in a stored event from posthog.ai.anthropic,") + out.print(f" it MUST have come from outside the SDK — most likely via posthog_properties.") + out.print(f"") + out.print(f" Code references (posthog-python SDK):") + out.print(f" - posthog/ai/anthropic/anthropic_converter.py:206-231") + out.print(f" extract_anthropic_usage_from_response() -> only sets input_tokens, output_tokens, cache_*") + out.print(f" - posthog/ai/utils.py -> tag('$ai_input_tokens', ...) and tag('$ai_output_tokens', ...)") + out.print(f" NO tag('$ai_total_tokens', ...) anywhere") + out.print(f" - posthog/ai/openai_agents/processor.py:539,696") + out.print(f" ONLY place $ai_total_tokens is set (OpenAI Agents only, not Anthropic)") + else: + out.print(f" UNEXPECTED: Found total_tokens references where none were expected!") + + return result + + +def write_output_md(step1_result: dict, step2_result: dict, step3_result: dict, raw_output: str, step4_result: dict | None = None): """Write a shareable output.md summarizing the investigation.""" now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") @@ -545,9 +630,34 @@ def write_output_md(step1_result: dict, step2_result: dict, step3_result: dict, --- +## Test 4: SDK Source Code Inspection + +Programmatically inspected the PostHog Python SDK source to prove `$ai_total_tokens` cannot come from the Anthropic code path: + +| Component | File | Sets `$ai_total_tokens`? | +|-----------|------|------------------------| +| Anthropic converter | `posthog/ai/anthropic/anthropic_converter.py` | {"No" if step4_result and step4_result.get('anthropic_converter_clean') else "YES (unexpected!)"} | +| General AI utils | `posthog/ai/utils.py` | {"No" if step4_result and step4_result.get('utils_clean') else "YES (unexpected!)"} | +| OpenAI Agents processor | `posthog/ai/openai_agents/processor.py` | {"**Yes** (only place)" if step4_result and step4_result.get('openai_agents_sets_it') else "No"} | + +**The Anthropic converter** (`extract_anthropic_usage_from_response`, lines 206-231) only extracts: +- `input_tokens` (exclusive of cache) +- `output_tokens` +- `cache_read_input_tokens` +- `cache_creation_input_tokens` +- `web_search_count` + +**The general utils** (`posthog/ai/utils.py`) only tags `$ai_input_tokens` and `$ai_output_tokens`. There is no `tag("$ai_total_tokens", ...)` anywhere in this file. + +**The only code that sets `$ai_total_tokens`** is in `posthog/ai/openai_agents/processor.py` (lines 539 and 696), which is exclusively for OpenAI Agents — a completely separate code path from the Anthropic wrapper. + +**Result:** If `$ai_total_tokens` appears in a stored event captured via `posthog.ai.anthropic.AsyncAnthropic`, it **must** have come from outside the SDK — most likely via `posthog_properties`. + +--- + ## Key Evidence -1. `$ai_total_tokens` is present in the stored events, but the PostHog Anthropic wrapper **never sets** this property. Only OpenAI Agents sets it. This strongly suggests the value comes from `posthog_properties`. +1. `$ai_total_tokens` is present in the stored events, but the PostHog Anthropic wrapper **never sets** this property (confirmed by source inspection in Test 4). Only OpenAI Agents sets it. This strongly suggests the value comes from `posthog_properties`. 2. The stored `$ai_input_tokens` (48268) minus `$ai_cache_read_input_tokens` (45417) equals exactly the expected exclusive value (2851), which is what the Anthropic API returns as `input_tokens`. @@ -588,14 +698,19 @@ async def main(): # Step 3: Test the posthog_properties override theory step3_result = await step3_posthog_wrapper_with_custom_properties(out) + # Step 4: Prove $ai_total_tokens is never set by Anthropic SDK + step4_result = await step4_prove_total_tokens_never_set(out) + out.start_section("DONE") out.print(" Check the output above to see if the bug is reproducible.") out.print(" If Step 2 shows correct values but the customer sees wrong values,") out.print(" the issue is likely in posthog_properties overrides (Step 3).") + out.print(" Step 4 proves via source inspection that $ai_total_tokens cannot") + out.print(" come from the Anthropic SDK code path.") out.print() # Write the markdown output - write_output_md(step1_result, step2_result, step3_result, out.get_raw_output()) + write_output_md(step1_result, step2_result, step3_result, out.get_raw_output(), step4_result) if __name__ == "__main__": diff --git a/python/scripts/debug-anthropic-cache-cost/test_local_pipeline.py b/python/scripts/debug-anthropic-cache-cost/test_local_pipeline.py new file mode 100644 index 0000000..f2da4d3 --- /dev/null +++ b/python/scripts/debug-anthropic-cache-cost/test_local_pipeline.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +""" +Send test $ai_generation events through PostHog to verify cost calculation behavior. + +Three test cases: +1. "correct" - exclusive $ai_input_tokens (2851), no $ai_total_tokens +2. "customer" - inclusive $ai_input_tokens (48268), with $ai_total_tokens (48864) +3. "no_total" - inclusive $ai_input_tokens (48268), WITHOUT $ai_total_tokens + +Then query the PostHog API to compare stored costs. + +Usage: + cd python && python scripts/debug-anthropic-cache-cost/test_local_pipeline.py + +Requires .env at the repo root with: + POSTHOG_API_KEY=phc_... # Project API key (for sending events) + POSTHOG_HOST=http://localhost:8010 + POSTHOG_PERSONAL_API_KEY=phx_... # Personal API key (for querying events) +""" + +import json +import os +import sys +import time +import uuid + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +from dotenv import load_dotenv + +load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))), ".env")) + +import requests + +POSTHOG_HOST = os.getenv("POSTHOG_HOST", "http://localhost:8010") +API_TOKEN = os.getenv("POSTHOG_API_KEY") +PERSONAL_API_KEY = os.getenv("POSTHOG_PERSONAL_API_KEY") + +if not API_TOKEN: + print("ERROR: POSTHOG_API_KEY not set in .env") + sys.exit(1) + +if not PERSONAL_API_KEY: + print("ERROR: POSTHOG_PERSONAL_API_KEY not set in .env") + print("Create one at: {}/settings/user-api-keys".format(POSTHOG_HOST)) + sys.exit(1) + +# Values from the customer's actual event +CACHE_READ = 45417 +EXCLUSIVE_INPUT = 2851 # 48268 - 45417 +INCLUSIVE_INPUT = 48268 # what the customer has +OUTPUT_TOKENS = 596 +TOTAL_TOKENS = 48864 # 48268 + 596 + + +def make_event(test_name: str, input_tokens: int, output_tokens: int, extra_props: dict | None = None) -> dict: + # Use a unique run_id so we can find events from this specific run + return { + "event": "$ai_generation", + "distinct_id": f"cost-debug-{test_name}", + "properties": { + "$ai_provider": "anthropic", + "$ai_model": "claude-haiku-4-5", + "$ai_input_tokens": input_tokens, + "$ai_output_tokens": output_tokens, + "$ai_cache_read_input_tokens": CACHE_READ, + "$ai_trace_id": str(uuid.uuid4()), + "$ai_span_id": str(uuid.uuid4()), + "test_name": test_name, + "test_run_id": RUN_ID, + **(extra_props or {}), + }, + } + + +def send_event(event: dict): + resp = requests.post( + f"{POSTHOG_HOST}/e/", + headers={"Content-Type": "application/json"}, + data=json.dumps({"api_key": API_TOKEN, **event}), + ) + print(f" Sent '{event['properties']['test_name']}': {resp.status_code}") + return resp + + +def query_events(test_names: list[str], max_retries: int = 15, delay: float = 3.0) -> dict: + """Query PostHog API to check stored event properties.""" + found = {} + for attempt in range(max_retries): + time.sleep(delay) + print(f" Querying for events (attempt {attempt + 1}/{max_retries})...") + + resp = requests.get( + f"{POSTHOG_HOST}/api/projects/1/events/", + params={"event": "$ai_generation", "limit": 20, "orderBy": '["-timestamp"]'}, + headers={"Authorization": f"Bearer {PERSONAL_API_KEY}"}, + ) + + if resp.status_code != 200: + print(f" API returned {resp.status_code}: {resp.text[:200]}") + continue + + data = resp.json() + events = data.get("results", []) + + for ev in events: + props = ev.get("properties", {}) + name = props.get("test_name") + run_id = props.get("test_run_id") + if name in test_names and run_id == RUN_ID: + found[name] = props + + if len(found) == len(test_names): + return found + + print(f" Found {len(found)}/{len(test_names)} test events so far...") + + return found + + +def print_comparison(results: dict): + print("\n" + "=" * 90) + print(" RESULTS COMPARISON") + print("=" * 90) + + fields = [ + "$ai_input_tokens", + "$ai_output_tokens", + "$ai_cache_read_input_tokens", + "$ai_total_tokens", + "$ai_input_cost_usd", + "$ai_output_cost_usd", + "$ai_total_cost_usd", + "$ai_cost_model_source", + "$ai_cost_model_provider", + "$ai_model_cost_used", + ] + + # Header + names = list(results.keys()) + header = f" {'Field':<35}" + "".join(f"{n:<25}" for n in names) + print(header) + print(" " + "-" * (35 + 25 * len(names))) + + for field in fields: + row = f" {field:<35}" + for name in names: + val = results[name].get(field, "NOT SET") + if isinstance(val, float): + row += f"${val:<24.7f}" + else: + row += f"{str(val):<25}" + print(row) + + # Analysis + print("\n" + "=" * 90) + print(" ANALYSIS") + print("=" * 90) + + if "correct" in results: + props = results["correct"] + actual_input_cost = props.get("$ai_input_cost_usd", 0) + print(f"\n Test 'correct' (exclusive input={EXCLUSIVE_INPUT}):") + print(f" $ai_input_cost_usd: {actual_input_cost}") + print(f" This should reflect cost for {EXCLUSIVE_INPUT} uncached + {CACHE_READ} cached tokens") + + if "customer" in results: + props = results["customer"] + actual_input_cost = props.get("$ai_input_cost_usd", 0) + print(f"\n Test 'customer' (inclusive input={INCLUSIVE_INPUT}, with $ai_total_tokens={TOTAL_TOKENS}):") + print(f" $ai_input_cost_usd: {actual_input_cost}") + print(f" Customer sees: 0.0528097") + if actual_input_cost: + print(f" Match: {abs(actual_input_cost - 0.0528097) < 0.001}") + + if "correct" in results and "customer" in results: + correct_cost = results["correct"].get("$ai_input_cost_usd", 0) + customer_cost = results["customer"].get("$ai_input_cost_usd", 0) + if correct_cost and correct_cost > 0: + print(f"\n OVERCHARGE FACTOR: {customer_cost / correct_cost:.1f}x") + + if "no_total" in results: + props = results["no_total"] + total_tokens = props.get("$ai_total_tokens", "NOT SET") + print(f"\n Test 'no_total' (inclusive input={INCLUSIVE_INPUT}, NO $ai_total_tokens sent):") + print(f" $ai_total_tokens after pipeline: {total_tokens}") + if total_tokens != "NOT SET": + print(f" => PIPELINE CREATED $ai_total_tokens! Value: {total_tokens}") + else: + print(f" => Pipeline did NOT create $ai_total_tokens (as expected)") + print(f" => Confirms $ai_total_tokens in customer event must come from client side") + + +# Generate a unique run ID so we only find events from this run +RUN_ID = str(uuid.uuid4())[:8] + + +def main(): + print("=" * 90) + print(" Local Pipeline Cost Calculation Test") + print(" Reproducing customer's Anthropic cache token cost issue") + print(f" Run ID: {RUN_ID}") + print(f" PostHog: {POSTHOG_HOST}") + print("=" * 90) + + test_names = ["correct", "customer", "no_total"] + + # Build and send events + events = [ + make_event("correct", EXCLUSIVE_INPUT, OUTPUT_TOKENS), + make_event("customer", INCLUSIVE_INPUT, OUTPUT_TOKENS, {"$ai_total_tokens": TOTAL_TOKENS}), + make_event("no_total", INCLUSIVE_INPUT, OUTPUT_TOKENS), + ] + + print("\nSending test events...") + for ev in events: + send_event(ev) + + print("\nWaiting for events to be processed...") + results = query_events(test_names) + + if not results: + print("\n ERROR: No test events found after waiting.") + print(" Check that PostHog is processing events and the API keys are correct.") + return + + if len(results) < len(test_names): + missing = set(test_names) - set(results.keys()) + print(f"\n WARNING: Missing events: {missing}") + print(" Showing results for what we found...") + + print_comparison(results) + + +if __name__ == "__main__": + main()