diff --git a/tee_gateway/model_registry.py b/tee_gateway/model_registry.py index 4027215..cc41cf8 100644 --- a/tee_gateway/model_registry.py +++ b/tee_gateway/model_registry.py @@ -30,6 +30,25 @@ class SupportedModel(Enum): input_price_usd=Decimal("0.000002"), output_price_usd=Decimal("0.000008"), ) + GPT_4_1_MINI = ModelConfig( + provider="openai", + api_name="gpt-4.1-mini", + input_price_usd=Decimal("0.0000004"), + output_price_usd=Decimal("0.0000016"), + ) + GPT_4_1_NANO = ModelConfig( + provider="openai", + api_name="gpt-4.1-nano", + input_price_usd=Decimal("0.0000001"), + output_price_usd=Decimal("0.0000004"), + ) + O3 = ModelConfig( + provider="openai", + api_name="o3", + input_price_usd=Decimal("0.00001"), + output_price_usd=Decimal("0.00004"), + force_temperature=1.0, + ) O4_MINI = ModelConfig( provider="openai", api_name="o4-mini", @@ -55,6 +74,30 @@ class SupportedModel(Enum): input_price_usd=Decimal("0.00000175"), output_price_usd=Decimal("0.000014"), ) + GPT_5_4 = ModelConfig( + provider="openai", + api_name="gpt-5.4", + input_price_usd=Decimal("0.0000025"), + output_price_usd=Decimal("0.000015"), + ) + GPT_5_4_MINI = ModelConfig( + provider="openai", + api_name="gpt-5.4-mini", + input_price_usd=Decimal("0.00000075"), + output_price_usd=Decimal("0.0000045"), + ) + GPT_5_4_NANO = ModelConfig( + provider="openai", + api_name="gpt-5.4-nano", + input_price_usd=Decimal("0.0000002"), + output_price_usd=Decimal("0.00000125"), + ) + GPT_5_5 = ModelConfig( + provider="openai", + api_name="gpt-5.5", + input_price_usd=Decimal("0.000005"), + output_price_usd=Decimal("0.00003"), + ) # ── Anthropic ─────────────────────────────────────────────────────── CLAUDE_SONNET_4_5 = ModelConfig( @@ -87,8 +130,17 @@ class SupportedModel(Enum): input_price_usd=Decimal("0.000005"), output_price_usd=Decimal("0.000025"), ) + CLAUDE_OPUS_4_7 = ModelConfig( + provider="anthropic", + api_name="claude-opus-4-7", + input_price_usd=Decimal("0.000005"), + output_price_usd=Decimal("0.000025"), + ) # ── Google Gemini ─────────────────────────────────────────────────── + # Note: gemini-2.5-flash, gemini-2.5-pro, and gemini-2.5-flash-lite are scheduled + # for deprecation on June 17, 2026 (flash-lite: July 22, 2026). Use the Gemini 3 + # replacements below for new integrations. GEMINI_2_5_FLASH = ModelConfig( provider="google", api_name="gemini-2.5-flash", @@ -116,6 +168,20 @@ class SupportedModel(Enum): input_price_usd=Decimal("0.0000005"), output_price_usd=Decimal("0.000003"), ) + GEMINI_3_1_PRO_PREVIEW = ModelConfig( + provider="google", + api_name="gemini-3.1-pro-preview", + input_price_usd=Decimal("0.000002"), + output_price_usd=Decimal("0.000012"), + thinking_budget=128, + ) + GEMINI_3_1_FLASH_LITE_PREVIEW = ModelConfig( + provider="google", + api_name="gemini-3.1-flash-lite-preview", + input_price_usd=Decimal("0.00000025"), + output_price_usd=Decimal("0.0000015"), + thinking_budget=0, + ) # ── xAI Grok ──────────────────────────────────────────────────────── GROK_4 = ModelConfig( @@ -142,14 +208,26 @@ class SupportedModel(Enum): input_price_usd=Decimal("0.0000002"), output_price_usd=Decimal("0.0000005"), ) + GROK_4_20_REASONING = ModelConfig( + provider="x-ai", + api_name="grok-4.20-reasoning", + input_price_usd=Decimal("0.000002"), + output_price_usd=Decimal("0.000006"), + ) + GROK_4_20_NON_REASONING = ModelConfig( + provider="x-ai", + api_name="grok-4.20-non-reasoning", + input_price_usd=Decimal("0.000002"), + output_price_usd=Decimal("0.000006"), + ) + GROK_CODE_FAST_1 = ModelConfig( + provider="x-ai", + api_name="grok-code-fast-1", + input_price_usd=Decimal("0.0000002"), + output_price_usd=Decimal("0.0000015"), + ) # ── Legacy models (not in current SDK — retained for older SDK versions) ── - CLAUDE_4_0_SONNET = ModelConfig( - provider="anthropic", - api_name="claude-sonnet-4-0", - input_price_usd=Decimal("0.000003"), - output_price_usd=Decimal("0.000015"), - ) GROK_3_MINI = ModelConfig( provider="x-ai", api_name="grok-3-mini", @@ -170,30 +248,44 @@ class SupportedModel(Enum): # OpenAI "gpt-4.1-2025-04-14": SupportedModel.GPT_4_1, "gpt-4.1": SupportedModel.GPT_4_1, + "gpt-4.1-mini": SupportedModel.GPT_4_1_MINI, + "gpt-4.1-mini-2025-04-14": SupportedModel.GPT_4_1_MINI, + "gpt-4.1-nano": SupportedModel.GPT_4_1_NANO, + "gpt-4.1-nano-2025-04-14": SupportedModel.GPT_4_1_NANO, + "o3": SupportedModel.O3, + "o3-2025-04-16": SupportedModel.O3, "o4-mini": SupportedModel.O4_MINI, "gpt-5": SupportedModel.GPT_5, "gpt-5-mini": SupportedModel.GPT_5_MINI, "gpt-5.2": SupportedModel.GPT_5_2, + "gpt-5.4": SupportedModel.GPT_5_4, + "gpt-5.4-mini": SupportedModel.GPT_5_4_MINI, + "gpt-5.4-nano": SupportedModel.GPT_5_4_NANO, + "gpt-5.5": SupportedModel.GPT_5_5, # Anthropic "claude-sonnet-4-5": SupportedModel.CLAUDE_SONNET_4_5, "claude-sonnet-4-6": SupportedModel.CLAUDE_SONNET_4_6, "claude-haiku-4-5": SupportedModel.CLAUDE_HAIKU_4_5, "claude-opus-4-5": SupportedModel.CLAUDE_OPUS_4_5, "claude-opus-4-6": SupportedModel.CLAUDE_OPUS_4_6, + "claude-opus-4-7": SupportedModel.CLAUDE_OPUS_4_7, # Google "gemini-2.5-flash": SupportedModel.GEMINI_2_5_FLASH, "gemini-2.5-pro": SupportedModel.GEMINI_2_5_PRO, "gemini-2.5-flash-lite": SupportedModel.GEMINI_2_5_FLASH_LITE, "gemini-3-flash-preview": SupportedModel.GEMINI_3_FLASH_PREVIEW, + "gemini-3.1-pro-preview": SupportedModel.GEMINI_3_1_PRO_PREVIEW, + "gemini-3.1-flash-lite-preview": SupportedModel.GEMINI_3_1_FLASH_LITE_PREVIEW, # xAI "grok-4": SupportedModel.GROK_4, "grok-4-fast": SupportedModel.GROK_4_FAST, "grok-4-1-fast": SupportedModel.GROK_4_1_FAST, "grok-4.1-fast": SupportedModel.GROK_4_1_FAST, "grok-4-1-fast-non-reasoning": SupportedModel.GROK_4_1_FAST_NON_REASONING, + "grok-4.20-reasoning": SupportedModel.GROK_4_20_REASONING, + "grok-4.20-non-reasoning": SupportedModel.GROK_4_20_NON_REASONING, + "grok-code-fast-1": SupportedModel.GROK_CODE_FAST_1, # Legacy — not in current SDK, retained for older SDK versions - "claude-sonnet-4-0": SupportedModel.CLAUDE_4_0_SONNET, - "claude-4.0-sonnet": SupportedModel.CLAUDE_4_0_SONNET, # alternate dot notation "grok-3-mini-beta": SupportedModel.GROK_3_MINI, # old beta alias "grok-3-mini": SupportedModel.GROK_3_MINI, "grok-3-beta": SupportedModel.GROK_3, # old beta alias diff --git a/tests/test_pricing.py b/tests/test_pricing.py index 5419782..4033711 100644 --- a/tests/test_pricing.py +++ b/tests/test_pricing.py @@ -94,12 +94,6 @@ def test_claude_sonnet_4_6_resolves(self): self.assertEqual(cfg.input_price_usd, Decimal("0.000003")) self.assertEqual(cfg.output_price_usd, Decimal("0.000015")) - def test_claude_sonnet_4_0_hyphen_resolves(self): - """claude-sonnet-4-0 (legacy) must still resolve for older SDK versions.""" - cfg = get_model_config("claude-sonnet-4-0") - self.assertEqual(cfg, get_model_config("claude-4.0-sonnet")) - self.assertEqual(cfg.provider, "anthropic") - # ── Anthropic Haiku ───────────────────────────────────────────────────── def test_claude_haiku_4_5_resolves(self): @@ -148,6 +142,55 @@ def test_gpt_5_2_resolves(self): cfg = get_model_config("gpt-5.2") self.assertEqual(cfg.provider, "openai") + def test_gpt_4_1_mini_resolves(self): + cfg = get_model_config("gpt-4.1-mini") + self.assertEqual(cfg.provider, "openai") + self.assertEqual(cfg.input_price_usd, Decimal("0.0000004")) + self.assertEqual(cfg.output_price_usd, Decimal("0.0000016")) + + def test_gpt_4_1_mini_dated_resolves(self): + cfg = get_model_config("gpt-4.1-mini-2025-04-14") + self.assertEqual(cfg, get_model_config("gpt-4.1-mini")) + + def test_gpt_4_1_nano_resolves(self): + cfg = get_model_config("gpt-4.1-nano") + self.assertEqual(cfg.provider, "openai") + self.assertEqual(cfg.input_price_usd, Decimal("0.0000001")) + self.assertEqual(cfg.output_price_usd, Decimal("0.0000004")) + + def test_gpt_4_1_nano_dated_resolves(self): + cfg = get_model_config("gpt-4.1-nano-2025-04-14") + self.assertEqual(cfg, get_model_config("gpt-4.1-nano")) + + def test_o3_resolves(self): + cfg = get_model_config("o3") + self.assertEqual(cfg.provider, "openai") + self.assertEqual(cfg.force_temperature, 1.0) + + def test_o3_dated_resolves(self): + cfg = get_model_config("o3-2025-04-16") + self.assertEqual(cfg, get_model_config("o3")) + + def test_gpt_5_4_resolves(self): + cfg = get_model_config("gpt-5.4") + self.assertEqual(cfg.provider, "openai") + self.assertEqual(cfg.input_price_usd, Decimal("0.0000025")) + self.assertEqual(cfg.output_price_usd, Decimal("0.000015")) + + def test_gpt_5_4_mini_resolves(self): + cfg = get_model_config("gpt-5.4-mini") + self.assertEqual(cfg.provider, "openai") + + def test_gpt_5_4_nano_resolves(self): + cfg = get_model_config("gpt-5.4-nano") + self.assertEqual(cfg.provider, "openai") + + def test_gpt_5_5_resolves(self): + cfg = get_model_config("gpt-5.5") + self.assertEqual(cfg.provider, "openai") + self.assertEqual(cfg.input_price_usd, Decimal("0.000005")) + self.assertEqual(cfg.output_price_usd, Decimal("0.00003")) + # ── Google ────────────────────────────────────────────────────────────── def test_gemini_2_5_flash_resolves(self): @@ -167,6 +210,20 @@ def test_gemini_3_flash_preview_resolves(self): cfg = get_model_config("gemini-3-flash-preview") self.assertEqual(cfg.provider, "google") + def test_gemini_3_1_pro_preview_resolves(self): + cfg = get_model_config("gemini-3.1-pro-preview") + self.assertEqual(cfg.provider, "google") + self.assertEqual(cfg.input_price_usd, Decimal("0.000002")) + self.assertEqual(cfg.output_price_usd, Decimal("0.000012")) + self.assertEqual(cfg.thinking_budget, 128) + + def test_gemini_3_1_flash_lite_preview_resolves(self): + cfg = get_model_config("gemini-3.1-flash-lite-preview") + self.assertEqual(cfg.provider, "google") + self.assertEqual(cfg.input_price_usd, Decimal("0.00000025")) + self.assertEqual(cfg.output_price_usd, Decimal("0.0000015")) + self.assertEqual(cfg.thinking_budget, 0) + # ── xAI Grok ──────────────────────────────────────────────────────────── def test_grok_4_resolves(self): @@ -193,6 +250,30 @@ def test_grok_3_resolves(self): cfg = get_model_config("grok-3") self.assertEqual(cfg.provider, "x-ai") + def test_grok_4_20_reasoning_resolves(self): + cfg = get_model_config("grok-4.20-reasoning") + self.assertEqual(cfg.provider, "x-ai") + self.assertEqual(cfg.input_price_usd, Decimal("0.000002")) + self.assertEqual(cfg.output_price_usd, Decimal("0.000006")) + + def test_grok_4_20_non_reasoning_resolves(self): + cfg = get_model_config("grok-4.20-non-reasoning") + self.assertEqual(cfg.provider, "x-ai") + self.assertEqual(cfg.input_price_usd, Decimal("0.000002")) + self.assertEqual(cfg.output_price_usd, Decimal("0.000006")) + + def test_grok_code_fast_1_resolves(self): + cfg = get_model_config("grok-code-fast-1") + self.assertEqual(cfg.provider, "x-ai") + self.assertEqual(cfg.input_price_usd, Decimal("0.0000002")) + self.assertEqual(cfg.output_price_usd, Decimal("0.0000015")) + + def test_claude_opus_4_7_resolves(self): + cfg = get_model_config("claude-opus-4-7") + self.assertEqual(cfg.provider, "anthropic") + self.assertEqual(cfg.input_price_usd, Decimal("0.000005")) + self.assertEqual(cfg.output_price_usd, Decimal("0.000025")) + # ── Errors ─────────────────────────────────────────────────────────────── def test_unknown_model_raises(self): @@ -238,6 +319,55 @@ def test_o4_mini_cost(self): expected = _expected_cost_opg("o4-mini", 2000, 1000) self.assertEqual(cost, expected) + def test_gpt_4_1_mini_cost(self): + cost = self._calc("gpt-4.1-mini", 1000, 500) + expected = _expected_cost_opg("gpt-4.1-mini", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.0000004 + 500*0.0000016 = 0.0004 + 0.0008 = 0.0012 USD = 1.2e15 wei + self.assertEqual(cost, 1_200_000_000_000_000) + + def test_gpt_4_1_nano_cost(self): + cost = self._calc("gpt-4.1-nano", 1000, 500) + expected = _expected_cost_opg("gpt-4.1-nano", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.0000001 + 500*0.0000004 = 0.0001 + 0.0002 = 0.0003 USD = 3e14 wei + self.assertEqual(cost, 300_000_000_000_000) + + def test_o3_cost(self): + cost = self._calc("o3", 1000, 500) + expected = _expected_cost_opg("o3", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.00001 + 500*0.00004 = 0.01 + 0.02 = 0.03 USD = 3e16 wei + self.assertEqual(cost, 30_000_000_000_000_000) + + def test_gpt_5_4_cost(self): + cost = self._calc("gpt-5.4", 1000, 500) + expected = _expected_cost_opg("gpt-5.4", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.0000025 + 500*0.000015 = 0.0025 + 0.0075 = 0.01 USD = 1e16 wei + self.assertEqual(cost, 10_000_000_000_000_000) + + def test_gpt_5_4_mini_cost(self): + cost = self._calc("gpt-5.4-mini", 1000, 500) + expected = _expected_cost_opg("gpt-5.4-mini", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.00000075 + 500*0.0000045 = 0.00075 + 0.00225 = 0.003 USD = 3e15 wei + self.assertEqual(cost, 3_000_000_000_000_000) + + def test_gpt_5_4_nano_cost(self): + cost = self._calc("gpt-5.4-nano", 1000, 500) + expected = _expected_cost_opg("gpt-5.4-nano", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.0000002 + 500*0.00000125 = 0.0002 + 0.000625 = 0.000825 USD = 8.25e14 wei + self.assertEqual(cost, 825_000_000_000_000) + + def test_gpt_5_5_cost(self): + cost = self._calc("gpt-5.5", 1000, 500) + expected = _expected_cost_opg("gpt-5.5", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.000005 + 500*0.00003 = 0.005 + 0.015 = 0.02 USD = 2e16 wei + self.assertEqual(cost, 20_000_000_000_000_000) + # ── Anthropic Sonnet ──────────────────────────────────────────────────── def test_claude_sonnet_4_5_cost(self): @@ -251,14 +381,6 @@ def test_claude_sonnet_4_6_cost(self): cost = self._calc("claude-sonnet-4-6", 1000, 500) self.assertEqual(cost, self._calc("claude-sonnet-4-5", 1000, 500)) - def test_claude_sonnet_4_0_cost(self): - """claude-sonnet-4-0 (legacy) must produce correct pricing.""" - cost = self._calc("claude-sonnet-4-0", 1000, 500) - expected = _expected_cost_opg("claude-sonnet-4-0", 1000, 500) - self.assertEqual(cost, expected) - # Same price tier as claude-sonnet-4-5 - self.assertEqual(cost, 10_500_000_000_000_000) - # ── Anthropic Haiku ───────────────────────────────────────────────────── def test_claude_haiku_4_5_cost(self): @@ -281,6 +403,13 @@ def test_claude_opus_4_6_cost(self): cost = self._calc("claude-opus-4-6", 1000, 500) self.assertEqual(cost, self._calc("claude-opus-4-5", 1000, 500)) + def test_claude_opus_4_7_cost(self): + cost = self._calc("claude-opus-4-7", 1000, 500) + expected = _expected_cost_opg("claude-opus-4-7", 1000, 500) + self.assertEqual(cost, expected) + # Same price tier as opus-4-5/4-6: 1000*0.000005 + 500*0.000025 = 0.0175 USD + self.assertEqual(cost, 17_500_000_000_000_000) + # ── Google Gemini ──────────────────────────────────────────────────────── def test_gemini_2_5_flash_cost(self): @@ -307,6 +436,20 @@ def test_gemini_3_flash_preview_cost(self): expected = _expected_cost_opg("gemini-3-flash-preview", 1000, 500) self.assertEqual(cost, expected) + def test_gemini_3_1_pro_preview_cost(self): + cost = self._calc("gemini-3.1-pro-preview", 1000, 500) + expected = _expected_cost_opg("gemini-3.1-pro-preview", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.000002 + 500*0.000012 = 0.002 + 0.006 = 0.008 USD = 8e15 wei + self.assertEqual(cost, 8_000_000_000_000_000) + + def test_gemini_3_1_flash_lite_preview_cost(self): + cost = self._calc("gemini-3.1-flash-lite-preview", 1000, 500) + expected = _expected_cost_opg("gemini-3.1-flash-lite-preview", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.00000025 + 500*0.0000015 = 0.00025 + 0.00075 = 0.001 USD = 1e15 wei + self.assertEqual(cost, 1_000_000_000_000_000) + # ── xAI Grok ──────────────────────────────────────────────────────────── def test_grok_4_cost(self): @@ -327,6 +470,24 @@ def test_grok_4_1_fast_cost(self): cost = self._calc("grok-4-1-fast", 1000, 500) self.assertEqual(cost, self._calc("grok-4-fast", 1000, 500)) + def test_grok_4_20_reasoning_cost(self): + cost = self._calc("grok-4.20-reasoning", 1000, 500) + expected = _expected_cost_opg("grok-4.20-reasoning", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.000002 + 500*0.000006 = 0.002 + 0.003 = 0.005 USD = 5e15 wei + self.assertEqual(cost, 5_000_000_000_000_000) + + def test_grok_4_20_non_reasoning_cost(self): + cost = self._calc("grok-4.20-non-reasoning", 1000, 500) + self.assertEqual(cost, self._calc("grok-4.20-reasoning", 1000, 500)) + + def test_grok_code_fast_1_cost(self): + cost = self._calc("grok-code-fast-1", 1000, 500) + expected = _expected_cost_opg("grok-code-fast-1", 1000, 500) + self.assertEqual(cost, expected) + # 1000*0.0000002 + 500*0.0000015 = 0.0002 + 0.00075 = 0.00095 USD = 9.5e14 wei + self.assertEqual(cost, 950_000_000_000_000) + def test_grok_3_mini_cost(self): cost = self._calc("grok-3-mini", 1000, 500) expected = _expected_cost_opg("grok-3-mini", 1000, 500) @@ -435,16 +596,6 @@ def test_model_name_case_insensitive(self): ) self.assertEqual(cost_lower, cost_upper) - def test_sonnet_4_0_hyphen_vs_dot_same_cost(self): - """claude-sonnet-4-0 and claude-4.0-sonnet are the same model.""" - cost_hyphen = calculate_session_cost( - _ctx("claude-sonnet-4-0", 1000, 500), _get_price - ) - cost_dot = calculate_session_cost( - _ctx("claude-4.0-sonnet", 1000, 500), _get_price - ) - self.assertEqual(cost_hyphen, cost_dot) - if __name__ == "__main__": unittest.main()