diff --git a/CHANGELOG.md b/CHANGELOG.md index aa48422..72b866d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes for the ContextGuard plugin are documented here. ## [Unreleased] - Extended Batch 1 token-savings advisory reports with cache-score amortization risk fields, tool-prune deferred-schema proxy accounting, and a benchmark measurement-baseline contract while preserving local-only/no-savings-claim boundaries. +- Clarified cache-score amortization output for cache-read multipliers above uncached cost by reporting a bounded `max_profitable_reuses` instead of a monotonic break-even reuse count. ## [0.4.10] - 2026-06-14 diff --git a/context-guard-kit/cache_score.py b/context-guard-kit/cache_score.py index c330c9d..4ae8f20 100755 --- a/context-guard-kit/cache_score.py +++ b/context-guard-kit/cache_score.py @@ -26,6 +26,7 @@ DEFAULT_EXPECTED_REUSES = 1 MAX_EXPECTED_REUSES = 1_000_000 MAX_CACHE_MULTIPLIER = 1_000_000.0 +SAVINGS_EPSILON = 1e-12 PROVIDER_MINIMUM_CACHEABLE_TOKENS = { # Provider and model minimums move over time. These defaults are advisory # and can be overridden with --minimum-cacheable-tokens. @@ -279,6 +280,28 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]: return "json", warnings +def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float: + return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier)) + + +def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int: + """Return the largest reuse count with strictly positive relative savings.""" + candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0)))) + while candidate > 0 and read_premium_relative_savings( + candidate, + write_multiplier=write_multiplier, + read_multiplier=read_multiplier, + ) <= SAVINGS_EPSILON: + candidate -= 1 + while read_premium_relative_savings( + candidate + 1, + write_multiplier=write_multiplier, + read_multiplier=read_multiplier, + ) > SAVINGS_EPSILON: + candidate += 1 + return candidate + + def build_amortization_report( *, eligible: bool, @@ -296,6 +319,7 @@ def build_amortization_report( """ supplied = cache_write_multiplier is not None and cache_read_multiplier is not None break_even_reuses: int | None = None + max_profitable_reuses: int | None = None expected_uncached_relative_cost: float | None = None expected_cached_relative_cost: float | None = None expected_relative_savings: float | None = None @@ -330,14 +354,28 @@ def build_amortization_report( break_even_reuses = 0 status = "already_break_even_on_write" risk = "low" - elif cache_read_multiplier > 1.0 and cache_write_multiplier <= 1.0 and expected_reuses == 0: - break_even_reuses = 0 - status = "already_break_even_on_write" - risk = "low" - elif cache_read_multiplier > 1.0 and expected_relative_savings >= 0: - break_even_reuses = 0 if cache_write_multiplier <= 1.0 else None - status = "amortizes_with_expected_reuses" - risk = "medium" + elif cache_read_multiplier > 1.0: + if cache_write_multiplier < 1.0: + max_profitable_reuses = max_profitable_read_premium_reuses( + write_multiplier=cache_write_multiplier, + read_multiplier=cache_read_multiplier, + ) + if expected_relative_savings < -SAVINGS_EPSILON: + status = "no_read_discount" + risk = "high" + elif expected_reuses == 0: + if expected_relative_savings > SAVINGS_EPSILON: + status = "write_discount_only_no_expected_reads" + risk = "low" + else: + status = "break_even_only_no_expected_reads" + risk = "medium" + elif abs(expected_relative_savings) <= SAVINGS_EPSILON: + status = "break_even_only_with_limited_reuses" + risk = "medium" + else: + status = "positive_only_with_limited_reuses" + risk = "medium" else: status = "no_read_discount" risk = "high" @@ -347,6 +385,7 @@ def build_amortization_report( "expected_reuses_semantics": "future_cache_reads_after_initial_write", "cacheable_prefix_tokens": prefix_tokens, "break_even_reuses": break_even_reuses, + "max_profitable_reuses": max_profitable_reuses, "status": status, "risk": risk, "cache_write_multiplier": cache_write_multiplier, @@ -356,7 +395,7 @@ def build_amortization_report( "expected_relative_savings": expected_relative_savings, "multiplier_baseline": "uncached_prefix_input_cost_equals_1.0", "user_supplied_multipliers": supplied, - "formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1", + "formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1", "claim_boundary": { "advisory_only": True, "provider_pricing_defaults_included": False, @@ -459,7 +498,8 @@ def render_text(report: dict[str, Any]) -> str: f"warnings: {warning_codes}\n" f"amortization: {amortization.get('status', 'unknown')} " f"(risk={amortization.get('risk', 'unknown')}, " - f"break_even_reuses={amortization.get('break_even_reuses')})\n" + f"break_even_reuses={amortization.get('break_even_reuses')}, " + f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n" "claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n" ) diff --git a/plugins/context-guard/bin/context-guard-cache-score b/plugins/context-guard/bin/context-guard-cache-score index c330c9d..4ae8f20 100755 --- a/plugins/context-guard/bin/context-guard-cache-score +++ b/plugins/context-guard/bin/context-guard-cache-score @@ -26,6 +26,7 @@ TOKEN_PROXY_CHARS_PER_TOKEN = 4 DEFAULT_EXPECTED_REUSES = 1 MAX_EXPECTED_REUSES = 1_000_000 MAX_CACHE_MULTIPLIER = 1_000_000.0 +SAVINGS_EPSILON = 1e-12 PROVIDER_MINIMUM_CACHEABLE_TOKENS = { # Provider and model minimums move over time. These defaults are advisory # and can be overridden with --minimum-cacheable-tokens. @@ -279,6 +280,28 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]: return "json", warnings +def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float: + return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier)) + + +def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int: + """Return the largest reuse count with strictly positive relative savings.""" + candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0)))) + while candidate > 0 and read_premium_relative_savings( + candidate, + write_multiplier=write_multiplier, + read_multiplier=read_multiplier, + ) <= SAVINGS_EPSILON: + candidate -= 1 + while read_premium_relative_savings( + candidate + 1, + write_multiplier=write_multiplier, + read_multiplier=read_multiplier, + ) > SAVINGS_EPSILON: + candidate += 1 + return candidate + + def build_amortization_report( *, eligible: bool, @@ -296,6 +319,7 @@ def build_amortization_report( """ supplied = cache_write_multiplier is not None and cache_read_multiplier is not None break_even_reuses: int | None = None + max_profitable_reuses: int | None = None expected_uncached_relative_cost: float | None = None expected_cached_relative_cost: float | None = None expected_relative_savings: float | None = None @@ -330,14 +354,28 @@ def build_amortization_report( break_even_reuses = 0 status = "already_break_even_on_write" risk = "low" - elif cache_read_multiplier > 1.0 and cache_write_multiplier <= 1.0 and expected_reuses == 0: - break_even_reuses = 0 - status = "already_break_even_on_write" - risk = "low" - elif cache_read_multiplier > 1.0 and expected_relative_savings >= 0: - break_even_reuses = 0 if cache_write_multiplier <= 1.0 else None - status = "amortizes_with_expected_reuses" - risk = "medium" + elif cache_read_multiplier > 1.0: + if cache_write_multiplier < 1.0: + max_profitable_reuses = max_profitable_read_premium_reuses( + write_multiplier=cache_write_multiplier, + read_multiplier=cache_read_multiplier, + ) + if expected_relative_savings < -SAVINGS_EPSILON: + status = "no_read_discount" + risk = "high" + elif expected_reuses == 0: + if expected_relative_savings > SAVINGS_EPSILON: + status = "write_discount_only_no_expected_reads" + risk = "low" + else: + status = "break_even_only_no_expected_reads" + risk = "medium" + elif abs(expected_relative_savings) <= SAVINGS_EPSILON: + status = "break_even_only_with_limited_reuses" + risk = "medium" + else: + status = "positive_only_with_limited_reuses" + risk = "medium" else: status = "no_read_discount" risk = "high" @@ -347,6 +385,7 @@ def build_amortization_report( "expected_reuses_semantics": "future_cache_reads_after_initial_write", "cacheable_prefix_tokens": prefix_tokens, "break_even_reuses": break_even_reuses, + "max_profitable_reuses": max_profitable_reuses, "status": status, "risk": risk, "cache_write_multiplier": cache_write_multiplier, @@ -356,7 +395,7 @@ def build_amortization_report( "expected_relative_savings": expected_relative_savings, "multiplier_baseline": "uncached_prefix_input_cost_equals_1.0", "user_supplied_multipliers": supplied, - "formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1", + "formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1", "claim_boundary": { "advisory_only": True, "provider_pricing_defaults_included": False, @@ -459,7 +498,8 @@ def render_text(report: dict[str, Any]) -> str: f"warnings: {warning_codes}\n" f"amortization: {amortization.get('status', 'unknown')} " f"(risk={amortization.get('risk', 'unknown')}, " - f"break_even_reuses={amortization.get('break_even_reuses')})\n" + f"break_even_reuses={amortization.get('break_even_reuses')}, " + f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n" "claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n" ) diff --git a/tests/test_context_guard_kit.py b/tests/test_context_guard_kit.py index c5c8335..a40f514 100644 --- a/tests/test_context_guard_kit.py +++ b/tests/test_context_guard_kit.py @@ -10352,6 +10352,7 @@ def test_cache_score_reports_static_prefix_and_claim_boundary(self): self.assertEqual(amortization["cache_write_multiplier"], 1.25) self.assertEqual(amortization["cache_read_multiplier"], 0.1) self.assertEqual(amortization["break_even_reuses"], 1) + self.assertIsNone(amortization["max_profitable_reuses"]) self.assertEqual(amortization["status"], "amortizes_with_expected_reuses") self.assertEqual(amortization["risk"], "low") self.assertAlmostEqual(amortization["expected_uncached_relative_cost"], 4.0) @@ -10380,10 +10381,101 @@ def test_cache_score_reports_static_prefix_and_claim_boundary(self): self.assertEqual(premium["status"], "no_read_discount") self.assertEqual(premium["risk"], "high") self.assertIsNone(premium["break_even_reuses"]) + self.assertEqual(premium["max_profitable_reuses"], 0) self.assertAlmostEqual(premium["expected_uncached_relative_cost"], 2.0) self.assertAlmostEqual(premium["expected_cached_relative_cost"], 2.5) self.assertLess(premium["expected_relative_savings"], 0) + limited_premium_proc = self._run_cache_score( + script, + "--provider", + "openai", + "--expected-reuses", + "4", + "--cache-write-multiplier", + "0.5", + "--cache-read-multiplier", + "1.1", + "--json", + input_data=prompt, + ) + limited_premium = json.loads(limited_premium_proc.stdout)["amortization"] + self.assertEqual(limited_premium["status"], "positive_only_with_limited_reuses") + self.assertEqual(limited_premium["risk"], "medium") + self.assertIsNone(limited_premium["break_even_reuses"]) + self.assertEqual(limited_premium["max_profitable_reuses"], 4) + self.assertGreater(limited_premium["expected_relative_savings"], 0) + limited_text = self._run_cache_score( + script, + "--provider", + "openai", + "--expected-reuses", + "4", + "--cache-write-multiplier", + "0.5", + "--cache-read-multiplier", + "1.1", + input_data=prompt, + ) + self.assertIn("positive_only_with_limited_reuses", limited_text.stdout) + self.assertIn("max_profitable_reuses=4", limited_text.stdout) + + exact_break_even_proc = self._run_cache_score( + script, + "--provider", + "openai", + "--expected-reuses", + "1", + "--cache-write-multiplier", + "0.5", + "--cache-read-multiplier", + "1.5", + "--json", + input_data=prompt, + ) + exact_break_even = json.loads(exact_break_even_proc.stdout)["amortization"] + self.assertEqual(exact_break_even["status"], "break_even_only_with_limited_reuses") + self.assertEqual(exact_break_even["risk"], "medium") + self.assertEqual(exact_break_even["max_profitable_reuses"], 0) + self.assertAlmostEqual(exact_break_even["expected_relative_savings"], 0.0) + + decimal_break_even_proc = self._run_cache_score( + script, + "--provider", + "openai", + "--expected-reuses", + "2", + "--cache-write-multiplier", + "0.2", + "--cache-read-multiplier", + "1.4", + "--json", + input_data=prompt, + ) + decimal_break_even = json.loads(decimal_break_even_proc.stdout)["amortization"] + self.assertEqual(decimal_break_even["status"], "break_even_only_with_limited_reuses") + self.assertEqual(decimal_break_even["max_profitable_reuses"], 1) + self.assertAlmostEqual(decimal_break_even["expected_relative_savings"], 0.0) + + no_read_break_even_proc = self._run_cache_score( + script, + "--provider", + "openai", + "--expected-reuses", + "0", + "--cache-write-multiplier", + "1", + "--cache-read-multiplier", + "2", + "--json", + input_data=prompt, + ) + no_read_break_even = json.loads(no_read_break_even_proc.stdout)["amortization"] + self.assertEqual(no_read_break_even["status"], "break_even_only_no_expected_reads") + self.assertEqual(no_read_break_even["risk"], "medium") + self.assertIsNone(no_read_break_even["max_profitable_reuses"]) + self.assertAlmostEqual(no_read_break_even["expected_relative_savings"], 0.0) + def test_cache_score_json_order_provider_thresholds_and_help(self): request = { "tools": [