Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ All notable changes for the ContextGuard plugin are documented here.
## [Unreleased]

- Extended Batch 1 token-savings advisory reports with cache-score amortization risk fields, tool-prune deferred-schema proxy accounting, and a benchmark measurement-baseline contract while preserving local-only/no-savings-claim boundaries.
- Clarified cache-score amortization output for cache-read multipliers above uncached cost by reporting a bounded `max_profitable_reuses` instead of a monotonic break-even reuse count.

## [0.4.10] - 2026-06-14

Expand Down
60 changes: 50 additions & 10 deletions context-guard-kit/cache_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
DEFAULT_EXPECTED_REUSES = 1
MAX_EXPECTED_REUSES = 1_000_000
MAX_CACHE_MULTIPLIER = 1_000_000.0
SAVINGS_EPSILON = 1e-12
PROVIDER_MINIMUM_CACHEABLE_TOKENS = {
# Provider and model minimums move over time. These defaults are advisory
# and can be overridden with --minimum-cacheable-tokens.
Expand Down Expand Up @@ -279,6 +280,28 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
return "json", warnings


def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float:
return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier))


def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int:
"""Return the largest reuse count with strictly positive relative savings."""
candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0))))
while candidate > 0 and read_premium_relative_savings(
candidate,
write_multiplier=write_multiplier,
read_multiplier=read_multiplier,
) <= SAVINGS_EPSILON:
candidate -= 1
while read_premium_relative_savings(
candidate + 1,
write_multiplier=write_multiplier,
read_multiplier=read_multiplier,
) > SAVINGS_EPSILON:
candidate += 1
return candidate


def build_amortization_report(
*,
eligible: bool,
Expand All @@ -296,6 +319,7 @@ def build_amortization_report(
"""
supplied = cache_write_multiplier is not None and cache_read_multiplier is not None
break_even_reuses: int | None = None
max_profitable_reuses: int | None = None
expected_uncached_relative_cost: float | None = None
expected_cached_relative_cost: float | None = None
expected_relative_savings: float | None = None
Expand Down Expand Up @@ -330,14 +354,28 @@ def build_amortization_report(
break_even_reuses = 0
status = "already_break_even_on_write"
risk = "low"
elif cache_read_multiplier > 1.0 and cache_write_multiplier <= 1.0 and expected_reuses == 0:
break_even_reuses = 0
status = "already_break_even_on_write"
risk = "low"
elif cache_read_multiplier > 1.0 and expected_relative_savings >= 0:
break_even_reuses = 0 if cache_write_multiplier <= 1.0 else None
status = "amortizes_with_expected_reuses"
risk = "medium"
elif cache_read_multiplier > 1.0:
if cache_write_multiplier < 1.0:
max_profitable_reuses = max_profitable_read_premium_reuses(
write_multiplier=cache_write_multiplier,
read_multiplier=cache_read_multiplier,
)
if expected_relative_savings < -SAVINGS_EPSILON:
status = "no_read_discount"
risk = "high"
elif expected_reuses == 0:
if expected_relative_savings > SAVINGS_EPSILON:
status = "write_discount_only_no_expected_reads"
risk = "low"
else:
status = "break_even_only_no_expected_reads"
risk = "medium"
elif abs(expected_relative_savings) <= SAVINGS_EPSILON:
status = "break_even_only_with_limited_reuses"
risk = "medium"
else:
status = "positive_only_with_limited_reuses"
risk = "medium"
else:
status = "no_read_discount"
risk = "high"
Expand All @@ -347,6 +385,7 @@ def build_amortization_report(
"expected_reuses_semantics": "future_cache_reads_after_initial_write",
"cacheable_prefix_tokens": prefix_tokens,
"break_even_reuses": break_even_reuses,
"max_profitable_reuses": max_profitable_reuses,
"status": status,
"risk": risk,
"cache_write_multiplier": cache_write_multiplier,
Expand All @@ -356,7 +395,7 @@ def build_amortization_report(
"expected_relative_savings": expected_relative_savings,
"multiplier_baseline": "uncached_prefix_input_cost_equals_1.0",
"user_supplied_multipliers": supplied,
"formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1",
"formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1",
"claim_boundary": {
"advisory_only": True,
"provider_pricing_defaults_included": False,
Expand Down Expand Up @@ -459,7 +498,8 @@ def render_text(report: dict[str, Any]) -> str:
f"warnings: {warning_codes}\n"
f"amortization: {amortization.get('status', 'unknown')} "
f"(risk={amortization.get('risk', 'unknown')}, "
f"break_even_reuses={amortization.get('break_even_reuses')})\n"
f"break_even_reuses={amortization.get('break_even_reuses')}, "
f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n"
"claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n"
)

Expand Down
60 changes: 50 additions & 10 deletions plugins/context-guard/bin/context-guard-cache-score
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ TOKEN_PROXY_CHARS_PER_TOKEN = 4
DEFAULT_EXPECTED_REUSES = 1
MAX_EXPECTED_REUSES = 1_000_000
MAX_CACHE_MULTIPLIER = 1_000_000.0
SAVINGS_EPSILON = 1e-12
PROVIDER_MINIMUM_CACHEABLE_TOKENS = {
# Provider and model minimums move over time. These defaults are advisory
# and can be overridden with --minimum-cacheable-tokens.
Expand Down Expand Up @@ -279,6 +280,28 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
return "json", warnings


def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float:
return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier))


def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int:
"""Return the largest reuse count with strictly positive relative savings."""
candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0))))
while candidate > 0 and read_premium_relative_savings(
candidate,
write_multiplier=write_multiplier,
read_multiplier=read_multiplier,
) <= SAVINGS_EPSILON:
candidate -= 1
while read_premium_relative_savings(
candidate + 1,
write_multiplier=write_multiplier,
read_multiplier=read_multiplier,
) > SAVINGS_EPSILON:
candidate += 1
return candidate


def build_amortization_report(
*,
eligible: bool,
Expand All @@ -296,6 +319,7 @@ def build_amortization_report(
"""
supplied = cache_write_multiplier is not None and cache_read_multiplier is not None
break_even_reuses: int | None = None
max_profitable_reuses: int | None = None
expected_uncached_relative_cost: float | None = None
expected_cached_relative_cost: float | None = None
expected_relative_savings: float | None = None
Expand Down Expand Up @@ -330,14 +354,28 @@ def build_amortization_report(
break_even_reuses = 0
status = "already_break_even_on_write"
risk = "low"
elif cache_read_multiplier > 1.0 and cache_write_multiplier <= 1.0 and expected_reuses == 0:
break_even_reuses = 0
status = "already_break_even_on_write"
risk = "low"
elif cache_read_multiplier > 1.0 and expected_relative_savings >= 0:
break_even_reuses = 0 if cache_write_multiplier <= 1.0 else None
status = "amortizes_with_expected_reuses"
risk = "medium"
elif cache_read_multiplier > 1.0:
if cache_write_multiplier < 1.0:
max_profitable_reuses = max_profitable_read_premium_reuses(
write_multiplier=cache_write_multiplier,
read_multiplier=cache_read_multiplier,
)
if expected_relative_savings < -SAVINGS_EPSILON:
status = "no_read_discount"
risk = "high"
elif expected_reuses == 0:
if expected_relative_savings > SAVINGS_EPSILON:
status = "write_discount_only_no_expected_reads"
risk = "low"
else:
status = "break_even_only_no_expected_reads"
risk = "medium"
elif abs(expected_relative_savings) <= SAVINGS_EPSILON:
status = "break_even_only_with_limited_reuses"
risk = "medium"
else:
status = "positive_only_with_limited_reuses"
risk = "medium"
else:
status = "no_read_discount"
risk = "high"
Expand All @@ -347,6 +385,7 @@ def build_amortization_report(
"expected_reuses_semantics": "future_cache_reads_after_initial_write",
"cacheable_prefix_tokens": prefix_tokens,
"break_even_reuses": break_even_reuses,
"max_profitable_reuses": max_profitable_reuses,
"status": status,
"risk": risk,
"cache_write_multiplier": cache_write_multiplier,
Expand All @@ -356,7 +395,7 @@ def build_amortization_report(
"expected_relative_savings": expected_relative_savings,
"multiplier_baseline": "uncached_prefix_input_cost_equals_1.0",
"user_supplied_multipliers": supplied,
"formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1",
"formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1",
"claim_boundary": {
"advisory_only": True,
"provider_pricing_defaults_included": False,
Expand Down Expand Up @@ -459,7 +498,8 @@ def render_text(report: dict[str, Any]) -> str:
f"warnings: {warning_codes}\n"
f"amortization: {amortization.get('status', 'unknown')} "
f"(risk={amortization.get('risk', 'unknown')}, "
f"break_even_reuses={amortization.get('break_even_reuses')})\n"
f"break_even_reuses={amortization.get('break_even_reuses')}, "
f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n"
"claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n"
)

Expand Down
92 changes: 92 additions & 0 deletions tests/test_context_guard_kit.py
Original file line number Diff line number Diff line change
Expand Up @@ -10352,6 +10352,7 @@ def test_cache_score_reports_static_prefix_and_claim_boundary(self):
self.assertEqual(amortization["cache_write_multiplier"], 1.25)
self.assertEqual(amortization["cache_read_multiplier"], 0.1)
self.assertEqual(amortization["break_even_reuses"], 1)
self.assertIsNone(amortization["max_profitable_reuses"])
self.assertEqual(amortization["status"], "amortizes_with_expected_reuses")
self.assertEqual(amortization["risk"], "low")
self.assertAlmostEqual(amortization["expected_uncached_relative_cost"], 4.0)
Expand Down Expand Up @@ -10380,10 +10381,101 @@ def test_cache_score_reports_static_prefix_and_claim_boundary(self):
self.assertEqual(premium["status"], "no_read_discount")
self.assertEqual(premium["risk"], "high")
self.assertIsNone(premium["break_even_reuses"])
self.assertEqual(premium["max_profitable_reuses"], 0)
self.assertAlmostEqual(premium["expected_uncached_relative_cost"], 2.0)
self.assertAlmostEqual(premium["expected_cached_relative_cost"], 2.5)
self.assertLess(premium["expected_relative_savings"], 0)

limited_premium_proc = self._run_cache_score(
script,
"--provider",
"openai",
"--expected-reuses",
"4",
"--cache-write-multiplier",
"0.5",
"--cache-read-multiplier",
"1.1",
"--json",
input_data=prompt,
)
limited_premium = json.loads(limited_premium_proc.stdout)["amortization"]
self.assertEqual(limited_premium["status"], "positive_only_with_limited_reuses")
self.assertEqual(limited_premium["risk"], "medium")
self.assertIsNone(limited_premium["break_even_reuses"])
self.assertEqual(limited_premium["max_profitable_reuses"], 4)
self.assertGreater(limited_premium["expected_relative_savings"], 0)
limited_text = self._run_cache_score(
script,
"--provider",
"openai",
"--expected-reuses",
"4",
"--cache-write-multiplier",
"0.5",
"--cache-read-multiplier",
"1.1",
input_data=prompt,
)
self.assertIn("positive_only_with_limited_reuses", limited_text.stdout)
self.assertIn("max_profitable_reuses=4", limited_text.stdout)

exact_break_even_proc = self._run_cache_score(
script,
"--provider",
"openai",
"--expected-reuses",
"1",
"--cache-write-multiplier",
"0.5",
"--cache-read-multiplier",
"1.5",
"--json",
input_data=prompt,
)
exact_break_even = json.loads(exact_break_even_proc.stdout)["amortization"]
self.assertEqual(exact_break_even["status"], "break_even_only_with_limited_reuses")
self.assertEqual(exact_break_even["risk"], "medium")
self.assertEqual(exact_break_even["max_profitable_reuses"], 0)
self.assertAlmostEqual(exact_break_even["expected_relative_savings"], 0.0)

decimal_break_even_proc = self._run_cache_score(
script,
"--provider",
"openai",
"--expected-reuses",
"2",
"--cache-write-multiplier",
"0.2",
"--cache-read-multiplier",
"1.4",
"--json",
input_data=prompt,
)
decimal_break_even = json.loads(decimal_break_even_proc.stdout)["amortization"]
self.assertEqual(decimal_break_even["status"], "break_even_only_with_limited_reuses")
self.assertEqual(decimal_break_even["max_profitable_reuses"], 1)
self.assertAlmostEqual(decimal_break_even["expected_relative_savings"], 0.0)

no_read_break_even_proc = self._run_cache_score(
script,
"--provider",
"openai",
"--expected-reuses",
"0",
"--cache-write-multiplier",
"1",
"--cache-read-multiplier",
"2",
"--json",
input_data=prompt,
)
no_read_break_even = json.loads(no_read_break_even_proc.stdout)["amortization"]
self.assertEqual(no_read_break_even["status"], "break_even_only_no_expected_reads")
self.assertEqual(no_read_break_even["risk"], "medium")
self.assertIsNone(no_read_break_even["max_profitable_reuses"])
self.assertAlmostEqual(no_read_break_even["expected_relative_savings"], 0.0)

def test_cache_score_json_order_provider_thresholds_and_help(self):
request = {
"tools": [
Expand Down