From b4a8688d374b967277e4cabd639ff7bcde92110a Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 13 Mar 2026 12:29:21 +0000 Subject: [PATCH] fix(issue-4): use LLM-aware estimator for circuit breaker cost resolution --- spec/unit/features/rule_engine.feature | 18 ++++++++++ spec/unit/rule_engine_spec.lua | 50 +++++++++++++++++++++++--- src/fairvisor/rule_engine.lua | 7 +++- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/spec/unit/features/rule_engine.feature b/spec/unit/features/rule_engine.feature index a2a2f4b..8657fd9 100644 --- a/spec/unit/features/rule_engine.feature +++ b/spec/unit/features/rule_engine.feature @@ -132,6 +132,24 @@ Feature: Rule evaluation engine orchestration And kill switch check was skipped And decision does not expose override headers + Rule: Circuit Breaker Cost Resolution + Scenario: AC-12 Circuit breaker uses LLM estimator for cost + Given the rule engine test environment is reset + And fixture policy with circuit breaker and token_bucket_llm rule + And the llm prompt estimate is 120 + And the request context max_tokens is 300 + When I evaluate the request + Then llm prompt estimation was called + And circuit breaker was checked with cost 420 + + Scenario: AC-12b Circuit breaker uses default_max_completion when max_tokens missing + Given the rule engine test environment is reset + And fixture policy with circuit breaker and token_bucket_llm rule + And the llm prompt estimate is 120 + When I evaluate the request + Then llm prompt estimation was called + And circuit breaker was checked with cost 620 + Rule: Audit event emission Scenario: Decision events are emitted for every evaluation Given the rule engine test environment is reset diff --git a/spec/unit/rule_engine_spec.lua b/spec/unit/rule_engine_spec.lua index 37061ff..6b81814 100644 --- a/spec/unit/rule_engine_spec.lua +++ b/spec/unit/rule_engine_spec.lua @@ -195,15 +195,15 @@ local function _setup_engine(ctx) } local circuit_breaker = { - check = function(_dict, _config, _key, _cost, _now) + check = function(_dict, _config, _key, cost, _now) ctx.calls[#ctx.calls + 1] = "circuit_check" + ctx.last_circuit_cost = cost if ctx.circuit_tripped then return { tripped = true, retry_after = 30 } end return { tripped = false } end, } - local kill_switch = { check = function(_kill_switches, _descriptors, _path, _now) ctx.calls[#ctx.calls + 1] = "kill_switch_check" @@ -235,8 +235,14 @@ local function _setup_engine(ctx) ctx.calls[#ctx.calls + 1] = "llm_check" return { allowed = true } end, + estimate_prompt_tokens = function(_config, _request_context) + ctx.calls[#ctx.calls + 1] = "llm_estimate" + return ctx.llm_prompt_estimate or 0 + end, + build_error_response = function(_reason, _extra) + return '{"error":"mock"}' + end, } - local health = { inc = function(_self, name, labels, value) ctx.metrics[#ctx.metrics + 1] = { @@ -552,10 +558,46 @@ runner:given("^fixture kill switch override skips kill switch$", function(ctx) ctx.rule_results.allow_rule = { allowed = true, limit = 100, remaining = 90, reset = 1 } end) +runner:given("^the llm prompt estimate is (%d+)$", function(ctx, estimate) + ctx.llm_prompt_estimate = tonumber(estimate) +end) + +runner:given("^the request context max_tokens is (%d+)$", function(ctx, max_tokens) + ctx.request_context.max_tokens = tonumber(max_tokens) +end) + +runner:given("^fixture policy with circuit breaker and token_bucket_llm rule$", function(ctx) + ctx.matching_policy_ids = { "p_llm" } + ctx.request_context._descriptors["jwt:org_id"] = "org-llm" + ctx.bundle.policies_by_id.p_llm = { + id = "p_llm", + spec = { + mode = "enforce", + circuit_breaker = { enabled = true, threshold = 10, window_seconds = 60 }, + rules = { + { + name = "llm_rule", + algorithm = "token_bucket_llm", + limit_keys = { "jwt:org_id" }, + algorithm_config = { tokens_per_minute = 1000, default_max_completion = 500 } + } + } + } + } +end) + +runner:then_("^llm prompt estimation was called$", function(ctx) + assert.is_true(_contains(ctx.calls, "llm_estimate")) +end) + +runner:then_("^circuit breaker was checked with cost (%d+)$", function(ctx, expected_cost) + assert.is_true(_contains(ctx.calls, "circuit_check")) + assert.equals(tonumber(expected_cost), ctx.last_circuit_cost) +end) + runner:when("^I evaluate the request$", function(ctx) ctx.decision = ctx.engine.evaluate(ctx.request_context, ctx.bundle) end) - runner:then_("^decision action is \"([^\"]+)\"$", function(ctx, action) assert.equals(action, ctx.decision.action) end) diff --git a/src/fairvisor/rule_engine.lua b/src/fairvisor/rule_engine.lua index 6701b63..8808fdb 100644 --- a/src/fairvisor/rule_engine.lua +++ b/src/fairvisor/rule_engine.lua @@ -266,7 +266,12 @@ local function _resolve_request_cost(policy, request_context) end if rule.algorithm == "token_bucket_llm" then - return request_context and request_context.max_tokens or 1 + local prompt = _call(_llm_limiter.estimate_prompt_tokens, 0, config, request_context) + local max_completion = config.default_max_completion or 1000 + if request_context and type(request_context.max_tokens) == "number" and request_context.max_tokens > 0 then + max_completion = request_context.max_tokens + end + return prompt + max_completion end return 1