fairvisor · levleontiev · Mar 13, 2026
@@ -132,6 +132,24 @@ Feature: Rule evaluation engine orchestration
       And kill switch check was skipped
       And decision does not expose override headers
 
+  Rule: Circuit Breaker Cost Resolution
+    Scenario: AC-12 Circuit breaker uses LLM estimator for cost
+      Given the rule engine test environment is reset
+      And fixture policy with circuit breaker and token_bucket_llm rule
+      And the llm prompt estimate is 120
+      And the request context max_tokens is 300
+      When I evaluate the request
+      Then llm prompt estimation was called
+      And circuit breaker was checked with cost 420
+
+    Scenario: AC-12b Circuit breaker uses default_max_completion when max_tokens missing
+      Given the rule engine test environment is reset
+      And fixture policy with circuit breaker and token_bucket_llm rule
+      And the llm prompt estimate is 120
+      When I evaluate the request
+      Then llm prompt estimation was called
+      And circuit breaker was checked with cost 620
+
   Rule: Audit event emission
     Scenario: Decision events are emitted for every evaluation
       Given the rule engine test environment is reset

@@ -195,15 +195,15 @@ local function _setup_engine(ctx)
   }
 
   local circuit_breaker = {
-    check = function(_dict, _config, _key, _cost, _now)
+    check = function(_dict, _config, _key, cost, _now)
       ctx.calls[#ctx.calls + 1] = "circuit_check"
+      ctx.last_circuit_cost = cost
       if ctx.circuit_tripped then
         return { tripped = true, retry_after = 30 }
       end
       return { tripped = false }
     end,
   }
-
   local kill_switch = {
     check = function(_kill_switches, _descriptors, _path, _now)
       ctx.calls[#ctx.calls + 1] = "kill_switch_check"
@@ -235,8 +235,14 @@ local function _setup_engine(ctx)
       ctx.calls[#ctx.calls + 1] = "llm_check"
       return { allowed = true }
     end,
+    estimate_prompt_tokens = function(_config, _request_context)
+      ctx.calls[#ctx.calls + 1] = "llm_estimate"
+      return ctx.llm_prompt_estimate or 0
+    end,
+    build_error_response = function(_reason, _extra)
+      return '{"error":"mock"}'
+    end,
   }
-
   local health = {
     inc = function(_self, name, labels, value)
       ctx.metrics[#ctx.metrics + 1] = {
@@ -552,10 +558,46 @@ runner:given("^fixture kill switch override skips kill switch$", function(ctx)
   ctx.rule_results.allow_rule = { allowed = true, limit = 100, remaining = 90, reset = 1 }
 end)
 
+runner:given("^the llm prompt estimate is (%d+)$", function(ctx, estimate)
+  ctx.llm_prompt_estimate = tonumber(estimate)
+end)
+
+runner:given("^the request context max_tokens is (%d+)$", function(ctx, max_tokens)
+  ctx.request_context.max_tokens = tonumber(max_tokens)
+end)
+
+runner:given("^fixture policy with circuit breaker and token_bucket_llm rule$", function(ctx)
+  ctx.matching_policy_ids = { "p_llm" }
+  ctx.request_context._descriptors["jwt:org_id"] = "org-llm"
+  ctx.bundle.policies_by_id.p_llm = {
+    id = "p_llm",
+    spec = {
+      mode = "enforce",
+      circuit_breaker = { enabled = true, threshold = 10, window_seconds = 60 },
+      rules = {
+        {
+          name = "llm_rule",
+          algorithm = "token_bucket_llm",
+          limit_keys = { "jwt:org_id" },
+          algorithm_config = { tokens_per_minute = 1000, default_max_completion = 500 }
+        }
+      }
+    }
+  }
+end)
+
+runner:then_("^llm prompt estimation was called$", function(ctx)
+  assert.is_true(_contains(ctx.calls, "llm_estimate"))
+end)
+
+runner:then_("^circuit breaker was checked with cost (%d+)$", function(ctx, expected_cost)
+  assert.is_true(_contains(ctx.calls, "circuit_check"))
+  assert.equals(tonumber(expected_cost), ctx.last_circuit_cost)
+end)
+
 runner:when("^I evaluate the request$", function(ctx)
   ctx.decision = ctx.engine.evaluate(ctx.request_context, ctx.bundle)
 end)
-
 runner:then_("^decision action is \"([^\"]+)\"$", function(ctx, action)
   assert.equals(action, ctx.decision.action)
 end)

@@ -266,7 +266,12 @@ local function _resolve_request_cost(policy, request_context)
   end
 
   if rule.algorithm == "token_bucket_llm" then
-    return request_context and request_context.max_tokens or 1
+    local prompt = _call(_llm_limiter.estimate_prompt_tokens, 0, config, request_context)
+    local max_completion = config.default_max_completion or 1000
+    if request_context and type(request_context.max_tokens) == "number" and request_context.max_tokens > 0 then
+      max_completion = request_context.max_tokens
+    end
+    return prompt + max_completion
   end
 
   return 1