Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions spec/unit/features/rule_engine.feature
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,24 @@ Feature: Rule evaluation engine orchestration
And kill switch check was skipped
And decision does not expose override headers

Rule: Circuit Breaker Cost Resolution
Scenario: AC-12 Circuit breaker uses LLM estimator for cost
Given the rule engine test environment is reset
And fixture policy with circuit breaker and token_bucket_llm rule
And the llm prompt estimate is 120
And the request context max_tokens is 300
When I evaluate the request
Then llm prompt estimation was called
And circuit breaker was checked with cost 420

Scenario: AC-12b Circuit breaker uses default_max_completion when max_tokens missing
Given the rule engine test environment is reset
And fixture policy with circuit breaker and token_bucket_llm rule
And the llm prompt estimate is 120
When I evaluate the request
Then llm prompt estimation was called
And circuit breaker was checked with cost 620

Rule: Audit event emission
Scenario: Decision events are emitted for every evaluation
Given the rule engine test environment is reset
Expand Down
50 changes: 46 additions & 4 deletions spec/unit/rule_engine_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -195,15 +195,15 @@ local function _setup_engine(ctx)
}

local circuit_breaker = {
check = function(_dict, _config, _key, _cost, _now)
check = function(_dict, _config, _key, cost, _now)
ctx.calls[#ctx.calls + 1] = "circuit_check"
ctx.last_circuit_cost = cost
if ctx.circuit_tripped then
return { tripped = true, retry_after = 30 }
end
return { tripped = false }
end,
}

local kill_switch = {
check = function(_kill_switches, _descriptors, _path, _now)
ctx.calls[#ctx.calls + 1] = "kill_switch_check"
Expand Down Expand Up @@ -235,8 +235,14 @@ local function _setup_engine(ctx)
ctx.calls[#ctx.calls + 1] = "llm_check"
return { allowed = true }
end,
estimate_prompt_tokens = function(_config, _request_context)
ctx.calls[#ctx.calls + 1] = "llm_estimate"
return ctx.llm_prompt_estimate or 0
end,
build_error_response = function(_reason, _extra)
return '{"error":"mock"}'
end,
}

local health = {
inc = function(_self, name, labels, value)
ctx.metrics[#ctx.metrics + 1] = {
Expand Down Expand Up @@ -552,10 +558,46 @@ runner:given("^fixture kill switch override skips kill switch$", function(ctx)
ctx.rule_results.allow_rule = { allowed = true, limit = 100, remaining = 90, reset = 1 }
end)

runner:given("^the llm prompt estimate is (%d+)$", function(ctx, estimate)
ctx.llm_prompt_estimate = tonumber(estimate)
end)

runner:given("^the request context max_tokens is (%d+)$", function(ctx, max_tokens)
ctx.request_context.max_tokens = tonumber(max_tokens)
end)

runner:given("^fixture policy with circuit breaker and token_bucket_llm rule$", function(ctx)
ctx.matching_policy_ids = { "p_llm" }
ctx.request_context._descriptors["jwt:org_id"] = "org-llm"
ctx.bundle.policies_by_id.p_llm = {
id = "p_llm",
spec = {
mode = "enforce",
circuit_breaker = { enabled = true, threshold = 10, window_seconds = 60 },
rules = {
{
name = "llm_rule",
algorithm = "token_bucket_llm",
limit_keys = { "jwt:org_id" },
algorithm_config = { tokens_per_minute = 1000, default_max_completion = 500 }
}
}
}
}
end)

runner:then_("^llm prompt estimation was called$", function(ctx)
assert.is_true(_contains(ctx.calls, "llm_estimate"))
end)

runner:then_("^circuit breaker was checked with cost (%d+)$", function(ctx, expected_cost)
assert.is_true(_contains(ctx.calls, "circuit_check"))
assert.equals(tonumber(expected_cost), ctx.last_circuit_cost)
end)

runner:when("^I evaluate the request$", function(ctx)
ctx.decision = ctx.engine.evaluate(ctx.request_context, ctx.bundle)
end)

runner:then_("^decision action is \"([^\"]+)\"$", function(ctx, action)
assert.equals(action, ctx.decision.action)
end)
Expand Down
7 changes: 6 additions & 1 deletion src/fairvisor/rule_engine.lua
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,12 @@ local function _resolve_request_cost(policy, request_context)
end

if rule.algorithm == "token_bucket_llm" then
return request_context and request_context.max_tokens or 1
local prompt = _call(_llm_limiter.estimate_prompt_tokens, 0, config, request_context)
local max_completion = config.default_max_completion or 1000
if request_context and type(request_context.max_tokens) == "number" and request_context.max_tokens > 0 then
max_completion = request_context.max_tokens
end
return prompt + max_completion
end

return 1
Expand Down
Loading