Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ export type {
FailureContext,
RoutingDecision,
RouterEvent,
RouterCacheStats,
ApiKeyConfig
} from "./router/types";

Expand Down
99 changes: 98 additions & 1 deletion src/router/modelRouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ import {
ModelRouterOptions,
FailureContext,
RoutingDecision,
RoutingStrategy
RoutingStrategy,
RouterCacheStats
} from "./types";
import { errorDetector } from "./errorDetector";
import { fallbackStrategy, contextStrategy, costStrategy } from "./routingStrategies";
Expand All @@ -17,12 +18,17 @@ export class ModelRouter {
private fallbackMap: Record<string, string[]>;
private maxRetries: number;
private crossProviderEnabled: boolean;
private maxDecisionCacheSize: number;
private decisionCache: Map<string, RoutingDecision> = new Map();
private cacheHits = 0;
private cacheMisses = 0;

constructor(options: ModelRouterOptions) {
this.strategy = options.strategy;
this.fallbackMap = options.fallbackMap || {};
this.maxRetries = options.maxRetries ?? 1;
this.crossProviderEnabled = options.enableCrossProvider ?? false;
this.maxDecisionCacheSize = options.decisionCacheSize ?? 128;

// Register API keys if provided
if (options.apiKeys) {
Expand All @@ -46,6 +52,15 @@ export class ModelRouter {
);
}

if (
!Number.isInteger(this.maxDecisionCacheSize) ||
this.maxDecisionCacheSize < 0
) {
throw new Error(
"TokenFirewall Router: decisionCacheSize must be a non-negative integer"
);
}

if (this.strategy === "fallback") {
if (Object.keys(this.fallbackMap).length === 0) {
throw new Error(
Expand Down Expand Up @@ -90,6 +105,12 @@ export class ModelRouter {

// Detect failure type
const failureType = errorDetector.detectFailureType(context.error);
const cacheKey = this.createDecisionCacheKey(context, failureType);
const cachedDecision = this.getCachedDecision(cacheKey);

if (cachedDecision) {
return cachedDecision;
}

// Select routing strategy
const decision = this.selectStrategy(context, failureType);
Expand Down Expand Up @@ -117,6 +138,8 @@ export class ModelRouter {
};
}

this.setCachedDecision(cacheKey, decision);

return decision;
}

Expand Down Expand Up @@ -164,4 +187,78 @@ export class ModelRouter {
public isCrossProviderEnabled(): boolean {
return this.crossProviderEnabled;
}

/**
* Clear cached routing decisions and reset cache counters
*/
public clearRoutingDecisionCache(): void {
this.decisionCache.clear();
this.cacheHits = 0;
this.cacheMisses = 0;
}

/**
* Inspect current routing decision cache usage
* @returns cache size, configured bound, and hit/miss counters
*/
public getRoutingCacheStats(): RouterCacheStats {
return {
size: this.decisionCache.size,
maxSize: this.maxDecisionCacheSize,
hits: this.cacheHits,
misses: this.cacheMisses
};
}

/**
* Build a stable, non-sensitive cache key from routing inputs.
* requestBody is intentionally excluded because prompts may contain secrets.
*/
private createDecisionCacheKey(
context: FailureContext,
failureType: string
): string {
return JSON.stringify({
strategy: this.strategy,
failureType,
originalModel: context.originalModel,
provider: context.provider,
retryCount: context.retryCount,
attemptedModels: [...context.attemptedModels].sort()
});
}

private getCachedDecision(cacheKey: string): RoutingDecision | null {
if (this.maxDecisionCacheSize === 0) {
return null;
}

const cached = this.decisionCache.get(cacheKey);
if (!cached) {
this.cacheMisses++;
return null;
}

this.decisionCache.delete(cacheKey);
this.decisionCache.set(cacheKey, cached);
this.cacheHits++;
return { ...cached };
}

private setCachedDecision(cacheKey: string, decision: RoutingDecision): void {
if (this.maxDecisionCacheSize === 0) {
return;
}

if (this.decisionCache.has(cacheKey)) {
this.decisionCache.delete(cacheKey);
} else if (this.decisionCache.size >= this.maxDecisionCacheSize) {
const oldestKey = this.decisionCache.keys().next().value;
if (oldestKey) {
this.decisionCache.delete(oldestKey);
}
}

this.decisionCache.set(cacheKey, { ...decision });
}
}
16 changes: 16 additions & 0 deletions src/router/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ export interface ModelRouterOptions {
fallbackMap?: Record<string, string[]>;
/** Maximum number of retry attempts (default: 1) */
maxRetries?: number;
/** Maximum cached routing decisions per router instance (default: 128, 0 disables cache) */
decisionCacheSize?: number;
/** API keys for cross-provider fallback */
apiKeys?: ApiKeyConfig;
/** Enable cross-provider fallback (default: false) */
Expand Down Expand Up @@ -75,6 +77,20 @@ export interface RoutingDecision {
reason: string;
}

/**
* Runtime stats for the router decision cache
*/
export interface RouterCacheStats {
/** Number of currently cached routing decisions */
size: number;
/** Maximum decisions retained by this router */
maxSize: number;
/** Cache hits since router creation or last clear */
hits: number;
/** Cache misses since router creation or last clear */
misses: number;
}

/**
* Router event for logging
*/
Expand Down
98 changes: 98 additions & 0 deletions tests/routing-performance.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* Routing performance regression tests.
*
* Run: node tests/routing-performance.test.js
*/

const { createModelRouter } = require("../dist/index.js");

function assert(condition, message) {
if (!condition) {
throw new Error(message);
}
}

function createFailureContext(originalModel) {
return {
error: { status: 429 },
originalModel,
requestBody: {
messages: [{ role: "user", content: "Summarize this document" }]
},
provider: "openai",
retryCount: 0,
attemptedModels: [originalModel]
};
}

function measureAverageMs(callback, iterations) {
const start = process.hrtime.bigint();
for (let index = 0; index < iterations; index++) {
callback();
}
const elapsedMs = Number(process.hrtime.bigint() - start) / 1_000_000;
return elapsedMs / iterations;
}

function testRepeatedFallbackDecisionsUseCache() {
const router = createModelRouter({
strategy: "fallback",
fallbackMap: {
"gpt-4o": ["gpt-4o-mini", "gpt-4.1-mini"]
},
maxRetries: 2
});
const context = createFailureContext("gpt-4o");

const firstDecision = router.handleFailure(context);
assert(firstDecision.retry === true, "first decision should retry");
assert(firstDecision.nextModel === "gpt-4o-mini", "first fallback should be selected");

const averageMs = measureAverageMs(() => {
const decision = router.handleFailure(context);
assert(decision.nextModel === "gpt-4o-mini", "cached decision should remain stable");
}, 5000);

const stats = router.getRoutingCacheStats();
assert(stats.size === 1, "router should cache one repeated decision");
assert(stats.hits >= 5000, "repeated decisions should hit the cache");
assert(stats.misses === 1, "only the initial decision should miss");
assert(averageMs < 10, `average routing overhead should stay below 10ms, got ${averageMs}ms`);

console.log(`Repeated fallback routing average: ${averageMs.toFixed(4)}ms`);
}

function testDecisionCacheSizeIsBounded() {
const router = createModelRouter({
strategy: "fallback",
fallbackMap: {
"gpt-a": ["gpt-a-mini"],
"gpt-b": ["gpt-b-mini"],
"gpt-c": ["gpt-c-mini"]
},
maxRetries: 2,
decisionCacheSize: 2
});

router.handleFailure(createFailureContext("gpt-a"));
router.handleFailure(createFailureContext("gpt-b"));
router.handleFailure(createFailureContext("gpt-c"));

const stats = router.getRoutingCacheStats();
assert(stats.size === 2, "cache should evict the oldest decision when full");
assert(stats.maxSize === 2, "cache stats should expose the configured bound");

router.clearRoutingDecisionCache();
const clearedStats = router.getRoutingCacheStats();
assert(clearedStats.size === 0, "clear should remove cached decisions");
assert(clearedStats.hits === 0, "clear should reset hit counter");
assert(clearedStats.misses === 0, "clear should reset miss counter");
}

function run() {
testRepeatedFallbackDecisionsUseCache();
testDecisionCacheSizeIsBounded();
console.log("Routing performance tests passed");
}

run();