diff --git a/src/index.ts b/src/index.ts index c769e99..dedf1e0 100644 --- a/src/index.ts +++ b/src/index.ts @@ -249,6 +249,7 @@ export type { FailureContext, RoutingDecision, RouterEvent, + RouterCacheStats, ApiKeyConfig } from "./router/types"; diff --git a/src/router/modelRouter.ts b/src/router/modelRouter.ts index 4b6f8ea..3cb6d45 100644 --- a/src/router/modelRouter.ts +++ b/src/router/modelRouter.ts @@ -2,7 +2,8 @@ import { ModelRouterOptions, FailureContext, RoutingDecision, - RoutingStrategy + RoutingStrategy, + RouterCacheStats } from "./types"; import { errorDetector } from "./errorDetector"; import { fallbackStrategy, contextStrategy, costStrategy } from "./routingStrategies"; @@ -17,12 +18,17 @@ export class ModelRouter { private fallbackMap: Record; private maxRetries: number; private crossProviderEnabled: boolean; + private maxDecisionCacheSize: number; + private decisionCache: Map = new Map(); + private cacheHits = 0; + private cacheMisses = 0; constructor(options: ModelRouterOptions) { this.strategy = options.strategy; this.fallbackMap = options.fallbackMap || {}; this.maxRetries = options.maxRetries ?? 1; this.crossProviderEnabled = options.enableCrossProvider ?? false; + this.maxDecisionCacheSize = options.decisionCacheSize ?? 128; // Register API keys if provided if (options.apiKeys) { @@ -46,6 +52,15 @@ export class ModelRouter { ); } + if ( + !Number.isInteger(this.maxDecisionCacheSize) || + this.maxDecisionCacheSize < 0 + ) { + throw new Error( + "TokenFirewall Router: decisionCacheSize must be a non-negative integer" + ); + } + if (this.strategy === "fallback") { if (Object.keys(this.fallbackMap).length === 0) { throw new Error( @@ -90,6 +105,12 @@ export class ModelRouter { // Detect failure type const failureType = errorDetector.detectFailureType(context.error); + const cacheKey = this.createDecisionCacheKey(context, failureType); + const cachedDecision = this.getCachedDecision(cacheKey); + + if (cachedDecision) { + return cachedDecision; + } // Select routing strategy const decision = this.selectStrategy(context, failureType); @@ -117,6 +138,8 @@ export class ModelRouter { }; } + this.setCachedDecision(cacheKey, decision); + return decision; } @@ -164,4 +187,78 @@ export class ModelRouter { public isCrossProviderEnabled(): boolean { return this.crossProviderEnabled; } + + /** + * Clear cached routing decisions and reset cache counters + */ + public clearRoutingDecisionCache(): void { + this.decisionCache.clear(); + this.cacheHits = 0; + this.cacheMisses = 0; + } + + /** + * Inspect current routing decision cache usage + * @returns cache size, configured bound, and hit/miss counters + */ + public getRoutingCacheStats(): RouterCacheStats { + return { + size: this.decisionCache.size, + maxSize: this.maxDecisionCacheSize, + hits: this.cacheHits, + misses: this.cacheMisses + }; + } + + /** + * Build a stable, non-sensitive cache key from routing inputs. + * requestBody is intentionally excluded because prompts may contain secrets. + */ + private createDecisionCacheKey( + context: FailureContext, + failureType: string + ): string { + return JSON.stringify({ + strategy: this.strategy, + failureType, + originalModel: context.originalModel, + provider: context.provider, + retryCount: context.retryCount, + attemptedModels: [...context.attemptedModels].sort() + }); + } + + private getCachedDecision(cacheKey: string): RoutingDecision | null { + if (this.maxDecisionCacheSize === 0) { + return null; + } + + const cached = this.decisionCache.get(cacheKey); + if (!cached) { + this.cacheMisses++; + return null; + } + + this.decisionCache.delete(cacheKey); + this.decisionCache.set(cacheKey, cached); + this.cacheHits++; + return { ...cached }; + } + + private setCachedDecision(cacheKey: string, decision: RoutingDecision): void { + if (this.maxDecisionCacheSize === 0) { + return; + } + + if (this.decisionCache.has(cacheKey)) { + this.decisionCache.delete(cacheKey); + } else if (this.decisionCache.size >= this.maxDecisionCacheSize) { + const oldestKey = this.decisionCache.keys().next().value; + if (oldestKey) { + this.decisionCache.delete(oldestKey); + } + } + + this.decisionCache.set(cacheKey, { ...decision }); + } } diff --git a/src/router/types.ts b/src/router/types.ts index 8e333f4..074be2c 100644 --- a/src/router/types.ts +++ b/src/router/types.ts @@ -39,6 +39,8 @@ export interface ModelRouterOptions { fallbackMap?: Record; /** Maximum number of retry attempts (default: 1) */ maxRetries?: number; + /** Maximum cached routing decisions per router instance (default: 128, 0 disables cache) */ + decisionCacheSize?: number; /** API keys for cross-provider fallback */ apiKeys?: ApiKeyConfig; /** Enable cross-provider fallback (default: false) */ @@ -75,6 +77,20 @@ export interface RoutingDecision { reason: string; } +/** + * Runtime stats for the router decision cache + */ +export interface RouterCacheStats { + /** Number of currently cached routing decisions */ + size: number; + /** Maximum decisions retained by this router */ + maxSize: number; + /** Cache hits since router creation or last clear */ + hits: number; + /** Cache misses since router creation or last clear */ + misses: number; +} + /** * Router event for logging */ diff --git a/tests/routing-performance.test.js b/tests/routing-performance.test.js new file mode 100644 index 0000000..1ae448d --- /dev/null +++ b/tests/routing-performance.test.js @@ -0,0 +1,98 @@ +/** + * Routing performance regression tests. + * + * Run: node tests/routing-performance.test.js + */ + +const { createModelRouter } = require("../dist/index.js"); + +function assert(condition, message) { + if (!condition) { + throw new Error(message); + } +} + +function createFailureContext(originalModel) { + return { + error: { status: 429 }, + originalModel, + requestBody: { + messages: [{ role: "user", content: "Summarize this document" }] + }, + provider: "openai", + retryCount: 0, + attemptedModels: [originalModel] + }; +} + +function measureAverageMs(callback, iterations) { + const start = process.hrtime.bigint(); + for (let index = 0; index < iterations; index++) { + callback(); + } + const elapsedMs = Number(process.hrtime.bigint() - start) / 1_000_000; + return elapsedMs / iterations; +} + +function testRepeatedFallbackDecisionsUseCache() { + const router = createModelRouter({ + strategy: "fallback", + fallbackMap: { + "gpt-4o": ["gpt-4o-mini", "gpt-4.1-mini"] + }, + maxRetries: 2 + }); + const context = createFailureContext("gpt-4o"); + + const firstDecision = router.handleFailure(context); + assert(firstDecision.retry === true, "first decision should retry"); + assert(firstDecision.nextModel === "gpt-4o-mini", "first fallback should be selected"); + + const averageMs = measureAverageMs(() => { + const decision = router.handleFailure(context); + assert(decision.nextModel === "gpt-4o-mini", "cached decision should remain stable"); + }, 5000); + + const stats = router.getRoutingCacheStats(); + assert(stats.size === 1, "router should cache one repeated decision"); + assert(stats.hits >= 5000, "repeated decisions should hit the cache"); + assert(stats.misses === 1, "only the initial decision should miss"); + assert(averageMs < 10, `average routing overhead should stay below 10ms, got ${averageMs}ms`); + + console.log(`Repeated fallback routing average: ${averageMs.toFixed(4)}ms`); +} + +function testDecisionCacheSizeIsBounded() { + const router = createModelRouter({ + strategy: "fallback", + fallbackMap: { + "gpt-a": ["gpt-a-mini"], + "gpt-b": ["gpt-b-mini"], + "gpt-c": ["gpt-c-mini"] + }, + maxRetries: 2, + decisionCacheSize: 2 + }); + + router.handleFailure(createFailureContext("gpt-a")); + router.handleFailure(createFailureContext("gpt-b")); + router.handleFailure(createFailureContext("gpt-c")); + + const stats = router.getRoutingCacheStats(); + assert(stats.size === 2, "cache should evict the oldest decision when full"); + assert(stats.maxSize === 2, "cache stats should expose the configured bound"); + + router.clearRoutingDecisionCache(); + const clearedStats = router.getRoutingCacheStats(); + assert(clearedStats.size === 0, "clear should remove cached decisions"); + assert(clearedStats.hits === 0, "clear should reset hit counter"); + assert(clearedStats.misses === 0, "clear should reset miss counter"); +} + +function run() { + testRepeatedFallbackDecisionsUseCache(); + testDecisionCacheSizeIsBounded(); + console.log("Routing performance tests passed"); +} + +run();