From 26ab140bc403965046b9d02172e0ea6b96541e4d Mon Sep 17 00:00:00 2001 From: Matic Jurglic Date: Wed, 25 Mar 2026 10:16:46 +0100 Subject: [PATCH 1/5] Run saveUsageCost in the background to avoid blocking proxy responses The request forward handler was awaiting saveUsageCost before returning the response. This function polls OpenRouter's generation cost API with exponential backoff (1s, 2s, 4s, 8s...) because cost data is often not immediately available, adding 10-15+ seconds of latency. Run it in the background instead so responses return immediately. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/billing/ai-billing.ts | 25 +++++++ .../handlers/handle-request-forward.ts | 47 +++++++++---- .../realm-server/lib/credit-strategies.ts | 22 ++++++ .../tests/request-forward-test.ts | 68 ++++++++----------- 4 files changed, 109 insertions(+), 53 deletions(-) diff --git a/packages/billing/ai-billing.ts b/packages/billing/ai-billing.ts index 50709fe8e03..7c7b5fe8be7 100644 --- a/packages/billing/ai-billing.ts +++ b/packages/billing/ai-billing.ts @@ -73,6 +73,31 @@ export async function validateAICredits( }; } +export async function spendUsageCost( + dbAdapter: DBAdapter, + matrixUserId: string, + costInUsd: number, +) { + try { + let creditsConsumed = Math.round(costInUsd * CREDITS_PER_USD); + let user = await getUserByMatrixUserId(dbAdapter, matrixUserId); + + if (!user) { + throw new Error( + `should not happen: user with matrix id ${matrixUserId} not found in the users table`, + ); + } + + await spendCredits(dbAdapter, user.id, creditsConsumed); + } catch (err) { + log.error( + `Failed to spend usage cost (matrixUserId: ${matrixUserId}, costInUsd: ${costInUsd}):`, + err, + ); + Sentry.captureException(err); + } +} + export async function saveUsageCost( dbAdapter: DBAdapter, matrixUserId: string, diff --git a/packages/realm-server/handlers/handle-request-forward.ts b/packages/realm-server/handlers/handle-request-forward.ts index 76b9de68710..3ec4a2feec2 100644 --- a/packages/realm-server/handlers/handle-request-forward.ts +++ b/packages/realm-server/handlers/handle-request-forward.ts @@ -13,6 +13,10 @@ import * as Sentry from '@sentry/node'; const log = logger('request-forward'); +// Track pending cost-saving promises per user so we can ensure the previous +// request's cost has been recorded before allowing a new one +const pendingCostPromises = new Map>(); + async function handleStreamingRequest( ctxt: Koa.Context, url: string, @@ -61,13 +65,11 @@ async function handleStreamingRequest( // Handle end of stream if (data === '[DONE]') { if (generationId) { - // Create a mock response object with the generation ID for the credit strategy - const mockResponse = { id: generationId }; - await endpointConfig.creditStrategy.saveUsageCost( - dbAdapter, - matrixUserId, - mockResponse, - ); + // Save cost in the background so we don't block the stream on OpenRouter's generation cost API + const costPromise = endpointConfig.creditStrategy + .saveUsageCost(dbAdapter, matrixUserId, { id: generationId }) + .finally(() => pendingCostPromises.delete(matrixUserId)); + pendingCostPromises.set(matrixUserId, costPromise); } ctxt.res.write(`data: [DONE]\n\n`); return 'stop'; @@ -328,7 +330,22 @@ export default function handleRequestForward({ return; } - // 4. Check user has sufficient credits using credit strategy + // 4. Wait for any pending cost from a previous request to be recorded + const pendingCost = pendingCostPromises.get(matrixUserId); + if (pendingCost) { + try { + await pendingCost; + } catch (e) { + log.error('Error waiting for pending cost:', e); + await sendResponseForSystemError( + ctxt, + 'There was an error saving your Boxel credits usage. Try again or contact support if the problem persists.', + ); + return; + } + } + + // 5. Check user has sufficient credits using credit strategy const creditValidation = await destinationConfig.creditStrategy.validateCredits( dbAdapter, @@ -469,12 +486,14 @@ export default function handleRequestForward({ const responseData = await externalResponse.json(); - // 6. Calculate and deduct credits using credit strategy - await destinationConfig.creditStrategy.saveUsageCost( - dbAdapter, - matrixUserId, - responseData, - ); + // 6. Deduct credits in the background using the cost from the response + const costInUsd = responseData?.usage?.cost; + if (costInUsd != null) { + const costPromise = destinationConfig.creditStrategy + .spendUsageCost(dbAdapter, matrixUserId, costInUsd) + .finally(() => pendingCostPromises.delete(matrixUserId)); + pendingCostPromises.set(matrixUserId, costPromise); + } // 7. Return response const response = new Response(JSON.stringify(responseData), { diff --git a/packages/realm-server/lib/credit-strategies.ts b/packages/realm-server/lib/credit-strategies.ts index 304b52fa358..e97c90bdc02 100644 --- a/packages/realm-server/lib/credit-strategies.ts +++ b/packages/realm-server/lib/credit-strategies.ts @@ -6,6 +6,7 @@ import { validateAICredits, extractGenerationIdFromResponse, saveUsageCost as saveUsageCostFromBilling, + spendUsageCost as spendUsageCostFromBilling, } from '@cardstack/billing/ai-billing'; export interface CreditStrategy { @@ -23,6 +24,11 @@ export interface CreditStrategy { matrixUserId: string, response: any, ): Promise; + spendUsageCost( + dbAdapter: DBAdapter, + matrixUserId: string, + costInUsd: number, + ): Promise; } // Default AI Bot Credit Strategy (reused from AI bot) @@ -62,6 +68,14 @@ export class OpenRouterCreditStrategy implements CreditStrategy { ); } } + + async spendUsageCost( + dbAdapter: DBAdapter, + matrixUserId: string, + costInUsd: number, + ): Promise { + await spendUsageCostFromBilling(dbAdapter, matrixUserId, costInUsd); + } } // No Credit Strategy (for free endpoints) @@ -82,6 +96,14 @@ export class NoCreditStrategy implements CreditStrategy { ): Promise { // No-op for no-credit strategy } + + async spendUsageCost( + _dbAdapter: DBAdapter, + _matrixUserId: string, + _costInUsd: number, + ): Promise { + // No-op for no-credit strategy + } } // Credit Strategy Factory diff --git a/packages/realm-server/tests/request-forward-test.ts b/packages/realm-server/tests/request-forward-test.ts index 59ecd121b30..6d7d584337a 100644 --- a/packages/realm-server/tests/request-forward-test.ts +++ b/packages/realm-server/tests/request-forward-test.ts @@ -134,34 +134,20 @@ module(basename(__filename), function () { const originalFetch = global.fetch; const mockFetch = sinon.stub(global, 'fetch'); - // Mock OpenRouter response + // Mock OpenRouter response (includes usage.cost so credits can be + // deducted directly without polling the generation cost API) const mockOpenRouterResponse = { id: 'gen-test-123', choices: [{ text: 'Test response from OpenRouter' }], - usage: { total_tokens: 150 }, + usage: { total_tokens: 150, cost: 0.003 }, }; - // Mock generation cost API response - const mockCostResponse = { - data: { - id: 'gen-test-123', - total_cost: 0.003, - total_tokens: 150, - model: 'openai/gpt-3.5-turbo', - }, - }; - - // Set up fetch to return different responses based on URL + // Set up fetch to return OpenRouter response mockFetch.callsFake( async (input: string | URL | Request, _init?: RequestInit) => { const url = typeof input === 'string' ? input : input.toString(); - if (url.includes('/generation?id=')) { - return new Response(JSON.stringify(mockCostResponse), { - status: 200, - headers: { 'content-type': 'application/json' }, - }); - } else if (url.includes('/chat/completions')) { + if (url.includes('/chat/completions')) { return new Response(JSON.stringify(mockOpenRouterResponse), { status: 200, headers: { 'content-type': 'application/json' }, @@ -207,36 +193,40 @@ module(basename(__filename), function () { // Verify fetch was called correctly (allowing unrelated fetches) const calls = mockFetch.getCalls(); - const chatCallIndex = calls.findIndex((call) => { + const chatCall = calls.find((call) => { const url = call.args[0]; const href = typeof url === 'string' ? url : url?.toString(); return Boolean(href && href.includes('/chat/completions')); }); - const generationCallIndex = calls.findIndex((call) => { - const url = call.args[0]; - const href = typeof url === 'string' ? url : url?.toString(); - return Boolean(href && href.includes('/generation?id=')); - }); - assert.true(chatCallIndex >= 0, 'Fetch should call chat completions'); - assert.true( - generationCallIndex >= 0, - 'Fetch should call generation cost API', - ); - assert.true( - chatCallIndex < generationCallIndex, - 'Generation cost should be fetched after chat completions', - ); + assert.ok(chatCall, 'Fetch should call chat completions'); // Verify authorization header was set correctly - const firstCallHeaders = calls[chatCallIndex].args[1] - ?.headers as Record; - // Note: The actual authorization header will include the JWT token, not the API key - // The API key is added by the proxy handler, not the test + const chatCallHeaders = chatCall!.args[1]?.headers as Record< + string, + string + >; assert.true( - firstCallHeaders?.Authorization?.startsWith('Bearer '), + chatCallHeaders?.Authorization?.startsWith('Bearer '), 'Should set authorization header', ); + + // Verify credits were deducted (0.003 USD * 1000 = 3 credits) + // Allow a tick for the background cost saving to complete + await new Promise((resolve) => setTimeout(resolve, 50)); + const user = await getUserByMatrixUserId( + dbAdapter, + '@testuser:localhost', + ); + const remainingCredits = await sumUpCreditsLedger(dbAdapter, { + creditType: ['extra_credit', 'extra_credit_used'], + userId: user!.id, + }); + assert.strictEqual( + remainingCredits, + 47, + 'Credits should be deducted (50 - 3 = 47)', + ); } finally { mockFetch.restore(); global.fetch = originalFetch; From 6c0caed78b6358590efaaaec9db687a11d67c88c Mon Sep 17 00:00:00 2001 From: Matic Jurglic Date: Wed, 25 Mar 2026 14:01:55 +0100 Subject: [PATCH 2/5] Chain per-user cost promises to prevent race conditions When multiple requests overlap for the same user, the previous .finally() could delete a newer promise from the map. Now promises are chained sequentially per user, and .finally() only deletes if the map still points at the same promise. Also falls back to saveUsageCost (background polling) when the response doesn't include usage.cost inline. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../handlers/handle-request-forward.ts | 62 ++++++++++++++++--- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/packages/realm-server/handlers/handle-request-forward.ts b/packages/realm-server/handlers/handle-request-forward.ts index 3ec4a2feec2..0cb866bb5fa 100644 --- a/packages/realm-server/handlers/handle-request-forward.ts +++ b/packages/realm-server/handlers/handle-request-forward.ts @@ -65,10 +65,23 @@ async function handleStreamingRequest( // Handle end of stream if (data === '[DONE]') { if (generationId) { - // Save cost in the background so we don't block the stream on OpenRouter's generation cost API - const costPromise = endpointConfig.creditStrategy - .saveUsageCost(dbAdapter, matrixUserId, { id: generationId }) - .finally(() => pendingCostPromises.delete(matrixUserId)); + // Save cost in the background so we don't block the stream on OpenRouter's generation cost API. + // Chain per-user promises so costs are recorded sequentially. + const previousPromise = + pendingCostPromises.get(matrixUserId) ?? Promise.resolve(); + const costPromise = previousPromise + .then(() => + endpointConfig.creditStrategy.saveUsageCost( + dbAdapter, + matrixUserId, + { id: generationId }, + ), + ) + .finally(() => { + if (pendingCostPromises.get(matrixUserId) === costPromise) { + pendingCostPromises.delete(matrixUserId); + } + }); pendingCostPromises.set(matrixUserId, costPromise); } ctxt.res.write(`data: [DONE]\n\n`); @@ -486,14 +499,43 @@ export default function handleRequestForward({ const responseData = await externalResponse.json(); - // 6. Deduct credits in the background using the cost from the response + // 6. Deduct credits in the background using the cost from the response, + // or fall back to saveUsageCost when the cost is not provided. const costInUsd = responseData?.usage?.cost; - if (costInUsd != null) { - const costPromise = destinationConfig.creditStrategy - .spendUsageCost(dbAdapter, matrixUserId, costInUsd) - .finally(() => pendingCostPromises.delete(matrixUserId)); - pendingCostPromises.set(matrixUserId, costPromise); + const previousPromise = + pendingCostPromises.get(matrixUserId) ?? Promise.resolve(); + let costPromise: Promise; + + if (typeof costInUsd === 'number' && Number.isFinite(costInUsd) && costInUsd > 0) { + costPromise = previousPromise + .then(() => + destinationConfig.creditStrategy.spendUsageCost( + dbAdapter, + matrixUserId, + costInUsd, + ), + ) + .finally(() => { + if (pendingCostPromises.get(matrixUserId) === costPromise) { + pendingCostPromises.delete(matrixUserId); + } + }); + } else { + costPromise = previousPromise + .then(() => + destinationConfig.creditStrategy.saveUsageCost( + dbAdapter, + matrixUserId, + responseData, + ), + ) + .finally(() => { + if (pendingCostPromises.get(matrixUserId) === costPromise) { + pendingCostPromises.delete(matrixUserId); + } + }); } + pendingCostPromises.set(matrixUserId, costPromise); // 7. Return response const response = new Response(JSON.stringify(responseData), { From e0926a180d2049f5aef9c9d1d10f1657637018e9 Mon Sep 17 00:00:00 2001 From: Matic Jurglic Date: Wed, 25 Mar 2026 14:02:01 +0100 Subject: [PATCH 3/5] Validate costInUsd input in spendUsageCost Guard against non-finite, negative, or non-number values from external API responses before attempting to spend credits. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/billing/ai-billing.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/packages/billing/ai-billing.ts b/packages/billing/ai-billing.ts index 7c7b5fe8be7..97197b90415 100644 --- a/packages/billing/ai-billing.ts +++ b/packages/billing/ai-billing.ts @@ -79,6 +79,17 @@ export async function spendUsageCost( costInUsd: number, ) { try { + if ( + typeof costInUsd !== 'number' || + !Number.isFinite(costInUsd) || + costInUsd < 0 + ) { + log.warn( + `Invalid costInUsd value: ${costInUsd} for user ${matrixUserId}, skipping`, + ); + return; + } + let creditsConsumed = Math.round(costInUsd * CREDITS_PER_USD); let user = await getUserByMatrixUserId(dbAdapter, matrixUserId); From e2efba5edcd6cea2ab32803b6069096a43c8a0fa Mon Sep 17 00:00:00 2001 From: Matic Jurglic Date: Wed, 25 Mar 2026 14:02:08 +0100 Subject: [PATCH 4/5] Replace setTimeout with waitUntil polling in request-forward test Avoids flaky test failures on slow CI by polling the credit ledger instead of sleeping an arbitrary 50ms. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../tests/request-forward-test.ts | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/realm-server/tests/request-forward-test.ts b/packages/realm-server/tests/request-forward-test.ts index 6d7d584337a..19785176d62 100644 --- a/packages/realm-server/tests/request-forward-test.ts +++ b/packages/realm-server/tests/request-forward-test.ts @@ -15,6 +15,7 @@ import { insertPlan, realmSecretSeed, createVirtualNetwork, + waitUntil, } from './helpers'; import { createJWT as createRealmServerJWT } from '../utils/jwt'; import { @@ -212,20 +213,19 @@ module(basename(__filename), function () { ); // Verify credits were deducted (0.003 USD * 1000 = 3 credits) - // Allow a tick for the background cost saving to complete - await new Promise((resolve) => setTimeout(resolve, 50)); const user = await getUserByMatrixUserId( dbAdapter, '@testuser:localhost', ); - const remainingCredits = await sumUpCreditsLedger(dbAdapter, { - creditType: ['extra_credit', 'extra_credit_used'], - userId: user!.id, - }); - assert.strictEqual( - remainingCredits, - 47, - 'Credits should be deducted (50 - 3 = 47)', + await waitUntil( + async () => { + const credits = await sumUpCreditsLedger(dbAdapter, { + creditType: ['extra_credit', 'extra_credit_used'], + userId: user!.id, + }); + return credits === 47; + }, + { timeoutMessage: 'Credits should be deducted (50 - 3 = 47)' }, ); } finally { mockFetch.restore(); From 4aa3e47584168b49ecf104f699ce6cbb8a3206d1 Mon Sep 17 00:00:00 2001 From: Matic Jurglic Date: Wed, 25 Mar 2026 14:29:22 +0100 Subject: [PATCH 5/5] Lint fix --- packages/realm-server/handlers/handle-request-forward.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/realm-server/handlers/handle-request-forward.ts b/packages/realm-server/handlers/handle-request-forward.ts index 0cb866bb5fa..84971433bd9 100644 --- a/packages/realm-server/handlers/handle-request-forward.ts +++ b/packages/realm-server/handlers/handle-request-forward.ts @@ -506,7 +506,11 @@ export default function handleRequestForward({ pendingCostPromises.get(matrixUserId) ?? Promise.resolve(); let costPromise: Promise; - if (typeof costInUsd === 'number' && Number.isFinite(costInUsd) && costInUsd > 0) { + if ( + typeof costInUsd === 'number' && + Number.isFinite(costInUsd) && + costInUsd > 0 + ) { costPromise = previousPromise .then(() => destinationConfig.creditStrategy.spendUsageCost(