From 57b5d398f1719a91342e7d25a6b8d2ca1976bc81 Mon Sep 17 00:00:00 2001 From: "kiloconnect[bot]" <240665456+kiloconnect[bot]@users.noreply.github.com> Date: Sat, 2 May 2026 07:35:55 +0000 Subject: [PATCH] refactor(rate-limit): key free-model limit on user id for authenticated requests Authenticated free-model requests are now rate-limited per user id regardless of feature or source IP. Anonymous requests continue to be rate-limited per IP, counting only anonymous usage so they aren't skewed by authenticated users on shared IPs. This removes the feature/Cloudflare-IP special case that existed for cloud-agent, code-review and app-builder. --- .../src/app/api/openrouter/[...path]/route.ts | 34 ++++--------------- apps/web/src/lib/feature-detection.test.ts | 21 +----------- apps/web/src/lib/feature-detection.ts | 15 -------- apps/web/src/lib/free-model-rate-limiter.ts | 11 +++--- 4 files changed, 12 insertions(+), 69 deletions(-) diff --git a/apps/web/src/app/api/openrouter/[...path]/route.ts b/apps/web/src/app/api/openrouter/[...path]/route.ts index 62d0f161a6..1b015c4728 100644 --- a/apps/web/src/app/api/openrouter/[...path]/route.ts +++ b/apps/web/src/app/api/openrouter/[...path]/route.ts @@ -3,12 +3,7 @@ import { type NextRequest } from 'next/server'; import { isOpenCodeBasedClient, stripRequiredPrefix } from '@/lib/utils'; import { applyTrackingIds } from '@/lib/ai-gateway/providerHash'; import { extractPromptInfo as extractChatCompletionsPromptInfo } from '@/lib/ai-gateway/processUsage'; -import { - validateFeatureHeader, - FEATURE_HEADER, - isUserRateLimitedFeature, - type FeatureValue, -} from '@/lib/feature-detection'; +import { validateFeatureHeader, FEATURE_HEADER } from '@/lib/feature-detection'; import type { OpenRouterChatCompletionRequest, GatewayResponsesRequest, @@ -79,7 +74,6 @@ import { } from '@/lib/ai-gateway/o11y/api-metrics.server'; import { normalizeModelId } from '@/lib/ai-gateway/model-utils'; import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models'; -import { isCloudflareIP } from '@/lib/cloudflare-ip'; import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/kilo-auto'; import { applyResolvedAutoModel } from '@/lib/ai-gateway/kilo-auto/resolution'; import { fixOpenCodeDuplicateReasoning } from '@/lib/ai-gateway/providers/fixOpenCodeDuplicateReasoning'; @@ -132,24 +126,11 @@ function extractPromptInfo(requestBodyParsed: GatewayRequest): PromptInfo { } async function resolveRateLimit( - feature: FeatureValue | null, ipAddress: string, authPromise: Promise<{ user: { id: string } | null }> -): Promise< - | NextResponseType - | { result: { allowed: boolean; requestCount: number }; subject: string } -> { - if (isUserRateLimitedFeature(feature) && isCloudflareIP(ipAddress)) { - const { user } = await authPromise; - if (!user) { - return NextResponse.json( - { - error: 'Authentication required for this feature', - error_type: ProxyErrorType.authentication_required, - }, - { status: 401 } - ); - } +): Promise<{ result: { allowed: boolean; requestCount: number }; subject: string }> { + const { user } = await authPromise; + if (user) { return { result: await checkFreeModelRateLimitByUser(user.id), subject: `user: ${user.id}`, @@ -272,14 +253,11 @@ export async function POST(request: NextRequest): Promise { test('returns null for null input', () => { @@ -19,22 +19,3 @@ describe('validateFeatureHeader', () => { expect(validateFeatureHeader(' Cloud-Agent ')).toBe('cloud-agent'); }); }); - -describe('isUserRateLimitedFeature', () => { - test('returns true for server-side products', () => { - expect(isUserRateLimitedFeature('cloud-agent')).toBe(true); - expect(isUserRateLimitedFeature('code-review')).toBe(true); - expect(isUserRateLimitedFeature('app-builder')).toBe(true); - }); - - test('returns false for client-side products', () => { - expect(isUserRateLimitedFeature('vscode-extension')).toBe(false); - expect(isUserRateLimitedFeature('jetbrains-extension')).toBe(false); - expect(isUserRateLimitedFeature('cli')).toBe(false); - expect(isUserRateLimitedFeature('direct-gateway')).toBe(false); - }); - - test('returns false for null', () => { - expect(isUserRateLimitedFeature(null)).toBe(false); - }); -}); diff --git a/apps/web/src/lib/feature-detection.ts b/apps/web/src/lib/feature-detection.ts index 188ed61eca..21af48ff76 100644 --- a/apps/web/src/lib/feature-detection.ts +++ b/apps/web/src/lib/feature-detection.ts @@ -48,18 +48,3 @@ export function validateFeatureHeader(headerValue: string | null): FeatureValue const result = featureSchema.safeParse(headerValue.trim().toLowerCase()); return result.success ? result.data : null; } - -/** - * Server-side products that rate-limit free models per user instead of per IP. - * These products share infrastructure IPs, so IP-based limits would be too restrictive. - */ -const USER_RATE_LIMITED_FEATURES: ReadonlySet = new Set([ - 'cloud-agent', - 'code-review', - 'app-builder', -]); - -export function isUserRateLimitedFeature(feature: FeatureValue | null): boolean { - if (!feature) return false; - return USER_RATE_LIMITED_FEATURES.has(feature); -} diff --git a/apps/web/src/lib/free-model-rate-limiter.ts b/apps/web/src/lib/free-model-rate-limiter.ts index 425d48777c..19fa097ac0 100644 --- a/apps/web/src/lib/free-model-rate-limiter.ts +++ b/apps/web/src/lib/free-model-rate-limiter.ts @@ -53,13 +53,14 @@ async function getModelUsageSinceTimeByUser( } /** - * Check if an IP address is within the free model rate limit. - * This applies to ALL free model requests, both anonymous and authenticated. + * Check if an anonymous IP address is within the free model rate limit. + * Only counts anonymous (unauthenticated) requests — authenticated requests + * are rate-limited per user via checkFreeModelRateLimitByUser. */ export async function checkFreeModelRateLimit(ipAddress: string): Promise { const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000); - const requestCount = await getModelUsageSinceTime(windowStart, ipAddress); + const requestCount = await getModelUsageSinceTime(windowStart, ipAddress, true); return { allowed: requestCount < FREE_MODEL_MAX_REQUESTS_PER_WINDOW, @@ -68,9 +69,7 @@ export async function checkFreeModelRateLimit(ipAddress: string): Promise { const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000);