From 57b5d398f1719a91342e7d25a6b8d2ca1976bc81 Mon Sep 17 00:00:00 2001
From: "kiloconnect[bot]" <240665456+kiloconnect[bot]@users.noreply.github.com>
Date: Sat, 2 May 2026 07:35:55 +0000
Subject: [PATCH] refactor(rate-limit): key free-model limit on user id for
 authenticated requests

Authenticated free-model requests are now rate-limited per user id regardless of
feature or source IP. Anonymous requests continue to be rate-limited per IP,
counting only anonymous usage so they aren't skewed by authenticated users on
shared IPs. This removes the feature/Cloudflare-IP special case that existed for
cloud-agent, code-review and app-builder.
---
 .../src/app/api/openrouter/[...path]/route.ts | 34 ++++---------------
 apps/web/src/lib/feature-detection.test.ts    | 21 +-----------
 apps/web/src/lib/feature-detection.ts         | 15 --------
 apps/web/src/lib/free-model-rate-limiter.ts   | 11 +++---
 4 files changed, 12 insertions(+), 69 deletions(-)
diff --git a/apps/web/src/app/api/openrouter/[...path]/route.ts b/apps/web/src/app/api/openrouter/[...path]/route.ts
index 62d0f161a6..1b015c4728 100644
--- a/apps/web/src/app/api/openrouter/[...path]/route.ts
+++ b/apps/web/src/app/api/openrouter/[...path]/route.ts
@@ -3,12 +3,7 @@ import { type NextRequest } from 'next/server';
 import { isOpenCodeBasedClient, stripRequiredPrefix } from '@/lib/utils';
 import { applyTrackingIds } from '@/lib/ai-gateway/providerHash';
 import { extractPromptInfo as extractChatCompletionsPromptInfo } from '@/lib/ai-gateway/processUsage';
-import {
-  validateFeatureHeader,
-  FEATURE_HEADER,
-  isUserRateLimitedFeature,
-  type FeatureValue,
-} from '@/lib/feature-detection';
+import { validateFeatureHeader, FEATURE_HEADER } from '@/lib/feature-detection';
 import type {
   OpenRouterChatCompletionRequest,
   GatewayResponsesRequest,
@@ -79,7 +74,6 @@ import {
 } from '@/lib/ai-gateway/o11y/api-metrics.server';
 import { normalizeModelId } from '@/lib/ai-gateway/model-utils';
 import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models';
-import { isCloudflareIP } from '@/lib/cloudflare-ip';
 import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/kilo-auto';
 import { applyResolvedAutoModel } from '@/lib/ai-gateway/kilo-auto/resolution';
 import { fixOpenCodeDuplicateReasoning } from '@/lib/ai-gateway/providers/fixOpenCodeDuplicateReasoning';
@@ -132,24 +126,11 @@ function extractPromptInfo(requestBodyParsed: GatewayRequest): PromptInfo {
 }
 
 async function resolveRateLimit(
-  feature: FeatureValue | null,
   ipAddress: string,
   authPromise: Promise<{ user: { id: string } | null }>
-): Promise<
-  | NextResponseType<unknown>
-  | { result: { allowed: boolean; requestCount: number }; subject: string }
-> {
-  if (isUserRateLimitedFeature(feature) && isCloudflareIP(ipAddress)) {
-    const { user } = await authPromise;
-    if (!user) {
-      return NextResponse.json(
-        {
-          error: 'Authentication required for this feature',
-          error_type: ProxyErrorType.authentication_required,
-        },
-        { status: 401 }
-      );
-    }
+): Promise<{ result: { allowed: boolean; requestCount: number }; subject: string }> {
+  const { user } = await authPromise;
+  if (user) {
     return {
       result: await checkFreeModelRateLimitByUser(user.id),
       subject: `user: ${user.id}`,
@@ -272,14 +253,11 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
   }
 
   // For FREE models: check rate limit, log at start.
-  // Server-side products (cloud-agent, code-review, app-builder) rate-limit
-  // per user when the request comes from Cloudflare IPs (Kilo infrastructure).
-  // All other products rate-limit per IP (fast pre-auth path).
+  // Authenticated requests rate-limit per user; anonymous requests rate-limit per IP.
   const isRateLimitedFreeModelRequest =
     isKiloExclusiveFreeModel(originalModelIdLowerCased) || autoModel === KILO_AUTO_FREE_MODEL.id;
   if (isRateLimitedFreeModelRequest) {
-    const rateLimit = await resolveRateLimit(feature, ipAddress, authPromise);
-    if (rateLimit instanceof NextResponse) return rateLimit;
+    const rateLimit = await resolveRateLimit(ipAddress, authPromise);
 
     if (!rateLimit.result.allowed) {
       console.warn(
diff --git a/apps/web/src/lib/feature-detection.test.ts b/apps/web/src/lib/feature-detection.test.ts
index 7a27acc354..1104dbe1ca 100644
--- a/apps/web/src/lib/feature-detection.test.ts
+++ b/apps/web/src/lib/feature-detection.test.ts
@@ -1,5 +1,5 @@
 import { describe, test, expect } from '@jest/globals';
-import { isUserRateLimitedFeature, validateFeatureHeader } from './feature-detection';
+import { validateFeatureHeader } from './feature-detection';
 
 describe('validateFeatureHeader', () => {
   test('returns null for null input', () => {
@@ -19,22 +19,3 @@ describe('validateFeatureHeader', () => {
     expect(validateFeatureHeader('  Cloud-Agent  ')).toBe('cloud-agent');
   });
 });
-
-describe('isUserRateLimitedFeature', () => {
-  test('returns true for server-side products', () => {
-    expect(isUserRateLimitedFeature('cloud-agent')).toBe(true);
-    expect(isUserRateLimitedFeature('code-review')).toBe(true);
-    expect(isUserRateLimitedFeature('app-builder')).toBe(true);
-  });
-
-  test('returns false for client-side products', () => {
-    expect(isUserRateLimitedFeature('vscode-extension')).toBe(false);
-    expect(isUserRateLimitedFeature('jetbrains-extension')).toBe(false);
-    expect(isUserRateLimitedFeature('cli')).toBe(false);
-    expect(isUserRateLimitedFeature('direct-gateway')).toBe(false);
-  });
-
-  test('returns false for null', () => {
-    expect(isUserRateLimitedFeature(null)).toBe(false);
-  });
-});
diff --git a/apps/web/src/lib/feature-detection.ts b/apps/web/src/lib/feature-detection.ts
index 188ed61eca..21af48ff76 100644
--- a/apps/web/src/lib/feature-detection.ts
+++ b/apps/web/src/lib/feature-detection.ts
@@ -48,18 +48,3 @@ export function validateFeatureHeader(headerValue: string | null): FeatureValue
   const result = featureSchema.safeParse(headerValue.trim().toLowerCase());
   return result.success ? result.data : null;
 }
-
-/**
- * Server-side products that rate-limit free models per user instead of per IP.
- * These products share infrastructure IPs, so IP-based limits would be too restrictive.
- */
-const USER_RATE_LIMITED_FEATURES: ReadonlySet<FeatureValue> = new Set([
-  'cloud-agent',
-  'code-review',
-  'app-builder',
-]);
-
-export function isUserRateLimitedFeature(feature: FeatureValue | null): boolean {
-  if (!feature) return false;
-  return USER_RATE_LIMITED_FEATURES.has(feature);
-}
diff --git a/apps/web/src/lib/free-model-rate-limiter.ts b/apps/web/src/lib/free-model-rate-limiter.ts
index 425d48777c..19fa097ac0 100644
--- a/apps/web/src/lib/free-model-rate-limiter.ts
+++ b/apps/web/src/lib/free-model-rate-limiter.ts
@@ -53,13 +53,14 @@ async function getModelUsageSinceTimeByUser(
 }
 
 /**
- * Check if an IP address is within the free model rate limit.
- * This applies to ALL free model requests, both anonymous and authenticated.
+ * Check if an anonymous IP address is within the free model rate limit.
+ * Only counts anonymous (unauthenticated) requests — authenticated requests
+ * are rate-limited per user via checkFreeModelRateLimitByUser.
  */
 export async function checkFreeModelRateLimit(ipAddress: string): Promise<RateLimitResult> {
   const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000);
 
-  const requestCount = await getModelUsageSinceTime(windowStart, ipAddress);
+  const requestCount = await getModelUsageSinceTime(windowStart, ipAddress, true);
 
   return {
     allowed: requestCount < FREE_MODEL_MAX_REQUESTS_PER_WINDOW,
@@ -68,9 +69,7 @@ export async function checkFreeModelRateLimit(ipAddress: string): Promise<RateLi
 }
 
 /**
- * Check if a user is within the free model rate limit.
- * Used for server-side products (cloud-agent, code-review, app-builder)
- * where all requests share infrastructure IPs.
+ * Check if an authenticated user is within the free model rate limit.
  */
 export async function checkFreeModelRateLimitByUser(kiloUserId: string): Promise<RateLimitResult> {
   const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000);