Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 6 additions & 28 deletions apps/web/src/app/api/openrouter/[...path]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@ import { type NextRequest } from 'next/server';
import { isOpenCodeBasedClient, stripRequiredPrefix } from '@/lib/utils';
import { applyTrackingIds } from '@/lib/ai-gateway/providerHash';
import { extractPromptInfo as extractChatCompletionsPromptInfo } from '@/lib/ai-gateway/processUsage';
import {
validateFeatureHeader,
FEATURE_HEADER,
isUserRateLimitedFeature,
type FeatureValue,
} from '@/lib/feature-detection';
import { validateFeatureHeader, FEATURE_HEADER } from '@/lib/feature-detection';
import type {
OpenRouterChatCompletionRequest,
GatewayResponsesRequest,
Expand Down Expand Up @@ -79,7 +74,6 @@ import {
} from '@/lib/ai-gateway/o11y/api-metrics.server';
import { normalizeModelId } from '@/lib/ai-gateway/model-utils';
import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models';
import { isCloudflareIP } from '@/lib/cloudflare-ip';
import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/kilo-auto';
import { applyResolvedAutoModel } from '@/lib/ai-gateway/kilo-auto/resolution';
import { fixOpenCodeDuplicateReasoning } from '@/lib/ai-gateway/providers/fixOpenCodeDuplicateReasoning';
Expand Down Expand Up @@ -132,24 +126,11 @@ function extractPromptInfo(requestBodyParsed: GatewayRequest): PromptInfo {
}

async function resolveRateLimit(
feature: FeatureValue | null,
ipAddress: string,
authPromise: Promise<{ user: { id: string } | null }>
): Promise<
| NextResponseType<unknown>
| { result: { allowed: boolean; requestCount: number }; subject: string }
> {
if (isUserRateLimitedFeature(feature) && isCloudflareIP(ipAddress)) {
const { user } = await authPromise;
if (!user) {
return NextResponse.json(
{
error: 'Authentication required for this feature',
error_type: ProxyErrorType.authentication_required,
},
{ status: 401 }
);
}
): Promise<{ result: { allowed: boolean; requestCount: number }; subject: string }> {
const { user } = await authPromise;
if (user) {
return {
result: await checkFreeModelRateLimitByUser(user.id),
subject: `user: ${user.id}`,
Expand Down Expand Up @@ -272,14 +253,11 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
}

// For FREE models: check rate limit, log at start.
// Server-side products (cloud-agent, code-review, app-builder) rate-limit
// per user when the request comes from Cloudflare IPs (Kilo infrastructure).
// All other products rate-limit per IP (fast pre-auth path).
// Authenticated requests rate-limit per user; anonymous requests rate-limit per IP.
const isRateLimitedFreeModelRequest =
isKiloExclusiveFreeModel(originalModelIdLowerCased) || autoModel === KILO_AUTO_FREE_MODEL.id;
if (isRateLimitedFreeModelRequest) {
const rateLimit = await resolveRateLimit(feature, ipAddress, authPromise);
if (rateLimit instanceof NextResponse) return rateLimit;
const rateLimit = await resolveRateLimit(ipAddress, authPromise);

if (!rateLimit.result.allowed) {
console.warn(
Expand Down
21 changes: 1 addition & 20 deletions apps/web/src/lib/feature-detection.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, test, expect } from '@jest/globals';
import { isUserRateLimitedFeature, validateFeatureHeader } from './feature-detection';
import { validateFeatureHeader } from './feature-detection';

describe('validateFeatureHeader', () => {
test('returns null for null input', () => {
Expand All @@ -19,22 +19,3 @@ describe('validateFeatureHeader', () => {
expect(validateFeatureHeader(' Cloud-Agent ')).toBe('cloud-agent');
});
});

describe('isUserRateLimitedFeature', () => {
test('returns true for server-side products', () => {
expect(isUserRateLimitedFeature('cloud-agent')).toBe(true);
expect(isUserRateLimitedFeature('code-review')).toBe(true);
expect(isUserRateLimitedFeature('app-builder')).toBe(true);
});

test('returns false for client-side products', () => {
expect(isUserRateLimitedFeature('vscode-extension')).toBe(false);
expect(isUserRateLimitedFeature('jetbrains-extension')).toBe(false);
expect(isUserRateLimitedFeature('cli')).toBe(false);
expect(isUserRateLimitedFeature('direct-gateway')).toBe(false);
});

test('returns false for null', () => {
expect(isUserRateLimitedFeature(null)).toBe(false);
});
});
15 changes: 0 additions & 15 deletions apps/web/src/lib/feature-detection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,3 @@ export function validateFeatureHeader(headerValue: string | null): FeatureValue
const result = featureSchema.safeParse(headerValue.trim().toLowerCase());
return result.success ? result.data : null;
}

/**
* Server-side products that rate-limit free models per user instead of per IP.
* These products share infrastructure IPs, so IP-based limits would be too restrictive.
*/
const USER_RATE_LIMITED_FEATURES: ReadonlySet<FeatureValue> = new Set([
'cloud-agent',
'code-review',
'app-builder',
]);

export function isUserRateLimitedFeature(feature: FeatureValue | null): boolean {
if (!feature) return false;
return USER_RATE_LIMITED_FEATURES.has(feature);
}
11 changes: 5 additions & 6 deletions apps/web/src/lib/free-model-rate-limiter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,14 @@ async function getModelUsageSinceTimeByUser(
}

/**
* Check if an IP address is within the free model rate limit.
* This applies to ALL free model requests, both anonymous and authenticated.
* Check if an anonymous IP address is within the free model rate limit.
* Only counts anonymous (unauthenticated) requests — authenticated requests
* are rate-limited per user via checkFreeModelRateLimitByUser.
*/
export async function checkFreeModelRateLimit(ipAddress: string): Promise<RateLimitResult> {
const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000);

const requestCount = await getModelUsageSinceTime(windowStart, ipAddress);
const requestCount = await getModelUsageSinceTime(windowStart, ipAddress, true);

return {
allowed: requestCount < FREE_MODEL_MAX_REQUESTS_PER_WINDOW,
Expand All @@ -68,9 +69,7 @@ export async function checkFreeModelRateLimit(ipAddress: string): Promise<RateLi
}

/**
* Check if a user is within the free model rate limit.
* Used for server-side products (cloud-agent, code-review, app-builder)
* where all requests share infrastructure IPs.
* Check if an authenticated user is within the free model rate limit.
*/
export async function checkFreeModelRateLimitByUser(kiloUserId: string): Promise<RateLimitResult> {
const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000);
Expand Down