From 1d1e3552c53d663f5ee90e0d9abf09d5a25a4f69 Mon Sep 17 00:00:00 2001 From: AEGIS Date: Sat, 4 Apr 2026 17:12:31 -0500 Subject: [PATCH 1/3] feat: add rate limiting, cost attribution, and scope enforcement (#18) - Rate limiter: sliding window per-tenant using RATELIMIT_KV with tier-based limits (free=20/min, hobby=60, pro=300, enterprise=1000). Returns 429 with Retry-After and X-RateLimit-* headers. - Cost attribution: per-tool credit costs with quality multipliers for image_generate. Reserves quota via edge-auth consumeQuota RPC before tool call, settles (commit/refund) after based on outcome. Free tools (read-only, zero cost) skip quota enforcement. - Scope enforcement: mutation tools require 'generate' scope. tools/list filters catalog to match session scopes. - AuthServiceRpc extended with checkQuota, consumeQuota, and commitOrRefundQuota methods matching edge-auth's entrypoint. - All existing tests updated with new mocks; 25 new tests added. Closes #18 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/cost-attribution.ts | 178 ++++++++++++++++++++++++++++++++++ src/gateway.ts | 113 ++++++++++++++++++++- src/rate-limiter.ts | 108 +++++++++++++++++++++ src/types.ts | 32 ++++++ test/audit.test.ts | 13 +++ test/auth.test.ts | 3 + test/cost-attribution.test.ts | 144 +++++++++++++++++++++++++++ test/gateway.test.ts | 4 + test/oauth-handler.test.ts | 4 + test/rate-limiter.test.ts | 90 +++++++++++++++++ wrangler.toml | 5 + 11 files changed, 692 insertions(+), 2 deletions(-) create mode 100644 src/cost-attribution.ts create mode 100644 src/rate-limiter.ts create mode 100644 test/cost-attribution.test.ts create mode 100644 test/rate-limiter.test.ts diff --git a/src/cost-attribution.ts b/src/cost-attribution.ts new file mode 100644 index 0000000..d49ba48 --- /dev/null +++ b/src/cost-attribution.ts @@ -0,0 +1,178 @@ +// ─── Cost Attribution ───────────────────────────────────────── +// Maps tool calls to credit costs and enforces quotas via edge-auth. +// Every tool call is metered: check quota before, consume after success. +// Cost data flows to the audit pipeline for billing dashboards. + +import type { Tier, AuthServiceRpc } from './types.js'; +import type { AuditArtifact } from './audit.js'; + +// ─── Credit cost per tool call ────────────────────────────────── +// Costs are in "credits" — 1 credit = 1 unit of the tier's allocation. +// Expensive operations (image gen, deploy) cost more. +export interface ToolCost { + /** Base credit cost for the tool call */ + baseCost: number; + /** Feature key for quota tracking (maps to edge-auth quota.feature) */ + feature: string; +} + +const TOOL_COSTS: Record = { + // img-forge: costs depend on quality tier (resolved at call time) + 'image_generate': { baseCost: 5, feature: 'mcp.image_generate' }, + 'image_list_models': { baseCost: 0, feature: 'mcp.image_list_models' }, + 'image_check_job': { baseCost: 0, feature: 'mcp.image_check_job' }, + + // TarotScript scaffold + 'scaffold_create': { baseCost: 2, feature: 'mcp.scaffold_create' }, + 'scaffold_classify': { baseCost: 0, feature: 'mcp.scaffold_classify' }, + 'scaffold_status': { baseCost: 0, feature: 'mcp.scaffold_status' }, + 'scaffold_publish': { baseCost: 3, feature: 'mcp.scaffold_publish' }, + 'scaffold_deploy': { baseCost: 5, feature: 'mcp.scaffold_deploy' }, + 'scaffold_import': { baseCost: 1, feature: 'mcp.scaffold_import' }, + + // Flow tools + 'flow_create': { baseCost: 2, feature: 'mcp.flow_create' }, + 'flow_status': { baseCost: 0, feature: 'mcp.flow_status' }, + 'flow_summary': { baseCost: 0, feature: 'mcp.flow_summary' }, + 'flow_quality': { baseCost: 0, feature: 'mcp.flow_quality' }, + 'flow_governance': { baseCost: 0, feature: 'mcp.flow_governance' }, + + // Visual QA + 'visual_screenshot': { baseCost: 1, feature: 'mcp.visual_screenshot' }, + 'visual_analyze': { baseCost: 2, feature: 'mcp.visual_analyze' }, + 'visual_pages': { baseCost: 0, feature: 'mcp.visual_pages' }, +}; + +// Quality tier multipliers for image_generate +const IMAGE_QUALITY_MULTIPLIER: Record = { + draft: 1, + standard: 1, + premium: 3, + ultra: 5, + ultra_plus: 8, +}; + +/** + * Resolve the credit cost for a tool call, factoring in quality tier for images. + */ +export function resolveToolCost( + toolName: string, + args?: Record, +): ToolCost { + const base = TOOL_COSTS[toolName]; + if (!base) { + // Unknown tools cost 1 credit by default (conservative) + return { baseCost: 1, feature: `mcp.${toolName}` }; + } + + // Apply quality multiplier for image_generate + if (toolName === 'image_generate' && args?.quality_tier) { + const multiplier = IMAGE_QUALITY_MULTIPLIER[args.quality_tier as string] ?? 1; + return { ...base, baseCost: base.baseCost * multiplier }; + } + + return base; +} + +/** + * Check if a tool call is free (cost = 0). Free calls skip quota enforcement. + */ +export function isFreeTool(toolName: string): boolean { + const cost = TOOL_COSTS[toolName]; + return cost !== undefined && cost.baseCost === 0; +} + +export interface QuotaCheckResult { + allowed: boolean; + reservationId?: string; + remaining?: number; + error?: string; +} + +/** + * Check and reserve quota for a tool call via edge-auth RPC. + * Returns a reservation ID that must be committed or refunded after the call. + */ +export async function reserveQuota( + authService: AuthServiceRpc, + tenantId: string, + userId: string, + toolName: string, + args?: Record, +): Promise { + const cost = resolveToolCost(toolName, args); + + // Free tools don't consume quota + if (cost.baseCost === 0) { + return { allowed: true }; + } + + try { + const result = await authService.consumeQuota({ + tenantId, + userId, + feature: cost.feature, + amount: cost.baseCost, + }); + + if (!result.success) { + return { + allowed: false, + error: result.error ?? 'Quota exceeded', + remaining: result.remaining, + }; + } + + return { + allowed: true, + reservationId: result.reservationId, + remaining: result.remaining, + }; + } catch (err) { + // Quota service unavailable — fail open for read-only tools, closed for mutations + const isReadOnly = cost.baseCost <= 0; + if (isReadOnly) { + return { allowed: true }; + } + return { + allowed: false, + error: 'Quota service unavailable', + }; + } +} + +/** + * Commit or refund a quota reservation based on tool call outcome. + */ +export async function settleQuota( + authService: AuthServiceRpc, + reservationId: string | undefined, + success: boolean, +): Promise { + if (!reservationId) return; + + try { + await authService.commitOrRefundQuota( + reservationId, + success ? 'success' : 'failed', + ); + } catch { + // Best-effort — don't fail the tool call if settlement fails. + // The reservation will auto-expire in edge-auth. + console.error(`[cost] Failed to settle reservation ${reservationId}`); + } +} + +/** + * Build cost attribution data for the audit artifact. + */ +export function buildCostAttribution( + toolName: string, + args?: Record, +): { feature: string; creditCost: number } { + const cost = resolveToolCost(toolName, args); + return { + feature: cost.feature, + creditCost: cost.baseCost, + }; +} diff --git a/src/gateway.ts b/src/gateway.ts index 67d003e..6a72f97 100644 --- a/src/gateway.ts +++ b/src/gateway.ts @@ -13,6 +13,8 @@ import { materializeScaffold } from './scaffold-materializer.js'; import { publishToGitHub } from './scaffold-publish.js'; import { classifyIntention, type IntentClassification } from './intent-classifier.js'; import { logDivergence } from './divergence-logger.js'; +import { checkRateLimit, rateLimitHeaders, type RateLimitResult } from './rate-limiter.js'; +import { reserveQuota, settleQuota, buildCostAttribution, isFreeTool } from './cost-attribution.js'; const MCP_PROTOCOL_VERSION = '2025-03-26'; const JSON_RPC_PARSE_ERROR = -32700; @@ -1131,6 +1133,28 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP ); } + // Rate limiting — check before processing + const rateLimitKey = authResult.tenantId ?? authResult.userId ?? 'unknown'; + const rlResult = await checkRateLimit(env.RATELIMIT_KV, rateLimitKey, authResult.tier); + if (!rlResult.allowed) { + audit({ + trace_id: generateTraceId(), + principal: authResult.userId ?? 'unknown', + tenant: authResult.tenantId ?? 'unknown', + tool: 'rate_limit', + risk_level: 'UNKNOWN', + policy_decision: 'DENY', + redacted_input_summary: '{}', + outcome: 'auth_denied', + timestamp: new Date().toISOString(), + }, env); + return jsonResponse( + { error: 'Rate limit exceeded', code: 'RATE_LIMITED' }, + 429, + rateLimitHeaders(rlResult), + ); + } + // Validate Accept header const accept = request.headers.get('Accept') ?? ''; if (!accept.includes('application/json') && !accept.includes('*/*') && accept !== '') { @@ -1186,7 +1210,18 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP // ─── tools/list ───────────────────────────────────────── if (rpcMethod === 'tools/list') { // KV handles session expiration via expirationTtl — no manual pruning needed - const tools = buildAggregatedCatalog(); + let tools = buildAggregatedCatalog(); + + // Scope-based filtering: only show tools the session has access to + const hasGenerate = session.scopes.includes('generate'); + if (!hasGenerate) { + // Read-only scope — filter out mutation tools + tools = tools.filter(t => { + const risk = getToolRiskLevel(t.name); + return risk === 'READ_ONLY'; + }); + } + return rpcResult(rpcId, { tools }); } @@ -1229,6 +1264,23 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP return rpcError(rpcId, JSON_RPC_METHOD_NOT_FOUND, `Unknown tool: ${toolName}`); } + // Scope enforcement: mutation tools require 'generate' scope + if (risk !== 'READ_ONLY' && !session.scopes.includes('generate')) { + audit({ + trace_id: traceId, + principal: session.userId ?? 'unknown', + tenant: session.tenantId ?? 'unknown', + tool: toolName, + risk_level: risk, + policy_decision: 'DENY', + redacted_input_summary: summarizeInput(toolArgs), + outcome: 'auth_denied', + timestamp: new Date().toISOString(), + }, env); + return rpcError(rpcId, JSON_RPC_INVALID_PARAMS, + `Tool "${toolName}" requires the "generate" scope. Your API key only has: ${session.scopes.join(', ')}`); + } + // Validate arguments are object-shaped const argValidation = validateToolArguments(toolArgs, { type: 'object' }); if (!argValidation.valid) { @@ -1290,8 +1342,65 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP return rpcError(rpcId, JSON_RPC_INVALID_PARAMS, tierDenied); } + // ─── Cost attribution: reserve quota before tool call ──── + const costInfo = buildCostAttribution(toolName, toolArgs as Record | undefined); + let quotaReservation: { reservationId?: string } = {}; + + if (!isFreeTool(toolName) && session.tenantId) { + const quotaResult = await reserveQuota( + env.AUTH_SERVICE, + session.tenantId, + session.userId ?? '', + toolName, + toolArgs as Record | undefined, + ); + + if (!quotaResult.allowed) { + audit({ + trace_id: traceId, + principal: session.userId ?? 'unknown', + tenant: session.tenantId ?? 'unknown', + tool: toolName, + risk_level: risk, + policy_decision: 'DENY', + redacted_input_summary: summarizeInput(toolArgs), + outcome: 'tier_denied', + timestamp: new Date().toISOString(), + }, env); + return rpcError(rpcId, JSON_RPC_INVALID_PARAMS, + `Quota exceeded for ${toolName}. ${quotaResult.error ?? 'Upgrade your plan for more credits.'}`); + } + + quotaReservation = { reservationId: quotaResult.reservationId }; + } + const result = await proxyToolCall(env, toolName, toolArgs, session, traceId); - return rpcResult(rpcId, result); + + // ─── Cost attribution: settle quota after tool call ────── + const toolSucceeded = !result.isError; + await settleQuota(env.AUTH_SERVICE, quotaReservation.reservationId, toolSucceeded); + + // Enrich audit queue event with cost data + queueAuditEvent(env.PLATFORM_EVENTS_QUEUE, { + trace_id: traceId, + principal: session.userId ?? 'unknown', + tenant: session.tenantId ?? 'unknown', + tool: toolName, + risk_level: risk, + policy_decision: 'ALLOW', + redacted_input_summary: summarizeInput(toolArgs), + outcome: toolSucceeded ? 'success' : 'error', + timestamp: new Date().toISOString(), + latency_ms: 0, // latency is tracked in proxyToolCall's own audit + }); + + // Add rate limit headers to successful responses + const response = rpcResult(rpcId, result); + const rlHeaders = rateLimitHeaders(rlResult); + for (const [k, v] of Object.entries(rlHeaders)) { + response.headers.set(k, v); + } + return response; } return rpcError(rpcId, JSON_RPC_METHOD_NOT_FOUND, `Unknown method: ${rpcMethod}`); diff --git a/src/rate-limiter.ts b/src/rate-limiter.ts new file mode 100644 index 0000000..83b7594 --- /dev/null +++ b/src/rate-limiter.ts @@ -0,0 +1,108 @@ +// ─── Rate Limiter ───────────────────────────────────────────── +// Sliding window rate limiting per API key / tenant. +// Uses KV with TTL for window expiration — no external dependencies. +// Returns 429 with Retry-After header when limit exceeded. + +import type { Tier } from './types.js'; + +export interface RateLimitConfig { + /** Max requests per window */ + limit: number; + /** Window size in seconds */ + windowSeconds: number; +} + +// Per-tier rate limits — configurable, conservative defaults +const TIER_LIMITS: Record = { + free: { limit: 20, windowSeconds: 60 }, + hobby: { limit: 60, windowSeconds: 60 }, + pro: { limit: 300, windowSeconds: 60 }, + enterprise: { limit: 1000, windowSeconds: 60 }, +}; + +export interface RateLimitResult { + allowed: boolean; + /** Requests remaining in current window */ + remaining: number; + /** Total limit for this window */ + limit: number; + /** Seconds until window resets */ + retryAfterSeconds: number; +} + +interface WindowState { + count: number; + windowStart: number; +} + +const RATE_LIMIT_PREFIX = 'rl:'; + +/** + * Check and increment rate limit for a given key (API key ID, tenant ID, etc.) + * Uses a simple fixed-window approach with KV TTL for auto-cleanup. + */ +export async function checkRateLimit( + kv: KVNamespace, + key: string, + tier: Tier, + config?: RateLimitConfig, +): Promise { + const { limit, windowSeconds } = config ?? TIER_LIMITS[tier] ?? TIER_LIMITS.free; + const now = Math.floor(Date.now() / 1000); + const windowStart = now - (now % windowSeconds); + const kvKey = `${RATE_LIMIT_PREFIX}${key}:${windowStart}`; + + // Read current window count + const raw = await kv.get(kvKey); + let state: WindowState; + + if (raw) { + state = JSON.parse(raw) as WindowState; + } else { + state = { count: 0, windowStart }; + } + + const retryAfterSeconds = windowSeconds - (now - windowStart); + + if (state.count >= limit) { + return { + allowed: false, + remaining: 0, + limit, + retryAfterSeconds, + }; + } + + // Increment + state.count += 1; + // Write back with TTL — window auto-expires + await kv.put(kvKey, JSON.stringify(state), { + expirationTtl: windowSeconds + 10, // small buffer past window end + }); + + return { + allowed: true, + remaining: limit - state.count, + limit, + retryAfterSeconds, + }; +} + +/** + * Build standard rate limit response headers. + */ +export function rateLimitHeaders(result: RateLimitResult): Record { + const headers: Record = { + 'X-RateLimit-Limit': String(result.limit), + 'X-RateLimit-Remaining': String(result.remaining), + 'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + result.retryAfterSeconds), + }; + if (!result.allowed) { + headers['Retry-After'] = String(result.retryAfterSeconds); + } + return headers; +} + +export function getRateLimitConfig(tier: Tier): RateLimitConfig { + return TIER_LIMITS[tier] ?? TIER_LIMITS.free; +} diff --git a/src/types.ts b/src/types.ts index 5d91711..45e5dd4 100644 --- a/src/types.ts +++ b/src/types.ts @@ -54,6 +54,35 @@ export interface AuthServiceRpc { name?: string; error?: string; }>; + + // ─── Quota (cost attribution) ──────────────────────────────── + checkQuota(params: { + tenantId: string; + userId?: string; + feature: string; + amount?: number; + }): Promise<{ + allowed: boolean; + remaining: number; + limit: number; + resetsAt?: string; + }>; + consumeQuota(params: { + tenantId: string; + userId?: string; + feature: string; + amount: number; + idempotencyKey?: string; + }): Promise<{ + success: boolean; + reservationId?: string; + remaining?: number; + error?: string; + }>; + commitOrRefundQuota( + reservationId: string, + outcome: 'success' | 'failed', + ): Promise; } // ─── Backend RPC surface (what product workers expose) ──────── @@ -98,6 +127,9 @@ export interface GatewayEnv { OAUTH_PROVIDER: OAuthHelpers; OAUTH_KV: KVNamespace; + // Rate limiting + RATELIMIT_KV: KVNamespace; + // Secrets SERVICE_BINDING_SECRET: string; diff --git a/test/audit.test.ts b/test/audit.test.ts index 78173eb..fe9ab95 100644 --- a/test/audit.test.ts +++ b/test/audit.test.ts @@ -176,6 +176,9 @@ describe('gateway audit integration', () => { registerUser: async (_n: string, _e: string, _p: string) => ({ valid: false }), provisionTenant: async (_p: { userId: string; source: string }) => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }), exchangeSocialCode: async (_c: string) => ({ valid: false }), + checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }), + consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }), + commitOrRefundQuota: async () => {}, }; } @@ -206,6 +209,16 @@ describe('gateway audit integration', () => { getWithMetadata: async () => ({ value: null, metadata: null, cacheStatus: null }), } as unknown as KVNamespace; })(), + RATELIMIT_KV: (() => { + const store = new Map(); + return { + get: async (key: string) => store.get(key) ?? null, + put: async (key: string, value: string) => { store.set(key, value); }, + delete: async (key: string) => { store.delete(key); }, + list: async () => ({ keys: [], list_complete: true, cacheStatus: null }), + getWithMetadata: async () => ({ value: null, metadata: null, cacheStatus: null }), + } as unknown as KVNamespace; + })(), PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue, SERVICE_BINDING_SECRET: 'test-secret', API_BASE_URL: 'https://mcp.stackbilt.dev', diff --git a/test/auth.test.ts b/test/auth.test.ts index 3527cc7..027f51f 100644 --- a/test/auth.test.ts +++ b/test/auth.test.ts @@ -16,6 +16,9 @@ function mockAuthService(overrides?: Partial): AuthServiceRpc { registerUser: async () => ({ valid: false }), provisionTenant: async () => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }), exchangeSocialCode: async () => ({ valid: false }), + checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }), + consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }), + commitOrRefundQuota: async () => {}, ...overrides, }; } diff --git a/test/cost-attribution.test.ts b/test/cost-attribution.test.ts new file mode 100644 index 0000000..b3dcea2 --- /dev/null +++ b/test/cost-attribution.test.ts @@ -0,0 +1,144 @@ +import { describe, it, expect, vi } from 'vitest'; +import { + resolveToolCost, + isFreeTool, + reserveQuota, + settleQuota, + buildCostAttribution, +} from '../src/cost-attribution.js'; +import type { AuthServiceRpc } from '../src/types.js'; + +function mockAuthService(overrides?: Partial): AuthServiceRpc { + return { + validateApiKey: async () => ({ valid: true, tenant_id: 't', tier: 'pro', scopes: [] }), + validateJwt: async () => ({ valid: true, tenant_id: 't', user_id: 'u', tier: 'pro', scopes: [] }), + authenticateUser: async () => ({ valid: false }), + registerUser: async () => ({ valid: false }), + provisionTenant: async () => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }), + exchangeSocialCode: async () => ({ valid: false }), + checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }), + consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }), + commitOrRefundQuota: async () => {}, + ...overrides, + }; +} + +describe('resolveToolCost', () => { + it('returns base cost for known tools', () => { + const cost = resolveToolCost('scaffold_create'); + expect(cost.baseCost).toBe(2); + expect(cost.feature).toBe('mcp.scaffold_create'); + }); + + it('returns 0 cost for read-only tools', () => { + expect(resolveToolCost('scaffold_status').baseCost).toBe(0); + expect(resolveToolCost('image_list_models').baseCost).toBe(0); + expect(resolveToolCost('flow_status').baseCost).toBe(0); + }); + + it('applies quality multiplier for image_generate', () => { + const draft = resolveToolCost('image_generate', { quality_tier: 'draft' }); + const ultra = resolveToolCost('image_generate', { quality_tier: 'ultra' }); + expect(ultra.baseCost).toBeGreaterThan(draft.baseCost); + }); + + it('returns default cost for unknown tools', () => { + const cost = resolveToolCost('unknown_tool'); + expect(cost.baseCost).toBe(1); + expect(cost.feature).toBe('mcp.unknown_tool'); + }); +}); + +describe('isFreeTool', () => { + it('returns true for zero-cost tools', () => { + expect(isFreeTool('scaffold_status')).toBe(true); + expect(isFreeTool('image_list_models')).toBe(true); + }); + + it('returns false for paid tools', () => { + expect(isFreeTool('image_generate')).toBe(false); + expect(isFreeTool('scaffold_create')).toBe(false); + }); + + it('returns false for unknown tools', () => { + expect(isFreeTool('nonexistent')).toBe(false); + }); +}); + +describe('reserveQuota', () => { + it('reserves quota via auth service', async () => { + const auth = mockAuthService(); + const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'scaffold_create'); + expect(result.allowed).toBe(true); + expect(result.reservationId).toBe('res-1'); + }); + + it('skips quota for free tools', async () => { + const auth = mockAuthService(); + const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'scaffold_status'); + expect(result.allowed).toBe(true); + expect(result.reservationId).toBeUndefined(); + }); + + it('rejects when quota is exceeded', async () => { + const auth = mockAuthService({ + consumeQuota: async () => ({ success: false, error: 'Quota exceeded', remaining: 0 }), + }); + const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'image_generate'); + expect(result.allowed).toBe(false); + expect(result.error).toContain('Quota exceeded'); + }); + + it('fails closed on auth service error for mutation tools', async () => { + const auth = mockAuthService({ + consumeQuota: async () => { throw new Error('RPC timeout'); }, + }); + const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'scaffold_create'); + expect(result.allowed).toBe(false); + expect(result.error).toContain('unavailable'); + }); +}); + +describe('settleQuota', () => { + it('commits on success', async () => { + const commitFn = vi.fn(); + const auth = mockAuthService({ commitOrRefundQuota: commitFn }); + await settleQuota(auth, 'res-1', true); + expect(commitFn).toHaveBeenCalledWith('res-1', 'success'); + }); + + it('refunds on failure', async () => { + const commitFn = vi.fn(); + const auth = mockAuthService({ commitOrRefundQuota: commitFn }); + await settleQuota(auth, 'res-1', false); + expect(commitFn).toHaveBeenCalledWith('res-1', 'failed'); + }); + + it('skips settlement when no reservation ID', async () => { + const commitFn = vi.fn(); + const auth = mockAuthService({ commitOrRefundQuota: commitFn }); + await settleQuota(auth, undefined, true); + expect(commitFn).not.toHaveBeenCalled(); + }); + + it('does not throw on settlement failure', async () => { + const auth = mockAuthService({ + commitOrRefundQuota: async () => { throw new Error('RPC error'); }, + }); + // Should not throw + await expect(settleQuota(auth, 'res-1', true)).resolves.toBeUndefined(); + }); +}); + +describe('buildCostAttribution', () => { + it('returns feature and cost for known tools', () => { + const attr = buildCostAttribution('image_generate', { quality_tier: 'premium' }); + expect(attr.feature).toBe('mcp.image_generate'); + expect(attr.creditCost).toBe(15); // 5 * 3 (premium multiplier) + }); + + it('returns base cost for tools without args', () => { + const attr = buildCostAttribution('scaffold_create'); + expect(attr.creditCost).toBe(2); + }); +}); diff --git a/test/gateway.test.ts b/test/gateway.test.ts index 0516551..5e1311f 100644 --- a/test/gateway.test.ts +++ b/test/gateway.test.ts @@ -11,6 +11,9 @@ function mockAuthService(tier: string = 'pro'): AuthServiceRpc { registerUser: async () => ({ valid: false }), provisionTenant: async () => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }), exchangeSocialCode: async () => ({ valid: false }), + checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }), + consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }), + commitOrRefundQuota: async () => {}, }; } @@ -42,6 +45,7 @@ function makeEnv(overrides?: Partial): GatewayEnv { IMG_FORGE: mockFetcher({ jsonrpc: '2.0', id: 1, result: { content: [{ type: 'text', text: 'image generated' }] } }), OAUTH_PROVIDER: {} as any, OAUTH_KV: mockKV(), + RATELIMIT_KV: mockKV(), PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue, SERVICE_BINDING_SECRET: 'test-secret', API_BASE_URL: 'https://mcp.stackbilt.dev', diff --git a/test/oauth-handler.test.ts b/test/oauth-handler.test.ts index e93ac5e..3b6317d 100644 --- a/test/oauth-handler.test.ts +++ b/test/oauth-handler.test.ts @@ -90,6 +90,9 @@ function mockAuthService(overrides?: Partial) { email: 'social@example.com', name: 'Social User', })), + checkQuota: vi.fn(async () => ({ allowed: true, remaining: 100, limit: 500 })), + consumeQuota: vi.fn(async () => ({ success: true, reservationId: 'res-1', remaining: 99 })), + commitOrRefundQuota: vi.fn(async () => {}), ...overrides, }; } @@ -101,6 +104,7 @@ function makeEnv(overrides?: Partial): GatewayEnv { IMG_FORGE: {} as Fetcher, OAUTH_PROVIDER: mockOAuthProvider() as unknown as GatewayEnv['OAUTH_PROVIDER'], OAUTH_KV: mockKV(), + RATELIMIT_KV: mockKV(), PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue, SERVICE_BINDING_SECRET: TEST_SECRET, API_BASE_URL: TEST_API_BASE_URL, diff --git a/test/rate-limiter.test.ts b/test/rate-limiter.test.ts new file mode 100644 index 0000000..743d19a --- /dev/null +++ b/test/rate-limiter.test.ts @@ -0,0 +1,90 @@ +import { describe, it, expect } from 'vitest'; +import { checkRateLimit, rateLimitHeaders, getRateLimitConfig } from '../src/rate-limiter.js'; + +function mockKV(): KVNamespace { + const store = new Map(); + return { + get: async (key: string) => store.get(key) ?? null, + put: async (key: string, value: string) => { store.set(key, value); }, + delete: async (key: string) => { store.delete(key); }, + list: async () => ({ keys: [], list_complete: true, cacheStatus: null }), + getWithMetadata: async () => ({ value: null, metadata: null, cacheStatus: null }), + } as unknown as KVNamespace; +} + +describe('checkRateLimit', () => { + it('allows first request', async () => { + const kv = mockKV(); + const result = await checkRateLimit(kv, 'tenant-1', 'free'); + expect(result.allowed).toBe(true); + expect(result.remaining).toBeGreaterThan(0); + }); + + it('decrements remaining with each request', async () => { + const kv = mockKV(); + const r1 = await checkRateLimit(kv, 'tenant-1', 'free'); + const r2 = await checkRateLimit(kv, 'tenant-1', 'free'); + expect(r2.remaining).toBe(r1.remaining - 1); + }); + + it('rejects when limit is exhausted', async () => { + const kv = mockKV(); + // Use a tiny limit + const config = { limit: 2, windowSeconds: 60 }; + await checkRateLimit(kv, 'tenant-1', 'free', config); + await checkRateLimit(kv, 'tenant-1', 'free', config); + const result = await checkRateLimit(kv, 'tenant-1', 'free', config); + expect(result.allowed).toBe(false); + expect(result.remaining).toBe(0); + }); + + it('tracks separate keys independently', async () => { + const kv = mockKV(); + const config = { limit: 1, windowSeconds: 60 }; + await checkRateLimit(kv, 'tenant-1', 'free', config); + const result = await checkRateLimit(kv, 'tenant-2', 'free', config); + expect(result.allowed).toBe(true); + }); + + it('higher tiers get higher limits', () => { + const free = getRateLimitConfig('free'); + const pro = getRateLimitConfig('pro'); + const enterprise = getRateLimitConfig('enterprise'); + expect(pro.limit).toBeGreaterThan(free.limit); + expect(enterprise.limit).toBeGreaterThan(pro.limit); + }); +}); + +describe('rateLimitHeaders', () => { + it('includes standard rate limit headers', () => { + const headers = rateLimitHeaders({ + allowed: true, + remaining: 42, + limit: 60, + retryAfterSeconds: 30, + }); + expect(headers['X-RateLimit-Limit']).toBe('60'); + expect(headers['X-RateLimit-Remaining']).toBe('42'); + expect(headers['X-RateLimit-Reset']).toBeDefined(); + }); + + it('adds Retry-After when not allowed', () => { + const headers = rateLimitHeaders({ + allowed: false, + remaining: 0, + limit: 60, + retryAfterSeconds: 45, + }); + expect(headers['Retry-After']).toBe('45'); + }); + + it('omits Retry-After when allowed', () => { + const headers = rateLimitHeaders({ + allowed: true, + remaining: 10, + limit: 60, + retryAfterSeconds: 30, + }); + expect(headers['Retry-After']).toBeUndefined(); + }); +}); diff --git a/wrangler.toml b/wrangler.toml index 5a0b71f..1e74033 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -42,6 +42,11 @@ service = "n8n-transpiler" binding = "OAUTH_KV" id = "9c165be8754749e3b543458ae8e596db" +# Rate limiting — sliding window counters per API key / tenant +[[kv_namespaces]] +binding = "RATELIMIT_KV" +id = "240065d87b05466ab7b5527e3552817b" + # Custom domain — taken over from img-forge-mcp (ADR-039) [[routes]] pattern = "mcp.stackbilt.dev" From cbc778311c11e8a2b82f22e88621c20012ae4bfb Mon Sep 17 00:00:00 2001 From: Kurt Overmier Date: Fri, 17 Apr 2026 09:14:39 -0500 Subject: [PATCH 2/3] chore(26): address review + resolve post-rebase duplication with C-1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebased onto current main (which absorbed C-1 scope enforcement, sprint-2 hardening, and legacy-grant-scope fallback since this PR was opened). Cleanup commit addresses review feedback plus one duplication that surfaced during the rebase. Changes: 1. Remove duplicate scope check at proxyToolCall dispatch. Main's C-1 block enforces scope-to-risk mapping using the RISK_REQUIRED_SCOPES table and returns INVALID_REQUEST with outcome=insufficient_scope. The earlier version added here predated C-1 and returned INVALID_PARAMS with outcome=auth_denied. Keeping only the main/C-1 version — strictly more protective (catches READ_ONLY-with-empty-scopes that the older check skipped) and matches the existing gateway.test.ts expectations. 2. Fix "sliding window" comment in rate-limiter.ts — implementation is fixed-window (const windowStart = now - (now % windowSeconds)). 3. Remove unreachable isReadOnly branch in reserveQuota's catch. Free tools return earlier at `if (cost.baseCost === 0)`; by the time we're in the catch, baseCost > 0 always and the branch was dead. 4. Add 'rate_limited' to AuditArtifact.outcome and use it for the rate- limit denial path (was reusing 'auth_denied', which conflated quota/throttling rejections with auth failures in downstream analytics). 5. Update test/gateway-legacy-scope.test.ts mocks to include the new AuthServiceRpc quota methods (checkQuota, consumeQuota, commitOrRefundQuota) and the RATELIMIT_KV binding. Test passes unchanged afterward. Full suite: 176/176 passing. Typecheck clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/audit.ts | 2 +- src/cost-attribution.ts | 9 +++------ src/gateway.ts | 19 +------------------ src/rate-limiter.ts | 2 +- test/gateway-legacy-scope.test.ts | 4 ++++ 5 files changed, 10 insertions(+), 26 deletions(-) diff --git a/src/audit.ts b/src/audit.ts index cf40773..620d04a 100644 --- a/src/audit.ts +++ b/src/audit.ts @@ -12,7 +12,7 @@ export interface AuditArtifact { risk_level: RiskLevel | 'UNKNOWN'; policy_decision: 'ALLOW' | 'DENY'; redacted_input_summary: string; - outcome: 'success' | 'error' | 'backend_error' | 'auth_denied' | 'unknown_tool' | 'invalid_params' | 'tier_denied' | 'insufficient_scope'; + outcome: 'success' | 'error' | 'backend_error' | 'auth_denied' | 'unknown_tool' | 'invalid_params' | 'tier_denied' | 'insufficient_scope' | 'rate_limited'; timestamp: string; latency_ms?: number; } diff --git a/src/cost-attribution.ts b/src/cost-attribution.ts index d49ba48..3e70c52 100644 --- a/src/cost-attribution.ts +++ b/src/cost-attribution.ts @@ -128,12 +128,9 @@ export async function reserveQuota( reservationId: result.reservationId, remaining: result.remaining, }; - } catch (err) { - // Quota service unavailable — fail open for read-only tools, closed for mutations - const isReadOnly = cost.baseCost <= 0; - if (isReadOnly) { - return { allowed: true }; - } + } catch { + // Quota service unavailable on a non-free tool — fail closed. + // Free-tool calls return earlier above and never reach this catch. return { allowed: false, error: 'Quota service unavailable', diff --git a/src/gateway.ts b/src/gateway.ts index 6a72f97..53ed4f9 100644 --- a/src/gateway.ts +++ b/src/gateway.ts @@ -1145,7 +1145,7 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP risk_level: 'UNKNOWN', policy_decision: 'DENY', redacted_input_summary: '{}', - outcome: 'auth_denied', + outcome: 'rate_limited', timestamp: new Date().toISOString(), }, env); return jsonResponse( @@ -1264,23 +1264,6 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP return rpcError(rpcId, JSON_RPC_METHOD_NOT_FOUND, `Unknown tool: ${toolName}`); } - // Scope enforcement: mutation tools require 'generate' scope - if (risk !== 'READ_ONLY' && !session.scopes.includes('generate')) { - audit({ - trace_id: traceId, - principal: session.userId ?? 'unknown', - tenant: session.tenantId ?? 'unknown', - tool: toolName, - risk_level: risk, - policy_decision: 'DENY', - redacted_input_summary: summarizeInput(toolArgs), - outcome: 'auth_denied', - timestamp: new Date().toISOString(), - }, env); - return rpcError(rpcId, JSON_RPC_INVALID_PARAMS, - `Tool "${toolName}" requires the "generate" scope. Your API key only has: ${session.scopes.join(', ')}`); - } - // Validate arguments are object-shaped const argValidation = validateToolArguments(toolArgs, { type: 'object' }); if (!argValidation.valid) { diff --git a/src/rate-limiter.ts b/src/rate-limiter.ts index 83b7594..9eac668 100644 --- a/src/rate-limiter.ts +++ b/src/rate-limiter.ts @@ -1,5 +1,5 @@ // ─── Rate Limiter ───────────────────────────────────────────── -// Sliding window rate limiting per API key / tenant. +// Fixed-window rate limiting per API key / tenant. // Uses KV with TTL for window expiration — no external dependencies. // Returns 429 with Retry-After header when limit exceeded. diff --git a/test/gateway-legacy-scope.test.ts b/test/gateway-legacy-scope.test.ts index cd3043c..c762eb2 100644 --- a/test/gateway-legacy-scope.test.ts +++ b/test/gateway-legacy-scope.test.ts @@ -31,6 +31,9 @@ function mockAuthService(): AuthServiceRpc { createdAt: '2026-04-11T00:00:00Z', }), exchangeSocialCode: async () => ({ valid: false }), + checkQuota: async () => ({ allowed: true, remaining: 1_000_000, limit: 1_000_000 }), + consumeQuota: async () => ({ success: true, reservationId: 'res-test', remaining: 999_999 }), + commitOrRefundQuota: async () => {}, }; } @@ -62,6 +65,7 @@ function makeEnv(): GatewayEnv { IMG_FORGE: mockFetcher(), OAUTH_PROVIDER: {} as any, OAUTH_KV: mockKV(), + RATELIMIT_KV: mockKV(), PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue, SERVICE_BINDING_SECRET: 'test-secret', API_BASE_URL: 'https://mcp.stackbilt.dev', From e3f434c5a9e2695c502dca0ca84d3974278df1ac Mon Sep 17 00:00:00 2001 From: Kurt Overmier Date: Sat, 18 Apr 2026 02:32:58 -0500 Subject: [PATCH 3/3] docs(26): document rate limiting, cost attribution, and scope/tier enforcement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align README, user guide, API reference, and architecture docs with the behavior shipped in PR #18 and hardened in PR #26 — corrects stale tier credits/multipliers, documents the fixed-window limiter and 429 semantics, and adds the scope/tier/risk-level enforcement matrix and quota attribution. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 6 +++- docs/api-reference.md | 67 ++++++++++++++++++++++++++++++++++------- docs/architecture.md | 29 +++++++++++++++--- docs/user-guide.md | 69 +++++++++++++++++++++++++++++++++++-------- 4 files changed, 142 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 592d935..ae4277f 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,9 @@ Zero LLM calls for file generation. ~20ms for structure, ~2s with oracle prose. - **OAuth 2.1 with PKCE** — GitHub SSO, Google SSO, and email/password authentication - **Backend adapter pattern** — tool catalogs aggregated from multiple service bindings, namespaced to avoid collisions +- **Per-tier rate limiting** — fixed-window per-tenant limits via `RATELIMIT_KV` (free=20/min, hobby=60, pro=300, enterprise=1000); 429 with `Retry-After` and `X-RateLimit-*` headers +- **Cost attribution & quota** — every tool call carries a credit cost; quota is reserved via `edge-auth` before dispatch and committed/refunded on outcome; `image_generate` cost scales with `quality_tier` (1×/1×/3×/5×/8× for draft/standard/premium/ultra/ultra_plus) +- **Scope + tier enforcement** — `tools/list` is filtered by token scopes; `tools/call` requires the `generate` scope for mutating tools; expensive `image_generate` quality tiers (`premium` and above) are gated to Pro+ plans - **Security Constitution compliance** — every tool declares a risk level (`READ_ONLY`, `LOCAL_MUTATION`, `EXTERNAL_MUTATION`); structured audit logging with secret redaction; HMAC-signed identity tokens - **Coming-soon gate** — `PUBLIC_SIGNUPS_ENABLED` flag to control public access - **MCP JSON-RPC over HTTP** — supports both streaming (SSE) and request/response transport @@ -84,7 +87,8 @@ Deploys to the `mcp.stackbilt.dev` custom domain via Cloudflare Workers. | `AUTH_SERVICE` | Service Binding | RPC to `edge-auth` worker (`AuthEntrypoint`) | | `STACKBILDER` | Service Binding | Route to `edge-stack-architect-v2` worker | | `IMG_FORGE` | Service Binding | Route to `img-forge-mcp` worker | -| `OAUTH_KV` | KV Namespace | Stores social OAuth state (5-min TTL entries) | +| `OAUTH_KV` | KV Namespace | Stores social OAuth state (5-min TTL entries) and MCP sessions | +| `RATELIMIT_KV` | KV Namespace | Per-tenant fixed-window rate-limit counters (60s TTL) | | `PLATFORM_EVENTS_QUEUE` | Queue | Audit event pipeline (`stackbilt-user-events`) | | `MCP_REGISTRY_AUTH` | Variable | MCP Registry domain verification string (served at `/.well-known/mcp-registry-auth`) | diff --git a/docs/api-reference.md b/docs/api-reference.md index a28b9a2..141cecd 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -104,6 +104,8 @@ Returns the aggregated tool catalog from all backend adapters. Tools are namespaced by product (e.g. `image_generate`, `flow_create`). Each tool includes a JSON Schema for its `inputSchema`. +The catalog is **filtered by token scope**: tokens without the `generate` scope only see tools with risk level `READ_ONLY`. The full catalog is visible only to tokens that hold `generate`. + ### `tools/call` Invokes a tool on the appropriate backend. @@ -122,11 +124,15 @@ Invokes a tool on the appropriate backend. The gateway: 1. Validates the tool name exists in the catalog 2. Looks up the risk level from the route table -3. Generates a trace ID for audit -4. Proxies the call to the appropriate backend service binding -5. Parses the response (JSON or SSE) -6. Emits a structured audit event (to console + queue) -7. Returns the tool result +3. Enforces scope: tools with risk level `LOCAL_MUTATION`, `EXTERNAL_MUTATION`, or `DESTRUCTIVE` require the `generate` scope (rejected with `INVALID_REQUEST` and audit outcome `insufficient_scope`) +4. Enforces tier-restricted quality tiers for `image_generate` (`premium`, `ultra`, `ultra_plus` rejected for free/hobby plans with audit outcome `tier_denied`) +5. Reserves quota via `AUTH_SERVICE.consumeQuota` (cost from `src/cost-attribution.ts`); rejects with `INVALID_PARAMS` and outcome `tier_denied` if exceeded +6. Generates a trace ID for audit +7. Proxies the call to the appropriate backend service binding +8. Settles quota (commit on success, refund on failure) via `commitOrRefundQuota` +9. Parses the response (JSON or SSE) +10. Emits a structured audit event (to console + queue) +11. Returns the tool result, with `X-RateLimit-Limit`, `X-RateLimit-Remaining`, and `X-RateLimit-Reset` headers attached on success ### `ping` @@ -319,10 +325,48 @@ This replaces cookies in the stateless OAuth flow, keeping the gateway fully sta ## Scopes -| Scope | Allows | -|-------|--------| -| `generate` | Create content — images, architecture flows | -| `read` | View resources — models, job status, flow details | +| Scope | Allows | Enforced where | +|-------|--------|----------------| +| `generate` | Create content — images, scaffolds, architecture flows | `tools/list` filter (mutation tools hidden without it); `tools/call` for any tool with risk level `LOCAL_MUTATION`, `EXTERNAL_MUTATION`, or `DESTRUCTIVE` | +| `read` | View resources — models, job status, flow details | All `READ_ONLY` tools always visible | + +Both scopes are granted by default to new tokens issued via the gateway's OAuth flow. + +--- + +## Rate Limiting + +The gateway enforces a per-tenant fixed-window rate limit on every authenticated MCP request. Limits are tier-driven: + +| Tier | Requests / minute | +|------|-------------------| +| Free | 20 | +| Hobby | 60 | +| Pro | 300 | +| Enterprise | 1,000 | + +When exceeded, the gateway returns `429 Too Many Requests` with: + +| Header | Meaning | +|--------|---------| +| `Retry-After` | Seconds until the current window resets | +| `X-RateLimit-Limit` | Tier ceiling (e.g. `20`) | +| `X-RateLimit-Remaining` | Always `0` on a 429 response | +| `X-RateLimit-Reset` | Unix timestamp when the window resets | + +The same `X-RateLimit-*` headers are attached to successful `tools/call` responses so clients can pace themselves. `initialize`, `tools/list`, `ping`, and notifications currently do **not** echo rate-limit headers on success — those calls still count against the window, just without surfacing the counter to the client. + +The window is fixed (aligned to the start of each 60-second slot), not sliding. + +--- + +## Quota & Cost Attribution + +Mutating tool calls reserve credits via `AUTH_SERVICE.consumeQuota` before dispatch. The cost table lives in `src/cost-attribution.ts`; `image_generate` cost is `5 × quality multiplier` where multipliers are `draft=1, standard=1, premium=3, ultra=5, ultra_plus=8`. Read-only tools (`*_status`, `*_classify`, `image_list_models`, etc.) are free. + +If quota is exceeded, the call is rejected with `INVALID_PARAMS` and the message `Quota exceeded for `. + +For free and hobby tiers, `image_generate` quality tiers above `standard` are rejected at the gateway with `Quality tier "" requires a Pro plan or higher` — these calls do not reach the backend or consume quota. --- @@ -334,7 +378,7 @@ Standard MCP JSON-RPC error codes: |------|---------| | `-32600` | Invalid request | | `-32601` | Method not found | -| `-32602` | Invalid params | +| `-32602` | Invalid params (also used for `Quota exceeded` and `Quality tier requires Pro plan` rejections) | | `-32603` | Internal error | HTTP-level errors: @@ -343,9 +387,10 @@ HTTP-level errors: |--------|---------| | `400` | Missing or malformed request | | `401` | Invalid or expired token (`invalid_token`) | -| `403` | Rate limited or payment delinquent (`insufficient_scope`) | +| `403` | `insufficient_scope` (token lacks a required scope) or auth-service-level denial | | `404` | Unknown path | | `405` | Method not allowed | +| `429` | Per-tenant rate limit exceeded (see [Rate Limiting](#rate-limiting)) | --- diff --git a/docs/architecture.md b/docs/architecture.md index 176e88d..6f4f1eb 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -166,7 +166,11 @@ enum RiskLevel { | `image.list_models` | IMG_FORGE | READ_ONLY | | `image.check_job` | IMG_FORGE | READ_ONLY | -Risk levels are used for audit classification, not for authorization enforcement — all authenticated users can call all tools within their quota. +Risk levels drive both audit classification AND authorization: + +- **`tools/list` filter** — `READ_ONLY` tools are visible to any authenticated session; tools with any other risk level are hidden from sessions that lack the `generate` scope. +- **`tools/call` enforcement** — `LOCAL_MUTATION`, `EXTERNAL_MUTATION`, and `DESTRUCTIVE` tools require the `generate` scope and return `INVALID_REQUEST` with audit outcome `insufficient_scope` otherwise. +- **Tier-restricted quality tiers** — `image_generate` arguments with `quality_tier` of `premium`, `ultra`, or `ultra_plus` require a Pro+ plan; free/hobby calls are rejected at the gateway with audit outcome `tier_denied` (see `enforceTierRestriction` in `src/gateway.ts`). ## Audit — `audit.ts` @@ -222,10 +226,27 @@ Bearer token extraction and validation for non-OAuth paths: ### Rate Limiting -Enforced by `AUTH_SERVICE` (delegated to the auth worker). The gateway receives: +Two independent layers: + +1. **Gateway-side, per-tenant fixed-window limiter** (`src/rate-limiter.ts`) — counts every authenticated MCP request against a 60-second window in `RATELIMIT_KV`. Tier-driven ceiling: free=20, hobby=60, pro=300, enterprise=1000 req/min. Exceeding returns `429` with `Retry-After` and `X-RateLimit-*` headers. Window starts are aligned to `now - (now % 60)` so all tenants share the same boundaries. +2. **Auth-service-side checks** — `AUTH_SERVICE` may still reject upstream with: + - `insufficient_scope` (403) — payment delinquent + - `invalid_token` (401) — expired or invalid token + +The gateway-side limiter fires first (immediately after auth resolution) and short-circuits before any quota reserve or backend dispatch. Read-only and free tools both count against the limiter — only the `tools/call` quota path is gated by `isFreeTool`. + +### Quota & Cost Attribution + +`src/cost-attribution.ts` declares per-tool credit costs and an `image_generate` quality multiplier (`draft=1, standard=1, premium=3, ultra=5, ultra_plus=8` × `image_generate.baseCost: 5`). On `tools/call`: + +1. Resolve cost via `resolveToolCost(toolName, args)`. +2. If cost is non-zero, call `AUTH_SERVICE.consumeQuota({tenantId, userId, feature, amount})`. On failure, reject with `INVALID_PARAMS` and audit outcome `tier_denied` (overloaded — see follow-ups). +3. Dispatch to the backend. +4. Settle via `AUTH_SERVICE.commitOrRefundQuota(reservationId, success|failed)`. Settlement is best-effort; reservations auto-expire on the auth side if it fails. + +The gateway never holds canonical quota state — it is a metering/dispatch layer in front of `edge-auth`. -- `insufficient_scope` (403) — rate limited or payment delinquent -- `invalid_token` (401) — expired or invalid token +> **Note:** the `/api/scaffold` REST endpoint (used by the CLI) bypasses both the rate limiter and the quota/cost-attribution path. CLI traffic is unmetered today; only `/mcp` traffic exercises this enforcement layer. ## Dependencies diff --git a/docs/user-guide.md b/docs/user-guide.md index 0915e59..b37c0c1 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -26,7 +26,7 @@ Stackbilt exposes AI tools through the [Model Context Protocol](https://modelcon | `flow_advance` | Advance a flow to the next stage | LOCAL_MUTATION | | `flow_recover` | Recover a failed flow | LOCAL_MUTATION | -**Free tier**: 50 credits/month. No credit card required. Credits are weighted by operation complexity. +**Free tier**: 25 credits/month. No credit card required. Credits are weighted by operation complexity. See [§5 Quota & Billing](#5-quota--billing) for the full table. --- @@ -237,7 +237,7 @@ The client calls `image_generate` with your prompt. img-forge enhances the promp } ``` -**Quality tiers**: `draft` (fastest, SDXL), `standard` (FLUX Klein, default), `premium` (FLUX Dev), `ultra` (Gemini 2.5 Flash), `ultra_plus` (Gemini 3.1 Flash). +**Quality tiers**: `draft` (fastest, SDXL), `standard` (FLUX Klein, default), `premium` (FLUX Dev), `ultra` (Gemini 2.5 Flash), `ultra_plus` (Gemini 3.1 Flash). See [§5 Quota & Billing](#5-quota--billing) for credit costs and plan availability — `premium` and above require Pro or Enterprise. ### Classify Intent @@ -318,22 +318,62 @@ Both scopes are granted by default on the free tier. ## 5. Quota & Billing +### Monthly credit allocation + | Tier | Credits/month | Price | |------|--------------|-------| -| Free | 50 | $0 | -| Pro | 500 | Coming soon | -| Enterprise | 2,000 | Coming soon | +| Free | 25 | $0 | +| Hobby | 65 | Coming soon | +| Pro | 580 | Coming soon | +| Enterprise | 2,320 | Coming soon | + +### Per-call credit cost + +Most read-only tools (`*_status`, `*_classify`, `*_summary`, `*_quality`, `*_governance`, `*_pages`, `image_list_models`, `image_check_job`) cost **0 credits**. Mutating tools have a base cost: + +| Tool | Base cost | +|------|-----------| +| `image_generate` | 5 credits × quality multiplier (see below) | +| `scaffold_create` | 2 credits | +| `scaffold_publish` | 3 credits | +| `scaffold_deploy` | 5 credits | +| `scaffold_import` | 1 credit | +| `flow_create` | 2 credits | +| `visual_screenshot` | 1 credit | +| `visual_analyze` | 2 credits | + +### `image_generate` quality multipliers + +| Quality tier | Multiplier | Effective cost | Available on | +|--------------|-----------|----------------|--------------| +| `draft` | 1× | 5 credits | All tiers | +| `standard` | 1× | 5 credits | All tiers | +| `premium` | 3× | 15 credits | Pro + Enterprise only | +| `ultra` | 5× | 25 credits | Pro + Enterprise only | +| `ultra_plus` | 8× | 40 credits | Pro + Enterprise only | -Credits are weighted by operation: +Free and Hobby plans can request `draft` or `standard` only. Calls with higher quality tiers are rejected at the gateway with `Quality tier "" requires a Pro plan or higher`. -| Operation | Credits | -|-----------|---------| -| Draft quality | 1x | -| Standard quality | 2x | -| Premium quality | 5x | -| Ultra quality | 10x | +### How metering works -Your remaining quota is tracked automatically. When you hit the limit, tool calls return a quota error until the next billing cycle. +1. Before each call, the gateway reserves credits via `edge-auth`'s `consumeQuota` RPC. +2. If the reservation succeeds, the tool runs and the reservation is committed (success) or refunded (failure) via `commitOrRefundQuota`. +3. If the reservation fails (insufficient quota), the call is rejected with `Quota exceeded for `. + +Free-tier quota resets monthly. When you hit the limit, tool calls return a quota error until the next cycle. + +### Rate limits + +Independent of credit quota, every authenticated MCP request counts against a per-tenant fixed-window rate limit: + +| Tier | Requests / minute | +|------|-------------------| +| Free | 20 | +| Hobby | 60 | +| Pro | 300 | +| Enterprise | 1,000 | + +When a request would exceed the limit, the gateway returns `429 Too Many Requests` with `Retry-After: ` and `X-RateLimit-Limit` / `X-RateLimit-Remaining` / `X-RateLimit-Reset` headers. The same headers are also attached to successful `tools/call` responses so clients can pace themselves; other MCP methods (`initialize`, `tools/list`, `ping`, notifications) currently do not echo rate-limit headers on success. --- @@ -359,6 +399,9 @@ Pass `github_token` as a parameter with a GitHub PAT that has `repo` scope. Or a ### Quota exceeded Check your usage at the beginning of each month. Free tier resets monthly. Upgrade options coming soon. +### Rate limited (HTTP 429) +You exceeded your tier's per-minute request budget (free=20, hobby=60, pro=300, enterprise=1000). Wait the number of seconds in the `Retry-After` response header and resume. The window is fixed (60s aligned), not sliding. + --- ## 7. Security