From 1d1e3552c53d663f5ee90e0d9abf09d5a25a4f69 Mon Sep 17 00:00:00 2001
From: AEGIS <aegis@stackbilt.dev>
Date: Sat, 4 Apr 2026 17:12:31 -0500
Subject: [PATCH 1/3] feat: add rate limiting, cost attribution, and scope
 enforcement (#18)

- Rate limiter: sliding window per-tenant using RATELIMIT_KV with
  tier-based limits (free=20/min, hobby=60, pro=300, enterprise=1000).
  Returns 429 with Retry-After and X-RateLimit-* headers.

- Cost attribution: per-tool credit costs with quality multipliers
  for image_generate. Reserves quota via edge-auth consumeQuota RPC
  before tool call, settles (commit/refund) after based on outcome.
  Free tools (read-only, zero cost) skip quota enforcement.

- Scope enforcement: mutation tools require 'generate' scope.
  tools/list filters catalog to match session scopes.

- AuthServiceRpc extended with checkQuota, consumeQuota, and
  commitOrRefundQuota methods matching edge-auth's entrypoint.

- All existing tests updated with new mocks; 25 new tests added.

Closes #18

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/cost-attribution.ts       | 178 ++++++++++++++++++++++++++++++++++
 src/gateway.ts                | 113 ++++++++++++++++++++-
 src/rate-limiter.ts           | 108 +++++++++++++++++++++
 src/types.ts                  |  32 ++++++
 test/audit.test.ts            |  13 +++
 test/auth.test.ts             |   3 +
 test/cost-attribution.test.ts | 144 +++++++++++++++++++++++++++
 test/gateway.test.ts          |   4 +
 test/oauth-handler.test.ts    |   4 +
 test/rate-limiter.test.ts     |  90 +++++++++++++++++
 wrangler.toml                 |   5 +
 11 files changed, 692 insertions(+), 2 deletions(-)
 create mode 100644 src/cost-attribution.ts
 create mode 100644 src/rate-limiter.ts
 create mode 100644 test/cost-attribution.test.ts
 create mode 100644 test/rate-limiter.test.ts

diff --git a/src/cost-attribution.ts b/src/cost-attribution.ts
new file mode 100644
index 0000000..d49ba48
--- /dev/null
+++ b/src/cost-attribution.ts
@@ -0,0 +1,178 @@
+// ─── Cost Attribution ─────────────────────────────────────────
+// Maps tool calls to credit costs and enforces quotas via edge-auth.
+// Every tool call is metered: check quota before, consume after success.
+// Cost data flows to the audit pipeline for billing dashboards.
+
+import type { Tier, AuthServiceRpc } from './types.js';
+import type { AuditArtifact } from './audit.js';
+
+// ─── Credit cost per tool call ──────────────────────────────────
+// Costs are in "credits" — 1 credit = 1 unit of the tier's allocation.
+// Expensive operations (image gen, deploy) cost more.
+export interface ToolCost {
+  /** Base credit cost for the tool call */
+  baseCost: number;
+  /** Feature key for quota tracking (maps to edge-auth quota.feature) */
+  feature: string;
+}
+
+const TOOL_COSTS: Record<string, ToolCost> = {
+  // img-forge: costs depend on quality tier (resolved at call time)
+  'image_generate':    { baseCost: 5,  feature: 'mcp.image_generate' },
+  'image_list_models': { baseCost: 0,  feature: 'mcp.image_list_models' },
+  'image_check_job':   { baseCost: 0,  feature: 'mcp.image_check_job' },
+
+  // TarotScript scaffold
+  'scaffold_create':   { baseCost: 2,  feature: 'mcp.scaffold_create' },
+  'scaffold_classify': { baseCost: 0,  feature: 'mcp.scaffold_classify' },
+  'scaffold_status':   { baseCost: 0,  feature: 'mcp.scaffold_status' },
+  'scaffold_publish':  { baseCost: 3,  feature: 'mcp.scaffold_publish' },
+  'scaffold_deploy':   { baseCost: 5,  feature: 'mcp.scaffold_deploy' },
+  'scaffold_import':   { baseCost: 1,  feature: 'mcp.scaffold_import' },
+
+  // Flow tools
+  'flow_create':       { baseCost: 2,  feature: 'mcp.flow_create' },
+  'flow_status':       { baseCost: 0,  feature: 'mcp.flow_status' },
+  'flow_summary':      { baseCost: 0,  feature: 'mcp.flow_summary' },
+  'flow_quality':      { baseCost: 0,  feature: 'mcp.flow_quality' },
+  'flow_governance':   { baseCost: 0,  feature: 'mcp.flow_governance' },
+
+  // Visual QA
+  'visual_screenshot': { baseCost: 1,  feature: 'mcp.visual_screenshot' },
+  'visual_analyze':    { baseCost: 2,  feature: 'mcp.visual_analyze' },
+  'visual_pages':      { baseCost: 0,  feature: 'mcp.visual_pages' },
+};
+
+// Quality tier multipliers for image_generate
+const IMAGE_QUALITY_MULTIPLIER: Record<string, number> = {
+  draft:      1,
+  standard:   1,
+  premium:    3,
+  ultra:      5,
+  ultra_plus: 8,
+};
+
+/**
+ * Resolve the credit cost for a tool call, factoring in quality tier for images.
+ */
+export function resolveToolCost(
+  toolName: string,
+  args?: Record<string, unknown>,
+): ToolCost {
+  const base = TOOL_COSTS[toolName];
+  if (!base) {
+    // Unknown tools cost 1 credit by default (conservative)
+    return { baseCost: 1, feature: `mcp.${toolName}` };
+  }
+
+  // Apply quality multiplier for image_generate
+  if (toolName === 'image_generate' && args?.quality_tier) {
+    const multiplier = IMAGE_QUALITY_MULTIPLIER[args.quality_tier as string] ?? 1;
+    return { ...base, baseCost: base.baseCost * multiplier };
+  }
+
+  return base;
+}
+
+/**
+ * Check if a tool call is free (cost = 0). Free calls skip quota enforcement.
+ */
+export function isFreeTool(toolName: string): boolean {
+  const cost = TOOL_COSTS[toolName];
+  return cost !== undefined && cost.baseCost === 0;
+}
+
+export interface QuotaCheckResult {
+  allowed: boolean;
+  reservationId?: string;
+  remaining?: number;
+  error?: string;
+}
+
+/**
+ * Check and reserve quota for a tool call via edge-auth RPC.
+ * Returns a reservation ID that must be committed or refunded after the call.
+ */
+export async function reserveQuota(
+  authService: AuthServiceRpc,
+  tenantId: string,
+  userId: string,
+  toolName: string,
+  args?: Record<string, unknown>,
+): Promise<QuotaCheckResult> {
+  const cost = resolveToolCost(toolName, args);
+
+  // Free tools don't consume quota
+  if (cost.baseCost === 0) {
+    return { allowed: true };
+  }
+
+  try {
+    const result = await authService.consumeQuota({
+      tenantId,
+      userId,
+      feature: cost.feature,
+      amount: cost.baseCost,
+    });
+
+    if (!result.success) {
+      return {
+        allowed: false,
+        error: result.error ?? 'Quota exceeded',
+        remaining: result.remaining,
+      };
+    }
+
+    return {
+      allowed: true,
+      reservationId: result.reservationId,
+      remaining: result.remaining,
+    };
+  } catch (err) {
+    // Quota service unavailable — fail open for read-only tools, closed for mutations
+    const isReadOnly = cost.baseCost <= 0;
+    if (isReadOnly) {
+      return { allowed: true };
+    }
+    return {
+      allowed: false,
+      error: 'Quota service unavailable',
+    };
+  }
+}
+
+/**
+ * Commit or refund a quota reservation based on tool call outcome.
+ */
+export async function settleQuota(
+  authService: AuthServiceRpc,
+  reservationId: string | undefined,
+  success: boolean,
+): Promise<void> {
+  if (!reservationId) return;
+
+  try {
+    await authService.commitOrRefundQuota(
+      reservationId,
+      success ? 'success' : 'failed',
+    );
+  } catch {
+    // Best-effort — don't fail the tool call if settlement fails.
+    // The reservation will auto-expire in edge-auth.
+    console.error(`[cost] Failed to settle reservation ${reservationId}`);
+  }
+}
+
+/**
+ * Build cost attribution data for the audit artifact.
+ */
+export function buildCostAttribution(
+  toolName: string,
+  args?: Record<string, unknown>,
+): { feature: string; creditCost: number } {
+  const cost = resolveToolCost(toolName, args);
+  return {
+    feature: cost.feature,
+    creditCost: cost.baseCost,
+  };
+}
diff --git a/src/gateway.ts b/src/gateway.ts
index 67d003e..6a72f97 100644
--- a/src/gateway.ts
+++ b/src/gateway.ts
@@ -13,6 +13,8 @@ import { materializeScaffold } from './scaffold-materializer.js';
 import { publishToGitHub } from './scaffold-publish.js';
 import { classifyIntention, type IntentClassification } from './intent-classifier.js';
 import { logDivergence } from './divergence-logger.js';
+import { checkRateLimit, rateLimitHeaders, type RateLimitResult } from './rate-limiter.js';
+import { reserveQuota, settleQuota, buildCostAttribution, isFreeTool } from './cost-attribution.js';
 
 const MCP_PROTOCOL_VERSION = '2025-03-26';
 const JSON_RPC_PARSE_ERROR = -32700;
@@ -1131,6 +1133,28 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP
     );
   }
 
+  // Rate limiting — check before processing
+  const rateLimitKey = authResult.tenantId ?? authResult.userId ?? 'unknown';
+  const rlResult = await checkRateLimit(env.RATELIMIT_KV, rateLimitKey, authResult.tier);
+  if (!rlResult.allowed) {
+    audit({
+      trace_id: generateTraceId(),
+      principal: authResult.userId ?? 'unknown',
+      tenant: authResult.tenantId ?? 'unknown',
+      tool: 'rate_limit',
+      risk_level: 'UNKNOWN',
+      policy_decision: 'DENY',
+      redacted_input_summary: '{}',
+      outcome: 'auth_denied',
+      timestamp: new Date().toISOString(),
+    }, env);
+    return jsonResponse(
+      { error: 'Rate limit exceeded', code: 'RATE_LIMITED' },
+      429,
+      rateLimitHeaders(rlResult),
+    );
+  }
+
   // Validate Accept header
   const accept = request.headers.get('Accept') ?? '';
   if (!accept.includes('application/json') && !accept.includes('*/*') && accept !== '') {
@@ -1186,7 +1210,18 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP
   // ─── tools/list ─────────────────────────────────────────
   if (rpcMethod === 'tools/list') {
     // KV handles session expiration via expirationTtl — no manual pruning needed
-    const tools = buildAggregatedCatalog();
+    let tools = buildAggregatedCatalog();
+
+    // Scope-based filtering: only show tools the session has access to
+    const hasGenerate = session.scopes.includes('generate');
+    if (!hasGenerate) {
+      // Read-only scope — filter out mutation tools
+      tools = tools.filter(t => {
+        const risk = getToolRiskLevel(t.name);
+        return risk === 'READ_ONLY';
+      });
+    }
+
     return rpcResult(rpcId, { tools });
   }
 
@@ -1229,6 +1264,23 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP
       return rpcError(rpcId, JSON_RPC_METHOD_NOT_FOUND, `Unknown tool: ${toolName}`);
     }
 
+    // Scope enforcement: mutation tools require 'generate' scope
+    if (risk !== 'READ_ONLY' && !session.scopes.includes('generate')) {
+      audit({
+        trace_id: traceId,
+        principal: session.userId ?? 'unknown',
+        tenant: session.tenantId ?? 'unknown',
+        tool: toolName,
+        risk_level: risk,
+        policy_decision: 'DENY',
+        redacted_input_summary: summarizeInput(toolArgs),
+        outcome: 'auth_denied',
+        timestamp: new Date().toISOString(),
+      }, env);
+      return rpcError(rpcId, JSON_RPC_INVALID_PARAMS,
+        `Tool "${toolName}" requires the "generate" scope. Your API key only has: ${session.scopes.join(', ')}`);
+    }
+
     // Validate arguments are object-shaped
     const argValidation = validateToolArguments(toolArgs, { type: 'object' });
     if (!argValidation.valid) {
@@ -1290,8 +1342,65 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP
       return rpcError(rpcId, JSON_RPC_INVALID_PARAMS, tierDenied);
     }
 
+    // ─── Cost attribution: reserve quota before tool call ────
+    const costInfo = buildCostAttribution(toolName, toolArgs as Record<string, unknown> | undefined);
+    let quotaReservation: { reservationId?: string } = {};
+
+    if (!isFreeTool(toolName) && session.tenantId) {
+      const quotaResult = await reserveQuota(
+        env.AUTH_SERVICE,
+        session.tenantId,
+        session.userId ?? '',
+        toolName,
+        toolArgs as Record<string, unknown> | undefined,
+      );
+
+      if (!quotaResult.allowed) {
+        audit({
+          trace_id: traceId,
+          principal: session.userId ?? 'unknown',
+          tenant: session.tenantId ?? 'unknown',
+          tool: toolName,
+          risk_level: risk,
+          policy_decision: 'DENY',
+          redacted_input_summary: summarizeInput(toolArgs),
+          outcome: 'tier_denied',
+          timestamp: new Date().toISOString(),
+        }, env);
+        return rpcError(rpcId, JSON_RPC_INVALID_PARAMS,
+          `Quota exceeded for ${toolName}. ${quotaResult.error ?? 'Upgrade your plan for more credits.'}`);
+      }
+
+      quotaReservation = { reservationId: quotaResult.reservationId };
+    }
+
     const result = await proxyToolCall(env, toolName, toolArgs, session, traceId);
-    return rpcResult(rpcId, result);
+
+    // ─── Cost attribution: settle quota after tool call ──────
+    const toolSucceeded = !result.isError;
+    await settleQuota(env.AUTH_SERVICE, quotaReservation.reservationId, toolSucceeded);
+
+    // Enrich audit queue event with cost data
+    queueAuditEvent(env.PLATFORM_EVENTS_QUEUE, {
+      trace_id: traceId,
+      principal: session.userId ?? 'unknown',
+      tenant: session.tenantId ?? 'unknown',
+      tool: toolName,
+      risk_level: risk,
+      policy_decision: 'ALLOW',
+      redacted_input_summary: summarizeInput(toolArgs),
+      outcome: toolSucceeded ? 'success' : 'error',
+      timestamp: new Date().toISOString(),
+      latency_ms: 0, // latency is tracked in proxyToolCall's own audit
+    });
+
+    // Add rate limit headers to successful responses
+    const response = rpcResult(rpcId, result);
+    const rlHeaders = rateLimitHeaders(rlResult);
+    for (const [k, v] of Object.entries(rlHeaders)) {
+      response.headers.set(k, v);
+    }
+    return response;
   }
 
   return rpcError(rpcId, JSON_RPC_METHOD_NOT_FOUND, `Unknown method: ${rpcMethod}`);
diff --git a/src/rate-limiter.ts b/src/rate-limiter.ts
new file mode 100644
index 0000000..83b7594
--- /dev/null
+++ b/src/rate-limiter.ts
@@ -0,0 +1,108 @@
+// ─── Rate Limiter ─────────────────────────────────────────────
+// Sliding window rate limiting per API key / tenant.
+// Uses KV with TTL for window expiration — no external dependencies.
+// Returns 429 with Retry-After header when limit exceeded.
+
+import type { Tier } from './types.js';
+
+export interface RateLimitConfig {
+  /** Max requests per window */
+  limit: number;
+  /** Window size in seconds */
+  windowSeconds: number;
+}
+
+// Per-tier rate limits — configurable, conservative defaults
+const TIER_LIMITS: Record<Tier, RateLimitConfig> = {
+  free:       { limit: 20,   windowSeconds: 60 },
+  hobby:     { limit: 60,   windowSeconds: 60 },
+  pro:        { limit: 300,  windowSeconds: 60 },
+  enterprise: { limit: 1000, windowSeconds: 60 },
+};
+
+export interface RateLimitResult {
+  allowed: boolean;
+  /** Requests remaining in current window */
+  remaining: number;
+  /** Total limit for this window */
+  limit: number;
+  /** Seconds until window resets */
+  retryAfterSeconds: number;
+}
+
+interface WindowState {
+  count: number;
+  windowStart: number;
+}
+
+const RATE_LIMIT_PREFIX = 'rl:';
+
+/**
+ * Check and increment rate limit for a given key (API key ID, tenant ID, etc.)
+ * Uses a simple fixed-window approach with KV TTL for auto-cleanup.
+ */
+export async function checkRateLimit(
+  kv: KVNamespace,
+  key: string,
+  tier: Tier,
+  config?: RateLimitConfig,
+): Promise<RateLimitResult> {
+  const { limit, windowSeconds } = config ?? TIER_LIMITS[tier] ?? TIER_LIMITS.free;
+  const now = Math.floor(Date.now() / 1000);
+  const windowStart = now - (now % windowSeconds);
+  const kvKey = `${RATE_LIMIT_PREFIX}${key}:${windowStart}`;
+
+  // Read current window count
+  const raw = await kv.get(kvKey);
+  let state: WindowState;
+
+  if (raw) {
+    state = JSON.parse(raw) as WindowState;
+  } else {
+    state = { count: 0, windowStart };
+  }
+
+  const retryAfterSeconds = windowSeconds - (now - windowStart);
+
+  if (state.count >= limit) {
+    return {
+      allowed: false,
+      remaining: 0,
+      limit,
+      retryAfterSeconds,
+    };
+  }
+
+  // Increment
+  state.count += 1;
+  // Write back with TTL — window auto-expires
+  await kv.put(kvKey, JSON.stringify(state), {
+    expirationTtl: windowSeconds + 10, // small buffer past window end
+  });
+
+  return {
+    allowed: true,
+    remaining: limit - state.count,
+    limit,
+    retryAfterSeconds,
+  };
+}
+
+/**
+ * Build standard rate limit response headers.
+ */
+export function rateLimitHeaders(result: RateLimitResult): Record<string, string> {
+  const headers: Record<string, string> = {
+    'X-RateLimit-Limit': String(result.limit),
+    'X-RateLimit-Remaining': String(result.remaining),
+    'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + result.retryAfterSeconds),
+  };
+  if (!result.allowed) {
+    headers['Retry-After'] = String(result.retryAfterSeconds);
+  }
+  return headers;
+}
+
+export function getRateLimitConfig(tier: Tier): RateLimitConfig {
+  return TIER_LIMITS[tier] ?? TIER_LIMITS.free;
+}
diff --git a/src/types.ts b/src/types.ts
index 5d91711..45e5dd4 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -54,6 +54,35 @@ export interface AuthServiceRpc {
     name?: string;
     error?: string;
   }>;
+
+  // ─── Quota (cost attribution) ────────────────────────────────
+  checkQuota(params: {
+    tenantId: string;
+    userId?: string;
+    feature: string;
+    amount?: number;
+  }): Promise<{
+    allowed: boolean;
+    remaining: number;
+    limit: number;
+    resetsAt?: string;
+  }>;
+  consumeQuota(params: {
+    tenantId: string;
+    userId?: string;
+    feature: string;
+    amount: number;
+    idempotencyKey?: string;
+  }): Promise<{
+    success: boolean;
+    reservationId?: string;
+    remaining?: number;
+    error?: string;
+  }>;
+  commitOrRefundQuota(
+    reservationId: string,
+    outcome: 'success' | 'failed',
+  ): Promise<void>;
 }
 
 // ─── Backend RPC surface (what product workers expose) ────────
@@ -98,6 +127,9 @@ export interface GatewayEnv {
   OAUTH_PROVIDER: OAuthHelpers;
   OAUTH_KV: KVNamespace;
 
+  // Rate limiting
+  RATELIMIT_KV: KVNamespace;
+
   // Secrets
   SERVICE_BINDING_SECRET: string;
 
diff --git a/test/audit.test.ts b/test/audit.test.ts
index 78173eb..fe9ab95 100644
--- a/test/audit.test.ts
+++ b/test/audit.test.ts
@@ -176,6 +176,9 @@ describe('gateway audit integration', () => {
       registerUser: async (_n: string, _e: string, _p: string) => ({ valid: false }),
       provisionTenant: async (_p: { userId: string; source: string }) => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }),
       exchangeSocialCode: async (_c: string) => ({ valid: false }),
+      checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }),
+      consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }),
+      commitOrRefundQuota: async () => {},
     };
   }
 
@@ -206,6 +209,16 @@ describe('gateway audit integration', () => {
           getWithMetadata: async () => ({ value: null, metadata: null, cacheStatus: null }),
         } as unknown as KVNamespace;
       })(),
+      RATELIMIT_KV: (() => {
+        const store = new Map<string, string>();
+        return {
+          get: async (key: string) => store.get(key) ?? null,
+          put: async (key: string, value: string) => { store.set(key, value); },
+          delete: async (key: string) => { store.delete(key); },
+          list: async () => ({ keys: [], list_complete: true, cacheStatus: null }),
+          getWithMetadata: async () => ({ value: null, metadata: null, cacheStatus: null }),
+        } as unknown as KVNamespace;
+      })(),
       PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue,
       SERVICE_BINDING_SECRET: 'test-secret',
       API_BASE_URL: 'https://mcp.stackbilt.dev',
diff --git a/test/auth.test.ts b/test/auth.test.ts
index 3527cc7..027f51f 100644
--- a/test/auth.test.ts
+++ b/test/auth.test.ts
@@ -16,6 +16,9 @@ function mockAuthService(overrides?: Partial<AuthServiceRpc>): AuthServiceRpc {
     registerUser: async () => ({ valid: false }),
     provisionTenant: async () => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }),
     exchangeSocialCode: async () => ({ valid: false }),
+    checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }),
+    consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }),
+    commitOrRefundQuota: async () => {},
     ...overrides,
   };
 }
diff --git a/test/cost-attribution.test.ts b/test/cost-attribution.test.ts
new file mode 100644
index 0000000..b3dcea2
--- /dev/null
+++ b/test/cost-attribution.test.ts
@@ -0,0 +1,144 @@
+import { describe, it, expect, vi } from 'vitest';
+import {
+  resolveToolCost,
+  isFreeTool,
+  reserveQuota,
+  settleQuota,
+  buildCostAttribution,
+} from '../src/cost-attribution.js';
+import type { AuthServiceRpc } from '../src/types.js';
+
+function mockAuthService(overrides?: Partial<AuthServiceRpc>): AuthServiceRpc {
+  return {
+    validateApiKey: async () => ({ valid: true, tenant_id: 't', tier: 'pro', scopes: [] }),
+    validateJwt: async () => ({ valid: true, tenant_id: 't', user_id: 'u', tier: 'pro', scopes: [] }),
+    authenticateUser: async () => ({ valid: false }),
+    registerUser: async () => ({ valid: false }),
+    provisionTenant: async () => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }),
+    exchangeSocialCode: async () => ({ valid: false }),
+    checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }),
+    consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }),
+    commitOrRefundQuota: async () => {},
+    ...overrides,
+  };
+}
+
+describe('resolveToolCost', () => {
+  it('returns base cost for known tools', () => {
+    const cost = resolveToolCost('scaffold_create');
+    expect(cost.baseCost).toBe(2);
+    expect(cost.feature).toBe('mcp.scaffold_create');
+  });
+
+  it('returns 0 cost for read-only tools', () => {
+    expect(resolveToolCost('scaffold_status').baseCost).toBe(0);
+    expect(resolveToolCost('image_list_models').baseCost).toBe(0);
+    expect(resolveToolCost('flow_status').baseCost).toBe(0);
+  });
+
+  it('applies quality multiplier for image_generate', () => {
+    const draft = resolveToolCost('image_generate', { quality_tier: 'draft' });
+    const ultra = resolveToolCost('image_generate', { quality_tier: 'ultra' });
+    expect(ultra.baseCost).toBeGreaterThan(draft.baseCost);
+  });
+
+  it('returns default cost for unknown tools', () => {
+    const cost = resolveToolCost('unknown_tool');
+    expect(cost.baseCost).toBe(1);
+    expect(cost.feature).toBe('mcp.unknown_tool');
+  });
+});
+
+describe('isFreeTool', () => {
+  it('returns true for zero-cost tools', () => {
+    expect(isFreeTool('scaffold_status')).toBe(true);
+    expect(isFreeTool('image_list_models')).toBe(true);
+  });
+
+  it('returns false for paid tools', () => {
+    expect(isFreeTool('image_generate')).toBe(false);
+    expect(isFreeTool('scaffold_create')).toBe(false);
+  });
+
+  it('returns false for unknown tools', () => {
+    expect(isFreeTool('nonexistent')).toBe(false);
+  });
+});
+
+describe('reserveQuota', () => {
+  it('reserves quota via auth service', async () => {
+    const auth = mockAuthService();
+    const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'scaffold_create');
+    expect(result.allowed).toBe(true);
+    expect(result.reservationId).toBe('res-1');
+  });
+
+  it('skips quota for free tools', async () => {
+    const auth = mockAuthService();
+    const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'scaffold_status');
+    expect(result.allowed).toBe(true);
+    expect(result.reservationId).toBeUndefined();
+  });
+
+  it('rejects when quota is exceeded', async () => {
+    const auth = mockAuthService({
+      consumeQuota: async () => ({ success: false, error: 'Quota exceeded', remaining: 0 }),
+    });
+    const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'image_generate');
+    expect(result.allowed).toBe(false);
+    expect(result.error).toContain('Quota exceeded');
+  });
+
+  it('fails closed on auth service error for mutation tools', async () => {
+    const auth = mockAuthService({
+      consumeQuota: async () => { throw new Error('RPC timeout'); },
+    });
+    const result = await reserveQuota(auth, 'tenant-1', 'user-1', 'scaffold_create');
+    expect(result.allowed).toBe(false);
+    expect(result.error).toContain('unavailable');
+  });
+});
+
+describe('settleQuota', () => {
+  it('commits on success', async () => {
+    const commitFn = vi.fn();
+    const auth = mockAuthService({ commitOrRefundQuota: commitFn });
+    await settleQuota(auth, 'res-1', true);
+    expect(commitFn).toHaveBeenCalledWith('res-1', 'success');
+  });
+
+  it('refunds on failure', async () => {
+    const commitFn = vi.fn();
+    const auth = mockAuthService({ commitOrRefundQuota: commitFn });
+    await settleQuota(auth, 'res-1', false);
+    expect(commitFn).toHaveBeenCalledWith('res-1', 'failed');
+  });
+
+  it('skips settlement when no reservation ID', async () => {
+    const commitFn = vi.fn();
+    const auth = mockAuthService({ commitOrRefundQuota: commitFn });
+    await settleQuota(auth, undefined, true);
+    expect(commitFn).not.toHaveBeenCalled();
+  });
+
+  it('does not throw on settlement failure', async () => {
+    const auth = mockAuthService({
+      commitOrRefundQuota: async () => { throw new Error('RPC error'); },
+    });
+    // Should not throw
+    await expect(settleQuota(auth, 'res-1', true)).resolves.toBeUndefined();
+  });
+});
+
+describe('buildCostAttribution', () => {
+  it('returns feature and cost for known tools', () => {
+    const attr = buildCostAttribution('image_generate', { quality_tier: 'premium' });
+    expect(attr.feature).toBe('mcp.image_generate');
+    expect(attr.creditCost).toBe(15); // 5 * 3 (premium multiplier)
+  });
+
+  it('returns base cost for tools without args', () => {
+    const attr = buildCostAttribution('scaffold_create');
+    expect(attr.creditCost).toBe(2);
+  });
+});
diff --git a/test/gateway.test.ts b/test/gateway.test.ts
index 0516551..5e1311f 100644
--- a/test/gateway.test.ts
+++ b/test/gateway.test.ts
@@ -11,6 +11,9 @@ function mockAuthService(tier: string = 'pro'): AuthServiceRpc {
     registerUser: async () => ({ valid: false }),
     provisionTenant: async () => ({ tenantId: '', userId: '', tier: 'free', delinquent: false, createdAt: '' }),
     exchangeSocialCode: async () => ({ valid: false }),
+    checkQuota: async () => ({ allowed: true, remaining: 100, limit: 500 }),
+    consumeQuota: async () => ({ success: true, reservationId: 'res-1', remaining: 99 }),
+    commitOrRefundQuota: async () => {},
   };
 }
 
@@ -42,6 +45,7 @@ function makeEnv(overrides?: Partial<GatewayEnv>): GatewayEnv {
     IMG_FORGE: mockFetcher({ jsonrpc: '2.0', id: 1, result: { content: [{ type: 'text', text: 'image generated' }] } }),
     OAUTH_PROVIDER: {} as any,
     OAUTH_KV: mockKV(),
+    RATELIMIT_KV: mockKV(),
     PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue,
     SERVICE_BINDING_SECRET: 'test-secret',
     API_BASE_URL: 'https://mcp.stackbilt.dev',
diff --git a/test/oauth-handler.test.ts b/test/oauth-handler.test.ts
index e93ac5e..3b6317d 100644
--- a/test/oauth-handler.test.ts
+++ b/test/oauth-handler.test.ts
@@ -90,6 +90,9 @@ function mockAuthService(overrides?: Partial<GatewayEnv['AUTH_SERVICE']>) {
       email: 'social@example.com',
       name: 'Social User',
     })),
+    checkQuota: vi.fn(async () => ({ allowed: true, remaining: 100, limit: 500 })),
+    consumeQuota: vi.fn(async () => ({ success: true, reservationId: 'res-1', remaining: 99 })),
+    commitOrRefundQuota: vi.fn(async () => {}),
     ...overrides,
   };
 }
@@ -101,6 +104,7 @@ function makeEnv(overrides?: Partial<GatewayEnv>): GatewayEnv {
     IMG_FORGE: {} as Fetcher,
     OAUTH_PROVIDER: mockOAuthProvider() as unknown as GatewayEnv['OAUTH_PROVIDER'],
     OAUTH_KV: mockKV(),
+    RATELIMIT_KV: mockKV(),
     PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue,
     SERVICE_BINDING_SECRET: TEST_SECRET,
     API_BASE_URL: TEST_API_BASE_URL,
diff --git a/test/rate-limiter.test.ts b/test/rate-limiter.test.ts
new file mode 100644
index 0000000..743d19a
--- /dev/null
+++ b/test/rate-limiter.test.ts
@@ -0,0 +1,90 @@
+import { describe, it, expect } from 'vitest';
+import { checkRateLimit, rateLimitHeaders, getRateLimitConfig } from '../src/rate-limiter.js';
+
+function mockKV(): KVNamespace {
+  const store = new Map<string, string>();
+  return {
+    get: async (key: string) => store.get(key) ?? null,
+    put: async (key: string, value: string) => { store.set(key, value); },
+    delete: async (key: string) => { store.delete(key); },
+    list: async () => ({ keys: [], list_complete: true, cacheStatus: null }),
+    getWithMetadata: async () => ({ value: null, metadata: null, cacheStatus: null }),
+  } as unknown as KVNamespace;
+}
+
+describe('checkRateLimit', () => {
+  it('allows first request', async () => {
+    const kv = mockKV();
+    const result = await checkRateLimit(kv, 'tenant-1', 'free');
+    expect(result.allowed).toBe(true);
+    expect(result.remaining).toBeGreaterThan(0);
+  });
+
+  it('decrements remaining with each request', async () => {
+    const kv = mockKV();
+    const r1 = await checkRateLimit(kv, 'tenant-1', 'free');
+    const r2 = await checkRateLimit(kv, 'tenant-1', 'free');
+    expect(r2.remaining).toBe(r1.remaining - 1);
+  });
+
+  it('rejects when limit is exhausted', async () => {
+    const kv = mockKV();
+    // Use a tiny limit
+    const config = { limit: 2, windowSeconds: 60 };
+    await checkRateLimit(kv, 'tenant-1', 'free', config);
+    await checkRateLimit(kv, 'tenant-1', 'free', config);
+    const result = await checkRateLimit(kv, 'tenant-1', 'free', config);
+    expect(result.allowed).toBe(false);
+    expect(result.remaining).toBe(0);
+  });
+
+  it('tracks separate keys independently', async () => {
+    const kv = mockKV();
+    const config = { limit: 1, windowSeconds: 60 };
+    await checkRateLimit(kv, 'tenant-1', 'free', config);
+    const result = await checkRateLimit(kv, 'tenant-2', 'free', config);
+    expect(result.allowed).toBe(true);
+  });
+
+  it('higher tiers get higher limits', () => {
+    const free = getRateLimitConfig('free');
+    const pro = getRateLimitConfig('pro');
+    const enterprise = getRateLimitConfig('enterprise');
+    expect(pro.limit).toBeGreaterThan(free.limit);
+    expect(enterprise.limit).toBeGreaterThan(pro.limit);
+  });
+});
+
+describe('rateLimitHeaders', () => {
+  it('includes standard rate limit headers', () => {
+    const headers = rateLimitHeaders({
+      allowed: true,
+      remaining: 42,
+      limit: 60,
+      retryAfterSeconds: 30,
+    });
+    expect(headers['X-RateLimit-Limit']).toBe('60');
+    expect(headers['X-RateLimit-Remaining']).toBe('42');
+    expect(headers['X-RateLimit-Reset']).toBeDefined();
+  });
+
+  it('adds Retry-After when not allowed', () => {
+    const headers = rateLimitHeaders({
+      allowed: false,
+      remaining: 0,
+      limit: 60,
+      retryAfterSeconds: 45,
+    });
+    expect(headers['Retry-After']).toBe('45');
+  });
+
+  it('omits Retry-After when allowed', () => {
+    const headers = rateLimitHeaders({
+      allowed: true,
+      remaining: 10,
+      limit: 60,
+      retryAfterSeconds: 30,
+    });
+    expect(headers['Retry-After']).toBeUndefined();
+  });
+});
diff --git a/wrangler.toml b/wrangler.toml
index 5a0b71f..1e74033 100644
--- a/wrangler.toml
+++ b/wrangler.toml
@@ -42,6 +42,11 @@ service = "n8n-transpiler"
 binding = "OAUTH_KV"
 id = "9c165be8754749e3b543458ae8e596db"
 
+# Rate limiting — sliding window counters per API key / tenant
+[[kv_namespaces]]
+binding = "RATELIMIT_KV"
+id = "240065d87b05466ab7b5527e3552817b"
+
 # Custom domain — taken over from img-forge-mcp (ADR-039)
 [[routes]]
 pattern = "mcp.stackbilt.dev"

From cbc778311c11e8a2b82f22e88621c20012ae4bfb Mon Sep 17 00:00:00 2001
From: Kurt Overmier <kurt@stackbilt.dev>
Date: Fri, 17 Apr 2026 09:14:39 -0500
Subject: [PATCH 2/3] chore(26): address review + resolve post-rebase
 duplication with C-1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rebased onto current main (which absorbed C-1 scope enforcement, sprint-2
hardening, and legacy-grant-scope fallback since this PR was opened).
Cleanup commit addresses review feedback plus one duplication that surfaced
during the rebase.

Changes:

1. Remove duplicate scope check at proxyToolCall dispatch. Main's C-1 block
   enforces scope-to-risk mapping using the RISK_REQUIRED_SCOPES table and
   returns INVALID_REQUEST with outcome=insufficient_scope. The earlier
   version added here predated C-1 and returned INVALID_PARAMS with
   outcome=auth_denied. Keeping only the main/C-1 version — strictly more
   protective (catches READ_ONLY-with-empty-scopes that the older check
   skipped) and matches the existing gateway.test.ts expectations.

2. Fix "sliding window" comment in rate-limiter.ts — implementation is
   fixed-window (const windowStart = now - (now % windowSeconds)).

3. Remove unreachable isReadOnly branch in reserveQuota's catch. Free
   tools return earlier at `if (cost.baseCost === 0)`; by the time we're
   in the catch, baseCost > 0 always and the branch was dead.

4. Add 'rate_limited' to AuditArtifact.outcome and use it for the rate-
   limit denial path (was reusing 'auth_denied', which conflated
   quota/throttling rejections with auth failures in downstream analytics).

5. Update test/gateway-legacy-scope.test.ts mocks to include the new
   AuthServiceRpc quota methods (checkQuota, consumeQuota,
   commitOrRefundQuota) and the RATELIMIT_KV binding. Test passes unchanged
   afterward.

Full suite: 176/176 passing. Typecheck clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/audit.ts                      |  2 +-
 src/cost-attribution.ts           |  9 +++------
 src/gateway.ts                    | 19 +------------------
 src/rate-limiter.ts               |  2 +-
 test/gateway-legacy-scope.test.ts |  4 ++++
 5 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/src/audit.ts b/src/audit.ts
index cf40773..620d04a 100644
--- a/src/audit.ts
+++ b/src/audit.ts
@@ -12,7 +12,7 @@ export interface AuditArtifact {
   risk_level: RiskLevel | 'UNKNOWN';
   policy_decision: 'ALLOW' | 'DENY';
   redacted_input_summary: string;
-  outcome: 'success' | 'error' | 'backend_error' | 'auth_denied' | 'unknown_tool' | 'invalid_params' | 'tier_denied' | 'insufficient_scope';
+  outcome: 'success' | 'error' | 'backend_error' | 'auth_denied' | 'unknown_tool' | 'invalid_params' | 'tier_denied' | 'insufficient_scope' | 'rate_limited';
   timestamp: string;
   latency_ms?: number;
 }
diff --git a/src/cost-attribution.ts b/src/cost-attribution.ts
index d49ba48..3e70c52 100644
--- a/src/cost-attribution.ts
+++ b/src/cost-attribution.ts
@@ -128,12 +128,9 @@ export async function reserveQuota(
       reservationId: result.reservationId,
       remaining: result.remaining,
     };
-  } catch (err) {
-    // Quota service unavailable — fail open for read-only tools, closed for mutations
-    const isReadOnly = cost.baseCost <= 0;
-    if (isReadOnly) {
-      return { allowed: true };
-    }
+  } catch {
+    // Quota service unavailable on a non-free tool — fail closed.
+    // Free-tool calls return earlier above and never reach this catch.
     return {
       allowed: false,
       error: 'Quota service unavailable',
diff --git a/src/gateway.ts b/src/gateway.ts
index 6a72f97..53ed4f9 100644
--- a/src/gateway.ts
+++ b/src/gateway.ts
@@ -1145,7 +1145,7 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP
       risk_level: 'UNKNOWN',
       policy_decision: 'DENY',
       redacted_input_summary: '{}',
-      outcome: 'auth_denied',
+      outcome: 'rate_limited',
       timestamp: new Date().toISOString(),
     }, env);
     return jsonResponse(
@@ -1264,23 +1264,6 @@ async function handlePost(request: Request, env: GatewayEnv, oauthProps?: OAuthP
       return rpcError(rpcId, JSON_RPC_METHOD_NOT_FOUND, `Unknown tool: ${toolName}`);
     }
 
-    // Scope enforcement: mutation tools require 'generate' scope
-    if (risk !== 'READ_ONLY' && !session.scopes.includes('generate')) {
-      audit({
-        trace_id: traceId,
-        principal: session.userId ?? 'unknown',
-        tenant: session.tenantId ?? 'unknown',
-        tool: toolName,
-        risk_level: risk,
-        policy_decision: 'DENY',
-        redacted_input_summary: summarizeInput(toolArgs),
-        outcome: 'auth_denied',
-        timestamp: new Date().toISOString(),
-      }, env);
-      return rpcError(rpcId, JSON_RPC_INVALID_PARAMS,
-        `Tool "${toolName}" requires the "generate" scope. Your API key only has: ${session.scopes.join(', ')}`);
-    }
-
     // Validate arguments are object-shaped
     const argValidation = validateToolArguments(toolArgs, { type: 'object' });
     if (!argValidation.valid) {
diff --git a/src/rate-limiter.ts b/src/rate-limiter.ts
index 83b7594..9eac668 100644
--- a/src/rate-limiter.ts
+++ b/src/rate-limiter.ts
@@ -1,5 +1,5 @@
 // ─── Rate Limiter ─────────────────────────────────────────────
-// Sliding window rate limiting per API key / tenant.
+// Fixed-window rate limiting per API key / tenant.
 // Uses KV with TTL for window expiration — no external dependencies.
 // Returns 429 with Retry-After header when limit exceeded.
 
diff --git a/test/gateway-legacy-scope.test.ts b/test/gateway-legacy-scope.test.ts
index cd3043c..c762eb2 100644
--- a/test/gateway-legacy-scope.test.ts
+++ b/test/gateway-legacy-scope.test.ts
@@ -31,6 +31,9 @@ function mockAuthService(): AuthServiceRpc {
       createdAt: '2026-04-11T00:00:00Z',
     }),
     exchangeSocialCode: async () => ({ valid: false }),
+    checkQuota: async () => ({ allowed: true, remaining: 1_000_000, limit: 1_000_000 }),
+    consumeQuota: async () => ({ success: true, reservationId: 'res-test', remaining: 999_999 }),
+    commitOrRefundQuota: async () => {},
   };
 }
 
@@ -62,6 +65,7 @@ function makeEnv(): GatewayEnv {
     IMG_FORGE: mockFetcher(),
     OAUTH_PROVIDER: {} as any,
     OAUTH_KV: mockKV(),
+    RATELIMIT_KV: mockKV(),
     PLATFORM_EVENTS_QUEUE: { send: async () => {} } as unknown as Queue,
     SERVICE_BINDING_SECRET: 'test-secret',
     API_BASE_URL: 'https://mcp.stackbilt.dev',

From e3f434c5a9e2695c502dca0ca84d3974278df1ac Mon Sep 17 00:00:00 2001
From: Kurt Overmier <kurt@stackbilt.dev>
Date: Sat, 18 Apr 2026 02:32:58 -0500
Subject: [PATCH 3/3] docs(26): document rate limiting, cost attribution, and
 scope/tier enforcement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Align README, user guide, API reference, and architecture docs with the
behavior shipped in PR #18 and hardened in PR #26 — corrects stale tier
credits/multipliers, documents the fixed-window limiter and 429 semantics,
and adds the scope/tier/risk-level enforcement matrix and quota attribution.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md             |  6 +++-
 docs/api-reference.md | 67 ++++++++++++++++++++++++++++++++++-------
 docs/architecture.md  | 29 +++++++++++++++---
 docs/user-guide.md    | 69 +++++++++++++++++++++++++++++++++++--------
 4 files changed, 142 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index 592d935..ae4277f 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,9 @@ Zero LLM calls for file generation. ~20ms for structure, ~2s with oracle prose.
 
 - **OAuth 2.1 with PKCE** — GitHub SSO, Google SSO, and email/password authentication
 - **Backend adapter pattern** — tool catalogs aggregated from multiple service bindings, namespaced to avoid collisions
+- **Per-tier rate limiting** — fixed-window per-tenant limits via `RATELIMIT_KV` (free=20/min, hobby=60, pro=300, enterprise=1000); 429 with `Retry-After` and `X-RateLimit-*` headers
+- **Cost attribution & quota** — every tool call carries a credit cost; quota is reserved via `edge-auth` before dispatch and committed/refunded on outcome; `image_generate` cost scales with `quality_tier` (1×/1×/3×/5×/8× for draft/standard/premium/ultra/ultra_plus)
+- **Scope + tier enforcement** — `tools/list` is filtered by token scopes; `tools/call` requires the `generate` scope for mutating tools; expensive `image_generate` quality tiers (`premium` and above) are gated to Pro+ plans
 - **Security Constitution compliance** — every tool declares a risk level (`READ_ONLY`, `LOCAL_MUTATION`, `EXTERNAL_MUTATION`); structured audit logging with secret redaction; HMAC-signed identity tokens
 - **Coming-soon gate** — `PUBLIC_SIGNUPS_ENABLED` flag to control public access
 - **MCP JSON-RPC over HTTP** — supports both streaming (SSE) and request/response transport
@@ -84,7 +87,8 @@ Deploys to the `mcp.stackbilt.dev` custom domain via Cloudflare Workers.
 | `AUTH_SERVICE` | Service Binding | RPC to `edge-auth` worker (`AuthEntrypoint`) |
 | `STACKBILDER` | Service Binding | Route to `edge-stack-architect-v2` worker |
 | `IMG_FORGE` | Service Binding | Route to `img-forge-mcp` worker |
-| `OAUTH_KV` | KV Namespace | Stores social OAuth state (5-min TTL entries) |
+| `OAUTH_KV` | KV Namespace | Stores social OAuth state (5-min TTL entries) and MCP sessions |
+| `RATELIMIT_KV` | KV Namespace | Per-tenant fixed-window rate-limit counters (60s TTL) |
 | `PLATFORM_EVENTS_QUEUE` | Queue | Audit event pipeline (`stackbilt-user-events`) |
 | `MCP_REGISTRY_AUTH` | Variable | MCP Registry domain verification string (served at `/.well-known/mcp-registry-auth`) |
 
diff --git a/docs/api-reference.md b/docs/api-reference.md
index a28b9a2..141cecd 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -104,6 +104,8 @@ Returns the aggregated tool catalog from all backend adapters.
 
 Tools are namespaced by product (e.g. `image_generate`, `flow_create`). Each tool includes a JSON Schema for its `inputSchema`.
 
+The catalog is **filtered by token scope**: tokens without the `generate` scope only see tools with risk level `READ_ONLY`. The full catalog is visible only to tokens that hold `generate`.
+
 ### `tools/call`
 
 Invokes a tool on the appropriate backend.
@@ -122,11 +124,15 @@ Invokes a tool on the appropriate backend.
 The gateway:
 1. Validates the tool name exists in the catalog
 2. Looks up the risk level from the route table
-3. Generates a trace ID for audit
-4. Proxies the call to the appropriate backend service binding
-5. Parses the response (JSON or SSE)
-6. Emits a structured audit event (to console + queue)
-7. Returns the tool result
+3. Enforces scope: tools with risk level `LOCAL_MUTATION`, `EXTERNAL_MUTATION`, or `DESTRUCTIVE` require the `generate` scope (rejected with `INVALID_REQUEST` and audit outcome `insufficient_scope`)
+4. Enforces tier-restricted quality tiers for `image_generate` (`premium`, `ultra`, `ultra_plus` rejected for free/hobby plans with audit outcome `tier_denied`)
+5. Reserves quota via `AUTH_SERVICE.consumeQuota` (cost from `src/cost-attribution.ts`); rejects with `INVALID_PARAMS` and outcome `tier_denied` if exceeded
+6. Generates a trace ID for audit
+7. Proxies the call to the appropriate backend service binding
+8. Settles quota (commit on success, refund on failure) via `commitOrRefundQuota`
+9. Parses the response (JSON or SSE)
+10. Emits a structured audit event (to console + queue)
+11. Returns the tool result, with `X-RateLimit-Limit`, `X-RateLimit-Remaining`, and `X-RateLimit-Reset` headers attached on success
 
 ### `ping`
 
@@ -319,10 +325,48 @@ This replaces cookies in the stateless OAuth flow, keeping the gateway fully sta
 
 ## Scopes
 
-| Scope | Allows |
-|-------|--------|
-| `generate` | Create content — images, architecture flows |
-| `read` | View resources — models, job status, flow details |
+| Scope | Allows | Enforced where |
+|-------|--------|----------------|
+| `generate` | Create content — images, scaffolds, architecture flows | `tools/list` filter (mutation tools hidden without it); `tools/call` for any tool with risk level `LOCAL_MUTATION`, `EXTERNAL_MUTATION`, or `DESTRUCTIVE` |
+| `read` | View resources — models, job status, flow details | All `READ_ONLY` tools always visible |
+
+Both scopes are granted by default to new tokens issued via the gateway's OAuth flow.
+
+---
+
+## Rate Limiting
+
+The gateway enforces a per-tenant fixed-window rate limit on every authenticated MCP request. Limits are tier-driven:
+
+| Tier | Requests / minute |
+|------|-------------------|
+| Free | 20 |
+| Hobby | 60 |
+| Pro | 300 |
+| Enterprise | 1,000 |
+
+When exceeded, the gateway returns `429 Too Many Requests` with:
+
+| Header | Meaning |
+|--------|---------|
+| `Retry-After` | Seconds until the current window resets |
+| `X-RateLimit-Limit` | Tier ceiling (e.g. `20`) |
+| `X-RateLimit-Remaining` | Always `0` on a 429 response |
+| `X-RateLimit-Reset` | Unix timestamp when the window resets |
+
+The same `X-RateLimit-*` headers are attached to successful `tools/call` responses so clients can pace themselves. `initialize`, `tools/list`, `ping`, and notifications currently do **not** echo rate-limit headers on success — those calls still count against the window, just without surfacing the counter to the client.
+
+The window is fixed (aligned to the start of each 60-second slot), not sliding.
+
+---
+
+## Quota & Cost Attribution
+
+Mutating tool calls reserve credits via `AUTH_SERVICE.consumeQuota` before dispatch. The cost table lives in `src/cost-attribution.ts`; `image_generate` cost is `5 × quality multiplier` where multipliers are `draft=1, standard=1, premium=3, ultra=5, ultra_plus=8`. Read-only tools (`*_status`, `*_classify`, `image_list_models`, etc.) are free.
+
+If quota is exceeded, the call is rejected with `INVALID_PARAMS` and the message `Quota exceeded for <tool>`.
+
+For free and hobby tiers, `image_generate` quality tiers above `standard` are rejected at the gateway with `Quality tier "<x>" requires a Pro plan or higher` — these calls do not reach the backend or consume quota.
 
 ---
 
@@ -334,7 +378,7 @@ Standard MCP JSON-RPC error codes:
 |------|---------|
 | `-32600` | Invalid request |
 | `-32601` | Method not found |
-| `-32602` | Invalid params |
+| `-32602` | Invalid params (also used for `Quota exceeded` and `Quality tier requires Pro plan` rejections) |
 | `-32603` | Internal error |
 
 HTTP-level errors:
@@ -343,9 +387,10 @@ HTTP-level errors:
 |--------|---------|
 | `400` | Missing or malformed request |
 | `401` | Invalid or expired token (`invalid_token`) |
-| `403` | Rate limited or payment delinquent (`insufficient_scope`) |
+| `403` | `insufficient_scope` (token lacks a required scope) or auth-service-level denial |
 | `404` | Unknown path |
 | `405` | Method not allowed |
+| `429` | Per-tenant rate limit exceeded (see [Rate Limiting](#rate-limiting)) |
 
 ---
 
diff --git a/docs/architecture.md b/docs/architecture.md
index 176e88d..6f4f1eb 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -166,7 +166,11 @@ enum RiskLevel {
 | `image.list_models` | IMG_FORGE | READ_ONLY |
 | `image.check_job` | IMG_FORGE | READ_ONLY |
 
-Risk levels are used for audit classification, not for authorization enforcement — all authenticated users can call all tools within their quota.
+Risk levels drive both audit classification AND authorization:
+
+- **`tools/list` filter** — `READ_ONLY` tools are visible to any authenticated session; tools with any other risk level are hidden from sessions that lack the `generate` scope.
+- **`tools/call` enforcement** — `LOCAL_MUTATION`, `EXTERNAL_MUTATION`, and `DESTRUCTIVE` tools require the `generate` scope and return `INVALID_REQUEST` with audit outcome `insufficient_scope` otherwise.
+- **Tier-restricted quality tiers** — `image_generate` arguments with `quality_tier` of `premium`, `ultra`, or `ultra_plus` require a Pro+ plan; free/hobby calls are rejected at the gateway with audit outcome `tier_denied` (see `enforceTierRestriction` in `src/gateway.ts`).
 
 ## Audit — `audit.ts`
 
@@ -222,10 +226,27 @@ Bearer token extraction and validation for non-OAuth paths:
 
 ### Rate Limiting
 
-Enforced by `AUTH_SERVICE` (delegated to the auth worker). The gateway receives:
+Two independent layers:
+
+1. **Gateway-side, per-tenant fixed-window limiter** (`src/rate-limiter.ts`) — counts every authenticated MCP request against a 60-second window in `RATELIMIT_KV`. Tier-driven ceiling: free=20, hobby=60, pro=300, enterprise=1000 req/min. Exceeding returns `429` with `Retry-After` and `X-RateLimit-*` headers. Window starts are aligned to `now - (now % 60)` so all tenants share the same boundaries.
+2. **Auth-service-side checks** — `AUTH_SERVICE` may still reject upstream with:
+   - `insufficient_scope` (403) — payment delinquent
+   - `invalid_token` (401) — expired or invalid token
+
+The gateway-side limiter fires first (immediately after auth resolution) and short-circuits before any quota reserve or backend dispatch. Read-only and free tools both count against the limiter — only the `tools/call` quota path is gated by `isFreeTool`.
+
+### Quota & Cost Attribution
+
+`src/cost-attribution.ts` declares per-tool credit costs and an `image_generate` quality multiplier (`draft=1, standard=1, premium=3, ultra=5, ultra_plus=8` × `image_generate.baseCost: 5`). On `tools/call`:
+
+1. Resolve cost via `resolveToolCost(toolName, args)`.
+2. If cost is non-zero, call `AUTH_SERVICE.consumeQuota({tenantId, userId, feature, amount})`. On failure, reject with `INVALID_PARAMS` and audit outcome `tier_denied` (overloaded — see follow-ups).
+3. Dispatch to the backend.
+4. Settle via `AUTH_SERVICE.commitOrRefundQuota(reservationId, success|failed)`. Settlement is best-effort; reservations auto-expire on the auth side if it fails.
+
+The gateway never holds canonical quota state — it is a metering/dispatch layer in front of `edge-auth`.
 
-- `insufficient_scope` (403) — rate limited or payment delinquent
-- `invalid_token` (401) — expired or invalid token
+> **Note:** the `/api/scaffold` REST endpoint (used by the CLI) bypasses both the rate limiter and the quota/cost-attribution path. CLI traffic is unmetered today; only `/mcp` traffic exercises this enforcement layer.
 
 ## Dependencies
 
diff --git a/docs/user-guide.md b/docs/user-guide.md
index 0915e59..b37c0c1 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -26,7 +26,7 @@ Stackbilt exposes AI tools through the [Model Context Protocol](https://modelcon
 | `flow_advance` | Advance a flow to the next stage | LOCAL_MUTATION |
 | `flow_recover` | Recover a failed flow | LOCAL_MUTATION |
 
-**Free tier**: 50 credits/month. No credit card required. Credits are weighted by operation complexity.
+**Free tier**: 25 credits/month. No credit card required. Credits are weighted by operation complexity. See [§5 Quota & Billing](#5-quota--billing) for the full table.
 
 ---
 
@@ -237,7 +237,7 @@ The client calls `image_generate` with your prompt. img-forge enhances the promp
 }
 ```
 
-**Quality tiers**: `draft` (fastest, SDXL), `standard` (FLUX Klein, default), `premium` (FLUX Dev), `ultra` (Gemini 2.5 Flash), `ultra_plus` (Gemini 3.1 Flash).
+**Quality tiers**: `draft` (fastest, SDXL), `standard` (FLUX Klein, default), `premium` (FLUX Dev), `ultra` (Gemini 2.5 Flash), `ultra_plus` (Gemini 3.1 Flash). See [§5 Quota & Billing](#5-quota--billing) for credit costs and plan availability — `premium` and above require Pro or Enterprise.
 
 ### Classify Intent
 
@@ -318,22 +318,62 @@ Both scopes are granted by default on the free tier.
 
 ## 5. Quota & Billing
 
+### Monthly credit allocation
+
 | Tier | Credits/month | Price |
 |------|--------------|-------|
-| Free | 50 | $0 |
-| Pro | 500 | Coming soon |
-| Enterprise | 2,000 | Coming soon |
+| Free | 25 | $0 |
+| Hobby | 65 | Coming soon |
+| Pro | 580 | Coming soon |
+| Enterprise | 2,320 | Coming soon |
+
+### Per-call credit cost
+
+Most read-only tools (`*_status`, `*_classify`, `*_summary`, `*_quality`, `*_governance`, `*_pages`, `image_list_models`, `image_check_job`) cost **0 credits**. Mutating tools have a base cost:
+
+| Tool | Base cost |
+|------|-----------|
+| `image_generate` | 5 credits × quality multiplier (see below) |
+| `scaffold_create` | 2 credits |
+| `scaffold_publish` | 3 credits |
+| `scaffold_deploy` | 5 credits |
+| `scaffold_import` | 1 credit |
+| `flow_create` | 2 credits |
+| `visual_screenshot` | 1 credit |
+| `visual_analyze` | 2 credits |
+
+### `image_generate` quality multipliers
+
+| Quality tier | Multiplier | Effective cost | Available on |
+|--------------|-----------|----------------|--------------|
+| `draft` | 1× | 5 credits | All tiers |
+| `standard` | 1× | 5 credits | All tiers |
+| `premium` | 3× | 15 credits | Pro + Enterprise only |
+| `ultra` | 5× | 25 credits | Pro + Enterprise only |
+| `ultra_plus` | 8× | 40 credits | Pro + Enterprise only |
 
-Credits are weighted by operation:
+Free and Hobby plans can request `draft` or `standard` only. Calls with higher quality tiers are rejected at the gateway with `Quality tier "<x>" requires a Pro plan or higher`.
 
-| Operation | Credits |
-|-----------|---------|
-| Draft quality | 1x |
-| Standard quality | 2x |
-| Premium quality | 5x |
-| Ultra quality | 10x |
+### How metering works
 
-Your remaining quota is tracked automatically. When you hit the limit, tool calls return a quota error until the next billing cycle.
+1. Before each call, the gateway reserves credits via `edge-auth`'s `consumeQuota` RPC.
+2. If the reservation succeeds, the tool runs and the reservation is committed (success) or refunded (failure) via `commitOrRefundQuota`.
+3. If the reservation fails (insufficient quota), the call is rejected with `Quota exceeded for <tool>`.
+
+Free-tier quota resets monthly. When you hit the limit, tool calls return a quota error until the next cycle.
+
+### Rate limits
+
+Independent of credit quota, every authenticated MCP request counts against a per-tenant fixed-window rate limit:
+
+| Tier | Requests / minute |
+|------|-------------------|
+| Free | 20 |
+| Hobby | 60 |
+| Pro | 300 |
+| Enterprise | 1,000 |
+
+When a request would exceed the limit, the gateway returns `429 Too Many Requests` with `Retry-After: <seconds>` and `X-RateLimit-Limit` / `X-RateLimit-Remaining` / `X-RateLimit-Reset` headers. The same headers are also attached to successful `tools/call` responses so clients can pace themselves; other MCP methods (`initialize`, `tools/list`, `ping`, notifications) currently do not echo rate-limit headers on success.
 
 ---
 
@@ -359,6 +399,9 @@ Pass `github_token` as a parameter with a GitHub PAT that has `repo` scope. Or a
 ### Quota exceeded
 Check your usage at the beginning of each month. Free tier resets monthly. Upgrade options coming soon.
 
+### Rate limited (HTTP 429)
+You exceeded your tier's per-minute request budget (free=20, hobby=60, pro=300, enterprise=1000). Wait the number of seconds in the `Retry-After` response header and resume. The window is fixed (60s aligned), not sliding.
+
 ---
 
 ## 7. Security