thunderbird · raivieiraadriano92 · Jun 18, 2026
diff --git a/backend/src/api/powersync.test.ts b/backend/src/api/powersync.test.ts
@@ -28,8 +28,6 @@ const powersyncSettings: Settings = {
   mistralApiKey: '',
   anthropicApiKey: '',
   exaApiKey: '',
-  thunderboltInferenceUrl: '',
-  thunderboltInferenceApiKey: '',
   tinfoilApiKey: '',
   tinfoilEnclaveUrl: 'https://inference.tinfoil.sh/v1',
   monitoringToken: '',

diff --git a/backend/src/config/settings.ts b/backend/src/config/settings.ts
@@ -14,8 +14,6 @@ const settingsSchema = z
     mistralApiKey: z.string().default(''),
     anthropicApiKey: z.string().default(''),
     exaApiKey: z.string().default(''),
-    thunderboltInferenceUrl: z.string().default(''),
-    thunderboltInferenceApiKey: z.string().default(''),
     tinfoilApiKey: z.string().default(''),
     // Include the `/v1` API prefix — Tinfoil's OpenAI-compatible endpoints live
     // under `/v1/chat/completions`, `/v1/models`, etc.
@@ -148,8 +146,6 @@ const parseSettings = (): Settings => {
     mistralApiKey: process.env.MISTRAL_API_KEY || '',
     anthropicApiKey: process.env.ANTHROPIC_API_KEY || '',
     exaApiKey: process.env.EXA_API_KEY || '',
-    thunderboltInferenceUrl: process.env.THUNDERBOLT_INFERENCE_URL || '',
-    thunderboltInferenceApiKey: process.env.THUNDERBOLT_INFERENCE_API_KEY || '',
     tinfoilApiKey: process.env.TINFOIL_API_KEY || '',
     tinfoilEnclaveUrl: process.env.TINFOIL_ENCLAVE_URL || 'https://inference.tinfoil.sh/v1',
     monitoringToken: process.env.MONITORING_TOKEN || '',

diff --git a/backend/src/inference/client.ts b/backend/src/inference/client.ts
@@ -7,7 +7,7 @@ import { getPostHogClient, isPostHogConfigured } from '@/posthog/client'
 import { OpenAI as PostHogOpenAI } from '@posthog/ai'
 import OpenAI from 'openai'
 
-export type InferenceProvider = 'fireworks' | 'thunderbolt' | 'mistral' | 'anthropic'
+export type InferenceProvider = 'fireworks' | 'mistral' | 'anthropic'
 
 type InferenceClient = {
   client: OpenAI | PostHogOpenAI
@@ -19,11 +19,6 @@ type InferenceClient = {
  */
 let fireworksClient: OpenAI | PostHogOpenAI | null = null
 
-/**
- * Lazily initialized Thunderbolt client
- */
-let thunderboltClient: OpenAI | PostHogOpenAI | null = null
-
 /**
  * Lazily initialized Mistral client
  */
@@ -70,42 +65,6 @@ const getFireworksClient = (fetchFn?: typeof fetch): OpenAI | PostHogOpenAI => {
   return client
 }
 
-/**
- * Get the Thunderbolt inference client for gpt-oss
- */
-const getThunderboltClient = (fetchFn?: typeof fetch): OpenAI | PostHogOpenAI => {
-  // Don't use cache when fetchFn is provided (primarily for testing)
-  if (thunderboltClient && !fetchFn) {
-    return thunderboltClient
-  }
-
-  const settings = getSettings()
-
-  if (!settings.thunderboltInferenceUrl || !settings.thunderboltInferenceApiKey) {
-    throw new Error('Thunderbolt inference URL or API key not configured')
-  }
-
-  const params = {
-    apiKey: settings.thunderboltInferenceApiKey,
-    baseURL: settings.thunderboltInferenceUrl,
-    ...(fetchFn && { fetch: fetchFn }),
-  }
-
-  const client = isPostHogConfigured()
-    ? new PostHogOpenAI({
-        ...params,
-        posthog: getPostHogClient(fetchFn),
-      })
-    : new OpenAI(params)
-
-  // Only cache if no custom fetchFn was provided
-  if (!fetchFn) {
-    thunderboltClient = client
-  }
-
-  return client
-}
-
 /**
  * Get the Mistral AI client using OpenAI-compatible API
  */
@@ -180,7 +139,6 @@ const getAnthropicClient = (fetchFn?: typeof fetch): OpenAI | PostHogOpenAI => {
  */
 export const getInferenceClient = (provider: InferenceProvider, fetchFn?: typeof fetch): InferenceClient => {
   const clientMap: Record<InferenceProvider, () => OpenAI | PostHogOpenAI> = {
-    thunderbolt: () => getThunderboltClient(fetchFn),
     mistral: () => getMistralClient(fetchFn),
     anthropic: () => getAnthropicClient(fetchFn),
     fireworks: () => getFireworksClient(fetchFn),
@@ -200,7 +158,6 @@ export const getInferenceClient = (provider: InferenceProvider, fetchFn?: typeof
  */
 export const clearInferenceClientCache = () => {
   fireworksClient = null
-  thunderboltClient = null
   mistralClient = null
   anthropicClient = null
 }

diff --git a/backend/src/inference/posthog-privacy.test.ts b/backend/src/inference/posthog-privacy.test.ts
@@ -42,8 +42,6 @@ describe('Inference Routes - PostHog Privacy Integration', () => {
       POSTHOG_API_KEY: process.env.POSTHOG_API_KEY,
       POSTHOG_HOST: process.env.POSTHOG_HOST,
       FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY,
-      THUNDERBOLT_INFERENCE_URL: process.env.THUNDERBOLT_INFERENCE_URL,
-      THUNDERBOLT_INFERENCE_API_KEY: process.env.THUNDERBOLT_INFERENCE_API_KEY,
     }
 
     capturedFetches = []

diff --git a/backend/src/inference/routes.test.ts b/backend/src/inference/routes.test.ts
@@ -121,37 +121,6 @@ describe('Inference Routes', () => {
       expect(createSSEStreamSpy).toHaveBeenCalledWith(mockCompletion)
     })
 
-    it('should route gpt-oss-120b model to thunderbolt provider', async () => {
-      getInferenceClientSpy.mockReturnValue({
-        client: mockOpenAIClient as unknown as OpenAI,
-        provider: 'thunderbolt',
-      })
-
-      const mockCompletion = createMockStream()
-      mockCreateCompletion.mockImplementation(() => Promise.resolve(mockCompletion))
-
-      const gptOssRequest = {
-        ...validRequestBody,
-        model: 'gpt-oss-120b',
-      }
-
-      const response = await app.handle(
-        new Request('http://localhost/chat/completions', {
-          method: 'POST',
-          headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify(gptOssRequest),
-        }),
-      )
-
-      expect(response.status).toBe(200)
-      expect(getInferenceClientSpy).toHaveBeenCalledWith('thunderbolt')
-      expect(mockCreateCompletion).toHaveBeenCalledWith(
-        expect.objectContaining({
-          model: 'openai/gpt-oss-120b',
-        }),
-      )
-    })
-
     it('should route mistral models to mistral provider', async () => {
       const mockCompletion = createMockStream()
       mockCreateCompletion.mockImplementation(() => Promise.resolve(mockCompletion))
@@ -229,42 +198,6 @@ describe('Inference Routes', () => {
       isPostHogConfiguredSpy.mockReturnValue(false)
     })
 
-    it('should include correct provider in PostHog properties for gpt-oss-120b', async () => {
-      isPostHogConfiguredSpy.mockReturnValue(true)
-      getInferenceClientSpy.mockReturnValue({
-        client: mockOpenAIClient as unknown as OpenAI,
-        provider: 'thunderbolt',
-      })
-
-      const mockCompletion = createMockStream()
-      mockCreateCompletion.mockImplementation(() => Promise.resolve(mockCompletion))
-
-      const gptOssRequest = {
-        ...validRequestBody,
-        model: 'gpt-oss-120b',
-      }
-
-      const response = await app.handle(
-        new Request('http://localhost/chat/completions', {
-          method: 'POST',
-          headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify(gptOssRequest),
-        }),
-      )
-
-      expect(response.status).toBe(200)
-      expect(mockCreateCompletion).toHaveBeenCalledWith(
-        expect.objectContaining({
-          posthogProperties: expect.objectContaining({
-            model_provider: 'thunderbolt',
-          }),
-        }),
-      )
-
-      // Reset for other tests
-      isPostHogConfiguredSpy.mockReturnValue(false)
-    })
-
     it('should reject non-streaming requests', async () => {
       const nonStreamingRequest = {
         ...validRequestBody,
@@ -330,7 +263,7 @@ describe('Inference Routes', () => {
     })
 
     it('should validate all supported models', () => {
-      const expectedModels = ['gpt-oss-120b', 'mistral-medium-3.1', 'mistral-large-3', 'sonnet-4.5', 'opus-4.8']
+      const expectedModels = ['mistral-medium-3.1', 'mistral-large-3', 'sonnet-4.5', 'opus-4.8']
       expect(Object.keys(supportedModels)).toEqual(expectedModels)
     })
 

diff --git a/backend/src/inference/routes.ts b/backend/src/inference/routes.ts
@@ -27,10 +27,6 @@ type ModelConfig = {
 }
 
 export const supportedModels: Record<string, ModelConfig> = {
-  'gpt-oss-120b': {
-    provider: 'thunderbolt',
-    internalName: 'openai/gpt-oss-120b',
-  },
   'mistral-medium-3.1': {
     provider: 'mistral',
     internalName: 'mistral-medium-2508',

diff --git a/backend/src/test-utils/settings.ts b/backend/src/test-utils/settings.ts
@@ -14,8 +14,6 @@ export const createTestSettings = (overrides: Partial<Settings> = {}): Settings
   mistralApiKey: '',
   anthropicApiKey: '',
   exaApiKey: '',
-  thunderboltInferenceUrl: '',
-  thunderboltInferenceApiKey: '',
   tinfoilApiKey: '',
   tinfoilEnclaveUrl: 'https://inference.tinfoil.sh/v1',
   monitoringToken: '',

diff --git a/src/ai/eval/README.md b/src/ai/eval/README.md
@@ -5,23 +5,20 @@ Embedded E2E test runner that validates AI response quality across all models an
 ## Quick Start
 
 ```bash
-# Run all 135 scenarios (3 models x 3 modes x 15 prompts)
+# Run all scenarios
 bun run eval
 
-# Test only GPT-OSS
-EVAL_MODELS=gpt-oss bun run eval
+# Test only Opus
+EVAL_MODELS=opus bun run eval
 
 # Test only Chat mode across all models
 EVAL_MODES=chat bun run eval
 
 # Verbose mode — shows the full system prompt and model response for each scenario
-EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --verbose
+EVAL_MODELS=opus EVAL_MODES=chat bun run eval -- --verbose
 
-# Test GPT-OSS in Search mode only
-EVAL_MODELS=gpt-oss EVAL_MODES=search bun run eval
-
-# Test Mistral and Sonnet in Chat and Search modes
-EVAL_MODELS=mistral,sonnet EVAL_MODES=chat,search bun run eval
+# Test Opus in Search mode only
+EVAL_MODELS=opus EVAL_MODES=search bun run eval
 ```
 
 > **Prerequisite**: The backend must be running at `localhost:8000` (or whatever `cloud_url` is configured). The eval runner makes real API calls to the models.
@@ -55,20 +52,20 @@ Each scenario checks a combination of criteria depending on the mode:
 Thunderbolt AI Eval Runner
 ========================================
 Scenarios: 15
-Models: gpt-oss
+Models: opus
 Modes: chat
 Parallel: 3 (one per model)
 Timeout: 120000ms per scenario
 ========================================
 
-Starting batch: gpt-oss
+Starting batch: opus
 
---- GPT-OSS (15 scenarios) ---
-  PASS gpt-oss/chat/C1 (2.1s)
-  PASS gpt-oss/chat/C2 (4.3s)
-  PASS gpt-oss/chat/C3 (1.8s)
-  FAIL gpt-oss/chat/C4 (60.0s) — Empty response — no text output produced
-  PASS gpt-oss/chat/C5 (1.2s)
+--- OPUS (15 scenarios) ---
+  PASS opus/chat/C1 (2.1s)
+  PASS opus/chat/C2 (4.3s)
+  PASS opus/chat/C3 (1.8s)
+  FAIL opus/chat/C4 (60.0s) — Empty response — no text output produced
+  PASS opus/chat/C5 (1.2s)
   ...
 
 ============================================================
@@ -78,17 +75,17 @@ EVAL REPORT
 Overall: 12/15 passed (80%)
 
 By Model:
-  gpt-oss: 12/15 (80%)
+  opus: 12/15 (80%)
 
 By Mode:
   chat: 12/15 (80%)
 
 Failures (3):
-  FAIL gpt-oss/chat/C4
+  FAIL opus/chat/C4
     - Empty response — no text output produced
-  FAIL gpt-oss/chat/C11
+  FAIL opus/chat/C11
     - Insufficient citations: 0 found, 2 required
-  FAIL gpt-oss/chat/C15
+  FAIL opus/chat/C15
     - Empty response — no text output produced
 
 ============================================================
@@ -100,7 +97,7 @@ Report saved to: evals/eval-results.md
 
 | Variable                 | Default                 | Example           | Description                     |
 | ------------------------ | ----------------------- | ----------------- | ------------------------------- |
-| `EVAL_MODELS`            | all                     | `gpt-oss,mistral` | Which models to test            |
+| `EVAL_MODELS`            | all                     | `opus`            | Which models to test            |
 | `EVAL_MODES`             | all                     | `chat,search`     | Which modes to test             |
 | `EVAL_SCENARIO_PARALLEL` | `3`                     | `1`               | Concurrent scenarios per worker |
 | `EVAL_TIMEOUT`           | `120000`                | `60000`           | Timeout per scenario (ms)       |
@@ -116,12 +113,12 @@ Report saved to: evals/eval-results.md
 Example with detailed report:
 
 ```
-$ EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --detailed
+$ EVAL_MODELS=opus EVAL_MODES=chat bun run eval -- --detailed
 
 # The markdown report at evals/eval-results.md will include:
 ## Failures
 
-### gpt-oss/chat/C4
+### opus/chat/C4
 
 - **Prompt**: Compare the iPhone 16 Pro and Samsung Galaxy S25 Ultra
 - **Duration**: 60.0s
@@ -134,10 +131,10 @@ $ EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --detailed
 Example with verbose:
 
 ```
-$ EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --verbose
+$ EVAL_MODELS=opus EVAL_MODES=chat bun run eval -- --verbose
 
---- SYSTEM PROMPT (gpt-oss/chat/C1) ---
-You are an executive assistant using the **GPT OSS** model...
+--- SYSTEM PROMPT (opus/chat/C1) ---
+You are an executive assistant using the **Opus 4.8** model...
 # Principles
 ...
 # Active Mode (follow these instructions)
@@ -146,9 +143,9 @@ Make quick decisions—don't overthink...
 What are the top 3 news stories today?
 --- END PROMPT ---
 
-  PASS gpt-oss/chat/C1 (2.1s)
+  PASS opus/chat/C1 (2.1s)
 
---- RESPONSE (gpt-oss/chat/C1) ---
+--- RESPONSE (opus/chat/C1) ---
 Here are the three leading stories on AP News for February 16, 2026:
 - **Europeans push back at the U.S...** [1]
 - **"First feline" Larry marks 15 years...** [2]
@@ -160,9 +157,7 @@ Here are the three leading stories on AP News for February 16, 2026:
 
 Use these names in `EVAL_MODELS`:
 
-- `gpt-oss` — GPT OSS 120B (self-hosted)
-- `mistral` — Mistral Medium 3.1
-- `sonnet` — Sonnet 4.5
+- `opus` — Opus 4.8
 
 ### Mode names
 
@@ -174,7 +169,7 @@ Use these names in `EVAL_MODES`:
 
 ## Scenarios
 
-135 total scenarios: 15 prompts per mode, tested against each of 3 models.
+15 prompts per mode, tested against each registered model.
 
 **Chat mode** covers: news queries, product recommendations, factual lookups, comparisons, multi-part travel queries, medical info, stock market data, and more.