diff --git a/backend/src/api/powersync.test.ts b/backend/src/api/powersync.test.ts index 51cd7968f..e775668a0 100644 --- a/backend/src/api/powersync.test.ts +++ b/backend/src/api/powersync.test.ts @@ -28,8 +28,6 @@ const powersyncSettings: Settings = { mistralApiKey: '', anthropicApiKey: '', exaApiKey: '', - thunderboltInferenceUrl: '', - thunderboltInferenceApiKey: '', tinfoilApiKey: '', tinfoilEnclaveUrl: 'https://inference.tinfoil.sh/v1', monitoringToken: '', diff --git a/backend/src/config/settings.ts b/backend/src/config/settings.ts index b0e973f4b..b6cca1cb7 100644 --- a/backend/src/config/settings.ts +++ b/backend/src/config/settings.ts @@ -14,8 +14,6 @@ const settingsSchema = z mistralApiKey: z.string().default(''), anthropicApiKey: z.string().default(''), exaApiKey: z.string().default(''), - thunderboltInferenceUrl: z.string().default(''), - thunderboltInferenceApiKey: z.string().default(''), tinfoilApiKey: z.string().default(''), // Include the `/v1` API prefix — Tinfoil's OpenAI-compatible endpoints live // under `/v1/chat/completions`, `/v1/models`, etc. @@ -148,8 +146,6 @@ const parseSettings = (): Settings => { mistralApiKey: process.env.MISTRAL_API_KEY || '', anthropicApiKey: process.env.ANTHROPIC_API_KEY || '', exaApiKey: process.env.EXA_API_KEY || '', - thunderboltInferenceUrl: process.env.THUNDERBOLT_INFERENCE_URL || '', - thunderboltInferenceApiKey: process.env.THUNDERBOLT_INFERENCE_API_KEY || '', tinfoilApiKey: process.env.TINFOIL_API_KEY || '', tinfoilEnclaveUrl: process.env.TINFOIL_ENCLAVE_URL || 'https://inference.tinfoil.sh/v1', monitoringToken: process.env.MONITORING_TOKEN || '', diff --git a/backend/src/inference/client.ts b/backend/src/inference/client.ts index 0b53e8ae2..006bce913 100644 --- a/backend/src/inference/client.ts +++ b/backend/src/inference/client.ts @@ -7,7 +7,7 @@ import { getPostHogClient, isPostHogConfigured } from '@/posthog/client' import { OpenAI as PostHogOpenAI } from '@posthog/ai' import OpenAI from 'openai' -export type InferenceProvider = 'fireworks' | 'thunderbolt' | 'mistral' | 'anthropic' +export type InferenceProvider = 'fireworks' | 'mistral' | 'anthropic' type InferenceClient = { client: OpenAI | PostHogOpenAI @@ -19,11 +19,6 @@ type InferenceClient = { */ let fireworksClient: OpenAI | PostHogOpenAI | null = null -/** - * Lazily initialized Thunderbolt client - */ -let thunderboltClient: OpenAI | PostHogOpenAI | null = null - /** * Lazily initialized Mistral client */ @@ -70,42 +65,6 @@ const getFireworksClient = (fetchFn?: typeof fetch): OpenAI | PostHogOpenAI => { return client } -/** - * Get the Thunderbolt inference client for gpt-oss - */ -const getThunderboltClient = (fetchFn?: typeof fetch): OpenAI | PostHogOpenAI => { - // Don't use cache when fetchFn is provided (primarily for testing) - if (thunderboltClient && !fetchFn) { - return thunderboltClient - } - - const settings = getSettings() - - if (!settings.thunderboltInferenceUrl || !settings.thunderboltInferenceApiKey) { - throw new Error('Thunderbolt inference URL or API key not configured') - } - - const params = { - apiKey: settings.thunderboltInferenceApiKey, - baseURL: settings.thunderboltInferenceUrl, - ...(fetchFn && { fetch: fetchFn }), - } - - const client = isPostHogConfigured() - ? new PostHogOpenAI({ - ...params, - posthog: getPostHogClient(fetchFn), - }) - : new OpenAI(params) - - // Only cache if no custom fetchFn was provided - if (!fetchFn) { - thunderboltClient = client - } - - return client -} - /** * Get the Mistral AI client using OpenAI-compatible API */ @@ -180,7 +139,6 @@ const getAnthropicClient = (fetchFn?: typeof fetch): OpenAI | PostHogOpenAI => { */ export const getInferenceClient = (provider: InferenceProvider, fetchFn?: typeof fetch): InferenceClient => { const clientMap: Record OpenAI | PostHogOpenAI> = { - thunderbolt: () => getThunderboltClient(fetchFn), mistral: () => getMistralClient(fetchFn), anthropic: () => getAnthropicClient(fetchFn), fireworks: () => getFireworksClient(fetchFn), @@ -200,7 +158,6 @@ export const getInferenceClient = (provider: InferenceProvider, fetchFn?: typeof */ export const clearInferenceClientCache = () => { fireworksClient = null - thunderboltClient = null mistralClient = null anthropicClient = null } diff --git a/backend/src/inference/posthog-privacy.test.ts b/backend/src/inference/posthog-privacy.test.ts index ae6383f2e..379744a24 100644 --- a/backend/src/inference/posthog-privacy.test.ts +++ b/backend/src/inference/posthog-privacy.test.ts @@ -42,8 +42,6 @@ describe('Inference Routes - PostHog Privacy Integration', () => { POSTHOG_API_KEY: process.env.POSTHOG_API_KEY, POSTHOG_HOST: process.env.POSTHOG_HOST, FIREWORKS_API_KEY: process.env.FIREWORKS_API_KEY, - THUNDERBOLT_INFERENCE_URL: process.env.THUNDERBOLT_INFERENCE_URL, - THUNDERBOLT_INFERENCE_API_KEY: process.env.THUNDERBOLT_INFERENCE_API_KEY, } capturedFetches = [] diff --git a/backend/src/inference/routes.test.ts b/backend/src/inference/routes.test.ts index 663f730fa..cbb109b22 100644 --- a/backend/src/inference/routes.test.ts +++ b/backend/src/inference/routes.test.ts @@ -121,37 +121,6 @@ describe('Inference Routes', () => { expect(createSSEStreamSpy).toHaveBeenCalledWith(mockCompletion) }) - it('should route gpt-oss-120b model to thunderbolt provider', async () => { - getInferenceClientSpy.mockReturnValue({ - client: mockOpenAIClient as unknown as OpenAI, - provider: 'thunderbolt', - }) - - const mockCompletion = createMockStream() - mockCreateCompletion.mockImplementation(() => Promise.resolve(mockCompletion)) - - const gptOssRequest = { - ...validRequestBody, - model: 'gpt-oss-120b', - } - - const response = await app.handle( - new Request('http://localhost/chat/completions', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(gptOssRequest), - }), - ) - - expect(response.status).toBe(200) - expect(getInferenceClientSpy).toHaveBeenCalledWith('thunderbolt') - expect(mockCreateCompletion).toHaveBeenCalledWith( - expect.objectContaining({ - model: 'openai/gpt-oss-120b', - }), - ) - }) - it('should route mistral models to mistral provider', async () => { const mockCompletion = createMockStream() mockCreateCompletion.mockImplementation(() => Promise.resolve(mockCompletion)) @@ -229,42 +198,6 @@ describe('Inference Routes', () => { isPostHogConfiguredSpy.mockReturnValue(false) }) - it('should include correct provider in PostHog properties for gpt-oss-120b', async () => { - isPostHogConfiguredSpy.mockReturnValue(true) - getInferenceClientSpy.mockReturnValue({ - client: mockOpenAIClient as unknown as OpenAI, - provider: 'thunderbolt', - }) - - const mockCompletion = createMockStream() - mockCreateCompletion.mockImplementation(() => Promise.resolve(mockCompletion)) - - const gptOssRequest = { - ...validRequestBody, - model: 'gpt-oss-120b', - } - - const response = await app.handle( - new Request('http://localhost/chat/completions', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(gptOssRequest), - }), - ) - - expect(response.status).toBe(200) - expect(mockCreateCompletion).toHaveBeenCalledWith( - expect.objectContaining({ - posthogProperties: expect.objectContaining({ - model_provider: 'thunderbolt', - }), - }), - ) - - // Reset for other tests - isPostHogConfiguredSpy.mockReturnValue(false) - }) - it('should reject non-streaming requests', async () => { const nonStreamingRequest = { ...validRequestBody, @@ -330,7 +263,7 @@ describe('Inference Routes', () => { }) it('should validate all supported models', () => { - const expectedModels = ['gpt-oss-120b', 'mistral-medium-3.1', 'mistral-large-3', 'sonnet-4.5', 'opus-4.8'] + const expectedModels = ['mistral-medium-3.1', 'mistral-large-3', 'sonnet-4.5', 'opus-4.8'] expect(Object.keys(supportedModels)).toEqual(expectedModels) }) diff --git a/backend/src/inference/routes.ts b/backend/src/inference/routes.ts index ac01aadc0..918a04de9 100644 --- a/backend/src/inference/routes.ts +++ b/backend/src/inference/routes.ts @@ -27,10 +27,6 @@ type ModelConfig = { } export const supportedModels: Record = { - 'gpt-oss-120b': { - provider: 'thunderbolt', - internalName: 'openai/gpt-oss-120b', - }, 'mistral-medium-3.1': { provider: 'mistral', internalName: 'mistral-medium-2508', diff --git a/backend/src/test-utils/settings.ts b/backend/src/test-utils/settings.ts index 3f02959e4..768e1ca16 100644 --- a/backend/src/test-utils/settings.ts +++ b/backend/src/test-utils/settings.ts @@ -14,8 +14,6 @@ export const createTestSettings = (overrides: Partial = {}): Settings mistralApiKey: '', anthropicApiKey: '', exaApiKey: '', - thunderboltInferenceUrl: '', - thunderboltInferenceApiKey: '', tinfoilApiKey: '', tinfoilEnclaveUrl: 'https://inference.tinfoil.sh/v1', monitoringToken: '', diff --git a/src/ai/eval/README.md b/src/ai/eval/README.md index 0528bdb0c..364aeab56 100644 --- a/src/ai/eval/README.md +++ b/src/ai/eval/README.md @@ -5,23 +5,20 @@ Embedded E2E test runner that validates AI response quality across all models an ## Quick Start ```bash -# Run all 135 scenarios (3 models x 3 modes x 15 prompts) +# Run all scenarios bun run eval -# Test only GPT-OSS -EVAL_MODELS=gpt-oss bun run eval +# Test only Opus +EVAL_MODELS=opus bun run eval # Test only Chat mode across all models EVAL_MODES=chat bun run eval # Verbose mode — shows the full system prompt and model response for each scenario -EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --verbose +EVAL_MODELS=opus EVAL_MODES=chat bun run eval -- --verbose -# Test GPT-OSS in Search mode only -EVAL_MODELS=gpt-oss EVAL_MODES=search bun run eval - -# Test Mistral and Sonnet in Chat and Search modes -EVAL_MODELS=mistral,sonnet EVAL_MODES=chat,search bun run eval +# Test Opus in Search mode only +EVAL_MODELS=opus EVAL_MODES=search bun run eval ``` > **Prerequisite**: The backend must be running at `localhost:8000` (or whatever `cloud_url` is configured). The eval runner makes real API calls to the models. @@ -55,20 +52,20 @@ Each scenario checks a combination of criteria depending on the mode: Thunderbolt AI Eval Runner ======================================== Scenarios: 15 -Models: gpt-oss +Models: opus Modes: chat Parallel: 3 (one per model) Timeout: 120000ms per scenario ======================================== -Starting batch: gpt-oss +Starting batch: opus ---- GPT-OSS (15 scenarios) --- - PASS gpt-oss/chat/C1 (2.1s) - PASS gpt-oss/chat/C2 (4.3s) - PASS gpt-oss/chat/C3 (1.8s) - FAIL gpt-oss/chat/C4 (60.0s) — Empty response — no text output produced - PASS gpt-oss/chat/C5 (1.2s) +--- OPUS (15 scenarios) --- + PASS opus/chat/C1 (2.1s) + PASS opus/chat/C2 (4.3s) + PASS opus/chat/C3 (1.8s) + FAIL opus/chat/C4 (60.0s) — Empty response — no text output produced + PASS opus/chat/C5 (1.2s) ... ============================================================ @@ -78,17 +75,17 @@ EVAL REPORT Overall: 12/15 passed (80%) By Model: - gpt-oss: 12/15 (80%) + opus: 12/15 (80%) By Mode: chat: 12/15 (80%) Failures (3): - FAIL gpt-oss/chat/C4 + FAIL opus/chat/C4 - Empty response — no text output produced - FAIL gpt-oss/chat/C11 + FAIL opus/chat/C11 - Insufficient citations: 0 found, 2 required - FAIL gpt-oss/chat/C15 + FAIL opus/chat/C15 - Empty response — no text output produced ============================================================ @@ -100,7 +97,7 @@ Report saved to: evals/eval-results.md | Variable | Default | Example | Description | | ------------------------ | ----------------------- | ----------------- | ------------------------------- | -| `EVAL_MODELS` | all | `gpt-oss,mistral` | Which models to test | +| `EVAL_MODELS` | all | `opus` | Which models to test | | `EVAL_MODES` | all | `chat,search` | Which modes to test | | `EVAL_SCENARIO_PARALLEL` | `3` | `1` | Concurrent scenarios per worker | | `EVAL_TIMEOUT` | `120000` | `60000` | Timeout per scenario (ms) | @@ -116,12 +113,12 @@ Report saved to: evals/eval-results.md Example with detailed report: ``` -$ EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --detailed +$ EVAL_MODELS=opus EVAL_MODES=chat bun run eval -- --detailed # The markdown report at evals/eval-results.md will include: ## Failures -### gpt-oss/chat/C4 +### opus/chat/C4 - **Prompt**: Compare the iPhone 16 Pro and Samsung Galaxy S25 Ultra - **Duration**: 60.0s @@ -134,10 +131,10 @@ $ EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --detailed Example with verbose: ``` -$ EVAL_MODELS=gpt-oss EVAL_MODES=chat bun run eval -- --verbose +$ EVAL_MODELS=opus EVAL_MODES=chat bun run eval -- --verbose ---- SYSTEM PROMPT (gpt-oss/chat/C1) --- -You are an executive assistant using the **GPT OSS** model... +--- SYSTEM PROMPT (opus/chat/C1) --- +You are an executive assistant using the **Opus 4.8** model... # Principles ... # Active Mode (follow these instructions) @@ -146,9 +143,9 @@ Make quick decisions—don't overthink... What are the top 3 news stories today? --- END PROMPT --- - PASS gpt-oss/chat/C1 (2.1s) + PASS opus/chat/C1 (2.1s) ---- RESPONSE (gpt-oss/chat/C1) --- +--- RESPONSE (opus/chat/C1) --- Here are the three leading stories on AP News for February 16, 2026: - **Europeans push back at the U.S...** [1] - **"First feline" Larry marks 15 years...** [2] @@ -160,9 +157,7 @@ Here are the three leading stories on AP News for February 16, 2026: Use these names in `EVAL_MODELS`: -- `gpt-oss` — GPT OSS 120B (self-hosted) -- `mistral` — Mistral Medium 3.1 -- `sonnet` — Sonnet 4.5 +- `opus` — Opus 4.8 ### Mode names @@ -174,7 +169,7 @@ Use these names in `EVAL_MODES`: ## Scenarios -135 total scenarios: 15 prompts per mode, tested against each of 3 models. +15 prompts per mode, tested against each registered model. **Chat mode** covers: news queries, product recommendations, factual lookups, comparisons, multi-part travel queries, medical info, stock market data, and more. diff --git a/src/ai/eval/debug-single.ts b/src/ai/eval/debug-single.ts index 42f534b2a..8ed981b61 100644 --- a/src/ai/eval/debug-single.ts +++ b/src/ai/eval/debug-single.ts @@ -9,7 +9,7 @@ import { aiFetchStreamingResponse } from '@/ai/fetch' import { setupTestDatabase, teardownTestDatabase } from '@/dal/test-utils' import { getLocalSetting } from '@/stores/local-settings-store' -import { defaultModelGptOss120b } from '@/defaults/models' +import { defaultModelOpus48 } from '@/defaults/models' import { defaultModeChat } from '@/defaults/modes' import { isSsoMode } from '@/lib/auth-mode' import { getAuthToken } from '@/lib/auth-token' @@ -26,7 +26,7 @@ const run = async () => { await setupTestDatabase() console.log('[1/5] Database ready.\n') - const modelId = defaultModelGptOss120b.id + const modelId = defaultModelOpus48.id const prompt = "What's the current price of Bitcoin?" const body = JSON.stringify({ @@ -34,7 +34,7 @@ const run = async () => { id: uuidv7(), }) - console.log(`[2/5] Model: ${defaultModelGptOss120b.name} (${modelId})`) + console.log(`[2/5] Model: ${defaultModelOpus48.name} (${modelId})`) console.log(`[2/5] Mode: ${defaultModeChat.name}`) console.log(`[2/5] Prompt: "${prompt}"\n`) diff --git a/src/ai/eval/scenarios.ts b/src/ai/eval/scenarios.ts index 71a34dfca..c28b34880 100644 --- a/src/ai/eval/scenarios.ts +++ b/src/ai/eval/scenarios.ts @@ -2,13 +2,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -import { defaultModelGptOss120b, defaultModelOpus48 } from '@/defaults/models' +import { defaultModelOpus48 } from '@/defaults/models' import type { EvalCriteria, EvalScenario } from './types' -const models = [ - { name: 'gpt-oss', id: defaultModelGptOss120b.id }, - { name: 'opus', id: defaultModelOpus48.id }, -] as const +const models = [{ name: 'opus', id: defaultModelOpus48.id }] as const /** Default criteria applied to all Chat mode scenarios */ const chatCriteria: EvalCriteria = { diff --git a/src/ai/fetch.ts b/src/ai/fetch.ts index 3285e01c1..499450f8d 100644 --- a/src/ai/fetch.ts +++ b/src/ai/fetch.ts @@ -259,7 +259,7 @@ export const createModel = async (modelConfig: Model, getProxyFetch: () => Fetch } ssoFetch.preconnect = fetch.preconnect const providerFetch: typeof fetch = sso && !hasRealToken ? ssoFetch : fetch - // GPT OSS (vendor: 'openai') uses createOpenAI with .chat() to force Chat Completions API + // OpenAI-vendor thunderbolt models use createOpenAI with .chat() to force Chat Completions API // (AI SDK 5 defaults createOpenAI to Responses API which our backend doesn't support) if (modelConfig.vendor === 'openai') { const provider = createOpenAI({ baseURL: cloudUrl, apiKey: token, fetch: providerFetch }) diff --git a/src/components/ui/searchable-menu/searchable-menu.stories.tsx b/src/components/ui/searchable-menu/searchable-menu.stories.tsx index f89329536..f884a1ed7 100644 --- a/src/components/ui/searchable-menu/searchable-menu.stories.tsx +++ b/src/components/ui/searchable-menu/searchable-menu.stories.tsx @@ -62,7 +62,7 @@ const groupedItems: SearchableMenuGroup[] = [ id: 'provided', label: 'Provided Models', items: [ - { id: 'gpt-oss', label: 'GPT-OSS 120B', description: 'Fast' }, + { id: 'opus-4.8', label: 'Opus 4.8', description: 'Top-tier reasoning' }, { id: 'qwen3', label: 'Qwen3 Instruct', description: 'Balance between privacy and power' }, ], }, @@ -109,7 +109,7 @@ export const WithIcons: Story = { export const GroupedItems: Story = { args: { items: groupedItems, - value: 'gpt-oss', + value: 'opus-4.8', onValueChange: (id) => console.log('Selected:', id), }, parameters: { @@ -216,7 +216,7 @@ export const Interactive: Story = { } const InteractiveGroupedTemplate = () => { - const [value, setValue] = useState('gpt-oss') + const [value, setValue] = useState('opus-4.8') return ( { describe('resetModelProfileToDefault', () => { it('should restore default values for a known model', async () => { const db = getDb() - const { defaultModelProfileGptOss120b } = await import('@/defaults/model-profiles') + const { defaultModelProfileOpus48 } = await import('@/defaults/model-profiles') // Insert the actual default model first to satisfy FK constraint await db.insert(modelsTable).values({ - id: defaultModelGptOss120b.id, - provider: defaultModelGptOss120b.provider, - name: defaultModelGptOss120b.name, - model: defaultModelGptOss120b.model, - isSystem: defaultModelGptOss120b.isSystem, - enabled: defaultModelGptOss120b.enabled, + id: defaultModelOpus48.id, + provider: defaultModelOpus48.provider, + name: defaultModelOpus48.name, + model: defaultModelOpus48.model, + isSystem: defaultModelOpus48.isSystem, + enabled: defaultModelOpus48.enabled, }) // Insert a profile with modified values await db.insert(modelProfilesTable).values({ - modelId: defaultModelGptOss120b.id, + modelId: defaultModelOpus48.id, temperature: 0.99, maxSteps: 1, deletedAt: new Date().toISOString(), }) // Reset to defaults - await resetModelProfileToDefault(getDb(), defaultModelGptOss120b.id) + await resetModelProfileToDefault(getDb(), defaultModelOpus48.id) - const profile = await getModelProfile(getDb(), defaultModelGptOss120b.id) + const profile = await getModelProfile(getDb(), defaultModelOpus48.id) expect(profile).not.toBe(null) - expect(profile?.temperature).toBe(defaultModelProfileGptOss120b.temperature) - expect(profile?.maxSteps).toBe(defaultModelProfileGptOss120b.maxSteps) - expect(profile?.maxAttempts).toBe(defaultModelProfileGptOss120b.maxAttempts) - expect(profile?.nudgeThreshold).toBe(defaultModelProfileGptOss120b.nudgeThreshold) + expect(profile?.temperature).toBe(defaultModelProfileOpus48.temperature) + expect(profile?.maxSteps).toBe(defaultModelProfileOpus48.maxSteps) + expect(profile?.maxAttempts).toBe(defaultModelProfileOpus48.maxAttempts) + expect(profile?.nudgeThreshold).toBe(defaultModelProfileOpus48.nudgeThreshold) expect(profile?.deletedAt).toBe(null) }) @@ -268,31 +268,31 @@ describe('Model Profiles DAL', () => { describe('createDefaultModelProfile', () => { it('should create a profile for a known default model', async () => { const db = getDb() - const { defaultModelProfileGptOss120b, hashModelProfile } = await import('@/defaults/model-profiles') + const { defaultModelProfileOpus48, hashModelProfile } = await import('@/defaults/model-profiles') await db.insert(modelsTable).values({ - id: defaultModelGptOss120b.id, - provider: defaultModelGptOss120b.provider, - name: defaultModelGptOss120b.name, - model: defaultModelGptOss120b.model, - isSystem: defaultModelGptOss120b.isSystem, - enabled: defaultModelGptOss120b.enabled, + id: defaultModelOpus48.id, + provider: defaultModelOpus48.provider, + name: defaultModelOpus48.name, + model: defaultModelOpus48.model, + isSystem: defaultModelOpus48.isSystem, + enabled: defaultModelOpus48.enabled, }) - await createDefaultModelProfile(getDb(), defaultModelGptOss120b.id) + await createDefaultModelProfile(getDb(), defaultModelOpus48.id) - const profile = await getModelProfile(getDb(), defaultModelGptOss120b.id) + const profile = await getModelProfile(getDb(), defaultModelOpus48.id) expect(profile).not.toBe(null) - expect(profile?.modelId).toBe(defaultModelGptOss120b.id) - expect(profile?.temperature).toBe(defaultModelProfileGptOss120b.temperature) + expect(profile?.modelId).toBe(defaultModelOpus48.id) + expect(profile?.temperature).toBe(defaultModelProfileOpus48.temperature) // Should store the defaultHash const rawProfile = await db .select() .from(modelProfilesTable) - .where(eq(modelProfilesTable.modelId, defaultModelGptOss120b.id)) + .where(eq(modelProfilesTable.modelId, defaultModelOpus48.id)) .get() - expect(rawProfile?.defaultHash).toBe(hashModelProfile(defaultModelProfileGptOss120b)) + expect(rawProfile?.defaultHash).toBe(hashModelProfile(defaultModelProfileOpus48)) }) it('should do nothing for a model without seed data', async () => { @@ -318,24 +318,24 @@ describe('Model Profiles DAL', () => { const db = getDb() await db.insert(modelsTable).values({ - id: defaultModelGptOss120b.id, - provider: defaultModelGptOss120b.provider, - name: defaultModelGptOss120b.name, - model: defaultModelGptOss120b.model, - isSystem: defaultModelGptOss120b.isSystem, - enabled: defaultModelGptOss120b.enabled, + id: defaultModelOpus48.id, + provider: defaultModelOpus48.provider, + name: defaultModelOpus48.name, + model: defaultModelOpus48.model, + isSystem: defaultModelOpus48.isSystem, + enabled: defaultModelOpus48.enabled, }) // Insert a custom profile first await db.insert(modelProfilesTable).values({ - modelId: defaultModelGptOss120b.id, + modelId: defaultModelOpus48.id, temperature: 0.99, }) // Calling createDefaultModelProfile should not overwrite - await createDefaultModelProfile(getDb(), defaultModelGptOss120b.id) + await createDefaultModelProfile(getDb(), defaultModelOpus48.id) - const profile = await getModelProfile(getDb(), defaultModelGptOss120b.id) + const profile = await getModelProfile(getDb(), defaultModelOpus48.id) expect(profile?.temperature).toBe(0.99) }) }) diff --git a/src/dal/models.test.ts b/src/dal/models.test.ts index f114562ec..172951ec8 100644 --- a/src/dal/models.test.ts +++ b/src/dal/models.test.ts @@ -15,7 +15,7 @@ import { import { afterAll, beforeAll, beforeEach, describe, expect, it } from 'bun:test' import { eq } from 'drizzle-orm' import { v7 as uuidv7 } from 'uuid' -import { defaultModelGptOss120b, hashModel } from '@/defaults/models' +import { defaultModelOpus48, hashModel } from '@/defaults/models' import { isModelModified } from '@/defaults/utils' import type { Model } from '@/types' import { @@ -933,7 +933,7 @@ describe('Models DAL', () => { describe('resetModelToDefault', () => { it('restores default fields and refreshes defaultHash', async () => { const db = getDb() - const defaultModel = defaultModelGptOss120b + const defaultModel = defaultModelOpus48 await db.insert(modelsTable).values({ ...defaultModel, @@ -956,7 +956,7 @@ describe('Models DAL', () => { it('clears the local-only api key on reset', async () => { const db = getDb() - const defaultModel = defaultModelGptOss120b + const defaultModel = defaultModelOpus48 await db.insert(modelsTable).values({ ...defaultModel }) await db.insert(modelsSecretsTable).values({ modelId: defaultModel.id, apiKey: 'sk-user-supplied' }) @@ -973,7 +973,7 @@ describe('Models DAL', () => { it('preserves the row userId (does not overwrite with null from the default template)', async () => { const db = getDb() - const defaultModel = defaultModelGptOss120b + const defaultModel = defaultModelOpus48 // The default template carries `userId: null`. A row that has already // been synced has a real user_id — reset must not overwrite it, otherwise @@ -1046,22 +1046,22 @@ describe('Models DAL', () => { it('should auto-create a default profile for a known seeded model', async () => { const db = getDb() - // Create a model with the same ID as a seeded default (GPT-OSS) + // Create a model with the same ID as a seeded default (Opus 4.8) await createModel(getDb(), { - id: defaultModelGptOss120b.id, + id: defaultModelOpus48.id, provider: 'thunderbolt', - name: 'GPT OSS', - model: 'gpt-oss-120b', + name: 'Opus 4.8', + model: 'opus-4.8', }) // Verify a profile was auto-created const profile = await db .select() .from(modelProfilesTable) - .where(eq(modelProfilesTable.modelId, defaultModelGptOss120b.id)) + .where(eq(modelProfilesTable.modelId, defaultModelOpus48.id)) .get() expect(profile).not.toBeUndefined() - expect(profile?.temperature).toBe(0.3) + expect(profile?.temperature).toBe(0.2) }) it('should not create a profile for an unknown model ID', async () => { diff --git a/src/defaults/automations.ts b/src/defaults/automations.ts index 84b10c7e1..d5b546ad4 100644 --- a/src/defaults/automations.ts +++ b/src/defaults/automations.ts @@ -4,7 +4,7 @@ import { hashValues } from '@/lib/utils' import type { Prompt } from '@/types' -import { defaultModelGptOss120b } from './models' +import { defaultModelOpus48 } from './models' /** * Compute hash of user-editable fields for a prompt @@ -24,7 +24,7 @@ export const defaultAutomationDailyBrief: Prompt = { deletedAt: null, defaultHash: null, userId: null, - modelId: defaultModelGptOss120b.id, + modelId: defaultModelOpus48.id, prompt: `Create a daily brief with the following sections. Do not ask me for any missing information - just skip sections for which you are missing information or tools. 1. If you know my location, show me the 7-day forecast. If not, skip this section. @@ -68,7 +68,7 @@ export const defaultAutomationImportantEmails: Prompt = { deletedAt: null, defaultHash: null, userId: null, - modelId: defaultModelGptOss120b.id, + modelId: defaultModelOpus48.id, prompt: `Review my inbox and summarize the 5 most important emails that need my attention today. Include sender, subject, and why each is important.`, } diff --git a/src/defaults/model-profiles.test.ts b/src/defaults/model-profiles.test.ts index 373ee8ee9..68c279d8f 100644 --- a/src/defaults/model-profiles.test.ts +++ b/src/defaults/model-profiles.test.ts @@ -77,8 +77,8 @@ describe('hashModelProfile', () => { }) describe('defaultModelProfiles', () => { - test('contains five profiles', () => { - expect(defaultModelProfiles).toHaveLength(5) + test('contains four profiles', () => { + expect(defaultModelProfiles).toHaveLength(4) }) test('each profile has a non-null modelId', () => { diff --git a/src/defaults/model-profiles.ts b/src/defaults/model-profiles.ts index a4ea2b0bd..6861d3c6a 100644 --- a/src/defaults/model-profiles.ts +++ b/src/defaults/model-profiles.ts @@ -5,7 +5,6 @@ export { defaultModelProfileDeepseekV4Pro, defaultModelProfileGlm51, - defaultModelProfileGptOss120b, defaultModelProfileKimiK26, defaultModelProfileOpus48, defaultModelProfiles, diff --git a/src/defaults/model-profiles/gpt-oss.ts b/src/defaults/model-profiles/gpt-oss.ts deleted file mode 100644 index 5dc6ce4e2..000000000 --- a/src/defaults/model-profiles/gpt-oss.ts +++ /dev/null @@ -1,42 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -import type { ModelProfile } from '@/types' -import { defaultModelGptOss120b } from '@/defaults/models' - -export const defaultModelProfileGptOss120b: ModelProfile = { - modelId: defaultModelGptOss120b.id, - temperature: 0.3, - maxSteps: 8, - maxAttempts: 4, - nudgeThreshold: 5, - useSystemMessageModeDeveloper: 1, - providerOptions: { systemMessageMode: 'developer' }, - toolsOverride: `After calling tools, you MUST write a text response for the user. Never finish with only tool calls and no text. If tool results are unclear, summarize what you found anyway. -The user may write in any language. Regardless of the language, you MUST always use tools to find current information before responding.`, - linkPreviewsOverride: null, - chatModeAddendum: `Important: Each distinct fact or claim must have its own [N] citation. For multi-part questions, use a different source for each part when possible. Aim for at least 2 citations in your response.`, - searchModeAddendum: `CRITICAL: Your response MUST use tags with a url attribute containing the FULL URL. -Example of CORRECT output: - - - -Do NOT output just citation numbers like [1] [2] — those render as tiny badges, not rich preview cards. -Do NOT omit the url attribute — without url will NOT render a preview card. -Every link preview MUST have a url="https://..." attribute with the full page URL you fetched.`, - researchModeAddendum: `For research mode: every time you use information from a tool result, you MUST add [N] at the end of that sentence. Use a DIFFERENT [N] for each distinct source. Your final response needs at least 5 unique [N] citations — if you have fewer, go back and add citations to facts you missed. - -CITATION CHECK: Before finishing your response, count your [N] citations. If you have fewer than 5 unique numbers, add more citations to facts that came from your tool results. Every paragraph should have at least one [N].`, - citationReinforcementEnabled: 0, - citationReinforcementPrompt: null, - nudgeFinalStep: `This is your last step — tools are no longer available. You must write your final answer now. Summarize the key facts from your tool results and present them clearly to the user. Do not leave the response empty.`, - nudgePreventive: `You have gathered substantial information. Start composing your response — you can still make a few more tool calls if needed, but begin writing your answer.`, - nudgeRetry: `Your previous attempt produced no visible text. This is a retry — write your answer now using the information already gathered from tools. The user is waiting for a response.`, - nudgeSearchFinalStep: `This is your last step — tools are no longer available. Output your results now using tags. Each must have a url attribute with the full URL. Do not leave the response empty.`, - nudgeSearchPreventive: `You have enough search results. Start writing your widgets — you can still make a few more tool calls if needed.`, - nudgeSearchRetry: `Your previous attempt produced no visible text. Output for each result you found. The url attribute is required.`, - deletedAt: null, - defaultHash: null, - userId: null, -} diff --git a/src/defaults/model-profiles/index.ts b/src/defaults/model-profiles/index.ts index 1d2eabc72..69a0e3828 100644 --- a/src/defaults/model-profiles/index.ts +++ b/src/defaults/model-profiles/index.ts @@ -6,13 +6,11 @@ import { hashValues } from '@/lib/utils' import type { ModelProfile } from '@/types' import { defaultModelProfileDeepseekV4Pro } from './deepseek' import { defaultModelProfileGlm51 } from './glm' -import { defaultModelProfileGptOss120b } from './gpt-oss' import { defaultModelProfileKimiK26 } from './kimi' import { defaultModelProfileOpus48 } from './opus' export { defaultModelProfileDeepseekV4Pro } from './deepseek' export { defaultModelProfileGlm51 } from './glm' -export { defaultModelProfileGptOss120b } from './gpt-oss' export { defaultModelProfileKimiK26 } from './kimi' export { defaultModelProfileOpus48 } from './opus' @@ -47,7 +45,6 @@ export const hashModelProfile = (profile: ModelProfile): string => /** All default model profiles for iteration */ export const defaultModelProfiles: ReadonlyArray = [ - defaultModelProfileGptOss120b, defaultModelProfileOpus48, defaultModelProfileDeepseekV4Pro, defaultModelProfileKimiK26, diff --git a/src/defaults/models.ts b/src/defaults/models.ts index 637fd4f83..c215e3496 100644 --- a/src/defaults/models.ts +++ b/src/defaults/models.ts @@ -32,26 +32,6 @@ export const hashModel = (model: Model): string => { * * Each model is exported individually so it can be referenced by automations */ -export const defaultModelGptOss120b: Model = { - id: 'd045a4c0-3f93-4f30-a608-24e07856e11d', - name: 'GPT OSS', - provider: 'thunderbolt', - model: 'gpt-oss-120b', - isSystem: 1, - enabled: 1, - isConfidential: 1, - contextWindow: 131072, - toolUsage: 1, - startWithReasoning: 0, - supportsParallelToolCalls: 1, - deletedAt: null, - apiKey: null, - url: null, - defaultHash: null, - vendor: 'openai', - description: 'Fast', - userId: null, -} /** * Opus 4.8 reuses the row id originally assigned to Sonnet 4.5 (and inherited by 4.7). @@ -146,7 +126,6 @@ export const defaultModelGlm51: Model = { * "Provided" group of the model picker. Reorder freely. */ export const defaultModels: ReadonlyArray = [ - defaultModelGptOss120b, defaultModelOpus48, defaultModelDeepseekV4Pro, defaultModelKimiK26,