From b12416b691846dbe17f3c73956ed3c60b517ecc9 Mon Sep 17 00:00:00 2001 From: Anthony Mikinka Date: Fri, 6 Mar 2026 12:12:43 -0800 Subject: [PATCH 1/2] feat(model): update default model to Qwen3-Coder-30B-A3B-Instruct-GGUF - Change default modelName from gpt-oss-mxp4 to Qwen3-Coder-30B-A3B-Instruct-GGUF - Update extractionModelName to match - Include pending UI changes from main.tsx Co-Authored-By: Claude Sonnet 4.5 --- src/database/repositories/SettingsRepository.ts | 4 ++-- src/ui/main.tsx | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/database/repositories/SettingsRepository.ts b/src/database/repositories/SettingsRepository.ts index b95ee5f..92a05c4 100644 --- a/src/database/repositories/SettingsRepository.ts +++ b/src/database/repositories/SettingsRepository.ts @@ -80,8 +80,8 @@ export class SettingsRepository { private async createDefaultInterviewerSettings(): Promise { const defaults: InterviewerSettings = { modelProvider: 'lemonade-server', - modelName: 'gpt-oss-mxp4', - extractionModelName: 'gpt-oss-mxp4', + modelName: 'Qwen3-Coder-30B-A3B-Instruct-GGUF', + extractionModelName: 'Qwen3-Coder-30B-A3B-Instruct-GGUF', temperature: 0.7, maxTokens: 2000, interviewStyle: 'conversational', diff --git a/src/ui/main.tsx b/src/ui/main.tsx index fbdeaef..e3a17bd 100644 --- a/src/ui/main.tsx +++ b/src/ui/main.tsx @@ -1,14 +1,14 @@ import React from 'react'; import ReactDOM from 'react-dom/client'; -import { BrowserRouter } from 'react-router-dom'; +import { HashRouter } from 'react-router-dom'; import App from './App'; import './index.css'; ReactDOM.createRoot(document.getElementById('root')!).render( {/* @ts-expect-error: Suppressing future flag type error until types catch up */} - + - + , ); From bd63bea21e68502298193942e28a813134330042 Mon Sep 17 00:00:00 2001 From: Anthony Mikinka Date: Fri, 6 Mar 2026 12:18:03 -0800 Subject: [PATCH 2/2] fix(extraction): use extraction model setting for all extraction tasks Problem: - StructuredExtractionService was ignoring extractionModel parameter - LemonadeClient always used settings.modelName regardless of task - Small models (Llama-3.2-1B) were failing on extraction tasks - Job detail extraction was returning empty template instead of data Solution: - Add optional 'model' parameter to LemonadeClient.sendMessage() - Pass extractionModel to all sendMessage calls in StructuredExtractionService - Now extraction tasks use the configured extractionModelName - With Qwen3-Coder-30B-A3B-Instruct-GGUF, extractions should succeed Files changed: - LemonadeClient.ts: Accept model override in sendMessage options - StructuredExtractionService.ts: Use extractionModel for all requests Co-Authored-By: Claude Sonnet 4.5 --- src/services/LemonadeClient.ts | 8 +++++--- src/services/StructuredExtractionService.ts | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/services/LemonadeClient.ts b/src/services/LemonadeClient.ts index 99180cc..17173b1 100644 --- a/src/services/LemonadeClient.ts +++ b/src/services/LemonadeClient.ts @@ -97,7 +97,7 @@ export class LemonadeClient { /** * Send a message and get AI response */ - async sendMessage(conversationHistory: Message[], options?: { maxTokens?: number; maxInputTokens?: number }): Promise { + async sendMessage(conversationHistory: Message[], options?: { maxTokens?: number; maxInputTokens?: number; model?: string }): Promise { try { // Check server connection first if (!this.isConnected) { @@ -132,8 +132,10 @@ export class LemonadeClient { const sentInputChars = truncatedHistory.reduce((s, m) => s + m.content.length, 0); const wasTruncated = truncatedHistory.length < conversationHistory.length; + const modelToUse = options?.model ?? this.settings.modelName; + console.log(`[LLM:sendMessage] ── Request ─────────────────────────────────`); - console.log(`[LLM:sendMessage] model=${this.settings.modelName}`); + console.log(`[LLM:sendMessage] model=${modelToUse}`); console.log(`[LLM:sendMessage] maxInputTokens=${maxInputTokens}, maxOutputTokens=${options?.maxTokens ?? this.settings.maxTokens}`); console.log(`[LLM:sendMessage] messages: ${conversationHistory.length} total → ${truncatedHistory.length} sent${wasTruncated ? ' (TRUNCATED)' : ''}`); console.log(`[LLM:sendMessage] input chars: ${totalInputChars} total → ${sentInputChars} sent (~${Math.round(sentInputChars/4)} tokens)`); @@ -151,7 +153,7 @@ export class LemonadeClient { // consume tokens for chain-of-thought before producing visible content. const maxTokens = options?.maxTokens ?? this.settings.maxTokens; const completion = await this.client.chat.completions.create({ - model: this.settings.modelName, + model: modelToUse, messages: messages, temperature: this.settings.temperature, max_tokens: maxTokens, diff --git a/src/services/StructuredExtractionService.ts b/src/services/StructuredExtractionService.ts index a0618bf..77119df 100644 --- a/src/services/StructuredExtractionService.ts +++ b/src/services/StructuredExtractionService.ts @@ -60,6 +60,7 @@ export class StructuredExtractionService { const response = await this.lemonadeClient.sendMessage(messages, { maxTokens: 2048, + model: this.extractionModel, }); console.log('[StructuredExtractionService] Feedback extraction raw response:', response.substring(0, 500)); @@ -135,6 +136,7 @@ export class StructuredExtractionService { const response = await this.lemonadeClient.sendMessage(messages, { maxTokens: 1024, + model: this.extractionModel, }); const parsed = this.parseJSON(response); @@ -206,6 +208,7 @@ export class StructuredExtractionService { // 2048 gives sufficient headroom for thinking + a compact JSON response. const response = await this.lemonadeClient.sendMessage(messages, { maxTokens: 2048, + model: this.extractionModel, }); const parsed = this.parseJSON(response);