From b12416b691846dbe17f3c73956ed3c60b517ecc9 Mon Sep 17 00:00:00 2001
From: Anthony Mikinka <antmikinka@gmail.com>
Date: Fri, 6 Mar 2026 12:12:43 -0800
Subject: [PATCH 1/2] feat(model): update default model to
 Qwen3-Coder-30B-A3B-Instruct-GGUF

- Change default modelName from gpt-oss-mxp4 to Qwen3-Coder-30B-A3B-Instruct-GGUF
- Update extractionModelName to match
- Include pending UI changes from main.tsx

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/database/repositories/SettingsRepository.ts | 4 ++--
 src/ui/main.tsx                                 | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/database/repositories/SettingsRepository.ts b/src/database/repositories/SettingsRepository.ts
index b95ee5f..92a05c4 100644
--- a/src/database/repositories/SettingsRepository.ts
+++ b/src/database/repositories/SettingsRepository.ts
@@ -80,8 +80,8 @@ export class SettingsRepository {
   private async createDefaultInterviewerSettings(): Promise<InterviewerSettings> {
     const defaults: InterviewerSettings = {
       modelProvider: 'lemonade-server',
-      modelName: 'gpt-oss-mxp4',
-      extractionModelName: 'gpt-oss-mxp4',
+      modelName: 'Qwen3-Coder-30B-A3B-Instruct-GGUF',
+      extractionModelName: 'Qwen3-Coder-30B-A3B-Instruct-GGUF',
       temperature: 0.7,
       maxTokens: 2000,
       interviewStyle: 'conversational',
diff --git a/src/ui/main.tsx b/src/ui/main.tsx
index fbdeaef..e3a17bd 100644
--- a/src/ui/main.tsx
+++ b/src/ui/main.tsx
@@ -1,14 +1,14 @@
 import React from 'react';
 import ReactDOM from 'react-dom/client';
-import { BrowserRouter } from 'react-router-dom';
+import { HashRouter } from 'react-router-dom';
 import App from './App';
 import './index.css';
 
 ReactDOM.createRoot(document.getElementById('root')!).render(
   <React.StrictMode>
     {/* @ts-expect-error: Suppressing future flag type error until types catch up */}
-    <BrowserRouter future={{ v7_startTransition: true, v7_relativeSplatPath: true }}>
+    <HashRouter future={{ v7_startTransition: true, v7_relativeSplatPath: true }}>
       <App />
-    </BrowserRouter>
+    </HashRouter>
   </React.StrictMode>,
 );

From bd63bea21e68502298193942e28a813134330042 Mon Sep 17 00:00:00 2001
From: Anthony Mikinka <antmikinka@gmail.com>
Date: Fri, 6 Mar 2026 12:18:03 -0800
Subject: [PATCH 2/2] fix(extraction): use extraction model setting for all
 extraction tasks

Problem:
- StructuredExtractionService was ignoring extractionModel parameter
- LemonadeClient always used settings.modelName regardless of task
- Small models (Llama-3.2-1B) were failing on extraction tasks
- Job detail extraction was returning empty template instead of data

Solution:
- Add optional 'model' parameter to LemonadeClient.sendMessage()
- Pass extractionModel to all sendMessage calls in StructuredExtractionService
- Now extraction tasks use the configured extractionModelName
- With Qwen3-Coder-30B-A3B-Instruct-GGUF, extractions should succeed

Files changed:
- LemonadeClient.ts: Accept model override in sendMessage options
- StructuredExtractionService.ts: Use extractionModel for all requests

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/services/LemonadeClient.ts              | 8 +++++---
 src/services/StructuredExtractionService.ts | 3 +++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/services/LemonadeClient.ts b/src/services/LemonadeClient.ts
index 99180cc..17173b1 100644
--- a/src/services/LemonadeClient.ts
+++ b/src/services/LemonadeClient.ts
@@ -97,7 +97,7 @@ export class LemonadeClient {
   /**
    * Send a message and get AI response
    */
-  async sendMessage(conversationHistory: Message[], options?: { maxTokens?: number; maxInputTokens?: number }): Promise<string> {
+  async sendMessage(conversationHistory: Message[], options?: { maxTokens?: number; maxInputTokens?: number; model?: string }): Promise<string> {
     try {
       // Check server connection first
       if (!this.isConnected) {
@@ -132,8 +132,10 @@ export class LemonadeClient {
       const sentInputChars  = truncatedHistory.reduce((s, m) => s + m.content.length, 0);
       const wasTruncated    = truncatedHistory.length < conversationHistory.length;
 
+      const modelToUse = options?.model ?? this.settings.modelName;
+
       console.log(`[LLM:sendMessage] ── Request ─────────────────────────────────`);
-      console.log(`[LLM:sendMessage] model=${this.settings.modelName}`);
+      console.log(`[LLM:sendMessage] model=${modelToUse}`);
       console.log(`[LLM:sendMessage] maxInputTokens=${maxInputTokens}, maxOutputTokens=${options?.maxTokens ?? this.settings.maxTokens}`);
       console.log(`[LLM:sendMessage] messages: ${conversationHistory.length} total → ${truncatedHistory.length} sent${wasTruncated ? ' (TRUNCATED)' : ''}`);
       console.log(`[LLM:sendMessage] input chars: ${totalInputChars} total → ${sentInputChars} sent (~${Math.round(sentInputChars/4)} tokens)`);
@@ -151,7 +153,7 @@ export class LemonadeClient {
       // consume tokens for chain-of-thought before producing visible content.
       const maxTokens = options?.maxTokens ?? this.settings.maxTokens;
       const completion = await this.client.chat.completions.create({
-        model: this.settings.modelName,
+        model: modelToUse,
         messages: messages,
         temperature: this.settings.temperature,
         max_tokens: maxTokens,
diff --git a/src/services/StructuredExtractionService.ts b/src/services/StructuredExtractionService.ts
index a0618bf..77119df 100644
--- a/src/services/StructuredExtractionService.ts
+++ b/src/services/StructuredExtractionService.ts
@@ -60,6 +60,7 @@ export class StructuredExtractionService {
 
       const response = await this.lemonadeClient.sendMessage(messages, {
         maxTokens: 2048,
+        model: this.extractionModel,
       });
 
       console.log('[StructuredExtractionService] Feedback extraction raw response:', response.substring(0, 500));
@@ -135,6 +136,7 @@ export class StructuredExtractionService {
 
       const response = await this.lemonadeClient.sendMessage(messages, {
         maxTokens: 1024,
+        model: this.extractionModel,
       });
 
       const parsed = this.parseJSON(response);
@@ -206,6 +208,7 @@ export class StructuredExtractionService {
       // 2048 gives sufficient headroom for thinking + a compact JSON response.
       const response = await this.lemonadeClient.sendMessage(messages, {
         maxTokens: 2048,
+        model: this.extractionModel,
       });
 
       const parsed = this.parseJSON(response);