From d53978ba3a4773962f5d5a1d997f52502ec5a2b3 Mon Sep 17 00:00:00 2001 From: tpikachu Date: Wed, 1 Jul 2026 09:23:21 -0500 Subject: [PATCH 1/3] feat: single Answer Format control + natural, anti-AI answers (v1.2 #1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapse the two overlapping answer-control axes into one. Delete AnswerStyle (default/star/technical/conversational — muddy + underused) and promote length into a single AnswerFormat = key_points | explanation | detailed: - key_points: terse glanceable bullets (cap 220 tok). - explanation (NEW): a natural, flowing first-person explanation (cap 340 tok). - detailed: thorough, with one concrete example (cap 800 tok). answer.ts merges STYLE_INSTRUCTION + LENGTH_INSTRUCTION into FORMAT_INSTRUCTION, adds a naturalness / anti-AI directive (contractions, varied sentence length, banned corporate/AI tells + hedging, first-person — no fake "um"/"uh"), and keeps the citation + fabrication-guard rules. The single `format` field is threaded through AnswerPrefs, sessionManager, session.ipc, preload, mockManager, the store, and the pages; the Cue Card's two controls collapse into one 3-way Format toggle. Removed Profile.answerStyle — the answer_style DB column is kept (default 'concise') but unused, so NO migration. Fixes an e2e regression (session.start old positional args) caught by the adversarial review; typecheck couldn't see it (inside page.evaluate). Verified: typecheck · 104 unit · build green. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/05-IPC-MAP.md | 8 +- docs/06-OPENAI-SERVICE.md | 15 ++-- docs/sessions/2026-07-01.md | 47 +++++++++++ e2e/data-integrity.spec.ts | 1 - e2e/error-handling.spec.ts | 3 +- src/main/db/repositories/profiles.repo.ts | 11 +-- src/main/ipc/profiles.ipc.ts | 10 +-- src/main/ipc/session.ipc.ts | 19 ++--- src/main/services/mock/mockManager.ts | 3 +- src/main/services/openai/answer.test.ts | 24 ++++-- src/main/services/openai/answer.ts | 68 ++++++++-------- src/main/services/samples/sampleData.ts | 1 - src/main/services/session/sessionManager.ts | 52 +++++-------- src/preload/index.ts | 18 ++--- .../dashboard/pages/InterviewPage.tsx | 3 +- src/renderer/dashboard/pages/ProfilesPage.tsx | 1 - src/renderer/overlay/Overlay.tsx | 77 +++++++------------ src/renderer/store/useLiveSession.ts | 8 +- src/shared/ipc.ts | 5 +- src/shared/types.ts | 14 ++-- 20 files changed, 187 insertions(+), 201 deletions(-) create mode 100644 docs/sessions/2026-07-01.md diff --git a/docs/05-IPC-MAP.md b/docs/05-IPC-MAP.md index b1cb937..b6f181a 100644 --- a/docs/05-IPC-MAP.md +++ b/docs/05-IPC-MAP.md @@ -104,8 +104,8 @@ résumé, persisted, and indexed as `story` chunks so they ground live answers. ### session | Channel | Request | Response | |---|---|---| -| `session:start` | `{ profileId, interviewType, answerStyle, jobId, answerLength }` | `Session` (`answerStyle` = format/tone; `answerLength` = key_points\|detailed) | -| `session:resume` | `{ sessionId, answerStyle?, answerLength? }` | `Session` (re-activate an existing session row and continue it; interview type is restored from the row — one session per interview, type is dynamic) | +| `session:start` | `{ profileId, interviewType, jobId, answerFormat }` | `Session` (`answerFormat` = key_points\|explanation\|detailed — the single answer control) | +| `session:resume` | `{ sessionId, answerFormat? }` | `Session` (re-activate an existing session row and continue it; interview type is restored from the row — one session per interview, type is dynamic) | | `session:stop` | `{ sessionId }` | `Session` | | `session:toggle-pause` | `{ sessionId }` | `{ paused }` | | `session:toggle-pause-active` | — | `{ paused, active }` (global shortcut target — toggles the live session) | @@ -120,7 +120,7 @@ résumé, persisted, and indexed as `story` chunks so they ground live answers. | `session:ask` | `{ sessionId, questionText }` | `{ questionId }` (manual ask; answer streams) | | `session:ask-active` | `{ questionText }` | `{ ok }` (Cue Card "Ask" box — manual ask for the active session, no id) | | `session:set-interview-type` | `{ sessionId, interviewType }` | `{ ok }` (set the session-level type — chosen by the user in the save prompt at stop) | -| `session:set-answer-prefs` | `{ interviewType?, style?, length?, pronunciation? }` | `{ interviewType, style, length, pronunciation }` (live Cue Card controls; acts on the active session. Switching `interviewType` is dynamic — it persists onto the session row + reframes later answers) | +| `session:set-answer-prefs` | `{ interviewType?, format?, pronunciation? }` | `{ interviewType, format, pronunciation }` (live Cue Card controls; acts on the active session. Switching `interviewType` is dynamic — it persists onto the session row + reframes later answers) | | `session:set-answering` | `{ enabled }` | `{ enabled, answered }` (coding "listen-only" toggle: when disabled, the interviewer is still transcribed but not auto-answered; enabling it also answers the question they just asked) | | `session:regenerate` | — | `{ regenerated }` (re-answer the last question for the active session) | | `session:clear-answer` | — | `{ cleared }` (abort the in-flight answer for the active session) | @@ -195,7 +195,7 @@ Channel constants live in `EVENTS` (`src/shared/ipc.ts`); payload types are in | `session:answer-done` | `{ questionId }` | overlay | | `session:answer-reset` | `{ questionId }` | overlay (regenerate: clear the Cue Card answer but keep the transcript — no new question row/line) | | `session:client-info` | `ClientInfo \| null` | overlay (active interview: company/title/notes + profileName + grounding flags hasResume/hasJd/hasCompany, for the Cue Card header + session bar + ⓘ panel; `null` clears on stop) | -| `session:answer-prefs` | `AnswerPrefs` (`{ interviewType, style, length, pronunciation }`) | overlay (seeds the Cue Card answer-control toggles) | +| `session:answer-prefs` | `AnswerPrefs` (`{ interviewType, format, pronunciation }`) | overlay (seeds the Cue Card answer-control toggles) | | `session:audio-level` | `{ level }` (0-1 RMS, ~12/sec) | overlay (drives the Cue Card mic meter; computed in `feedRealtimeAudio` since the stream lives in the dashboard renderer) | | `session:save-prompt` | `SavePrompt` (`{ sessionId, interviewType, jobTitle, questionCount }`) | dashboard (a session just stopped → prompt save-or-discard + pick the type) | | `session:context` | `{ questionId, question, chunks }` | dashboard (debug: retrieved chunks) | diff --git a/docs/06-OPENAI-SERVICE.md b/docs/06-OPENAI-SERVICE.md index 20b39dd..3a24176 100644 --- a/docs/06-OPENAI-SERVICE.md +++ b/docs/06-OPENAI-SERVICE.md @@ -94,11 +94,13 @@ Small/fast model. Returns `{ text, type, confidence, strategy }`. Also used as a cheap "is this actually a question?" gate before answer generation. ### answer.ts — `streamAnswer(input) => AsyncIterable` -Input: `{ question, contextChunks, profile, style, length, pronunciation, interviewType, signal? }`. +Input: `{ question, contextChunks, profile, format, pronunciation, interviewType, signal? }`. Builds a **grounding** prompt: - System: persona + rules ("ground answers in provided context; never invent experience; if no relevant experience, give a transferable-skills answer and - set a risk warning"); LENGTH is a hard constraint. + set a risk warning"); FORMAT is a hard constraint; plus a **naturalness / anti-AI-tone** + directive (contractions, varied sentence length, no corporate/AI tells or hedging — must + read 100% human, never AI-generated). - **Grounded / proof-linked answers:** `buildContext` numbers the chunks `[1] (resume) …`; the prompt makes the model cite those numbers inline after each grounded claim (e.g. `…cut p99 latency 40% [1]`). The Cue Card renders the cited `[i]` as source chips @@ -106,10 +108,13 @@ Builds a **grounding** prompt: for anything the context can't support the model must not invent it — it leads with `⚠`, says it's not in the candidate's background, and pivots to a cited transferable framing. -- User: question + retrieved context + profile summary + the chosen format/length, +- User: question + retrieved context + profile summary + the chosen answer format, plus optional pronunciation hints for rare/technical terms. -- `length` (`key_points` | `detailed`) also sets a hard `max_output_tokens` ceiling - (220 / 800) so "key points" can never drift long regardless of the prompt. +- `format` — the single answer control (v1.2): `key_points` (terse bullets) | `explanation` + (a natural, flowing first-person explanation) | `detailed` (thorough, with one example). + It also sets a hard `max_output_tokens` ceiling (220 / 340 / 800) so "key points" can never + drift long regardless of the prompt. (The old format/tone × length split — `star`/`technical`/ + `conversational` — was removed.) Streams tokens (`{type:'delta', token}`), then a `usage` event, then a structured `meta` event `{ talkingPoints[], resumeMatch, star?, clarifyingQuestion?, riskWarning?, followupQuestion }`. **Status:** the prose answer + token usage are live; the meta pass diff --git a/docs/sessions/2026-07-01.md b/docs/sessions/2026-07-01.md new file mode 100644 index 0000000..48d678f --- /dev/null +++ b/docs/sessions/2026-07-01.md @@ -0,0 +1,47 @@ +# 2026-07-01 + +Started **v1.2** (branch `feat/prompt-overhaul`) — a prompt/answer overhaul planned with the +user in three increments: (1) Answer Format + naturalness, (2) Coding solver, (3) Pronunciation. +(v1.1.0 shipped first: the five trainer features + competency coverage, released via a +`v1.1.0` tag → GitHub Release.) + +## Increment 1 — Answer Format + naturalness + +Collapsed the two overlapping answer-control axes into ONE. The old **Answer Format** (tone: +`default`/`star`/`technical`/`conversational`) was muddy and underused, so it's gone; the length +control is promoted into the new single **Answer Format**: + +- `AnswerFormat = 'key_points' | 'explanation' | 'detailed'` (replaces `AnswerStyle` + `AnswerLength`). + - **key_points** — terse, glanceable bullets (~60w, cap 220 tok). + - **explanation** *(new)* — a natural, flowing first-person explanation (~90–130w, cap 340 tok). + - **detailed** — thorough, with one concrete example (~150–220w, cap 800 tok). +- `answer.ts`: merged `STYLE_INSTRUCTION` + `LENGTH_INSTRUCTION` → `FORMAT_INSTRUCTION`, and + `LENGTH_MAX_TOKENS` → `FORMAT_MAX_TOKENS`. Added a **naturalness / anti-AI directive** to the + system prompt: contractions, varied sentence length, banned corporate/AI tells + hedging, + first-person, confident — but explicitly *not* fake disfluency ("um"/"uh"). Citation + + fabrication-guard rules kept intact. +- Threaded the single `format` field through everything: `AnswerPrefs` (dropped `style`+`length`), + `sessionManager` (`LiveState.answerFormat`; goLive/start/resume/setAnswerPrefs/generateAnswer + + the `answerPrefs` broadcast), `session.ipc` zod, preload, `mockManager`, `useLiveSession.startNew`, + InterviewPage/ProfilesPage/sampleData. Cue Card: the two controls collapse into **one 3-way + Format toggle** (Key points / Explanation / Detailed). +- Removed `Profile.answerStyle`. The `profiles.answer_style` DB column is **kept** (NOT NULL default + `'concise'`) but no longer read/written — **no migration**. + +**Adversarial review** (3 dimensions → refute-verify) found no app-code bugs but caught a real +**e2e regression**: `error-handling.spec.ts` still called `api.session.start` with the old 5-arg +signature, so after the positional reorder `answerFormat` became `null` and zod rejected it +(`.default` only fills `undefined`, not `null`). Fixed that call + two stale `answerStyle` fields in +e2e specs. (Typecheck couldn't catch it — it lives inside a `page.evaluate` string typed as `any`.) + +**Naturalness validation:** the anti-AI tone is prompt-only and subjective — best judged live in +`npm run dev` with a real key; carefully engineered here (no auto-spike since the answer path can't +run headlessly under vitest). + +Verified: `typecheck` · 104 unit (+2) · `build` green. + +## Next (v1.2, same branch) +- Increment 2 — Coding solver: language picker (default JS), commented + optimal solution, + explanation-style by default, résumé/JD-free (already is). +- Increment 3 — Pronunciation: default ON + a structured guide panel (Part of speech / Singular / + Pronunciation) for hard words, separate from the natural answer. diff --git a/e2e/data-integrity.spec.ts b/e2e/data-integrity.spec.ts index 02dbc93..eefe159 100644 --- a/e2e/data-integrity.spec.ts +++ b/e2e/data-integrity.spec.ts @@ -11,7 +11,6 @@ const newProfile = { targetRole: 'SWE', targetCompany: null, interviewType: 'general', - answerStyle: 'default', language: 'en', resumeText: null, jdText: null, diff --git a/e2e/error-handling.spec.ts b/e2e/error-handling.spec.ts index 6c125ea..59c8491 100644 --- a/e2e/error-handling.spec.ts +++ b/e2e/error-handling.spec.ts @@ -19,12 +19,11 @@ test('a failed answer surfaces an error and clears the streaming state (B1/B2)', targetRole: 'SWE', targetCompany: null, interviewType: 'general', - answerStyle: 'default', language: 'en', resumeText: null, jdText: null, }); - const session = await api.session.start(profile.id, 'general', 'default', null, 'key_points'); + const session = await api.session.start(profile.id, 'general', null, 'key_points'); // Listen BEFORE asking. answerDone firing on a failed ask is the core B1 fix // (the Cue Card card stops spinning); sessionError proves the failure isn't silent. diff --git a/src/main/db/repositories/profiles.repo.ts b/src/main/db/repositories/profiles.repo.ts index 5f373cb..1cc821d 100644 --- a/src/main/db/repositories/profiles.repo.ts +++ b/src/main/db/repositories/profiles.repo.ts @@ -4,12 +4,8 @@ import type { Profile, ProfileInput } from '@shared/types'; type Row = typeof schema.profiles.$inferSelect; -/** Map legacy answer-style values (length is now a separate axis) to a valid - * format. Older profiles stored 'concise'/'detailed'. */ -function toAnswerStyle(v: string): Profile['answerStyle'] { - return v === 'star' || v === 'technical' || v === 'conversational' ? v : 'default'; -} - +// The `answer_style` DB column is retained (NOT NULL default 'concise') but no longer +// read or written — the answer format is a single live Cue Card control now (v1.2). function toProfile(r: Row): Profile { return { id: r.id, @@ -17,7 +13,6 @@ function toProfile(r: Row): Profile { targetRole: r.targetRole, targetCompany: r.targetCompany, interviewType: r.interviewType as Profile['interviewType'], - answerStyle: toAnswerStyle(r.answerStyle), language: r.language, resumeText: r.resumeText, jdText: r.jdText, @@ -53,7 +48,6 @@ export const profilesRepo = { targetRole: input.targetRole, targetCompany: input.targetCompany, interviewType: input.interviewType, - answerStyle: input.answerStyle, language: input.language, resumeText: input.resumeText, jdText: input.jdText, @@ -69,7 +63,6 @@ export const profilesRepo = { targetRole: patch.targetRole, targetCompany: patch.targetCompany, interviewType: patch.interviewType, - answerStyle: patch.answerStyle, language: patch.language, resumeText: patch.resumeText, jdText: patch.jdText, diff --git a/src/main/ipc/profiles.ipc.ts b/src/main/ipc/profiles.ipc.ts index 053e85f..3ea771c 100644 --- a/src/main/ipc/profiles.ipc.ts +++ b/src/main/ipc/profiles.ipc.ts @@ -12,19 +12,12 @@ const interviewType = z.enum([ 'sales', 'general', ]); -// Accept legacy length values ('concise'/'detailed') on the wire and fold them -// into the format axis, so old clients/profiles don't fail validation. -const answerStyle = z - .enum(['default', 'star', 'technical', 'conversational', 'concise', 'detailed']) - .transform((v) => (v === 'concise' || v === 'detailed' ? 'default' : v)); - const profileInput = z.object({ name: z.string().min(1), targetRole: z.string().default(''), targetCompany: z.string().nullable().default(null), - // Interview type & answer style are chosen per run now; kept optional/legacy. + // Interview type is chosen per run now; kept optional/legacy on the profile. interviewType: interviewType.default('general'), - answerStyle: answerStyle.default('default'), language: z.string().default('en'), resumeText: z.string().nullable().default(null), jdText: z.string().nullable().default(null), @@ -60,7 +53,6 @@ export function registerProfilesIpc(): void { targetRole: src.targetRole, targetCompany: src.targetCompany, interviewType: src.interviewType, - answerStyle: src.answerStyle, language: src.language, resumeText: src.resumeText, jdText: src.jdText, diff --git a/src/main/ipc/session.ipc.ts b/src/main/ipc/session.ipc.ts index c516eeb..335920a 100644 --- a/src/main/ipc/session.ipc.ts +++ b/src/main/ipc/session.ipc.ts @@ -15,8 +15,7 @@ const interviewType = z.enum([ 'sales', 'general', ]); -const answerStyle = z.enum(['default', 'star', 'technical', 'conversational']); -const answerLength = z.enum(['key_points', 'detailed']); +const answerFormat = z.enum(['key_points', 'explanation', 'detailed']); export function registerSessionIpc(): void { handle( @@ -24,23 +23,20 @@ export function registerSessionIpc(): void { z.object({ profileId: z.string().min(1), interviewType, - answerStyle: answerStyle.default('default'), jobId: z.string().nullable().default(null), - answerLength: answerLength.default('key_points'), + answerFormat: answerFormat.default('key_points'), }), - ({ profileId, interviewType: t, answerStyle: s, jobId, answerLength: len }) => - sessionManager.start(profileId, t, s, jobId, len), + ({ profileId, interviewType: t, jobId, answerFormat: f }) => + sessionManager.start(profileId, t, jobId, f), ); handle( IPC.session.resume, z.object({ sessionId: z.string().min(1), - answerStyle: answerStyle.default('default'), - answerLength: answerLength.default('key_points'), + answerFormat: answerFormat.default('key_points'), }), - ({ sessionId, answerStyle: s, answerLength: len }) => - sessionManager.resume(sessionId, s, len), + ({ sessionId, answerFormat: f }) => sessionManager.resume(sessionId, f), ); handle(IPC.session.stop, z.object({ sessionId: z.string().min(1) }), ({ sessionId }) => @@ -90,8 +86,7 @@ export function registerSessionIpc(): void { IPC.session.setAnswerPrefs, z.object({ interviewType: interviewType.optional(), - style: answerStyle.optional(), - length: answerLength.optional(), + format: answerFormat.optional(), pronunciation: z.boolean().optional(), }), (prefs) => sessionManager.setAnswerPrefs(prefs), diff --git a/src/main/services/mock/mockManager.ts b/src/main/services/mock/mockManager.ts index 5be442e..def47c1 100644 --- a/src/main/services/mock/mockManager.ts +++ b/src/main/services/mock/mockManager.ts @@ -86,8 +86,7 @@ export const mockManager = { profileId, jobId, interviewType, - answerStyle: 'default', - answerLength: 'key_points', + answerFormat: 'key_points', language: profile.language, isMock: true, }); diff --git a/src/main/services/openai/answer.test.ts b/src/main/services/openai/answer.test.ts index bebd5bf..8bfa2f0 100644 --- a/src/main/services/openai/answer.test.ts +++ b/src/main/services/openai/answer.test.ts @@ -33,8 +33,7 @@ function baseInput(over: Partial[0]> = {}) { question: 'Tell me about a hard bug.', contextChunks: [{ id: 'c1', sourceType: 'resume' as const, content: 'Fixed a race condition', score: 0.8 }], profile, - style: 'default' as const, - length: 'key_points' as const, + format: 'key_points' as const, pronunciation: false, interviewType: 'behavioral' as const, ...over, @@ -55,14 +54,20 @@ beforeEach(() => { describe('streamAnswer — request body', () => { it('caps key_points at 220 output tokens', async () => { - await collect(streamAnswer(baseInput({ length: 'key_points' }))); + await collect(streamAnswer(baseInput({ format: 'key_points' }))); expect(h.lastBody!.max_output_tokens).toBe(220); expect(userPrompt()).toContain('KEY POINTS'); expect(userPrompt()).toContain('~60 words'); }); + it('caps explanation at 340 output tokens', async () => { + await collect(streamAnswer(baseInput({ format: 'explanation' }))); + expect(h.lastBody!.max_output_tokens).toBe(340); + expect(userPrompt()).toContain('EXPLANATION'); + }); + it('caps detailed at 800 output tokens', async () => { - await collect(streamAnswer(baseInput({ length: 'detailed' }))); + await collect(streamAnswer(baseInput({ format: 'detailed' }))); expect(h.lastBody!.max_output_tokens).toBe(800); expect(userPrompt()).toContain('DETAILED'); }); @@ -75,11 +80,18 @@ describe('streamAnswer — request body', () => { }); it('injects the chosen format and interview type', async () => { - await collect(streamAnswer(baseInput({ style: 'star', interviewType: 'coding' }))); - expect(userPrompt()).toContain('STAR'); + await collect(streamAnswer(baseInput({ format: 'explanation', interviewType: 'coding' }))); + expect(userPrompt()).toContain('EXPLANATION'); expect(userPrompt()).toContain('Interview type: coding'); }); + it('instructs a human, anti-AI tone in the system prompt', async () => { + await collect(streamAnswer(baseInput())); + const system = String((h.lastBody!.input as { role: string; content: string }[])[0].content); + expect(system).toMatch(/human/i); + expect(system).toMatch(/As an AI/i); // it's in the BANNED list + }); + it('embeds retrieved context tagged by source', async () => { await collect(streamAnswer(baseInput())); expect(userPrompt()).toContain('(resume) Fixed a race condition'); diff --git a/src/main/services/openai/answer.ts b/src/main/services/openai/answer.ts index 73b12e5..64bfbfa 100644 --- a/src/main/services/openai/answer.ts +++ b/src/main/services/openai/answer.ts @@ -1,52 +1,42 @@ import { openai } from './client'; import { model } from './models'; -import type { - AnswerLength, - AnswerStyle, - InterviewType, - Profile, - RetrievedChunk, -} from '@shared/types'; +import type { AnswerFormat, InterviewType, Profile, RetrievedChunk } from '@shared/types'; export interface AnswerInput { question: string; contextChunks: RetrievedChunk[]; profile: Profile; - style: AnswerStyle; - length: AnswerLength; + /** The single answer control (v1.2): key_points | explanation | detailed. */ + format: AnswerFormat; /** Annotate rare/technical/foreign terms with a quick phonetic respelling. */ pronunciation: boolean; interviewType: InterviewType; signal?: AbortSignal; } -/** Human-readable instruction for each length, injected into the prompt. */ -const LENGTH_INSTRUCTION: Record = { +/** Human-readable instruction per answer FORMAT, injected into the prompt. */ +const FORMAT_INSTRUCTION: Record = { key_points: - 'LENGTH = KEY POINTS (STRICT). This is a glanceable cue to speak FROM, not a full answer. ' + - 'Hard cap: ~60 words TOTAL. Format: one short opening line (≤12 words), then 2–3 terse ' + - 'bullets of a few words each — keywords/phrases, not sentences. No paragraphs. No preamble. ' + - 'If a bullet reads like a full sentence, cut it down. Shorter is better.', + 'FORMAT = KEY POINTS (STRICT). A glanceable cue to speak FROM, not a full answer. ' + + 'Hard cap: ~60 words TOTAL. One short opening line (≤12 words), then 2–3 terse bullets of a ' + + 'few words each — keywords/phrases, not sentences. No paragraphs, no preamble. Shorter is better.', + explanation: + "FORMAT = EXPLANATION. A natural, flowing first-person answer (~90–130 words) — the way you'd " + + 'actually talk it through with someone. Connected sentences, NOT bullets. Lead with the point, ' + + 'then the how/why with one specific detail from the context. Warm and direct, never a lecture.', detailed: - 'LENGTH = DETAILED. A thorough, well-structured spoken answer (~120–200 words) with specifics ' + + 'FORMAT = DETAILED. A thorough, well-structured spoken answer (~150–220 words) with specifics ' + 'and one concrete example drawn from the context. Natural spoken language, not an essay.', }; -/** Hard output ceiling per length — the model literally cannot exceed this, so +/** Hard output ceiling per format — the model literally cannot exceed this, so * "key points" can never drift into a long answer regardless of the prompt. */ -const LENGTH_MAX_TOKENS: Record = { +const FORMAT_MAX_TOKENS: Record = { key_points: 220, + explanation: 340, detailed: 800, }; -/** Format/tone instruction per style. */ -const STYLE_INSTRUCTION: Record = { - default: 'Format: a clear, direct spoken answer.', - star: 'Format: STAR — frame the answer as Situation, Task, Action, Result.', - technical: 'Format: technical — precise, correct terminology; lead with the core concept.', - conversational: 'Format: conversational — warm, natural, first-person, like talking to a person.', -}; - export type AnswerEvent = | { type: 'delta'; token: string } | { @@ -64,8 +54,14 @@ const SYSTEM = `You are a live interview copilot. The candidate reads your outpu speaking in a real interview, so it must be instantly skimmable and spoken in their first-person voice ("I led…", not "The candidate led…"). Rules: -- LENGTH is a HARD constraint. Obey the requested length EXACTLY — even if you have more +- FORMAT is a HARD constraint. Obey the requested format EXACTLY — even if you have more to say. When unsure, be shorter. Never pad. (KEY POINTS especially must stay tiny.) +- SOUND 100% HUMAN — never AI-generated. Write the way a sharp person actually speaks: use + contractions ("I've", "didn't", "we're"), vary sentence length, get straight to the point. + BANNED (AI/corporate tells): "As an AI", "I'd be happy to", "It's worth noting", "Furthermore", + "Moreover", "In today's … world", "leverage", "delve", "robust", "seamless", and hedging like + "I believe/I think/arguably/potentially". Don't restate the question. Lead with the answer, + confidently. Natural ≠ disfluent — do NOT fake "um"/"uh". - CITE YOUR SOURCES. The CONTEXT items are NUMBERED [1], [2], …. Immediately after each claim drawn from the context, cite its number(s) inline, e.g. "cut p99 latency ~40% [1]" or "[2][3]". Cite only real context numbers; never invent a citation. @@ -75,9 +71,10 @@ Rules: - FABRICATION GUARD: if the context can't support what's asked, do NOT make it up. Begin the answer with "⚠", state in one short clause that it's not in their background, then pivot to a grounded, cited, transferable-skills framing (this is the riskWarning case). -- Then follow the requested FORMAT and the interview type. -- Formatting: lead with the single most important line; **bold** only true key terms; - prefer short bullets over dense paragraphs; no meta-commentary or headers.`; +- Match the interview type. +- Formatting: lead with the single most important line; **bold** only true key terms; use + bullets for KEY POINTS and connected sentences for EXPLANATION/DETAILED; no headers or + meta-commentary.`; function buildContext(chunks: RetrievedChunk[]): string { if (chunks.length === 0) return '(no relevant profile context found)'; @@ -91,8 +88,7 @@ function buildContext(chunks: RetrievedChunk[]): string { export async function* streamAnswer(input: AnswerInput): AsyncGenerator { const userPrompt = [ `Interview type: ${input.interviewType}`, - LENGTH_INSTRUCTION[input.length], - STYLE_INSTRUCTION[input.style], + FORMAT_INSTRUCTION[input.format], input.pronunciation ? 'Pronunciation: for rare, technical, or foreign terms, add a simple phonetic respelling in ' + 'parentheses the FIRST time each appears — lowercase syllables joined by hyphens, with the ' + @@ -106,9 +102,9 @@ export async function* streamAnswer(input: AnswerInput): AsyncGenerator invoke(IPC.session.start, { profileId, interviewType, answerStyle, jobId, answerLength }), - resume: (sessionId: string, answerStyle = 'default', answerLength = 'key_points') => - invoke(IPC.session.resume, { sessionId, answerStyle, answerLength }), - setAnswerPrefs: (prefs: { - interviewType?: string; - style?: string; - length?: string; - pronunciation?: boolean; - }) => - invoke<{ interviewType: string; style: string; length: string; pronunciation: boolean }>( + answerFormat = 'key_points', + ) => invoke(IPC.session.start, { profileId, interviewType, jobId, answerFormat }), + resume: (sessionId: string, answerFormat = 'key_points') => + invoke(IPC.session.resume, { sessionId, answerFormat }), + setAnswerPrefs: (prefs: { interviewType?: string; format?: string; pronunciation?: boolean }) => + invoke<{ interviewType: string; format: string; pronunciation: boolean }>( IPC.session.setAnswerPrefs, prefs, ), diff --git a/src/renderer/dashboard/pages/InterviewPage.tsx b/src/renderer/dashboard/pages/InterviewPage.tsx index 69cb99e..94cb691 100644 --- a/src/renderer/dashboard/pages/InterviewPage.tsx +++ b/src/renderer/dashboard/pages/InterviewPage.tsx @@ -156,8 +156,7 @@ export default function InterviewPage() { profileId, jobId: job.id, interviewType: 'general', - answerStyle: 'default', - answerLength: 'key_points', + answerFormat: 'key_points', source: a.source, micDeviceId: a.micDeviceId, }); diff --git a/src/renderer/dashboard/pages/ProfilesPage.tsx b/src/renderer/dashboard/pages/ProfilesPage.tsx index 0761365..9d34f09 100644 --- a/src/renderer/dashboard/pages/ProfilesPage.tsx +++ b/src/renderer/dashboard/pages/ProfilesPage.tsx @@ -41,7 +41,6 @@ export default function ProfilesPage() { targetRole, targetCompany: null, interviewType: 'general', - answerStyle: 'default', language: 'en', resumeText: null, jdText: null, diff --git a/src/renderer/overlay/Overlay.tsx b/src/renderer/overlay/Overlay.tsx index 5c41078..eea5968 100644 --- a/src/renderer/overlay/Overlay.tsx +++ b/src/renderer/overlay/Overlay.tsx @@ -3,9 +3,8 @@ import { useEffect, useRef, useState } from 'react'; import { api } from '../lib/api'; import type { AnswerPrefs, ClientInfo } from '@shared/ipc'; import type { - AnswerLength, + AnswerFormat, AnswerMetaEvent, - AnswerStyle, AppSettings, ContextSentEvent, InterviewType, @@ -84,8 +83,7 @@ export default function Overlay() { const [showClient, setShowClient] = useState(false); // Live answer controls (mirrored to the active session via setAnswerPrefs). const [interviewType, setInterviewType] = useState('general'); - const [format, setFormat] = useState('default'); - const [length, setLength] = useState('key_points'); + const [answerFormat, setAnswerFormat] = useState('key_points'); const [pronunciation, setPronunciation] = useState(false); // Coding sessions default to listen-only (don't auto-answer the interviewer, so a // generated coding answer isn't replaced). This toggle (coding-only) flips it on. @@ -234,8 +232,7 @@ export default function Overlay() { }), api.events.onAnswerPrefs((p) => { setInterviewType(p.interviewType); - setFormat(p.style); - setLength(p.length); + setAnswerFormat(p.format); setPronunciation(p.pronunciation); }), api.events.onAudioLevel((p) => { @@ -310,14 +307,9 @@ export default function Overlay() { await api.session.setAnswerPrefs({ interviewType: t }); if (question) await api.session.regenerate(); }; - const changeFormat = async (f: AnswerStyle) => { - setFormat(f); - await api.session.setAnswerPrefs({ style: f }); - if (question) await api.session.regenerate(); - }; - const changeLength = async (l: AnswerLength) => { - setLength(l); - await api.session.setAnswerPrefs({ length: l }); + const changeFormat = async (f: AnswerFormat) => { + setAnswerFormat(f); + await api.session.setAnswerPrefs({ format: f }); if (question) await api.session.regenerate(); }; const togglePronunciation = async () => { @@ -664,44 +656,29 @@ export default function Overlay() { ))} - - Length + Format - - + {( + [ + ['key_points', 'Key points', 'Short, glanceable key points'], + ['explanation', 'Explanation', 'A natural, spoken explanation'], + ['detailed', 'Detailed', 'Thorough, with a concrete example'], + ] as const + ).map(([value, label, title]) => ( + + ))} {interviewType === 'coding' && ( diff --git a/src/renderer/store/useLiveSession.ts b/src/renderer/store/useLiveSession.ts index 70cc65d..3621a25 100644 --- a/src/renderer/store/useLiveSession.ts +++ b/src/renderer/store/useLiveSession.ts @@ -35,8 +35,7 @@ interface LiveSessionState { startNew: (a: { profileId: string; interviewType: string; - answerStyle: string; - answerLength: string; + answerFormat: string; jobId: string | null; source: AudioSource; micDeviceId?: string | null; @@ -178,7 +177,7 @@ export const useLiveSession = create((set, get) => { pendingSave: null, clearPendingSave: () => set({ pendingSave: null }), - startNew: async ({ profileId, interviewType, answerStyle, answerLength, jobId, source, micDeviceId }) => { + startNew: async ({ profileId, interviewType, answerFormat, jobId, source, micDeviceId }) => { // Acquire audio FIRST: if the user denies the mic or cancels the system-audio // picker, we never create a session that displays "live" with nothing flowing. let stream: MediaStream; @@ -191,9 +190,8 @@ export const useLiveSession = create((set, get) => { const s = (await api.session.start( profileId, interviewType, - answerStyle, jobId, - answerLength, + answerFormat, )) as Session; lineId = 0; set({ session: s, transcript: [], interim: '', paused: false, micError: null, sessionError: null }); diff --git a/src/shared/ipc.ts b/src/shared/ipc.ts index b183c71..7abe22d 100644 --- a/src/shared/ipc.ts +++ b/src/shared/ipc.ts @@ -1,6 +1,6 @@ // Single source of truth for IPC channel names. See docs/05-IPC-MAP.md. -import type { AnswerLength, AnswerStyle, InterviewType } from './types'; +import type { AnswerFormat, InterviewType } from './types'; /** Request/response channels (ipcRenderer.invoke <-> ipcMain.handle). */ export const IPC = { @@ -190,8 +190,7 @@ export interface ClientInfo { /** Live answer preferences pushed to the Cue Card so its toggles stay in sync. */ export interface AnswerPrefs { interviewType: InterviewType; - style: AnswerStyle; - length: AnswerLength; + format: AnswerFormat; pronunciation: boolean; } diff --git a/src/shared/types.ts b/src/shared/types.ts index 774cbd5..16608f8 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -9,12 +9,13 @@ export type InterviewType = | 'sales' | 'general'; -/** Answer FORMAT / tone — chosen per round. Orthogonal to length. */ -export type AnswerStyle = 'default' | 'star' | 'technical' | 'conversational'; - -/** Answer LENGTH / depth — a live Cue Card toggle, independent of format. - * `key_points`: short, key-point-focused but natural. `detailed`: thorough. */ -export type AnswerLength = 'key_points' | 'detailed'; +/** Answer FORMAT — the single live Cue Card control (v1.2; replaces the old + * format/tone × length split). All three read 100% human, never AI-generated. + * - `key_points`: short, glanceable — a terse opener + a few key-point bullets. + * - `explanation`: a natural, flowing first-person explanation, like talking it + * through with someone. + * - `detailed`: thorough, with one concrete example. */ +export type AnswerFormat = 'key_points' | 'explanation' | 'detailed'; export type DocumentKind = 'resume' | 'jd' | 'note' | 'other'; export type ChunkSource = 'resume' | 'jd' | 'note' | 'company' | 'story'; @@ -38,7 +39,6 @@ export interface Profile { targetRole: string; targetCompany: string | null; interviewType: InterviewType; - answerStyle: AnswerStyle; language: string; resumeText: string | null; jdText: string | null; From 4787faa5d827793910524c57a9893b72dd537791 Mon Sep 17 00:00:00 2001 From: tpikachu Date: Wed, 1 Jul 2026 09:37:03 -0500 Subject: [PATCH 2/3] =?UTF-8?q?feat:=20coding=20solver=20=E2=80=94=20langu?= =?UTF-8?q?age=20picker,=20commented=20+=20optimal=20solution=20(v1.2=20#2?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upgrade the coding solver (screenshot/clipboard mode). CODING_RULES becomes codingRules(language): - Writes the solution in a chosen language (default javascript), with REQUIRED clear inline comments. - Explanation-first delivery: a natural Approach paragraph, then the code. - Keeps the OPTIMALITY mandate + explicit time/space complexity. - Stays résumé/JD-free (a coding problem is unrelated to the profile). solveFromOcr(text, language) / solveFromImages(dataUrls, language) build the system prompt per-call; codingMode threads a persisted `codingLanguage` setting (default javascript) into every solve. New AppSettings.codingLanguage + SETTINGS_KEYS.codingLanguage (in APP_SETTING_KEYS), persisted via settings.set; a 12-language dropdown in the Cue Card coding controls (mirrors the model/effort pickers). Adversarial review: no findings. Verified: typecheck · 104 unit · build green. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/06-OPENAI-SERVICE.md | 18 +++++++---- docs/sessions/2026-07-01.md | 22 +++++++++++-- src/main/db/repositories/settings.repo.ts | 2 ++ src/main/ipc/settings.ipc.ts | 4 +++ src/main/services/capture/codingMode.test.ts | 8 ++++- src/main/services/capture/codingMode.ts | 14 ++++++-- src/main/services/openai/coding.ts | 10 +++--- src/main/services/openai/codingPrompt.ts | 25 ++++++++++---- src/main/services/openai/vision.ts | 8 ++--- src/renderer/overlay/Overlay.tsx | 34 +++++++++++++++++++- src/shared/types.ts | 1 + 11 files changed, 118 insertions(+), 28 deletions(-) diff --git a/docs/06-OPENAI-SERVICE.md b/docs/06-OPENAI-SERVICE.md index 3a24176..8ba9600 100644 --- a/docs/06-OPENAI-SERVICE.md +++ b/docs/06-OPENAI-SERVICE.md @@ -130,13 +130,19 @@ chunked path and mock-answer audio). Realtime API session for delta-level STT latency; PCM is streamed one-way via `session:realtime-audio`. Event parsing lives in `realtimeEvents.ts`. -### coding.ts — `solveFromOcr(text)`, vision.ts — `solveFromImages(dataUrls[])` +### coding.ts — `solveFromOcr(text, language)`, vision.ts — `solveFromImages(dataUrls[], language)` Given a coding problem as text (clipboard/selection) or as one-or-more screenshots, -streams: approach, edge cases, time/space complexity, solution outline (and code). -Both paths use the same `coding` model + `reasoningParam('coding')`. A long problem -spans several viewports, so `solveFromImages` sends all captured screenshots in ONE -request (instruction-first, scroll order, `detail:'high'`) and the model reconstructs -them — the buffer + thumbnail strip live in `capture/codingMode.ts` (see `capture:add-region`/`solve-buffer`). +streams (explanation-first): a natural **Approach** paragraph, complexity, edge cases, +then the **optimal** solution as commented, runnable code. The shared prompt is +`codingRules(language)` (`codingPrompt.ts`): mandates the optimal solution + stated +time/space complexity, writes the code in the chosen `language` (default `javascript`, +a live Cue Card picker persisted as the `codingLanguage` setting), and requires clear +inline comments. Deliberately **résumé/JD-free** — a coding problem is unrelated to the +candidate's profile. Both paths use the same `coding` model + `reasoningParam('coding')`. +A long problem spans several viewports, so `solveFromImages` sends all captured screenshots +in ONE request (instruction-first, scroll order, `detail:'high'`) and the model reconstructs +them — the buffer + thumbnail strip + the `codingLanguage` lookup live in +`capture/codingMode.ts` (see `capture:add-region`/`solve-buffer`). ### interviewer.ts — `generateQuestion(...)` & tts.ts — `speak(text, voice)` Power the mock-interview mode: `generateQuestion` produces the next question and diff --git a/docs/sessions/2026-07-01.md b/docs/sessions/2026-07-01.md index 48d678f..7122e53 100644 --- a/docs/sessions/2026-07-01.md +++ b/docs/sessions/2026-07-01.md @@ -40,8 +40,26 @@ run headlessly under vitest). Verified: `typecheck` · 104 unit (+2) · `build` green. +## Increment 2 — Coding solver + +Upgraded the coding SOLVER (screenshot/clipboard mode; live-coding deferred to a later version). +- Shared prompt: `CODING_RULES` const → `codingRules(language)`. Now writes the solution in a + chosen **language** (default `javascript`), REQUIRES clear inline **comments**, delivers + **explanation-first** (a natural Approach paragraph, then the code), and keeps the OPTIMALITY + + explicit time/space complexity mandate. Confirmed **résumé/JD-free**. +- `solveFromOcr(text, language)` / `solveFromImages(dataUrls, language)` build their system prompt + per-call; `codingMode.ts` reads a persisted `codingLanguage` setting and threads it into all + three solve entry points. +- New `AppSettings.codingLanguage` (`SETTINGS_KEYS.codingLanguage`, in `APP_SETTING_KEYS` so a + factory reset clears it), default `'javascript'`, zod `z.string().min(1).max(40)`. A **Language** + dropdown (12 languages) in the Cue Card coding-solver controls, seeded from settings + persisted + via `api.settings.set({ codingLanguage })` — mirrors the existing model/effort pickers. + +**Adversarial review** (2 dimensions → refute-verify): **no findings** — the increment is clean +(callers are compiler-enforced to pass `language`; the settings round-trip + prompt hold together). + +Verified: `typecheck` · 104 unit · `build` green. + ## Next (v1.2, same branch) -- Increment 2 — Coding solver: language picker (default JS), commented + optimal solution, - explanation-style by default, résumé/JD-free (already is). - Increment 3 — Pronunciation: default ON + a structured guide panel (Part of speech / Singular / Pronunciation) for hard words, separate from the natural answer. diff --git a/src/main/db/repositories/settings.repo.ts b/src/main/db/repositories/settings.repo.ts index 7c72fce..8d95198 100644 --- a/src/main/db/repositories/settings.repo.ts +++ b/src/main/db/repositories/settings.repo.ts @@ -56,6 +56,7 @@ export const SETTINGS_KEYS = { overlayBounds: 'overlay_bounds', audioPrefs: 'audio_prefs', hideTaskbarIcon: 'hide_taskbar_icon', + codingLanguage: 'coding_language', } as const; /** Non-secret settings cleared by a factory reset (everything except the API key). */ @@ -71,4 +72,5 @@ const APP_SETTING_KEYS: string[] = [ SETTINGS_KEYS.overlayBounds, SETTINGS_KEYS.audioPrefs, SETTINGS_KEYS.hideTaskbarIcon, + SETTINGS_KEYS.codingLanguage, ]; diff --git a/src/main/ipc/settings.ipc.ts b/src/main/ipc/settings.ipc.ts index 56a14b5..af707ad 100644 --- a/src/main/ipc/settings.ipc.ts +++ b/src/main/ipc/settings.ipc.ts @@ -35,6 +35,7 @@ function readSettings(): AppSettings { reasoningEffortDefaults: { ...defaultEfforts }, overlay: settingsRepo.getJson(SETTINGS_KEYS.overlayPrefs, defaultOverlay), audio: settingsRepo.getJson(SETTINGS_KEYS.audioPrefs, defaultAudio), + codingLanguage: settingsRepo.get(SETTINGS_KEYS.codingLanguage) || 'javascript', privacyMode: settingsRepo.get(SETTINGS_KEYS.privacyMode) !== '0', hideTaskbarIcon: settingsRepo.get(SETTINGS_KEYS.hideTaskbarIcon) === '1', dataConsentAck: settingsRepo.get(SETTINGS_KEYS.dataConsentAck) === '1', @@ -61,6 +62,7 @@ const settingsPatch = z.object({ micDeviceId: z.string().nullable(), }) .optional(), + codingLanguage: z.string().min(1).max(40).optional(), dataConsentAck: z.boolean().optional(), tourDone: z.boolean().optional(), hideTaskbarIcon: z.boolean().optional(), @@ -84,6 +86,8 @@ export function registerSettingsIpc(): void { broadcast(EVENTS.overlayApplySettings, patch.overlay, ['overlay']); } if (patch.audio) settingsRepo.setJson(SETTINGS_KEYS.audioPrefs, patch.audio); + if (patch.codingLanguage !== undefined) + settingsRepo.set(SETTINGS_KEYS.codingLanguage, patch.codingLanguage); if (patch.hideTaskbarIcon !== undefined) { settingsRepo.set(SETTINGS_KEYS.hideTaskbarIcon, patch.hideTaskbarIcon ? '1' : '0'); getMainWindow()?.setSkipTaskbar(patch.hideTaskbarIcon); diff --git a/src/main/services/capture/codingMode.test.ts b/src/main/services/capture/codingMode.test.ts index 85131ee..fa261a0 100644 --- a/src/main/services/capture/codingMode.test.ts +++ b/src/main/services/capture/codingMode.test.ts @@ -14,6 +14,12 @@ vi.mock('../openai/vision', () => ({ // codingMode imports normalizeOpenAIError from the client, which transitively loads // electron (app.isPackaged) — stub it so the import chain stays node-safe. vi.mock('../openai/client', () => ({ normalizeOpenAIError: (e: unknown) => String(e) })); +// codingMode reads the coding language from settings.repo (→ db → better-sqlite3), +// which can't load under the node test env — stub it (get → null ⇒ 'javascript' default). +vi.mock('../../db/repositories/settings.repo', () => ({ + SETTINGS_KEYS: { codingLanguage: 'coding_language' }, + settingsRepo: { get: () => null }, +})); import { addCapture, clearCaptures, solveCaptures } from './codingMode'; import { broadcast } from '../../ipc/broadcast'; @@ -63,7 +69,7 @@ describe('multi-image capture buffer', () => { addCapture('img-2'); await solveCaptures(); expect(solveFromImages).toHaveBeenCalledTimes(1); - expect(solveFromImages).toHaveBeenCalledWith(['img-1', 'img-2']); + expect(solveFromImages).toHaveBeenCalledWith(['img-1', 'img-2'], 'javascript'); expect(lastBufferImages()).toEqual([]); // buffer cleared after solving }); }); diff --git a/src/main/services/capture/codingMode.ts b/src/main/services/capture/codingMode.ts index a927f6a..63c8e1f 100644 --- a/src/main/services/capture/codingMode.ts +++ b/src/main/services/capture/codingMode.ts @@ -6,6 +6,11 @@ import { solveFromImages } from '../openai/vision'; import { normalizeOpenAIError } from '../openai/client'; import type { AnswerEvent } from '../openai/answer'; import { showOverlay } from '../../windows/overlayWindow'; +import { SETTINGS_KEYS, settingsRepo } from '../../db/repositories/settings.repo'; + +/** The programming language the solver writes solutions in (Cue Card setting; JS default). */ +const codingLanguage = (): string => + settingsRepo.get(SETTINGS_KEYS.codingLanguage) || 'javascript'; // Accumulated problem screenshots for the current solve. A long problem scrolls // past one viewport, so the user captures several (scroll → capture → repeat) and @@ -41,7 +46,7 @@ export function solveCaptures(): Promise { images.length > 1 ? `Coding problem (${images.length} screenshots)` : 'Coding problem (from screenshot)'; - return streamToOverlay(solveFromImages(images), label); + return streamToOverlay(solveFromImages(images, codingLanguage()), label); } async function streamToOverlay(gen: AsyncGenerator, label: string): Promise { @@ -65,12 +70,15 @@ async function streamToOverlay(gen: AsyncGenerator, label: string): /** Stream a coding solution from plain text (clipboard). */ export function runCodingSolve(text: string): Promise { - return streamToOverlay(solveFromOcr(text), 'Coding problem (from clipboard)'); + return streamToOverlay(solveFromOcr(text, codingLanguage()), 'Coding problem (from clipboard)'); } /** Stream a coding solution from a single screenshot/region image (OpenAI vision). */ export function runCodingSolveFromImage(dataUrl: string): Promise { - return streamToOverlay(solveFromImages([dataUrl]), 'Coding problem (from screenshot)'); + return streamToOverlay( + solveFromImages([dataUrl], codingLanguage()), + 'Coding problem (from screenshot)', + ); } /** diff --git a/src/main/services/openai/coding.ts b/src/main/services/openai/coding.ts index fa994e0..e56f6f6 100644 --- a/src/main/services/openai/coding.ts +++ b/src/main/services/openai/coding.ts @@ -1,23 +1,23 @@ import { openai } from './client'; import { model, reasoningParam } from './models'; -import { CODING_RULES } from './codingPrompt'; +import { codingRules } from './codingPrompt'; import type { AnswerEvent } from './answer'; -const SYSTEM = `You solve a coding/technical problem given as plain text.\n${CODING_RULES}`; - /** Stream a coding-mode answer from clipboard/OCR'd problem text. Uses the dedicated * 'coding' model (a reasoning model by default) — the same solver as the screenshot - * path, so both stay consistently smart. */ + * path, so both stay consistently smart. Solution written in `language`. */ export async function* solveFromOcr( text: string, + language: string, signal?: AbortSignal, ): AsyncGenerator { + const system = `You solve a coding/technical problem given as plain text.\n${codingRules(language)}`; const stream = await openai().responses.stream( { model: model('coding'), ...reasoningParam('coding'), input: [ - { role: 'system', content: SYSTEM }, + { role: 'system', content: system }, { role: 'user', content: text.slice(0, 12_000) }, ], }, diff --git a/src/main/services/openai/codingPrompt.ts b/src/main/services/openai/codingPrompt.ts index 4f04816..c39b6b9 100644 --- a/src/main/services/openai/codingPrompt.ts +++ b/src/main/services/openai/codingPrompt.ts @@ -1,9 +1,11 @@ /** * Shared system prompt for coding/algorithmic problem solving (clipboard text and - * screenshot/vision paths). The hard requirement is OPTIMALITY: the answer must - * be the best-known algorithm for the problem, never the first thing that works. + * screenshot/vision paths). Hard requirements: OPTIMALITY, the solution written in the + * chosen LANGUAGE with clear inline COMMENTS, and an explanation-first delivery. + * Deliberately résumé/JD-free — a coding problem is unrelated to the candidate's profile. */ -export const CODING_RULES = `You are an expert competitive programmer and senior software engineer. +export function codingRules(language: string): string { + return `You are an expert competitive programmer and senior software engineer. OPTIMALITY — this is the single most important rule: - Always produce the OPTIMAL solution: the best achievable time complexity (and, for @@ -19,12 +21,23 @@ OPTIMALITY — this is the single most important rule: window, binary search (incl. on the answer), monotonic stack/queue, heaps, union-find, prefix sums, greedy with proof, dynamic programming (with the tightest state), or the appropriate graph algorithm. + +LANGUAGE: +- Write the solution in ${language}. Idiomatic, clean, and runnable as-is. +- The code MUST carry clear inline comments — explain the key steps, the core + invariant, and why the chosen data structure/algorithm works. Comment to teach, + not to narrate every trivial line. + +DELIVERY — explanation-first: +- Explain the approach the way you'd walk a peer through it: a short, natural, plain + paragraph (the idea + why it's optimal) BEFORE the code. No robotic bullet dumps. - The code must be correct, handle edge cases, and run as-is. If the problem statement is ambiguous, state the assumption you optimize under, then solve. FORMAT — clean markdown with short bold section labels, in this order: -**Problem**, **Optimal approach**, **Complexity**, **Edge cases**, **Solution**. -Use bullet lists for points and fenced code blocks (\`\`\`lang) for any code. Be -concise but complete.`; +**Problem**, **Approach**, **Complexity**, **Edge cases**, **Solution**. +Write **Approach** as a natural explanatory paragraph; put the code in a fenced code +block (tagged with the language) with inline comments. Be concise but complete.`; +} diff --git a/src/main/services/openai/vision.ts b/src/main/services/openai/vision.ts index 3a60c53..34fdebe 100644 --- a/src/main/services/openai/vision.ts +++ b/src/main/services/openai/vision.ts @@ -1,10 +1,8 @@ import { openai } from './client'; import { model, reasoningParam } from './models'; -import { CODING_RULES } from './codingPrompt'; +import { codingRules } from './codingPrompt'; import type { AnswerEvent } from './answer'; -const SYSTEM = `You are shown a screenshot containing a coding/technical interview problem (and possibly code). Read it carefully, transcribe the problem accurately, then solve it.\n${CODING_RULES}`; - /** * Solve a problem from ONE OR MORE screenshots using the 'coding' model (multimodal, * a reasoning model by default). A long LeetCode-style problem scrolls past one @@ -15,8 +13,10 @@ const SYSTEM = `You are shown a screenshot containing a coding/technical intervi */ export async function* solveFromImages( dataUrls: string[], + language: string, signal?: AbortSignal, ): AsyncGenerator { + const system = `You are shown a screenshot containing a coding/technical interview problem (and possibly code). Read it carefully, transcribe the problem accurately, then solve it.\n${codingRules(language)}`; const intro = dataUrls.length > 1 ? `The following ${dataUrls.length} images are consecutive, top-to-bottom (possibly ` + @@ -36,7 +36,7 @@ export async function* solveFromImages( model: model('coding'), ...reasoningParam('coding'), input: [ - { role: 'system', content: SYSTEM }, + { role: 'system', content: system }, { role: 'user', content }, ], }, diff --git a/src/renderer/overlay/Overlay.tsx b/src/renderer/overlay/Overlay.tsx index eea5968..4490541 100644 --- a/src/renderer/overlay/Overlay.tsx +++ b/src/renderer/overlay/Overlay.tsx @@ -1,7 +1,7 @@ import type React from 'react'; import { useEffect, useRef, useState } from 'react'; import { api } from '../lib/api'; -import type { AnswerPrefs, ClientInfo } from '@shared/ipc'; +import type { ClientInfo } from '@shared/ipc'; import type { AnswerFormat, AnswerMetaEvent, @@ -98,6 +98,7 @@ export default function Overlay() { // Switchable live so a hard problem can be bumped to a stronger model on the spot. const [codingModel, setCodingModel] = useState(''); const [codingEffort, setCodingEffort] = useState(''); + const [codingLanguage, setCodingLanguage] = useState('javascript'); const [codingDefaults, setCodingDefaults] = useState({ model: 'gpt-5-mini', effort: 'low' }); // The full override maps, so saving the coding pick doesn't clobber other tasks'. const modelsRef = useRef>({}); @@ -252,6 +253,7 @@ export default function Overlay() { effortsRef.current = ss.reasoningEfforts ?? {}; setCodingModel(ss.models?.coding ?? ''); setCodingEffort(ss.reasoningEfforts?.coding ?? ''); + setCodingLanguage(ss.codingLanguage ?? 'javascript'); setCodingDefaults({ model: ss.modelDefaults?.coding ?? 'gpt-5-mini', effort: ss.reasoningEffortDefaults?.coding ?? 'low', @@ -991,6 +993,36 @@ export default function Overlay() {

Coding solver

+