diff --git a/docs/05-IPC-MAP.md b/docs/05-IPC-MAP.md index b1cb937..b6f181a 100644 --- a/docs/05-IPC-MAP.md +++ b/docs/05-IPC-MAP.md @@ -104,8 +104,8 @@ résumé, persisted, and indexed as `story` chunks so they ground live answers. ### session | Channel | Request | Response | |---|---|---| -| `session:start` | `{ profileId, interviewType, answerStyle, jobId, answerLength }` | `Session` (`answerStyle` = format/tone; `answerLength` = key_points\|detailed) | -| `session:resume` | `{ sessionId, answerStyle?, answerLength? }` | `Session` (re-activate an existing session row and continue it; interview type is restored from the row — one session per interview, type is dynamic) | +| `session:start` | `{ profileId, interviewType, jobId, answerFormat }` | `Session` (`answerFormat` = key_points\|explanation\|detailed — the single answer control) | +| `session:resume` | `{ sessionId, answerFormat? }` | `Session` (re-activate an existing session row and continue it; interview type is restored from the row — one session per interview, type is dynamic) | | `session:stop` | `{ sessionId }` | `Session` | | `session:toggle-pause` | `{ sessionId }` | `{ paused }` | | `session:toggle-pause-active` | — | `{ paused, active }` (global shortcut target — toggles the live session) | @@ -120,7 +120,7 @@ résumé, persisted, and indexed as `story` chunks so they ground live answers. | `session:ask` | `{ sessionId, questionText }` | `{ questionId }` (manual ask; answer streams) | | `session:ask-active` | `{ questionText }` | `{ ok }` (Cue Card "Ask" box — manual ask for the active session, no id) | | `session:set-interview-type` | `{ sessionId, interviewType }` | `{ ok }` (set the session-level type — chosen by the user in the save prompt at stop) | -| `session:set-answer-prefs` | `{ interviewType?, style?, length?, pronunciation? }` | `{ interviewType, style, length, pronunciation }` (live Cue Card controls; acts on the active session. Switching `interviewType` is dynamic — it persists onto the session row + reframes later answers) | +| `session:set-answer-prefs` | `{ interviewType?, format?, pronunciation? }` | `{ interviewType, format, pronunciation }` (live Cue Card controls; acts on the active session. Switching `interviewType` is dynamic — it persists onto the session row + reframes later answers) | | `session:set-answering` | `{ enabled }` | `{ enabled, answered }` (coding "listen-only" toggle: when disabled, the interviewer is still transcribed but not auto-answered; enabling it also answers the question they just asked) | | `session:regenerate` | — | `{ regenerated }` (re-answer the last question for the active session) | | `session:clear-answer` | — | `{ cleared }` (abort the in-flight answer for the active session) | @@ -195,7 +195,7 @@ Channel constants live in `EVENTS` (`src/shared/ipc.ts`); payload types are in | `session:answer-done` | `{ questionId }` | overlay | | `session:answer-reset` | `{ questionId }` | overlay (regenerate: clear the Cue Card answer but keep the transcript — no new question row/line) | | `session:client-info` | `ClientInfo \| null` | overlay (active interview: company/title/notes + profileName + grounding flags hasResume/hasJd/hasCompany, for the Cue Card header + session bar + ⓘ panel; `null` clears on stop) | -| `session:answer-prefs` | `AnswerPrefs` (`{ interviewType, style, length, pronunciation }`) | overlay (seeds the Cue Card answer-control toggles) | +| `session:answer-prefs` | `AnswerPrefs` (`{ interviewType, format, pronunciation }`) | overlay (seeds the Cue Card answer-control toggles) | | `session:audio-level` | `{ level }` (0-1 RMS, ~12/sec) | overlay (drives the Cue Card mic meter; computed in `feedRealtimeAudio` since the stream lives in the dashboard renderer) | | `session:save-prompt` | `SavePrompt` (`{ sessionId, interviewType, jobTitle, questionCount }`) | dashboard (a session just stopped → prompt save-or-discard + pick the type) | | `session:context` | `{ questionId, question, chunks }` | dashboard (debug: retrieved chunks) | diff --git a/docs/06-OPENAI-SERVICE.md b/docs/06-OPENAI-SERVICE.md index 20b39dd..a8df879 100644 --- a/docs/06-OPENAI-SERVICE.md +++ b/docs/06-OPENAI-SERVICE.md @@ -94,11 +94,13 @@ Small/fast model. Returns `{ text, type, confidence, strategy }`. Also used as a cheap "is this actually a question?" gate before answer generation. ### answer.ts — `streamAnswer(input) => AsyncIterable` -Input: `{ question, contextChunks, profile, style, length, pronunciation, interviewType, signal? }`. +Input: `{ question, contextChunks, profile, format, pronunciation, interviewType, signal? }`. Builds a **grounding** prompt: - System: persona + rules ("ground answers in provided context; never invent experience; if no relevant experience, give a transferable-skills answer and - set a risk warning"); LENGTH is a hard constraint. + set a risk warning"); FORMAT is a hard constraint; plus a **naturalness / anti-AI-tone** + directive (contractions, varied sentence length, no corporate/AI tells or hedging — must + read 100% human, never AI-generated). - **Grounded / proof-linked answers:** `buildContext` numbers the chunks `[1] (resume) …`; the prompt makes the model cite those numbers inline after each grounded claim (e.g. `…cut p99 latency 40% [1]`). The Cue Card renders the cited `[i]` as source chips @@ -106,10 +108,19 @@ Builds a **grounding** prompt: for anything the context can't support the model must not invent it — it leads with `⚠`, says it's not in the candidate's background, and pivots to a cited transferable framing. -- User: question + retrieved context + profile summary + the chosen format/length, - plus optional pronunciation hints for rare/technical terms. -- `length` (`key_points` | `detailed`) also sets a hard `max_output_tokens` ceiling - (220 / 800) so "key points" can never drift long regardless of the prompt. +- User: question + retrieved context + profile summary + the chosen answer format. +- **Pronunciation guide** (v1.2, ON by default, live-toggleable): the answer stays clean + (no inline respellings); instead, if any words are genuinely hard, the model appends a + `[[PRONUNCIATION]]` section with one pipe-delimited line per word + (`word | part of speech | singular | respelling`). The Cue Card splits this out + (`overlay/pronunciation.ts` `splitPronunciation`, tolerant of model-output variance) and + renders a structured "🗣 How to say it" panel below the answer. Adds +160 `max_output_tokens` + headroom so the guide never eats the answer. +- `format` — the single answer control (v1.2): `key_points` (terse bullets) | `explanation` + (a natural, flowing first-person explanation) | `detailed` (thorough, with one example). + It also sets a hard `max_output_tokens` ceiling (220 / 340 / 800) so "key points" can never + drift long regardless of the prompt. (The old format/tone × length split — `star`/`technical`/ + `conversational` — was removed.) Streams tokens (`{type:'delta', token}`), then a `usage` event, then a structured `meta` event `{ talkingPoints[], resumeMatch, star?, clarifyingQuestion?, riskWarning?, followupQuestion }`. **Status:** the prose answer + token usage are live; the meta pass @@ -125,13 +136,19 @@ chunked path and mock-answer audio). Realtime API session for delta-level STT latency; PCM is streamed one-way via `session:realtime-audio`. Event parsing lives in `realtimeEvents.ts`. -### coding.ts — `solveFromOcr(text)`, vision.ts — `solveFromImages(dataUrls[])` +### coding.ts — `solveFromOcr(text, language)`, vision.ts — `solveFromImages(dataUrls[], language)` Given a coding problem as text (clipboard/selection) or as one-or-more screenshots, -streams: approach, edge cases, time/space complexity, solution outline (and code). -Both paths use the same `coding` model + `reasoningParam('coding')`. A long problem -spans several viewports, so `solveFromImages` sends all captured screenshots in ONE -request (instruction-first, scroll order, `detail:'high'`) and the model reconstructs -them — the buffer + thumbnail strip live in `capture/codingMode.ts` (see `capture:add-region`/`solve-buffer`). +streams (explanation-first): a natural **Approach** paragraph, complexity, edge cases, +then the **optimal** solution as commented, runnable code. The shared prompt is +`codingRules(language)` (`codingPrompt.ts`): mandates the optimal solution + stated +time/space complexity, writes the code in the chosen `language` (default `javascript`, +a live Cue Card picker persisted as the `codingLanguage` setting), and requires clear +inline comments. Deliberately **résumé/JD-free** — a coding problem is unrelated to the +candidate's profile. Both paths use the same `coding` model + `reasoningParam('coding')`. +A long problem spans several viewports, so `solveFromImages` sends all captured screenshots +in ONE request (instruction-first, scroll order, `detail:'high'`) and the model reconstructs +them — the buffer + thumbnail strip + the `codingLanguage` lookup live in +`capture/codingMode.ts` (see `capture:add-region`/`solve-buffer`). ### interviewer.ts — `generateQuestion(...)` & tts.ts — `speak(text, voice)` Power the mock-interview mode: `generateQuestion` produces the next question and diff --git a/docs/sessions/2026-07-01.md b/docs/sessions/2026-07-01.md new file mode 100644 index 0000000..6deeafe --- /dev/null +++ b/docs/sessions/2026-07-01.md @@ -0,0 +1,92 @@ +# 2026-07-01 + +Started **v1.2** (branch `feat/prompt-overhaul`) — a prompt/answer overhaul planned with the +user in three increments: (1) Answer Format + naturalness, (2) Coding solver, (3) Pronunciation. +(v1.1.0 shipped first: the five trainer features + competency coverage, released via a +`v1.1.0` tag → GitHub Release.) + +## Increment 1 — Answer Format + naturalness + +Collapsed the two overlapping answer-control axes into ONE. The old **Answer Format** (tone: +`default`/`star`/`technical`/`conversational`) was muddy and underused, so it's gone; the length +control is promoted into the new single **Answer Format**: + +- `AnswerFormat = 'key_points' | 'explanation' | 'detailed'` (replaces `AnswerStyle` + `AnswerLength`). + - **key_points** — terse, glanceable bullets (~60w, cap 220 tok). + - **explanation** *(new)* — a natural, flowing first-person explanation (~90–130w, cap 340 tok). + - **detailed** — thorough, with one concrete example (~150–220w, cap 800 tok). +- `answer.ts`: merged `STYLE_INSTRUCTION` + `LENGTH_INSTRUCTION` → `FORMAT_INSTRUCTION`, and + `LENGTH_MAX_TOKENS` → `FORMAT_MAX_TOKENS`. Added a **naturalness / anti-AI directive** to the + system prompt: contractions, varied sentence length, banned corporate/AI tells + hedging, + first-person, confident — but explicitly *not* fake disfluency ("um"/"uh"). Citation + + fabrication-guard rules kept intact. +- Threaded the single `format` field through everything: `AnswerPrefs` (dropped `style`+`length`), + `sessionManager` (`LiveState.answerFormat`; goLive/start/resume/setAnswerPrefs/generateAnswer + + the `answerPrefs` broadcast), `session.ipc` zod, preload, `mockManager`, `useLiveSession.startNew`, + InterviewPage/ProfilesPage/sampleData. Cue Card: the two controls collapse into **one 3-way + Format toggle** (Key points / Explanation / Detailed). +- Removed `Profile.answerStyle`. The `profiles.answer_style` DB column is **kept** (NOT NULL default + `'concise'`) but no longer read/written — **no migration**. + +**Adversarial review** (3 dimensions → refute-verify) found no app-code bugs but caught a real +**e2e regression**: `error-handling.spec.ts` still called `api.session.start` with the old 5-arg +signature, so after the positional reorder `answerFormat` became `null` and zod rejected it +(`.default` only fills `undefined`, not `null`). Fixed that call + two stale `answerStyle` fields in +e2e specs. (Typecheck couldn't catch it — it lives inside a `page.evaluate` string typed as `any`.) + +**Naturalness validation:** the anti-AI tone is prompt-only and subjective — best judged live in +`npm run dev` with a real key; carefully engineered here (no auto-spike since the answer path can't +run headlessly under vitest). + +Verified: `typecheck` · 104 unit (+2) · `build` green. + +## Increment 2 — Coding solver + +Upgraded the coding SOLVER (screenshot/clipboard mode; live-coding deferred to a later version). +- Shared prompt: `CODING_RULES` const → `codingRules(language)`. Now writes the solution in a + chosen **language** (default `javascript`), REQUIRES clear inline **comments**, delivers + **explanation-first** (a natural Approach paragraph, then the code), and keeps the OPTIMALITY + + explicit time/space complexity mandate. Confirmed **résumé/JD-free**. +- `solveFromOcr(text, language)` / `solveFromImages(dataUrls, language)` build their system prompt + per-call; `codingMode.ts` reads a persisted `codingLanguage` setting and threads it into all + three solve entry points. +- New `AppSettings.codingLanguage` (`SETTINGS_KEYS.codingLanguage`, in `APP_SETTING_KEYS` so a + factory reset clears it), default `'javascript'`, zod `z.string().min(1).max(40)`. A **Language** + dropdown (12 languages) in the Cue Card coding-solver controls, seeded from settings + persisted + via `api.settings.set({ codingLanguage })` — mirrors the existing model/effort pickers. + +**Adversarial review** (2 dimensions → refute-verify): **no findings** — the increment is clean +(callers are compiler-enforced to pass `language`; the settings round-trip + prompt hold together). + +Verified: `typecheck` · 104 unit · `build` green. + +## Increment 3 — Pronunciation: default-on + structured guide panel + +Pronunciation is now **ON by default** (still live-toggleable). Instead of inline respellings +(which made answers read oddly), the answer stays clean and the model appends a structured guide: +a `[[PRONUNCIATION]]` marker then one pipe-delimited line per hard word +(`word | part of speech | singular | respelling`). The Cue Card splits it out and renders a +teal **"🗣 How to say it"** panel below the answer. `answer.ts` gives +160 `max_output_tokens` +headroom when pronunciation is on so the guide never eats the answer. + +- `goLive` defaults `pronunciation: true` (live-state + the answerPrefs broadcast); Overlay's + toggle initial state matches. +- Parsing extracted to `overlay/pronunciation.ts` (`splitPronunciation`) so it's unit-testable. + +**Adversarial review** (2 dimensions → refute-verify) found **5 real parser-robustness gaps** — +all model-output-variance issues in `splitPronunciation`: +1. *(med)* a 3-field line (model dropped the optional singular) dropped the whole word. +2. *(low)* case-sensitive `[[PRON` match → a Title-cased marker leaked the raw guide into the answer. +3/5. *(low)* a bare partial marker (`[[`, `[[P`, `[[PR`, `[[PRO`) flickered into the body mid-stream. +4. *(low)* the "no singular" placeholder was only suppressed for the exact em-dash. + +**Fix:** rewrote `splitPronunciation` to be tolerant — respelling is the LAST field (2/3/4-field +lines all work), marker match is case/space-insensitive AND strips trailing partial prefixes, and +`—/–/-/n/a/none` placeholders normalize to empty. Locked with `pronunciation.test.ts` (+8) covering +each finding. + +Verified: `typecheck` · 113 unit (+9 across v1.2 #3) · `build` green. + +## v1.2 status +All three increments done on `feat/prompt-overhaul` (Answer Format + naturalness · Coding solver · +Pronunciation). Ready for one PR. Version/changelog bump deferred until the user asks (would be v1.2.0). diff --git a/e2e/data-integrity.spec.ts b/e2e/data-integrity.spec.ts index 02dbc93..eefe159 100644 --- a/e2e/data-integrity.spec.ts +++ b/e2e/data-integrity.spec.ts @@ -11,7 +11,6 @@ const newProfile = { targetRole: 'SWE', targetCompany: null, interviewType: 'general', - answerStyle: 'default', language: 'en', resumeText: null, jdText: null, diff --git a/e2e/error-handling.spec.ts b/e2e/error-handling.spec.ts index 6c125ea..59c8491 100644 --- a/e2e/error-handling.spec.ts +++ b/e2e/error-handling.spec.ts @@ -19,12 +19,11 @@ test('a failed answer surfaces an error and clears the streaming state (B1/B2)', targetRole: 'SWE', targetCompany: null, interviewType: 'general', - answerStyle: 'default', language: 'en', resumeText: null, jdText: null, }); - const session = await api.session.start(profile.id, 'general', 'default', null, 'key_points'); + const session = await api.session.start(profile.id, 'general', null, 'key_points'); // Listen BEFORE asking. answerDone firing on a failed ask is the core B1 fix // (the Cue Card card stops spinning); sessionError proves the failure isn't silent. diff --git a/src/main/db/repositories/profiles.repo.ts b/src/main/db/repositories/profiles.repo.ts index 5f373cb..1cc821d 100644 --- a/src/main/db/repositories/profiles.repo.ts +++ b/src/main/db/repositories/profiles.repo.ts @@ -4,12 +4,8 @@ import type { Profile, ProfileInput } from '@shared/types'; type Row = typeof schema.profiles.$inferSelect; -/** Map legacy answer-style values (length is now a separate axis) to a valid - * format. Older profiles stored 'concise'/'detailed'. */ -function toAnswerStyle(v: string): Profile['answerStyle'] { - return v === 'star' || v === 'technical' || v === 'conversational' ? v : 'default'; -} - +// The `answer_style` DB column is retained (NOT NULL default 'concise') but no longer +// read or written — the answer format is a single live Cue Card control now (v1.2). function toProfile(r: Row): Profile { return { id: r.id, @@ -17,7 +13,6 @@ function toProfile(r: Row): Profile { targetRole: r.targetRole, targetCompany: r.targetCompany, interviewType: r.interviewType as Profile['interviewType'], - answerStyle: toAnswerStyle(r.answerStyle), language: r.language, resumeText: r.resumeText, jdText: r.jdText, @@ -53,7 +48,6 @@ export const profilesRepo = { targetRole: input.targetRole, targetCompany: input.targetCompany, interviewType: input.interviewType, - answerStyle: input.answerStyle, language: input.language, resumeText: input.resumeText, jdText: input.jdText, @@ -69,7 +63,6 @@ export const profilesRepo = { targetRole: patch.targetRole, targetCompany: patch.targetCompany, interviewType: patch.interviewType, - answerStyle: patch.answerStyle, language: patch.language, resumeText: patch.resumeText, jdText: patch.jdText, diff --git a/src/main/db/repositories/settings.repo.ts b/src/main/db/repositories/settings.repo.ts index 7c72fce..8d95198 100644 --- a/src/main/db/repositories/settings.repo.ts +++ b/src/main/db/repositories/settings.repo.ts @@ -56,6 +56,7 @@ export const SETTINGS_KEYS = { overlayBounds: 'overlay_bounds', audioPrefs: 'audio_prefs', hideTaskbarIcon: 'hide_taskbar_icon', + codingLanguage: 'coding_language', } as const; /** Non-secret settings cleared by a factory reset (everything except the API key). */ @@ -71,4 +72,5 @@ const APP_SETTING_KEYS: string[] = [ SETTINGS_KEYS.overlayBounds, SETTINGS_KEYS.audioPrefs, SETTINGS_KEYS.hideTaskbarIcon, + SETTINGS_KEYS.codingLanguage, ]; diff --git a/src/main/ipc/profiles.ipc.ts b/src/main/ipc/profiles.ipc.ts index 053e85f..3ea771c 100644 --- a/src/main/ipc/profiles.ipc.ts +++ b/src/main/ipc/profiles.ipc.ts @@ -12,19 +12,12 @@ const interviewType = z.enum([ 'sales', 'general', ]); -// Accept legacy length values ('concise'/'detailed') on the wire and fold them -// into the format axis, so old clients/profiles don't fail validation. -const answerStyle = z - .enum(['default', 'star', 'technical', 'conversational', 'concise', 'detailed']) - .transform((v) => (v === 'concise' || v === 'detailed' ? 'default' : v)); - const profileInput = z.object({ name: z.string().min(1), targetRole: z.string().default(''), targetCompany: z.string().nullable().default(null), - // Interview type & answer style are chosen per run now; kept optional/legacy. + // Interview type is chosen per run now; kept optional/legacy on the profile. interviewType: interviewType.default('general'), - answerStyle: answerStyle.default('default'), language: z.string().default('en'), resumeText: z.string().nullable().default(null), jdText: z.string().nullable().default(null), @@ -60,7 +53,6 @@ export function registerProfilesIpc(): void { targetRole: src.targetRole, targetCompany: src.targetCompany, interviewType: src.interviewType, - answerStyle: src.answerStyle, language: src.language, resumeText: src.resumeText, jdText: src.jdText, diff --git a/src/main/ipc/session.ipc.ts b/src/main/ipc/session.ipc.ts index c516eeb..335920a 100644 --- a/src/main/ipc/session.ipc.ts +++ b/src/main/ipc/session.ipc.ts @@ -15,8 +15,7 @@ const interviewType = z.enum([ 'sales', 'general', ]); -const answerStyle = z.enum(['default', 'star', 'technical', 'conversational']); -const answerLength = z.enum(['key_points', 'detailed']); +const answerFormat = z.enum(['key_points', 'explanation', 'detailed']); export function registerSessionIpc(): void { handle( @@ -24,23 +23,20 @@ export function registerSessionIpc(): void { z.object({ profileId: z.string().min(1), interviewType, - answerStyle: answerStyle.default('default'), jobId: z.string().nullable().default(null), - answerLength: answerLength.default('key_points'), + answerFormat: answerFormat.default('key_points'), }), - ({ profileId, interviewType: t, answerStyle: s, jobId, answerLength: len }) => - sessionManager.start(profileId, t, s, jobId, len), + ({ profileId, interviewType: t, jobId, answerFormat: f }) => + sessionManager.start(profileId, t, jobId, f), ); handle( IPC.session.resume, z.object({ sessionId: z.string().min(1), - answerStyle: answerStyle.default('default'), - answerLength: answerLength.default('key_points'), + answerFormat: answerFormat.default('key_points'), }), - ({ sessionId, answerStyle: s, answerLength: len }) => - sessionManager.resume(sessionId, s, len), + ({ sessionId, answerFormat: f }) => sessionManager.resume(sessionId, f), ); handle(IPC.session.stop, z.object({ sessionId: z.string().min(1) }), ({ sessionId }) => @@ -90,8 +86,7 @@ export function registerSessionIpc(): void { IPC.session.setAnswerPrefs, z.object({ interviewType: interviewType.optional(), - style: answerStyle.optional(), - length: answerLength.optional(), + format: answerFormat.optional(), pronunciation: z.boolean().optional(), }), (prefs) => sessionManager.setAnswerPrefs(prefs), diff --git a/src/main/ipc/settings.ipc.ts b/src/main/ipc/settings.ipc.ts index 56a14b5..af707ad 100644 --- a/src/main/ipc/settings.ipc.ts +++ b/src/main/ipc/settings.ipc.ts @@ -35,6 +35,7 @@ function readSettings(): AppSettings { reasoningEffortDefaults: { ...defaultEfforts }, overlay: settingsRepo.getJson(SETTINGS_KEYS.overlayPrefs, defaultOverlay), audio: settingsRepo.getJson(SETTINGS_KEYS.audioPrefs, defaultAudio), + codingLanguage: settingsRepo.get(SETTINGS_KEYS.codingLanguage) || 'javascript', privacyMode: settingsRepo.get(SETTINGS_KEYS.privacyMode) !== '0', hideTaskbarIcon: settingsRepo.get(SETTINGS_KEYS.hideTaskbarIcon) === '1', dataConsentAck: settingsRepo.get(SETTINGS_KEYS.dataConsentAck) === '1', @@ -61,6 +62,7 @@ const settingsPatch = z.object({ micDeviceId: z.string().nullable(), }) .optional(), + codingLanguage: z.string().min(1).max(40).optional(), dataConsentAck: z.boolean().optional(), tourDone: z.boolean().optional(), hideTaskbarIcon: z.boolean().optional(), @@ -84,6 +86,8 @@ export function registerSettingsIpc(): void { broadcast(EVENTS.overlayApplySettings, patch.overlay, ['overlay']); } if (patch.audio) settingsRepo.setJson(SETTINGS_KEYS.audioPrefs, patch.audio); + if (patch.codingLanguage !== undefined) + settingsRepo.set(SETTINGS_KEYS.codingLanguage, patch.codingLanguage); if (patch.hideTaskbarIcon !== undefined) { settingsRepo.set(SETTINGS_KEYS.hideTaskbarIcon, patch.hideTaskbarIcon ? '1' : '0'); getMainWindow()?.setSkipTaskbar(patch.hideTaskbarIcon); diff --git a/src/main/services/capture/codingMode.test.ts b/src/main/services/capture/codingMode.test.ts index 85131ee..fa261a0 100644 --- a/src/main/services/capture/codingMode.test.ts +++ b/src/main/services/capture/codingMode.test.ts @@ -14,6 +14,12 @@ vi.mock('../openai/vision', () => ({ // codingMode imports normalizeOpenAIError from the client, which transitively loads // electron (app.isPackaged) — stub it so the import chain stays node-safe. vi.mock('../openai/client', () => ({ normalizeOpenAIError: (e: unknown) => String(e) })); +// codingMode reads the coding language from settings.repo (→ db → better-sqlite3), +// which can't load under the node test env — stub it (get → null ⇒ 'javascript' default). +vi.mock('../../db/repositories/settings.repo', () => ({ + SETTINGS_KEYS: { codingLanguage: 'coding_language' }, + settingsRepo: { get: () => null }, +})); import { addCapture, clearCaptures, solveCaptures } from './codingMode'; import { broadcast } from '../../ipc/broadcast'; @@ -63,7 +69,7 @@ describe('multi-image capture buffer', () => { addCapture('img-2'); await solveCaptures(); expect(solveFromImages).toHaveBeenCalledTimes(1); - expect(solveFromImages).toHaveBeenCalledWith(['img-1', 'img-2']); + expect(solveFromImages).toHaveBeenCalledWith(['img-1', 'img-2'], 'javascript'); expect(lastBufferImages()).toEqual([]); // buffer cleared after solving }); }); diff --git a/src/main/services/capture/codingMode.ts b/src/main/services/capture/codingMode.ts index a927f6a..63c8e1f 100644 --- a/src/main/services/capture/codingMode.ts +++ b/src/main/services/capture/codingMode.ts @@ -6,6 +6,11 @@ import { solveFromImages } from '../openai/vision'; import { normalizeOpenAIError } from '../openai/client'; import type { AnswerEvent } from '../openai/answer'; import { showOverlay } from '../../windows/overlayWindow'; +import { SETTINGS_KEYS, settingsRepo } from '../../db/repositories/settings.repo'; + +/** The programming language the solver writes solutions in (Cue Card setting; JS default). */ +const codingLanguage = (): string => + settingsRepo.get(SETTINGS_KEYS.codingLanguage) || 'javascript'; // Accumulated problem screenshots for the current solve. A long problem scrolls // past one viewport, so the user captures several (scroll → capture → repeat) and @@ -41,7 +46,7 @@ export function solveCaptures(): Promise { images.length > 1 ? `Coding problem (${images.length} screenshots)` : 'Coding problem (from screenshot)'; - return streamToOverlay(solveFromImages(images), label); + return streamToOverlay(solveFromImages(images, codingLanguage()), label); } async function streamToOverlay(gen: AsyncGenerator, label: string): Promise { @@ -65,12 +70,15 @@ async function streamToOverlay(gen: AsyncGenerator, label: string): /** Stream a coding solution from plain text (clipboard). */ export function runCodingSolve(text: string): Promise { - return streamToOverlay(solveFromOcr(text), 'Coding problem (from clipboard)'); + return streamToOverlay(solveFromOcr(text, codingLanguage()), 'Coding problem (from clipboard)'); } /** Stream a coding solution from a single screenshot/region image (OpenAI vision). */ export function runCodingSolveFromImage(dataUrl: string): Promise { - return streamToOverlay(solveFromImages([dataUrl]), 'Coding problem (from screenshot)'); + return streamToOverlay( + solveFromImages([dataUrl], codingLanguage()), + 'Coding problem (from screenshot)', + ); } /** diff --git a/src/main/services/mock/mockManager.ts b/src/main/services/mock/mockManager.ts index 5be442e..def47c1 100644 --- a/src/main/services/mock/mockManager.ts +++ b/src/main/services/mock/mockManager.ts @@ -86,8 +86,7 @@ export const mockManager = { profileId, jobId, interviewType, - answerStyle: 'default', - answerLength: 'key_points', + answerFormat: 'key_points', language: profile.language, isMock: true, }); diff --git a/src/main/services/openai/answer.test.ts b/src/main/services/openai/answer.test.ts index bebd5bf..95419cc 100644 --- a/src/main/services/openai/answer.test.ts +++ b/src/main/services/openai/answer.test.ts @@ -33,8 +33,7 @@ function baseInput(over: Partial[0]> = {}) { question: 'Tell me about a hard bug.', contextChunks: [{ id: 'c1', sourceType: 'resume' as const, content: 'Fixed a race condition', score: 0.8 }], profile, - style: 'default' as const, - length: 'key_points' as const, + format: 'key_points' as const, pronunciation: false, interviewType: 'behavioral' as const, ...over, @@ -55,31 +54,51 @@ beforeEach(() => { describe('streamAnswer — request body', () => { it('caps key_points at 220 output tokens', async () => { - await collect(streamAnswer(baseInput({ length: 'key_points' }))); + await collect(streamAnswer(baseInput({ format: 'key_points' }))); expect(h.lastBody!.max_output_tokens).toBe(220); expect(userPrompt()).toContain('KEY POINTS'); expect(userPrompt()).toContain('~60 words'); }); + it('caps explanation at 340 output tokens', async () => { + await collect(streamAnswer(baseInput({ format: 'explanation' }))); + expect(h.lastBody!.max_output_tokens).toBe(340); + expect(userPrompt()).toContain('EXPLANATION'); + }); + it('caps detailed at 800 output tokens', async () => { - await collect(streamAnswer(baseInput({ length: 'detailed' }))); + await collect(streamAnswer(baseInput({ format: 'detailed' }))); expect(h.lastBody!.max_output_tokens).toBe(800); expect(userPrompt()).toContain('DETAILED'); }); - it('includes the pronunciation instruction only when enabled', async () => { + it('includes the structured pronunciation-guide instruction only when enabled', async () => { await collect(streamAnswer(baseInput({ pronunciation: true }))); expect(userPrompt()).toMatch(/phonetic respelling/i); + expect(userPrompt()).toContain('[[PRONUNCIATION]]'); // structured guide marker await collect(streamAnswer(baseInput({ pronunciation: false }))); expect(userPrompt()).not.toMatch(/phonetic respelling/i); + expect(userPrompt()).not.toContain('[[PRONUNCIATION]]'); + }); + + it('gives pronunciation headroom above the format token cap', async () => { + await collect(streamAnswer(baseInput({ format: 'key_points', pronunciation: true }))); + expect(h.lastBody!.max_output_tokens).toBe(220 + 160); }); it('injects the chosen format and interview type', async () => { - await collect(streamAnswer(baseInput({ style: 'star', interviewType: 'coding' }))); - expect(userPrompt()).toContain('STAR'); + await collect(streamAnswer(baseInput({ format: 'explanation', interviewType: 'coding' }))); + expect(userPrompt()).toContain('EXPLANATION'); expect(userPrompt()).toContain('Interview type: coding'); }); + it('instructs a human, anti-AI tone in the system prompt', async () => { + await collect(streamAnswer(baseInput())); + const system = String((h.lastBody!.input as { role: string; content: string }[])[0].content); + expect(system).toMatch(/human/i); + expect(system).toMatch(/As an AI/i); // it's in the BANNED list + }); + it('embeds retrieved context tagged by source', async () => { await collect(streamAnswer(baseInput())); expect(userPrompt()).toContain('(resume) Fixed a race condition'); diff --git a/src/main/services/openai/answer.ts b/src/main/services/openai/answer.ts index 73b12e5..78dd530 100644 --- a/src/main/services/openai/answer.ts +++ b/src/main/services/openai/answer.ts @@ -1,52 +1,42 @@ import { openai } from './client'; import { model } from './models'; -import type { - AnswerLength, - AnswerStyle, - InterviewType, - Profile, - RetrievedChunk, -} from '@shared/types'; +import type { AnswerFormat, InterviewType, Profile, RetrievedChunk } from '@shared/types'; export interface AnswerInput { question: string; contextChunks: RetrievedChunk[]; profile: Profile; - style: AnswerStyle; - length: AnswerLength; + /** The single answer control (v1.2): key_points | explanation | detailed. */ + format: AnswerFormat; /** Annotate rare/technical/foreign terms with a quick phonetic respelling. */ pronunciation: boolean; interviewType: InterviewType; signal?: AbortSignal; } -/** Human-readable instruction for each length, injected into the prompt. */ -const LENGTH_INSTRUCTION: Record = { +/** Human-readable instruction per answer FORMAT, injected into the prompt. */ +const FORMAT_INSTRUCTION: Record = { key_points: - 'LENGTH = KEY POINTS (STRICT). This is a glanceable cue to speak FROM, not a full answer. ' + - 'Hard cap: ~60 words TOTAL. Format: one short opening line (≤12 words), then 2–3 terse ' + - 'bullets of a few words each — keywords/phrases, not sentences. No paragraphs. No preamble. ' + - 'If a bullet reads like a full sentence, cut it down. Shorter is better.', + 'FORMAT = KEY POINTS (STRICT). A glanceable cue to speak FROM, not a full answer. ' + + 'Hard cap: ~60 words TOTAL. One short opening line (≤12 words), then 2–3 terse bullets of a ' + + 'few words each — keywords/phrases, not sentences. No paragraphs, no preamble. Shorter is better.', + explanation: + "FORMAT = EXPLANATION. A natural, flowing first-person answer (~90–130 words) — the way you'd " + + 'actually talk it through with someone. Connected sentences, NOT bullets. Lead with the point, ' + + 'then the how/why with one specific detail from the context. Warm and direct, never a lecture.', detailed: - 'LENGTH = DETAILED. A thorough, well-structured spoken answer (~120–200 words) with specifics ' + + 'FORMAT = DETAILED. A thorough, well-structured spoken answer (~150–220 words) with specifics ' + 'and one concrete example drawn from the context. Natural spoken language, not an essay.', }; -/** Hard output ceiling per length — the model literally cannot exceed this, so +/** Hard output ceiling per format — the model literally cannot exceed this, so * "key points" can never drift into a long answer regardless of the prompt. */ -const LENGTH_MAX_TOKENS: Record = { +const FORMAT_MAX_TOKENS: Record = { key_points: 220, + explanation: 340, detailed: 800, }; -/** Format/tone instruction per style. */ -const STYLE_INSTRUCTION: Record = { - default: 'Format: a clear, direct spoken answer.', - star: 'Format: STAR — frame the answer as Situation, Task, Action, Result.', - technical: 'Format: technical — precise, correct terminology; lead with the core concept.', - conversational: 'Format: conversational — warm, natural, first-person, like talking to a person.', -}; - export type AnswerEvent = | { type: 'delta'; token: string } | { @@ -64,8 +54,14 @@ const SYSTEM = `You are a live interview copilot. The candidate reads your outpu speaking in a real interview, so it must be instantly skimmable and spoken in their first-person voice ("I led…", not "The candidate led…"). Rules: -- LENGTH is a HARD constraint. Obey the requested length EXACTLY — even if you have more +- FORMAT is a HARD constraint. Obey the requested format EXACTLY — even if you have more to say. When unsure, be shorter. Never pad. (KEY POINTS especially must stay tiny.) +- SOUND 100% HUMAN — never AI-generated. Write the way a sharp person actually speaks: use + contractions ("I've", "didn't", "we're"), vary sentence length, get straight to the point. + BANNED (AI/corporate tells): "As an AI", "I'd be happy to", "It's worth noting", "Furthermore", + "Moreover", "In today's … world", "leverage", "delve", "robust", "seamless", and hedging like + "I believe/I think/arguably/potentially". Don't restate the question. Lead with the answer, + confidently. Natural ≠ disfluent — do NOT fake "um"/"uh". - CITE YOUR SOURCES. The CONTEXT items are NUMBERED [1], [2], …. Immediately after each claim drawn from the context, cite its number(s) inline, e.g. "cut p99 latency ~40% [1]" or "[2][3]". Cite only real context numbers; never invent a citation. @@ -75,9 +71,10 @@ Rules: - FABRICATION GUARD: if the context can't support what's asked, do NOT make it up. Begin the answer with "⚠", state in one short clause that it's not in their background, then pivot to a grounded, cited, transferable-skills framing (this is the riskWarning case). -- Then follow the requested FORMAT and the interview type. -- Formatting: lead with the single most important line; **bold** only true key terms; - prefer short bullets over dense paragraphs; no meta-commentary or headers.`; +- Match the interview type. +- Formatting: lead with the single most important line; **bold** only true key terms; use + bullets for KEY POINTS and connected sentences for EXPLANATION/DETAILED; no headers or + meta-commentary.`; function buildContext(chunks: RetrievedChunk[]): string { if (chunks.length === 0) return '(no relevant profile context found)'; @@ -91,13 +88,16 @@ function buildContext(chunks: RetrievedChunk[]): string { export async function* streamAnswer(input: AnswerInput): AsyncGenerator { const userPrompt = [ `Interview type: ${input.interviewType}`, - LENGTH_INSTRUCTION[input.length], - STYLE_INSTRUCTION[input.style], + FORMAT_INSTRUCTION[input.format], input.pronunciation - ? 'Pronunciation: for rare, technical, or foreign terms, add a simple phonetic respelling in ' + - 'parentheses the FIRST time each appears — lowercase syllables joined by hyphens, with the ' + - 'STRESSED syllable in CAPITALS, e.g. "regulations (reg-yuh-LAY-shunz)", ' + - '"Kubernetes (koo-ber-NET-eez)", "Nguyen (WIN)". No IPA symbols. Common words need none.' + ? 'PRONUNCIATION GUIDE: keep the ANSWER itself clean — do NOT put respellings inline. ' + + 'AFTER the answer, if any words in it are genuinely hard to pronounce (rare, technical, ' + + 'foreign, or proper nouns), add a final section: a line containing exactly ' + + '[[PRONUNCIATION]], then ONE line per hard word formatted as ' + + '`word | part of speech | singular form (or — if n/a) | phonetic respelling`. ' + + 'Respelling = lowercase syllables joined by hyphens with the STRESSED syllable in CAPITALS ' + + '(e.g. "regulations | noun, plural | regulation | reg-yuh-LAY-shunz"). No IPA. Only include ' + + 'genuinely hard words; if none, omit the section entirely.' : '', `Candidate role target: ${input.profile.targetRole} @ ${input.profile.targetCompany ?? 'n/a'}`, '', @@ -106,9 +106,9 @@ export async function* streamAnswer(input: AnswerInput): AsyncGenerator { + const system = `You solve a coding/technical problem given as plain text.\n${codingRules(language)}`; const stream = await openai().responses.stream( { model: model('coding'), ...reasoningParam('coding'), input: [ - { role: 'system', content: SYSTEM }, + { role: 'system', content: system }, { role: 'user', content: text.slice(0, 12_000) }, ], }, diff --git a/src/main/services/openai/codingPrompt.ts b/src/main/services/openai/codingPrompt.ts index 4f04816..c39b6b9 100644 --- a/src/main/services/openai/codingPrompt.ts +++ b/src/main/services/openai/codingPrompt.ts @@ -1,9 +1,11 @@ /** * Shared system prompt for coding/algorithmic problem solving (clipboard text and - * screenshot/vision paths). The hard requirement is OPTIMALITY: the answer must - * be the best-known algorithm for the problem, never the first thing that works. + * screenshot/vision paths). Hard requirements: OPTIMALITY, the solution written in the + * chosen LANGUAGE with clear inline COMMENTS, and an explanation-first delivery. + * Deliberately résumé/JD-free — a coding problem is unrelated to the candidate's profile. */ -export const CODING_RULES = `You are an expert competitive programmer and senior software engineer. +export function codingRules(language: string): string { + return `You are an expert competitive programmer and senior software engineer. OPTIMALITY — this is the single most important rule: - Always produce the OPTIMAL solution: the best achievable time complexity (and, for @@ -19,12 +21,23 @@ OPTIMALITY — this is the single most important rule: window, binary search (incl. on the answer), monotonic stack/queue, heaps, union-find, prefix sums, greedy with proof, dynamic programming (with the tightest state), or the appropriate graph algorithm. + +LANGUAGE: +- Write the solution in ${language}. Idiomatic, clean, and runnable as-is. +- The code MUST carry clear inline comments — explain the key steps, the core + invariant, and why the chosen data structure/algorithm works. Comment to teach, + not to narrate every trivial line. + +DELIVERY — explanation-first: +- Explain the approach the way you'd walk a peer through it: a short, natural, plain + paragraph (the idea + why it's optimal) BEFORE the code. No robotic bullet dumps. - The code must be correct, handle edge cases, and run as-is. If the problem statement is ambiguous, state the assumption you optimize under, then solve. FORMAT — clean markdown with short bold section labels, in this order: -**Problem**, **Optimal approach**, **Complexity**, **Edge cases**, **Solution**. -Use bullet lists for points and fenced code blocks (\`\`\`lang) for any code. Be -concise but complete.`; +**Problem**, **Approach**, **Complexity**, **Edge cases**, **Solution**. +Write **Approach** as a natural explanatory paragraph; put the code in a fenced code +block (tagged with the language) with inline comments. Be concise but complete.`; +} diff --git a/src/main/services/openai/vision.ts b/src/main/services/openai/vision.ts index 3a60c53..34fdebe 100644 --- a/src/main/services/openai/vision.ts +++ b/src/main/services/openai/vision.ts @@ -1,10 +1,8 @@ import { openai } from './client'; import { model, reasoningParam } from './models'; -import { CODING_RULES } from './codingPrompt'; +import { codingRules } from './codingPrompt'; import type { AnswerEvent } from './answer'; -const SYSTEM = `You are shown a screenshot containing a coding/technical interview problem (and possibly code). Read it carefully, transcribe the problem accurately, then solve it.\n${CODING_RULES}`; - /** * Solve a problem from ONE OR MORE screenshots using the 'coding' model (multimodal, * a reasoning model by default). A long LeetCode-style problem scrolls past one @@ -15,8 +13,10 @@ const SYSTEM = `You are shown a screenshot containing a coding/technical intervi */ export async function* solveFromImages( dataUrls: string[], + language: string, signal?: AbortSignal, ): AsyncGenerator { + const system = `You are shown a screenshot containing a coding/technical interview problem (and possibly code). Read it carefully, transcribe the problem accurately, then solve it.\n${codingRules(language)}`; const intro = dataUrls.length > 1 ? `The following ${dataUrls.length} images are consecutive, top-to-bottom (possibly ` + @@ -36,7 +36,7 @@ export async function* solveFromImages( model: model('coding'), ...reasoningParam('coding'), input: [ - { role: 'system', content: SYSTEM }, + { role: 'system', content: system }, { role: 'user', content }, ], }, diff --git a/src/main/services/samples/sampleData.ts b/src/main/services/samples/sampleData.ts index f990a37..2a1eab9 100644 --- a/src/main/services/samples/sampleData.ts +++ b/src/main/services/samples/sampleData.ts @@ -130,7 +130,6 @@ export async function loadSampleData(): Promise<{ profileId: string; jobs: numbe targetRole: 'Senior Software Engineer', targetCompany: null, interviewType: 'general', - answerStyle: 'default', language: 'en', resumeText: SAMPLE_RESUME, jdText: null, diff --git a/src/main/services/session/sessionManager.ts b/src/main/services/session/sessionManager.ts index 2a7086d..3d29c3d 100644 --- a/src/main/services/session/sessionManager.ts +++ b/src/main/services/session/sessionManager.ts @@ -14,7 +14,7 @@ import { RealtimeTranscriber } from '../openai/realtime'; import { getOverlayWindow, showOverlay } from '../../windows/overlayWindow'; import { getMainWindow } from '../../windows/mainWindow'; import { log } from '../security/logger'; -import type { AnswerLength, AnswerStyle, InterviewType, Session } from '@shared/types'; +import type { AnswerFormat, InterviewType, Session } from '@shared/types'; /** A question we answered, kept so the Cue Card can re-generate it (e.g. after * toggling length/format/pronunciation) by reusing the SAME question row — no @@ -29,8 +29,7 @@ interface LiveState { profileId: string; jobId: string | null; interviewType: InterviewType; - answerStyle: AnswerStyle; - answerLength: AnswerLength; + answerFormat: AnswerFormat; pronunciation: boolean; isMock: boolean; // mock rehearsal — no mic capture; never persisted paused: boolean; @@ -70,8 +69,7 @@ export const sessionManager = { profileId: string; jobId: string | null; interviewType: InterviewType; - answerStyle: AnswerStyle; - answerLength: AnswerLength; + answerFormat: AnswerFormat; language: string; isMock?: boolean; }): void { @@ -82,9 +80,8 @@ export const sessionManager = { profileId: opts.profileId, jobId: opts.jobId, interviewType: opts.interviewType, - answerStyle: opts.answerStyle, - answerLength: opts.answerLength, - pronunciation: false, // off by default; toggled live from the Cue Card + answerFormat: opts.answerFormat, + pronunciation: true, // ON by default (v1.2); toggled live from the Cue Card isMock: !!opts.isMock, paused: false, busy: false, @@ -119,9 +116,8 @@ export const sessionManager = { EVENTS.answerPrefs, { interviewType: opts.interviewType, - style: opts.answerStyle, - length: opts.answerLength, - pronunciation: false, + format: opts.answerFormat, + pronunciation: true, }, ['overlay'], ); @@ -147,9 +143,8 @@ export const sessionManager = { start( profileId: string, interviewType: InterviewType, - answerStyle: AnswerStyle, jobId: string | null = null, - answerLength: AnswerLength = 'key_points', + answerFormat: AnswerFormat = 'key_points', ): Session { const profile = profilesRepo.get(profileId); if (!profile) throw new Error('Profile not found'); @@ -163,8 +158,7 @@ export const sessionManager = { profileId, jobId, interviewType, - answerStyle, - answerLength, + answerFormat, language: profile.language, }); return toSession(db().select().from(schema.sessions).where(eq(schema.sessions.id, id)).get()!); @@ -173,12 +167,8 @@ export const sessionManager = { /** Re-activate an existing (stopped) session and continue it, so one interview * reuses a single session row instead of piling up new ones. The interview * TYPE is restored from the session (it's switched live in the Cue Card, not - * chosen on resume); style/length default and are adjusted live too. */ - resume( - sessionId: string, - answerStyle: AnswerStyle = 'default', - answerLength: AnswerLength = 'key_points', - ): Session { + * chosen on resume); the answer format defaults and is adjusted live too. */ + resume(sessionId: string, answerFormat: AnswerFormat = 'key_points'): Session { const row = db().select().from(schema.sessions).where(eq(schema.sessions.id, sessionId)).get(); if (!row) throw new Error('Session not found'); const profile = profilesRepo.get(row.profileId); @@ -193,8 +183,7 @@ export const sessionManager = { profileId: row.profileId, jobId: row.jobId, interviewType: row.interviewType as InterviewType, - answerStyle, - answerLength, + answerFormat, language: profile.language, }); return toSession( @@ -462,10 +451,9 @@ export const sessionManager = { question: questionText, contextChunks: context, profile, - // Format, length, and pronunciation are chosen per run (this round) and - // can be toggled live from the Cue Card. - style: live?.answerStyle ?? 'default', - length: live?.answerLength ?? 'key_points', + // Answer format + pronunciation are chosen per run (this round) and can be + // toggled live from the Cue Card. + format: live?.answerFormat ?? 'key_points', pronunciation: live?.pronunciation ?? false, interviewType: (live?.interviewType ?? session.interviewType) as InterviewType, signal: abort.signal, @@ -529,16 +517,14 @@ export const sessionManager = { * regenerated) answer. */ setAnswerPrefs(prefs: { interviewType?: InterviewType; - style?: AnswerStyle; - length?: AnswerLength; + format?: AnswerFormat; pronunciation?: boolean; - }): { interviewType: InterviewType; style: AnswerStyle; length: AnswerLength; pronunciation: boolean } { + }): { interviewType: InterviewType; format: AnswerFormat; pronunciation: boolean } { // No active session (idle Cue Card): no-op with sensible defaults. if (!live) { return { interviewType: prefs.interviewType ?? 'general', - style: prefs.style ?? 'default', - length: prefs.length ?? 'key_points', + format: prefs.format ?? 'key_points', pronunciation: prefs.pronunciation ?? false, }; } @@ -551,13 +537,11 @@ export const sessionManager = { .where(eq(schema.sessions.id, live.sessionId)) .run(); } - if (prefs.style !== undefined) live.answerStyle = prefs.style; - if (prefs.length !== undefined) live.answerLength = prefs.length; + if (prefs.format !== undefined) live.answerFormat = prefs.format; if (prefs.pronunciation !== undefined) live.pronunciation = prefs.pronunciation; return { interviewType: live.interviewType, - style: live.answerStyle, - length: live.answerLength, + format: live.answerFormat, pronunciation: live.pronunciation, }; }, diff --git a/src/preload/index.ts b/src/preload/index.ts index fdd6a65..c03ba80 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -134,19 +134,13 @@ const api = { start: ( profileId: string, interviewType: string, - answerStyle: string, jobId: string | null = null, - answerLength = 'key_points', - ) => invoke(IPC.session.start, { profileId, interviewType, answerStyle, jobId, answerLength }), - resume: (sessionId: string, answerStyle = 'default', answerLength = 'key_points') => - invoke(IPC.session.resume, { sessionId, answerStyle, answerLength }), - setAnswerPrefs: (prefs: { - interviewType?: string; - style?: string; - length?: string; - pronunciation?: boolean; - }) => - invoke<{ interviewType: string; style: string; length: string; pronunciation: boolean }>( + answerFormat = 'key_points', + ) => invoke(IPC.session.start, { profileId, interviewType, jobId, answerFormat }), + resume: (sessionId: string, answerFormat = 'key_points') => + invoke(IPC.session.resume, { sessionId, answerFormat }), + setAnswerPrefs: (prefs: { interviewType?: string; format?: string; pronunciation?: boolean }) => + invoke<{ interviewType: string; format: string; pronunciation: boolean }>( IPC.session.setAnswerPrefs, prefs, ), diff --git a/src/renderer/dashboard/pages/InterviewPage.tsx b/src/renderer/dashboard/pages/InterviewPage.tsx index 69cb99e..94cb691 100644 --- a/src/renderer/dashboard/pages/InterviewPage.tsx +++ b/src/renderer/dashboard/pages/InterviewPage.tsx @@ -156,8 +156,7 @@ export default function InterviewPage() { profileId, jobId: job.id, interviewType: 'general', - answerStyle: 'default', - answerLength: 'key_points', + answerFormat: 'key_points', source: a.source, micDeviceId: a.micDeviceId, }); diff --git a/src/renderer/dashboard/pages/ProfilesPage.tsx b/src/renderer/dashboard/pages/ProfilesPage.tsx index 0761365..9d34f09 100644 --- a/src/renderer/dashboard/pages/ProfilesPage.tsx +++ b/src/renderer/dashboard/pages/ProfilesPage.tsx @@ -41,7 +41,6 @@ export default function ProfilesPage() { targetRole, targetCompany: null, interviewType: 'general', - answerStyle: 'default', language: 'en', resumeText: null, jdText: null, diff --git a/src/renderer/overlay/Overlay.tsx b/src/renderer/overlay/Overlay.tsx index 5c41078..888d909 100644 --- a/src/renderer/overlay/Overlay.tsx +++ b/src/renderer/overlay/Overlay.tsx @@ -1,11 +1,10 @@ import type React from 'react'; import { useEffect, useRef, useState } from 'react'; import { api } from '../lib/api'; -import type { AnswerPrefs, ClientInfo } from '@shared/ipc'; +import type { ClientInfo } from '@shared/ipc'; import type { - AnswerLength, + AnswerFormat, AnswerMetaEvent, - AnswerStyle, AppSettings, ContextSentEvent, InterviewType, @@ -21,6 +20,7 @@ import { removeCard, toggleCollapsed, } from './answerCards'; +import { splitPronunciation } from './pronunciation'; import { BoltIcon, ChevronRightIcon, @@ -84,9 +84,8 @@ export default function Overlay() { const [showClient, setShowClient] = useState(false); // Live answer controls (mirrored to the active session via setAnswerPrefs). const [interviewType, setInterviewType] = useState('general'); - const [format, setFormat] = useState('default'); - const [length, setLength] = useState('key_points'); - const [pronunciation, setPronunciation] = useState(false); + const [answerFormat, setAnswerFormat] = useState('key_points'); + const [pronunciation, setPronunciation] = useState(true); // Coding sessions default to listen-only (don't auto-answer the interviewer, so a // generated coding answer isn't replaced). This toggle (coding-only) flips it on. const [answerInterviewer, setAnswerInterviewer] = useState(false); @@ -100,6 +99,7 @@ export default function Overlay() { // Switchable live so a hard problem can be bumped to a stronger model on the spot. const [codingModel, setCodingModel] = useState(''); const [codingEffort, setCodingEffort] = useState(''); + const [codingLanguage, setCodingLanguage] = useState('javascript'); const [codingDefaults, setCodingDefaults] = useState({ model: 'gpt-5-mini', effort: 'low' }); // The full override maps, so saving the coding pick doesn't clobber other tasks'. const modelsRef = useRef>({}); @@ -234,8 +234,7 @@ export default function Overlay() { }), api.events.onAnswerPrefs((p) => { setInterviewType(p.interviewType); - setFormat(p.style); - setLength(p.length); + setAnswerFormat(p.format); setPronunciation(p.pronunciation); }), api.events.onAudioLevel((p) => { @@ -255,6 +254,7 @@ export default function Overlay() { effortsRef.current = ss.reasoningEfforts ?? {}; setCodingModel(ss.models?.coding ?? ''); setCodingEffort(ss.reasoningEfforts?.coding ?? ''); + setCodingLanguage(ss.codingLanguage ?? 'javascript'); setCodingDefaults({ model: ss.modelDefaults?.coding ?? 'gpt-5-mini', effort: ss.reasoningEffortDefaults?.coding ?? 'low', @@ -310,14 +310,9 @@ export default function Overlay() { await api.session.setAnswerPrefs({ interviewType: t }); if (question) await api.session.regenerate(); }; - const changeFormat = async (f: AnswerStyle) => { - setFormat(f); - await api.session.setAnswerPrefs({ style: f }); - if (question) await api.session.regenerate(); - }; - const changeLength = async (l: AnswerLength) => { - setLength(l); - await api.session.setAnswerPrefs({ length: l }); + const changeFormat = async (f: AnswerFormat) => { + setAnswerFormat(f); + await api.session.setAnswerPrefs({ format: f }); if (question) await api.session.regenerate(); }; const togglePronunciation = async () => { @@ -664,44 +659,29 @@ export default function Overlay() { ))} - - Length + Format - - + {( + [ + ['key_points', 'Key points', 'Short, glanceable key points'], + ['explanation', 'Explanation', 'A natural, spoken explanation'], + ['detailed', 'Detailed', 'Thorough, with a concrete example'], + ] as const + ).map(([value, label, title]) => ( + + ))} {interviewType === 'coding' && ( @@ -891,13 +871,14 @@ export default function Overlay() { {!c.collapsed && (
{c.answer ? ( - {c.answer} + {splitPronunciation(c.answer).body} ) : isCurrent && live && !paused ? ( Listening… ) : null} {c.streaming && } +
)} @@ -1014,6 +995,36 @@ export default function Overlay() {

Coding solver

+